< Summary

Information
Class: Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisCommand<TItem>
Assembly: Orchestrator
File(s): /home/runner/work/KicktippAi/KicktippAi/src/Orchestrator/Commands/Observability/ExportExperimentAnalysis/ExportExperimentAnalysisCommand.cs
Line coverage
85%
Covered lines: 439
Uncovered lines: 77
Coverable lines: 516
Total lines: 950
Line coverage: 85%
Branch coverage
65%
Covered branches: 184
Total branches: 280
Branch coverage: 65.7%
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
.ctor(...)100%11100%
ExecuteAsync()100%22100%
LoadDatasetItemsAsync()83.33%121286.96%
LoadTracesAsync()80%101095.65%
ListRunTracesAsync()87.5%8893.33%
ListRunObservationsAsync()68.75%161688.89%
BuildBundle(...)66.67%66100%
BuildRows(...)78.57%1414100%
ValidateComparableRuns(...)80.77%302681.48%
EnsureDistinctDatasetItems(...)75%10866.67%
ListAllDatasetRunItemsAsync()100%22100%
LoadAggregateScoresAsync()62.5%321660%
EnumerateOffsetPaginatedItemsAsync<TItem>(...)100%44100%
EnumerateCursorPaginatedItemsAsync<TItem>(...)100%44100%
GetNextPageNumber<TItem>(...)50%22100%
DeserializeRunMetadata(...)50%5457.14%
ExtractPrediction(...)66.67%7666.67%
ExtractExpectedOutput(...)31.25%201675%
ExtractDatasetItemMetadata(...)50%2290%
CalculateKicktippPoints(...)100%66100%
SelectPredictionObservation(...)27.78%1818100%
ResolveSourceDatasetItemId(...)62.5%14854.55%
TryDeriveSourceDatasetItemId(...)50%9657.14%
TryExtractPredictionOutput(...)61.54%892654.55%
TryGetNullableIntProperty(...)66.67%6677.78%
TryParseScoreString(...)50%9872.73%
NormalizePredictionStatus(...)100%22100%
ResolveRunDisplayName(...)75%4485.71%
ResolvePrimaryMetricName(...)100%22100%
ResolvePrimaryMetricValue(...)100%22100%
IsAveragePrimaryMetricTask(...)100%22100%
ResolveOutputPath(...)16.67%19628.57%
GetRequiredStringProperty(...)50%6680%
GetRequiredIntProperty(...)50%4475%
GetOptionalStringProperty(...)50%88100%
GetOptionalIntProperty(...)100%66100%
.ctor(...)100%11100%

File(s)

/home/runner/work/KicktippAi/KicktippAi/src/Orchestrator/Commands/Observability/ExportExperimentAnalysis/ExportExperimentAnalysisCommand.cs

#LineLine coverage
 1using System.Globalization;
 2using System.Text.Json;
 3using EHonda.KicktippAi.Core;
 4using CursorBasedComposite = EHonda.Pagination.CursorBased.Composite;
 5using EHonda.Pagination.OffsetBased;
 6using OffsetBasedComposite = EHonda.Pagination.OffsetBased.Composite;
 7using Microsoft.Extensions.Logging;
 8using Orchestrator.Commands.Observability.Experiments;
 9using Orchestrator.Infrastructure.Langfuse;
 10using Spectre.Console;
 11using Spectre.Console.Cli;
 12
 13namespace Orchestrator.Commands.Observability.ExportExperimentAnalysis;
 14
 15public sealed class ExportExperimentAnalysisCommand : AsyncCommand<ExportExperimentAnalysisSettings>
 16{
 17    private readonly IAnsiConsole _console;
 18    private readonly ILangfusePublicApiClient _langfuseClient;
 19    private readonly ILogger<ExportExperimentAnalysisCommand> _logger;
 20
 121    public ExportExperimentAnalysisCommand(
 122        IAnsiConsole console,
 123        ILangfusePublicApiClient langfuseClient,
 124        ILogger<ExportExperimentAnalysisCommand> logger)
 25    {
 126        _console = console;
 127        _langfuseClient = langfuseClient;
 128        _logger = logger;
 129    }
 30
 31    protected override async Task<int> ExecuteAsync(CommandContext context, ExportExperimentAnalysisSettings settings, C
 32    {
 33        try
 34        {
 135            var runNames = settings.GetParsedRunNames();
 136            var runContexts = new List<RunContext>();
 37
 138            PreparedExperimentSupport.ReportProgress(
 139                $"Exporting experiment analysis for dataset '{settings.DatasetName}' across {runNames.Count} run(s).");
 140            _logger.LogInformation(
 141                "Exporting experiment analysis for dataset {DatasetName} across {RunCount} runs.",
 142                settings.DatasetName,
 143                runNames.Count);
 144            var dataset = await _langfuseClient.GetDatasetAsync(settings.DatasetName, cancellationToken)
 145                          ?? throw new InvalidOperationException($"Dataset '{settings.DatasetName}' could not be found."
 46
 147            for (var runIndex = 0; runIndex < runNames.Count; runIndex += 1)
 48            {
 149                var runName = runNames[runIndex];
 150                PreparedExperimentSupport.ReportProgress(
 151                    $"Loading run {runIndex + 1}/{runNames.Count}: '{runName}'.");
 152                var datasetRun = await _langfuseClient.GetDatasetRunAsync(settings.DatasetName, runName, cancellationTok
 153                    ?? throw new InvalidOperationException(
 154                        $"Dataset run '{runName}' could not be found in dataset '{settings.DatasetName}'.");
 55
 156                var runMetadata = DeserializeRunMetadata(datasetRun.Metadata, runName);
 157                var datasetRunItems = await ListAllDatasetRunItemsAsync(datasetRun, cancellationToken);
 158                EnsureDistinctDatasetItems(datasetRunItems, runName);
 159                var aggregateScores = await LoadAggregateScoresAsync(datasetRun.Id, runMetadata, cancellationToken);
 60
 161                runContexts.Add(new RunContext(datasetRun, runMetadata, datasetRunItems, aggregateScores));
 162            }
 63
 164            ValidateComparableRuns(runContexts);
 65
 166            PreparedExperimentSupport.ReportProgress("Loading dataset items for the comparable run set.");
 167            var datasetItemsById = await LoadDatasetItemsAsync(settings.DatasetName, runContexts, cancellationToken);
 168            PreparedExperimentSupport.ReportProgress("Loading traces and observations for the comparable run set.");
 169            var tracesById = await LoadTracesAsync(runContexts, cancellationToken);
 170            PreparedExperimentSupport.ReportProgress("Building normalized analysis rows.");
 171            var rows = BuildRows(runContexts, datasetItemsById, tracesById);
 172            var bundle = BuildBundle(settings.DatasetName, dataset, runContexts, rows);
 73
 174            var outputPath = ResolveOutputPath(settings, bundle.TaskType, settings.DatasetName);
 175            Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!);
 176            await File.WriteAllTextAsync(
 177                outputPath,
 178                JsonSerializer.Serialize(bundle, PreparedExperimentCommandSupport.JsonOptions),
 179                cancellationToken);
 80
 181            PreparedExperimentSupport.ReportProgress(
 182                $"Wrote experiment analysis bundle with {bundle.Rows.Count} row(s) to '{outputPath}'.");
 83
 184            var summary = new
 185            {
 186                settings.DatasetName,
 187                bundle.TaskType,
 188                bundle.PrimaryMetricName,
 189                runCount = bundle.Runs.Count,
 190                rowCount = bundle.Rows.Count,
 191                outputPath
 192            };
 93
 194            _console.WriteLine(JsonSerializer.Serialize(summary, PreparedExperimentCommandSupport.JsonOptions));
 195            return 0;
 96        }
 197        catch (Exception ex)
 98        {
 199            _logger.LogError(ex, "Error exporting experiment analysis bundle");
 1100            _console.MarkupLine($"[red]Error:[/] {Markup.Escape(ex.Message)}");
 1101            return 1;
 102        }
 1103    }
 104
 105    private async Task<Dictionary<string, LangfuseDatasetItem>> LoadDatasetItemsAsync(
 106        string datasetName,
 107        IReadOnlyList<RunContext> runContexts,
 108        CancellationToken cancellationToken)
 109    {
 110        const int pageSize = 100;
 1111        var requiredDatasetItemIds = runContexts
 1112            .SelectMany(context => context.DatasetRunItems.Select(item => item.DatasetItemId))
 1113            .Distinct(StringComparer.Ordinal)
 1114            .ToHashSet(StringComparer.Ordinal);
 1115        var result = new Dictionary<string, LangfuseDatasetItem>(StringComparer.Ordinal);
 116
 1117        if (requiredDatasetItemIds.Count == 0)
 118        {
 0119            return result;
 120        }
 121
 1122        await foreach (var datasetItem in EnumerateOffsetPaginatedItemsAsync(
 1123                           (page, ct) => _langfuseClient.ListDatasetItemsAsync(
 1124                               new LangfuseListDatasetItemsRequest(DatasetName: datasetName, Page: page, Limit: pageSize
 1125                               ct),
 1126                           cancellationToken))
 127        {
 1128            if (requiredDatasetItemIds.Contains(datasetItem.Id))
 129            {
 1130                result[datasetItem.Id] = datasetItem;
 131            }
 132
 1133            if (result.Count == requiredDatasetItemIds.Count)
 134            {
 135                break;
 136            }
 137        }
 138
 1139        var missingDatasetItemIds = requiredDatasetItemIds
 1140            .Except(result.Keys, StringComparer.Ordinal)
 1141            .ToArray();
 1142        if (missingDatasetItemIds.Length > 0)
 143        {
 0144            throw new InvalidOperationException(
 0145                $"Dataset item(s) could not be loaded from Langfuse: {string.Join(", ", missingDatasetItemIds)}.");
 146        }
 147
 1148        _logger.LogInformation(
 1149            "Loaded {Count} dataset items for dataset {DatasetName} via paginated dataset listing.",
 1150            result.Count,
 1151            datasetName);
 152
 1153        return result;
 1154    }
 155
 156    private async Task<Dictionary<string, LangfuseTraceWithDetails>> LoadTracesAsync(
 157        IReadOnlyList<RunContext> runContexts,
 158        CancellationToken cancellationToken)
 159    {
 1160        var result = new Dictionary<string, LangfuseTraceWithDetails>(StringComparer.Ordinal);
 161
 1162        foreach (var runContext in runContexts)
 163        {
 1164            var expectedTraceIds = runContext.DatasetRunItems
 1165                .Select(item => item.TraceId)
 1166                .ToHashSet(StringComparer.Ordinal);
 1167            var traces = await ListRunTracesAsync(runContext.DatasetRun.Name, expectedTraceIds, cancellationToken);
 1168            var observationsByTraceId = await ListRunObservationsAsync(runContext.DatasetRun.Name, expectedTraceIds, can
 169
 1170            foreach (var traceId in expectedTraceIds)
 171            {
 1172                if (!traces.TryGetValue(traceId, out var trace))
 173                {
 0174                    throw new InvalidOperationException($"Trace '{traceId}' could not be loaded from Langfuse.");
 175                }
 176
 1177                result[traceId] = new LangfuseTraceWithDetails(
 1178                    trace.Id,
 1179                    trace.Name,
 1180                    trace.Metadata,
 1181                    trace.Output,
 1182                    [],
 1183                    observationsByTraceId.TryGetValue(traceId, out var observations) ? observations : [],
 1184                    trace.Tags);
 185            }
 1186        }
 187
 1188        _logger.LogInformation(
 1189            "Loaded {Count} traces for comparable export using per-run session batching.",
 1190            result.Count);
 191
 1192        return result;
 1193    }
 194
 195    private async Task<Dictionary<string, LangfuseTraceWithDetails>> ListRunTracesAsync(
 196        string runName,
 197        IReadOnlySet<string> expectedTraceIds,
 198        CancellationToken cancellationToken)
 199    {
 200        const int pageSize = 100;
 1201        var result = new Dictionary<string, LangfuseTraceWithDetails>(StringComparer.Ordinal);
 202
 1203        if (expectedTraceIds.Count == 0)
 204        {
 0205            return result;
 206        }
 207
 1208        await foreach (var trace in EnumerateOffsetPaginatedItemsAsync(
 1209                           (page, ct) => _langfuseClient.ListTracesAsync(
 1210                               new LangfuseListTracesRequest(SessionId: runName, Page: page, Limit: pageSize, Fields: "i
 1211                               ct),
 1212                           cancellationToken))
 213        {
 1214            if (expectedTraceIds.Contains(trace.Id))
 215            {
 1216                result[trace.Id] = trace;
 217            }
 218
 1219            if (result.Count == expectedTraceIds.Count)
 220            {
 221                break;
 222            }
 223        }
 224
 1225        _logger.LogInformation(
 1226            "Loaded {LoadedCount}/{ExpectedCount} trace shells for run {RunName} via sessionId listing.",
 1227            result.Count,
 1228            expectedTraceIds.Count,
 1229            runName);
 230
 1231        return result;
 1232    }
 233
 234    private async Task<Dictionary<string, IReadOnlyList<LangfuseObservationDetail>>> ListRunObservationsAsync(
 235        string runName,
 236        IReadOnlySet<string> expectedTraceIds,
 237        CancellationToken cancellationToken)
 238    {
 239        const int limit = 1000;
 1240        var observationsByTraceId = new Dictionary<string, List<LangfuseObservationDetail>>(StringComparer.Ordinal);
 241
 1242        if (expectedTraceIds.Count == 0)
 243        {
 0244            return observationsByTraceId.ToDictionary(
 0245                pair => pair.Key,
 0246                pair => (IReadOnlyList<LangfuseObservationDetail>)pair.Value,
 0247                StringComparer.Ordinal);
 248        }
 249
 1250        await foreach (var observation in EnumerateCursorPaginatedItemsAsync(
 1251                           (cursor, ct) => _langfuseClient.ListObservationsAsync(
 1252                               new LangfuseListObservationsRequest(SessionId: runName, Limit: limit, Cursor: cursor, Fie
 1253                               ct),
 1254                           cancellationToken))
 255        {
 1256            if (!expectedTraceIds.Contains(observation.TraceId))
 257            {
 258                continue;
 259            }
 260
 1261            if (!observationsByTraceId.TryGetValue(observation.TraceId, out var observations))
 262            {
 1263                observations = [];
 1264                observationsByTraceId[observation.TraceId] = observations;
 265            }
 266
 1267            observations.Add(observation);
 268        }
 269
 1270        _logger.LogInformation(
 1271            "Loaded observations for {TraceCount} traces in run {RunName} via sessionId observation listing.",
 1272            observationsByTraceId.Count,
 1273            runName);
 274
 1275        return observationsByTraceId.ToDictionary(
 1276            pair => pair.Key,
 1277            pair => (IReadOnlyList<LangfuseObservationDetail>)pair.Value,
 1278            StringComparer.Ordinal);
 1279    }
 280
 281    private static PreparedExperimentAnalysisBundle BuildBundle(
 282        string datasetName,
 283        LangfuseDataset dataset,
 284        IReadOnlyList<RunContext> runContexts,
 285        IReadOnlyList<PreparedExperimentAnalysisRow> rows)
 286    {
 1287        var taskType = runContexts[0].RunMetadata.TaskType ?? throw new InvalidOperationException("Run metadata missing 
 1288        var primaryMetricName = ResolvePrimaryMetricName(taskType);
 1289        var runSummaries = runContexts.Select(context => new PreparedExperimentAnalysisRun(
 1290                context.DatasetRun.Name,
 1291                context.DatasetRun.Id,
 1292                taskType,
 1293                ResolveRunDisplayName(context.RunMetadata, context.DatasetRun.Name),
 1294                context.RunMetadata.PromptKey,
 1295                context.RunMetadata.PromptSource,
 1296                context.RunMetadata.LangfusePromptName,
 1297                context.RunMetadata.LangfusePromptLabel,
 1298                context.RunMetadata.LangfusePromptVersion,
 1299                context.RunMetadata.ReasoningEffort,
 1300                context.RunMetadata.MaxOutputTokenCount,
 1301                context.RunMetadata.SliceKind,
 1302                context.RunMetadata.SliceKey,
 1303                context.RunMetadata.SourcePoolKey,
 1304                context.RunMetadata.SelectedItemIdsHash,
 1305                context.RunMetadata.SelectedItemIdsCount,
 1306                context.RunMetadata.SampleSize,
 1307                context.RunMetadata.MatchCount,
 1308                context.RunMetadata.Repetitions,
 1309                context.RunMetadata.Parallelism,
 1310                context.RunMetadata.EvaluationTimestampPolicyKey,
 1311                context.RunMetadata.EvaluationTime,
 1312                context.RunMetadata.StartedAtUtc,
 1313                context.RunMetadata.BatchStrategy,
 1314                context.RunMetadata.BatchSize,
 1315                context.RunMetadata.BatchCount,
 1316                context.AggregateScores,
 1317                ResolvePrimaryMetricValue(taskType, context.AggregateScores),
 1318                context.DatasetRunItems.Count,
 1319                context.RunMetadata.RunSubjectKind,
 1320                context.RunMetadata.RunSubjectId,
 1321                context.RunMetadata.RunSubjectDisplayName))
 1322            .OrderBy(run => run.RunName, StringComparer.Ordinal)
 1323            .ToList();
 324
 1325        return new PreparedExperimentAnalysisBundle(
 1326            datasetName,
 1327            taskType,
 1328            primaryMetricName,
 1329            ExperimentArtifactSupport.FormatStartedAtUtc(DateTimeOffset.UtcNow),
 1330            dataset.Description,
 1331            LangfuseJsonUtilities.IsDefined(dataset.Metadata) ? dataset.Metadata : default,
 1332            runSummaries,
 1333            rows);
 334    }
 335
 336    private static List<PreparedExperimentAnalysisRow> BuildRows(
 337        IReadOnlyList<RunContext> runContexts,
 338        IReadOnlyDictionary<string, LangfuseDatasetItem> datasetItemsById,
 339        IReadOnlyDictionary<string, LangfuseTraceWithDetails> tracesById)
 340    {
 1341        var rows = new List<PreparedExperimentAnalysisRow>();
 342
 1343        foreach (var context in runContexts.OrderBy(context => context.DatasetRun.Name, StringComparer.Ordinal))
 344        {
 1345            foreach (var datasetRunItem in context.DatasetRunItems.OrderBy(item => item.DatasetItemId, StringComparer.Or
 346            {
 1347                var datasetItem = datasetItemsById[datasetRunItem.DatasetItemId];
 1348                var trace = tracesById[datasetRunItem.TraceId];
 1349                var predictionObservation = SelectPredictionObservation(trace, context.RunMetadata);
 1350                var prediction = ExtractPrediction(trace.Output, predictionObservation?.Output, context.DatasetRun.Name,
 1351                var expectedOutput = ExtractExpectedOutput(datasetItem.ExpectedOutput, context.DatasetRun.Name, datasetR
 1352                var metadata = ExtractDatasetItemMetadata(datasetItem.Input, datasetItem.Metadata, datasetRunItem.Datase
 1353                var kicktippPoints = CalculateKicktippPoints(prediction, expectedOutput);
 1354                var sourceDatasetItemId = ResolveSourceDatasetItemId(
 1355                    context.RunMetadata,
 1356                    datasetRunItem.DatasetItemId,
 1357                    trace.Metadata,
 1358                    context.DatasetRun.Name);
 359
 1360                rows.Add(new PreparedExperimentAnalysisRow(
 1361                    datasetRunItem.DatasetItemId,
 1362                    context.DatasetRun.Id,
 1363                    context.DatasetRun.Name,
 1364                    context.RunMetadata.TaskType ?? throw new InvalidOperationException($"Run '{context.DatasetRun.Name}
 1365                    ResolveRunDisplayName(context.RunMetadata, context.DatasetRun.Name),
 1366                    context.RunMetadata.PromptKey,
 1367                    context.RunMetadata.ReasoningEffort,
 1368                    context.RunMetadata.SliceKind,
 1369                    context.RunMetadata.SliceKey,
 1370                    context.RunMetadata.SourcePoolKey,
 1371                    datasetRunItem.DatasetItemId,
 1372                    sourceDatasetItemId,
 1373                    datasetRunItem.TraceId,
 1374                    predictionObservation?.Id,
 1375                    metadata.Matchday,
 1376                    metadata.HomeTeam,
 1377                    metadata.AwayTeam,
 1378                    metadata.StartsAt,
 1379                    metadata.TippSpielId,
 1380                    prediction.HomeGoals,
 1381                    prediction.AwayGoals,
 1382                    expectedOutput.HomeGoals,
 1383                    expectedOutput.AwayGoals,
 1384                    kicktippPoints,
 1385                    prediction.Status,
 1386                    context.RunMetadata.RunSubjectKind,
 1387                    context.RunMetadata.RunSubjectId,
 1388                    context.RunMetadata.RunSubjectDisplayName,
 1389                    metadata.FixtureIndex,
 1390                    metadata.RepetitionIndex));
 391            }
 392        }
 393
 1394        return rows;
 395    }
 396
 397    private static void ValidateComparableRuns(IReadOnlyList<RunContext> runContexts)
 398    {
 1399        if (runContexts.Count < 1)
 400        {
 0401            throw new InvalidOperationException("At least one run is required to export an experiment analysis bundle.")
 402        }
 403
 1404        var baseline = runContexts[0];
 1405        var baselineTaskType = baseline.RunMetadata.TaskType ?? throw new InvalidOperationException(
 1406            $"Run '{baseline.DatasetRun.Name}' is missing task type metadata.");
 1407        var baselineItemIds = baseline.DatasetRunItems
 1408            .Select(item => item.DatasetItemId)
 0409            .OrderBy(item => item, StringComparer.Ordinal)
 1410            .ToArray();
 411
 1412        foreach (var candidate in runContexts.Skip(1))
 413        {
 1414            var taskType = candidate.RunMetadata.TaskType ?? throw new InvalidOperationException(
 1415                $"Run '{candidate.DatasetRun.Name}' is missing task type metadata.");
 1416            if (!string.Equals(baselineTaskType, taskType, StringComparison.OrdinalIgnoreCase))
 417            {
 0418                throw new InvalidOperationException(
 0419                    $"Run '{candidate.DatasetRun.Name}' has task type '{taskType}', but expected '{baselineTaskType}'.")
 420            }
 421
 1422            if (!string.IsNullOrWhiteSpace(baseline.RunMetadata.SelectedItemIdsHash)
 1423                && !string.IsNullOrWhiteSpace(candidate.RunMetadata.SelectedItemIdsHash)
 1424                && !string.Equals(
 1425                    baseline.RunMetadata.SelectedItemIdsHash,
 1426                    candidate.RunMetadata.SelectedItemIdsHash,
 1427                    StringComparison.Ordinal))
 428            {
 0429                throw new InvalidOperationException(
 0430                    $"Run '{candidate.DatasetRun.Name}' has selectedItemIdsHash '{candidate.RunMetadata.SelectedItemIdsH
 431            }
 432
 1433            var candidateItemIds = candidate.DatasetRunItems
 1434                .Select(item => item.DatasetItemId)
 0435                .OrderBy(item => item, StringComparer.Ordinal)
 1436                .ToArray();
 437
 1438            if (!baselineItemIds.SequenceEqual(candidateItemIds, StringComparer.Ordinal))
 439            {
 1440                throw new InvalidOperationException(
 1441                    $"Run '{candidate.DatasetRun.Name}' does not contain the same prepared dataset item set as '{baselin
 442            }
 443        }
 1444    }
 445
 446    private static void EnsureDistinctDatasetItems(
 447        IReadOnlyList<LangfuseDatasetRunItem> datasetRunItems,
 448        string runName)
 449    {
 1450        var duplicateItemId = datasetRunItems
 1451            .GroupBy(item => item.DatasetItemId, StringComparer.Ordinal)
 1452            .FirstOrDefault(group => group.Count() > 1)
 1453            ?.Key;
 454
 1455        if (duplicateItemId is not null)
 456        {
 0457            throw new InvalidOperationException(
 0458                $"Run '{runName}' contains duplicate dataset item id '{duplicateItemId}', which is not supported for com
 459        }
 1460    }
 461
 462    private async Task<IReadOnlyList<LangfuseDatasetRunItem>> ListAllDatasetRunItemsAsync(
 463        LangfuseDatasetRunWithItems datasetRun,
 464        CancellationToken cancellationToken)
 465    {
 466        const int pageSize = 100;
 1467        var items = new List<LangfuseDatasetRunItem>();
 468
 1469        await foreach (var item in EnumerateOffsetPaginatedItemsAsync(
 1470                           (page, ct) => _langfuseClient.ListDatasetRunItemsAsync(
 1471                               datasetRun.DatasetId,
 1472                               datasetRun.Name,
 1473                               page,
 1474                               pageSize,
 1475                               ct),
 1476                           cancellationToken))
 477        {
 1478            items.Add(item);
 479        }
 480
 1481        return items;
 1482    }
 483
 484    private async Task<ExperimentAggregateScores> LoadAggregateScoresAsync(
 485        string datasetRunId,
 486        PreparedExperimentRunMetadata runMetadata,
 487        CancellationToken cancellationToken)
 488    {
 489        const int pageSize = 100;
 1490        var scores = new List<LangfuseScore>();
 491
 1492        await foreach (var score in EnumerateOffsetPaginatedItemsAsync(
 1493                           (page, ct) => _langfuseClient.ListScoresAsync(
 1494                               new LangfuseListScoresRequest(DatasetRunId: datasetRunId, Page: page, Limit: pageSize),
 1495                               ct),
 1496                           cancellationToken))
 497        {
 1498            scores.Add(score);
 499        }
 500
 1501        var total = scores.FirstOrDefault(score => string.Equals(score.Name, "total_kicktipp_points", StringComparison.O
 1502        var average = scores.FirstOrDefault(score => string.Equals(score.Name, "avg_kicktipp_points", StringComparison.O
 503
 1504        if (total?.Value is null || average?.Value is null)
 505        {
 0506            var runName = string.IsNullOrWhiteSpace(runMetadata.StartedAtUtc)
 0507                ? datasetRunId
 0508                : runMetadata.StartedAtUtc;
 0509            throw new InvalidOperationException(
 0510                $"Dataset run '{runName}' is missing one or more aggregate Langfuse scores (expected total_kicktipp_poin
 0511            );
 512        }
 513
 1514        return new ExperimentAggregateScores(total.Value.Value, average.Value.Value);
 1515    }
 516
 517    private IAsyncEnumerable<TItem> EnumerateOffsetPaginatedItemsAsync<TItem>(
 518        Func<int, CancellationToken, Task<LangfusePaginatedResponse<TItem>>> pageRetriever,
 519        CancellationToken cancellationToken)
 520    {
 1521        var paginationHandler = new OffsetBasedComposite.PaginationHandlerBuilder<LangfusePaginatedResponse<TItem>, TIte
 1522            .WithPageRetriever((previousPage, ct) => pageRetriever(GetNextPageNumber(previousPage), ct))
 1523            .WithOffsetStateExtractor(static page => new OffsetState<int>(checked(page.Meta.Page * page.Meta.Limit), pag
 1524            .WithItemExtractor(static page => page.Data)
 1525            .Build();
 526
 1527        return paginationHandler.GetAllItemsAsync(cancellationToken);
 528    }
 529
 530    private IAsyncEnumerable<TItem> EnumerateCursorPaginatedItemsAsync<TItem>(
 531        Func<string?, CancellationToken, Task<LangfuseCursorPaginatedResponse<TItem>>> pageRetriever,
 532        CancellationToken cancellationToken)
 533    {
 1534        var paginationHandler = new CursorBasedComposite.PaginationHandlerBuilder<LangfuseCursorPaginatedResponse<TItem>
 1535            .WithPageRetriever((previousPage, ct) => pageRetriever(previousPage?.Meta.Cursor, ct))
 1536            .WithCursorExtractor(static page => page.Meta.Cursor)
 1537            .WithItemExtractor(static page => page.Data)
 1538            .Build();
 539
 1540        return paginationHandler.GetAllItemsAsync(cancellationToken);
 541    }
 542
 543    private static int GetNextPageNumber<TItem>(LangfusePaginatedResponse<TItem>? previousPage)
 544    {
 1545        return previousPage is null ? 1 : previousPage.Meta.Page + 1;
 546    }
 547
 548    private static PreparedExperimentRunMetadata DeserializeRunMetadata(JsonElement metadata, string runName)
 549    {
 1550        if (!LangfuseJsonUtilities.IsDefined(metadata))
 551        {
 0552            throw new InvalidOperationException($"Dataset run '{runName}' is missing run metadata.");
 553        }
 554
 555        try
 556        {
 1557            var deserialized = metadata.Deserialize<PreparedExperimentRunMetadata>(PreparedExperimentCommandSupport.Json
 1558            return deserialized ?? throw new InvalidOperationException($"Dataset run '{runName}' metadata could not be d
 559        }
 0560        catch (JsonException ex)
 561        {
 0562            throw new InvalidOperationException($"Dataset run '{runName}' metadata could not be deserialized.", ex);
 563        }
 1564    }
 565
 566    private static PredictionOutput ExtractPrediction(
 567        JsonElement traceOutput,
 568        JsonElement? observationOutput,
 569        string runName,
 570        string traceId)
 571    {
 1572        if (TryExtractPredictionOutput(traceOutput, out var prediction))
 573        {
 1574            return prediction;
 575        }
 576
 1577        if (observationOutput is JsonElement candidate && TryExtractPredictionOutput(candidate, out prediction))
 578        {
 1579            return prediction;
 580        }
 581
 0582        throw new InvalidOperationException(
 0583            $"Run '{runName}' trace '{traceId}' does not expose a parseable prediction payload in trace or observation o
 584    }
 585
 586    private static ExpectedOutput ExtractExpectedOutput(JsonElement expectedOutput, string runName, string datasetItemId
 587    {
 1588        if (expectedOutput.ValueKind == JsonValueKind.Object)
 589        {
 1590            if (expectedOutput.TryGetProperty("homeGoals", out var homeGoals)
 1591                && expectedOutput.TryGetProperty("awayGoals", out var awayGoals)
 1592                && homeGoals.TryGetInt32(out var home)
 1593                && awayGoals.TryGetInt32(out var away))
 594            {
 0595                return new ExpectedOutput(home, away);
 596            }
 597
 1598            if (expectedOutput.TryGetProperty("score", out var score)
 1599                && score.ValueKind == JsonValueKind.String
 1600                && TryParseScoreString(score.GetString(), out var parsed))
 601            {
 1602                return parsed;
 603            }
 604        }
 605
 0606        throw new InvalidOperationException(
 0607            $"Run '{runName}' dataset item '{datasetItemId}' does not expose a parseable expected scoreline.");
 608    }
 609
 610    private static DatasetItemMetadata ExtractDatasetItemMetadata(JsonElement input, JsonElement metadata, string datase
 611    {
 1612        if (metadata.ValueKind != JsonValueKind.Object)
 613        {
 0614            throw new InvalidOperationException($"Dataset item '{datasetItemId}' metadata is missing or not an object.")
 615        }
 616
 1617        return new DatasetItemMetadata(
 1618            GetRequiredStringProperty(metadata, "homeTeam", datasetItemId),
 1619            GetRequiredStringProperty(metadata, "awayTeam", datasetItemId),
 1620            GetRequiredIntProperty(metadata, "matchday", datasetItemId),
 1621            GetRequiredStringProperty(input, "startsAt", datasetItemId),
 1622            GetOptionalStringProperty(metadata, "tippSpielId"),
 1623            GetOptionalIntProperty(metadata, "fixtureIndex"),
 1624            GetOptionalIntProperty(metadata, "repetitionIndex"));
 625    }
 626
 627    private static int CalculateKicktippPoints(PredictionOutput prediction, ExpectedOutput expectedOutput)
 628    {
 1629        if (!string.Equals(prediction.Status, "placed", StringComparison.OrdinalIgnoreCase)
 1630            || prediction.HomeGoals is not int homeGoals
 1631            || prediction.AwayGoals is not int awayGoals)
 632        {
 1633            return 0;
 634        }
 635
 1636        return PreparedExperimentSupport.CalculateScores(
 1637            new Prediction(homeGoals, awayGoals),
 1638            expectedOutput.HomeGoals,
 1639            expectedOutput.AwayGoals).KicktippPoints;
 640    }
 641
 642    private static LangfuseObservationDetail? SelectPredictionObservation(
 643        LangfuseTraceWithDetails trace,
 644        PreparedExperimentRunMetadata runMetadata)
 645    {
 1646        var observations = trace.Observations ?? [];
 1647        var preferredObservationName = string.IsNullOrWhiteSpace(runMetadata.ObservationName)
 1648            ? string.Equals(runMetadata.TaskType, "community-to-date", StringComparison.OrdinalIgnoreCase)
 1649                ? "community-match-prediction"
 1650                : "predict-match"
 1651            : runMetadata.ObservationName;
 652
 1653        return observations.FirstOrDefault(observation => string.Equals(observation.Name, preferredObservationName, Stri
 0654               ?? observations.FirstOrDefault(observation => string.Equals(observation.Name, "community-match-prediction
 0655               ?? observations.FirstOrDefault(observation => string.Equals(observation.Name, "predict-match", StringComp
 0656               ?? observations.FirstOrDefault(observation => string.Equals(observation.Type, "GENERATION", StringCompari
 657    }
 658
 659    private static string ResolveSourceDatasetItemId(
 660        PreparedExperimentRunMetadata runMetadata,
 661        string datasetItemId,
 662        JsonElement traceMetadata,
 663        string runName)
 664    {
 1665        if (runMetadata.DatasetItemIdMap.Count > 0)
 666        {
 1667            var reverseMatch = runMetadata.DatasetItemIdMap
 1668                .FirstOrDefault(pair => string.Equals(pair.Value, datasetItemId, StringComparison.Ordinal));
 1669            if (!string.IsNullOrWhiteSpace(reverseMatch.Key))
 670            {
 1671                return reverseMatch.Key;
 672            }
 673        }
 674
 1675        if (TryDeriveSourceDatasetItemId(datasetItemId, out var derived))
 676        {
 1677            return derived;
 678        }
 679
 0680        var fromTraceMetadata = GetOptionalStringProperty(traceMetadata, "sourceDatasetItemId");
 0681        if (!string.IsNullOrWhiteSpace(fromTraceMetadata))
 682        {
 0683            return fromTraceMetadata!;
 684        }
 685
 0686        throw new InvalidOperationException(
 0687            $"Run '{runName}' dataset item '{datasetItemId}' could not be mapped back to a source dataset item id.");
 688    }
 689
 690    private static bool TryDeriveSourceDatasetItemId(string datasetItemId, out string sourceDatasetItemId)
 691    {
 1692        var repeatedMatchSliceIndex = datasetItemId.IndexOf("__repeated-match-slice__", StringComparison.Ordinal);
 1693        if (repeatedMatchSliceIndex >= 0)
 694        {
 1695            sourceDatasetItemId = datasetItemId[..repeatedMatchSliceIndex];
 1696            return true;
 697        }
 698
 1699        var repeatedMatchIndex = datasetItemId.IndexOf("__repeated-match__", StringComparison.Ordinal);
 1700        if (repeatedMatchIndex >= 0)
 701        {
 1702            sourceDatasetItemId = datasetItemId[..repeatedMatchIndex];
 1703            return true;
 704        }
 705
 0706        var sliceIndex = datasetItemId.IndexOf("__slice__", StringComparison.Ordinal);
 0707        if (sliceIndex >= 0)
 708        {
 0709            sourceDatasetItemId = datasetItemId[..sliceIndex];
 0710            return true;
 711        }
 712
 0713        sourceDatasetItemId = string.Empty;
 0714        return false;
 715    }
 716
 717    private static bool TryExtractPredictionOutput(JsonElement value, out PredictionOutput prediction)
 718    {
 1719        if (value.ValueKind == JsonValueKind.Object)
 720        {
 1721            if (value.TryGetProperty("status", out var statusProperty)
 1722                && statusProperty.ValueKind == JsonValueKind.String)
 723            {
 1724                var status = NormalizePredictionStatus(statusProperty.GetString());
 1725                var hasHomeGoals = TryGetNullableIntProperty(value, "homeGoals", out var parsedHomeGoals);
 1726                var hasAwayGoals = TryGetNullableIntProperty(value, "awayGoals", out var parsedAwayGoals);
 727
 1728                if (hasHomeGoals && hasAwayGoals)
 729                {
 1730                    prediction = new PredictionOutput(status, parsedHomeGoals, parsedAwayGoals);
 1731                    return true;
 732                }
 733
 0734                if (string.Equals(status, "missed", StringComparison.OrdinalIgnoreCase))
 735                {
 0736                    prediction = new PredictionOutput("missed", null, null);
 0737                    return true;
 738                }
 739            }
 740
 1741            if (value.TryGetProperty("homeGoals", out var homeGoals)
 1742                && value.TryGetProperty("awayGoals", out var awayGoals)
 1743                && homeGoals.TryGetInt32(out var home)
 1744                && awayGoals.TryGetInt32(out var away))
 745            {
 1746                prediction = new PredictionOutput("placed", home, away);
 1747                return true;
 748            }
 749        }
 750
 1751        if (value.ValueKind == JsonValueKind.String)
 752        {
 0753            var raw = value.GetString();
 0754            if (TryParseScoreString(raw, out var parsedScore))
 755            {
 0756                prediction = new PredictionOutput("placed", parsedScore.HomeGoals, parsedScore.AwayGoals);
 0757                return true;
 758            }
 759
 0760            if (!string.IsNullOrWhiteSpace(raw))
 761            {
 762                try
 763                {
 0764                    using var document = JsonDocument.Parse(raw);
 0765                    if (TryExtractPredictionOutput(document.RootElement, out prediction))
 766                    {
 0767                        return true;
 768                    }
 0769                }
 0770                catch (JsonException)
 771                {
 0772                }
 773            }
 774        }
 775
 1776        prediction = default;
 1777        return false;
 0778    }
 779
 780    private static bool TryGetNullableIntProperty(JsonElement value, string propertyName, out int? result)
 781    {
 1782        result = null;
 1783        if (!value.TryGetProperty(propertyName, out var property))
 784        {
 0785            return false;
 786        }
 787
 1788        if (property.ValueKind == JsonValueKind.Null)
 789        {
 1790            return true;
 791        }
 792
 1793        if (property.TryGetInt32(out var parsed))
 794        {
 1795            result = parsed;
 1796            return true;
 797        }
 798
 0799        return false;
 800    }
 801
 802    private static bool TryParseScoreString(string? value, out ExpectedOutput expectedOutput)
 803    {
 1804        expectedOutput = default;
 1805        if (string.IsNullOrWhiteSpace(value))
 806        {
 0807            return false;
 808        }
 809
 1810        var segments = value.Split(':', StringSplitOptions.TrimEntries);
 1811        if (segments.Length != 2)
 812        {
 0813            return false;
 814        }
 815
 1816        if (!int.TryParse(segments[0], NumberStyles.Integer, CultureInfo.InvariantCulture, out var homeGoals)
 1817            || !int.TryParse(segments[1], NumberStyles.Integer, CultureInfo.InvariantCulture, out var awayGoals))
 818        {
 0819            return false;
 820        }
 821
 1822        expectedOutput = new ExpectedOutput(homeGoals, awayGoals);
 1823        return true;
 824    }
 825
 826    private static string NormalizePredictionStatus(string? status)
 827    {
 1828        return string.Equals(status, "missed", StringComparison.OrdinalIgnoreCase)
 1829            ? "missed"
 1830            : "placed";
 831    }
 832
 833    private static string ResolveRunDisplayName(PreparedExperimentRunMetadata runMetadata, string runName)
 834    {
 1835        if (!string.IsNullOrWhiteSpace(runMetadata.RunSubjectDisplayName))
 836        {
 1837            return runMetadata.RunSubjectDisplayName;
 838        }
 839
 1840        if (!string.IsNullOrWhiteSpace(runMetadata.Model))
 841        {
 1842            return PreparedExperimentSupport.BuildRunSubjectDisplayName(
 1843                runMetadata.Model,
 1844                runMetadata.ReasoningEffort);
 845        }
 846
 0847        throw new InvalidOperationException($"Run '{runName}' is missing comparable display metadata.");
 848    }
 849
 850    private static string ResolvePrimaryMetricName(string taskType)
 851    {
 1852        return IsAveragePrimaryMetricTask(taskType)
 1853            ? "avg_kicktipp_points"
 1854            : "total_kicktipp_points";
 855    }
 856
 857    private static double ResolvePrimaryMetricValue(string taskType, ExperimentAggregateScores aggregateScores)
 858    {
 1859        return IsAveragePrimaryMetricTask(taskType)
 1860            ? aggregateScores.AvgKicktippPoints
 1861            : aggregateScores.TotalKicktippPoints;
 862    }
 863
 864    private static bool IsAveragePrimaryMetricTask(string taskType)
 865    {
 1866        return string.Equals(taskType, "repeated-match", StringComparison.OrdinalIgnoreCase)
 1867               || string.Equals(taskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase);
 868    }
 869
 870    private static string ResolveOutputPath(
 871        ExportExperimentAnalysisSettings settings,
 872        string taskType,
 873        string datasetName)
 874    {
 1875        if (!string.IsNullOrWhiteSpace(settings.OutputPath))
 876        {
 1877            return Path.GetFullPath(settings.OutputPath);
 878        }
 879
 0880        var timestamp = DateTimeOffset.UtcNow.ToString("yyyy-MM-ddTHH-mm-ssZ", CultureInfo.InvariantCulture).ToLowerInva
 0881        var datasetSegments = datasetName.Split('/', StringSplitOptions.RemoveEmptyEntries)
 0882            .Select(segment => segment.Trim())
 0883            .Where(segment => segment.Length > 0)
 0884            .ToList();
 885
 0886        return Path.GetFullPath(Path.Combine(
 0887            ["artifacts", "langfuse-experiments", "analysis", taskType, .. datasetSegments, $"comparison-{timestamp}.jso
 888    }
 889
 890    private static string GetRequiredStringProperty(JsonElement metadata, string propertyName, string datasetItemId)
 891    {
 1892        if (!metadata.TryGetProperty(propertyName, out var property)
 1893            || property.ValueKind != JsonValueKind.String
 1894            || string.IsNullOrWhiteSpace(property.GetString()))
 895        {
 0896            throw new InvalidOperationException($"Dataset item '{datasetItemId}' metadata is missing '{propertyName}'.")
 897        }
 898
 1899        return property.GetString()!;
 900    }
 901
 902    private static int GetRequiredIntProperty(JsonElement metadata, string propertyName, string datasetItemId)
 903    {
 1904        if (!metadata.TryGetProperty(propertyName, out var property)
 1905            || !property.TryGetInt32(out var value))
 906        {
 0907            throw new InvalidOperationException($"Dataset item '{datasetItemId}' metadata is missing '{propertyName}'.")
 908        }
 909
 1910        return value;
 911    }
 912
 913    private static string? GetOptionalStringProperty(JsonElement metadata, string propertyName)
 914    {
 1915        return metadata.ValueKind == JsonValueKind.Object
 1916               && metadata.TryGetProperty(propertyName, out var property)
 1917               && property.ValueKind == JsonValueKind.String
 1918               && !string.IsNullOrWhiteSpace(property.GetString())
 1919            ? property.GetString()
 1920            : null;
 921    }
 922
 923    private static int? GetOptionalIntProperty(JsonElement metadata, string propertyName)
 924    {
 1925        return metadata.ValueKind == JsonValueKind.Object
 1926               && metadata.TryGetProperty(propertyName, out var property)
 1927               && property.TryGetInt32(out var value)
 1928            ? value
 1929            : null;
 930    }
 931
 1932    private sealed record RunContext(
 1933        LangfuseDatasetRunWithItems DatasetRun,
 1934        PreparedExperimentRunMetadata RunMetadata,
 1935        IReadOnlyList<LangfuseDatasetRunItem> DatasetRunItems,
 1936        ExperimentAggregateScores AggregateScores);
 937
 938    private readonly record struct PredictionOutput(string Status, int? HomeGoals, int? AwayGoals);
 939
 940    private readonly record struct ExpectedOutput(int HomeGoals, int AwayGoals);
 941
 942    private readonly record struct DatasetItemMetadata(
 943        string HomeTeam,
 944        string AwayTeam,
 945        int Matchday,
 946        string StartsAt,
 947        string? TippSpielId,
 948        int? FixtureIndex,
 949        int? RepetitionIndex);
 950}

Methods/Properties

.ctor(Spectre.Console.IAnsiConsole, Orchestrator.Infrastructure.Langfuse.ILangfusePublicApiClient, Microsoft.Extensions.Logging.ILogger<Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisCommand>)
ExecuteAsync()
LoadDatasetItemsAsync()
LoadTracesAsync()
ListRunTracesAsync()
ListRunObservationsAsync()
BuildBundle(string, Orchestrator.Infrastructure.Langfuse.LangfuseDataset, System.Collections.Generic.IReadOnlyList<Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisCommand.RunContext>, System.Collections.Generic.IReadOnlyList<Orchestrator.Commands.Observability.Experiments.PreparedExperimentAnalysisRow>)
BuildRows(System.Collections.Generic.IReadOnlyList<Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisCommand.RunContext>, System.Collections.Generic.IReadOnlyDictionary<string, Orchestrator.Infrastructure.Langfuse.LangfuseDatasetItem>, System.Collections.Generic.IReadOnlyDictionary<string, Orchestrator.Infrastructure.Langfuse.LangfuseTraceWithDetails>)
ValidateComparableRuns(System.Collections.Generic.IReadOnlyList<Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisCommand.RunContext>)
EnsureDistinctDatasetItems(System.Collections.Generic.IReadOnlyList<Orchestrator.Infrastructure.Langfuse.LangfuseDatasetRunItem>, string)
ListAllDatasetRunItemsAsync()
LoadAggregateScoresAsync()
EnumerateOffsetPaginatedItemsAsync<TItem>(System.Func<int, System.Threading.CancellationToken, System.Threading.Tasks.Task<Orchestrator.Infrastructure.Langfuse.LangfusePaginatedResponse<TItem>>>, System.Threading.CancellationToken)
EnumerateCursorPaginatedItemsAsync<TItem>(System.Func<string, System.Threading.CancellationToken, System.Threading.Tasks.Task<Orchestrator.Infrastructure.Langfuse.LangfuseCursorPaginatedResponse<TItem>>>, System.Threading.CancellationToken)
GetNextPageNumber<TItem>(Orchestrator.Infrastructure.Langfuse.LangfusePaginatedResponse<TItem>)
DeserializeRunMetadata(System.Text.Json.JsonElement, string)
ExtractPrediction(System.Text.Json.JsonElement, System.Nullable<System.Text.Json.JsonElement>, string, string)
ExtractExpectedOutput(System.Text.Json.JsonElement, string, string)
ExtractDatasetItemMetadata(System.Text.Json.JsonElement, System.Text.Json.JsonElement, string)
CalculateKicktippPoints(Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisCommand.PredictionOutput, Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisCommand.ExpectedOutput)
SelectPredictionObservation(Orchestrator.Infrastructure.Langfuse.LangfuseTraceWithDetails, Orchestrator.Commands.Observability.Experiments.PreparedExperimentRunMetadata)
ResolveSourceDatasetItemId(Orchestrator.Commands.Observability.Experiments.PreparedExperimentRunMetadata, string, System.Text.Json.JsonElement, string)
TryDeriveSourceDatasetItemId(string, out string)
TryExtractPredictionOutput(System.Text.Json.JsonElement, out Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisCommand.PredictionOutput)
TryGetNullableIntProperty(System.Text.Json.JsonElement, string, out System.Nullable<int>)
TryParseScoreString(string, out Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisCommand.ExpectedOutput)
NormalizePredictionStatus(string)
ResolveRunDisplayName(Orchestrator.Commands.Observability.Experiments.PreparedExperimentRunMetadata, string)
ResolvePrimaryMetricName(string)
ResolvePrimaryMetricValue(string, Orchestrator.Commands.Observability.Experiments.ExperimentAggregateScores)
IsAveragePrimaryMetricTask(string)
ResolveOutputPath(Orchestrator.Commands.Observability.ExportExperimentAnalysis.ExportExperimentAnalysisSettings, string, string)
GetRequiredStringProperty(System.Text.Json.JsonElement, string, string)
GetRequiredIntProperty(System.Text.Json.JsonElement, string, string)
GetOptionalStringProperty(System.Text.Json.JsonElement, string)
GetOptionalIntProperty(System.Text.Json.JsonElement, string)
.ctor(Orchestrator.Infrastructure.Langfuse.LangfuseDatasetRunWithItems, Orchestrator.Commands.Observability.Experiments.PreparedExperimentRunMetadata, System.Collections.Generic.IReadOnlyList<Orchestrator.Infrastructure.Langfuse.LangfuseDatasetRunItem>, Orchestrator.Commands.Observability.Experiments.ExperimentAggregateScores)