< Summary

Information
Class: Orchestrator.Commands.Observability.Experiments.PreparedExperimentRunRequest
Assembly: Orchestrator
File(s): /home/runner/work/KicktippAi/KicktippAi/src/Orchestrator/Commands/Observability/Experiments/PreparedExperimentRunExecutor.cs
Line coverage
100%
Covered lines: 7
Uncovered lines: 0
Coverable lines: 7
Total lines: 1275
Line coverage: 100%
Branch coverage
N/A
Covered branches: 0
Total branches: 0
Branch coverage: N/A
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
.ctor(...)100%11100%

File(s)

/home/runner/work/KicktippAi/KicktippAi/src/Orchestrator/Commands/Observability/Experiments/PreparedExperimentRunExecutor.cs

#LineLine coverage
 1using System.Diagnostics;
 2using System.Text.Json;
 3using EHonda.KicktippAi.Core;
 4using OpenAiIntegration;
 5using Orchestrator.Infrastructure.Factories;
 6using Orchestrator.Infrastructure.Langfuse;
 7using Match = EHonda.KicktippAi.Core.Match;
 8
 9namespace Orchestrator.Commands.Observability.Experiments;
 10
 11internal sealed class PreparedExperimentRunExecutor
 12{
 13    private static readonly JsonSerializerOptions TraceJsonOptions = new(JsonSerializerDefaults.Web)
 14    {
 15        PropertyNameCaseInsensitive = true,
 16        PropertyNamingPolicy = JsonNamingPolicy.CamelCase
 17    };
 18
 19    private readonly IFirebaseServiceFactory _firebaseServiceFactory;
 20    private readonly IOpenAiServiceFactory _openAiServiceFactory;
 21    private readonly ILangfusePublicApiClient _langfuseClient;
 22
 23    public PreparedExperimentRunExecutor(
 24        IFirebaseServiceFactory firebaseServiceFactory,
 25        IOpenAiServiceFactory openAiServiceFactory,
 26        ILangfusePublicApiClient langfuseClient)
 27    {
 28        _firebaseServiceFactory = firebaseServiceFactory;
 29        _openAiServiceFactory = openAiServiceFactory;
 30        _langfuseClient = langfuseClient;
 31    }
 32
 33    public async Task<PreparedExperimentRunSummary> ExecuteAsync(
 34        string expectedTaskType,
 35        PreparedExperimentRunRequest request,
 36        CancellationToken cancellationToken)
 37    {
 38        var manifest = await PreparedExperimentCommandSupport.LoadJsonFileAsync<PreparedExperimentManifest>(
 39            request.ManifestPath,
 40            cancellationToken);
 41        PreparedExperimentCommandSupport.ValidateManifest(manifest);
 42        PreparedExperimentCommandSupport.EnsureTaskType(manifest, expectedTaskType);
 43
 44        var runMetadata = string.IsNullOrWhiteSpace(request.RunMetadataFile)
 45            ? PreparedExperimentSupport.BuildRunMetadata(manifest, request.Options)
 46            : PreparedExperimentCommandSupport.NormalizeRunMetadata(
 47                await PreparedExperimentCommandSupport.LoadJsonFileAsync<PreparedExperimentRunMetadata>(
 48                    request.RunMetadataFile,
 49                    cancellationToken),
 50                manifest,
 51                request.Options);
 52        runMetadata = ApplyBatchingDefaults(runMetadata, expectedTaskType);
 53
 54        var communityContext = GetCommunityContext(runMetadata, manifest);
 55        var datasetName = DeriveDatasetName(runMetadata, manifest);
 56        var explicitEvaluationTime = PreparedExperimentCommandSupport.ParseExplicitEvaluationTime(runMetadata);
 57        var evaluationTimestampPolicy = explicitEvaluationTime is null
 58            ? PreparedExperimentCommandSupport.ParseEvaluationTimestampPolicy(runMetadata)
 59            : null;
 60        var deletedExistingRun = await DeleteExistingRunIfRequestedAsync(
 61            datasetName,
 62            request.RunName,
 63            request.ReplaceRun,
 64            cancellationToken);
 65
 66        var predictionRepository = _firebaseServiceFactory.CreatePredictionRepository();
 67        var contextRepository = _firebaseServiceFactory.CreateContextRepository();
 68        var matchOutcomeRepository = _firebaseServiceFactory.CreateMatchOutcomeRepository();
 69        var promptRoute = await ResolvePromptRouteAsync(runMetadata, cancellationToken);
 70        if (promptRoute.TraceMetadata is { } promptTraceMetadata)
 71        {
 72            runMetadata = runMetadata with
 73            {
 74                LangfusePromptVersion = promptTraceMetadata.Version
 75            };
 76        }
 77
 78        var predictionServiceOptions = PredictionServiceOptions.FlexProcessingWithStandardFallback with
 79        {
 80            LangfusePromptTraceMetadata = promptRoute.TraceMetadata,
 81            ReasoningEffort = runMetadata.ReasoningEffort,
 82            MaxOutputTokenCount = runMetadata.MaxOutputTokenCount
 83                                  ?? PredictionServiceOptions.FlexProcessingWithStandardFallback.MaxOutputTokenCount
 84        };
 85        var predictionService = promptRoute.TemplateProvider is null
 86            ? _openAiServiceFactory.CreatePredictionService(
 87                request.Options.Model,
 88                predictionServiceOptions)
 89            : _openAiServiceFactory.CreatePredictionService(
 90                request.Options.Model,
 91                predictionServiceOptions,
 92                promptRoute.TemplateProvider);
 93        var reconstructionService = new MatchPromptReconstructionService(
 94            predictionRepository,
 95            contextRepository,
 96            promptRoute.TemplateProvider ?? new InstructionsTemplateProvider(PromptsFileProvider.Create()));
 97
 98        var outcomesByKey = await LoadOutcomesAsync(matchOutcomeRepository, communityContext, manifest, cancellationToke
 99        var experimentName = PreparedExperimentSupport.DeriveExperimentName(runMetadata, request.RunName);
 100        var traceTags = PreparedExperimentSupport.DeriveTraceTags(runMetadata);
 101        var propagatedMetadata = PreparedExperimentSupport.DerivePropagatedMetadata(runMetadata);
 102        var runMetadataPayload = PreparedExperimentSupport.BuildLangfuseExperimentMetadata(
 103            runMetadata,
 104            experimentName,
 105            request.RunName,
 106            new Dictionary<string, string?>
 107            {
 108                ["openaiServiceTierStrategy"] = "flex-first-standard-fallback",
 109                ["openaiReasoningEffort"] = runMetadata.ReasoningEffort
 110            });
 111        var batches = BuildBatches(manifest.Items, runMetadata, expectedTaskType);
 112        var executionSummaries = new List<PreparedExperimentExecutionSummary>();
 113        string? datasetRunId = null;
 114        var completedExecutionCount = 0;
 115
 116        PreparedExperimentSupport.ReportProgress(
 117            $"Starting {expectedTaskType} run '{request.RunName}' for model '{request.Options.Model}' with sample size {
 118
 119        for (var batchIndex = 0; batchIndex < batches.Count; batchIndex += 1)
 120        {
 121            var batch = batches[batchIndex];
 122            var batchStart = completedExecutionCount + 1;
 123            var batchEnd = completedExecutionCount + batch.Count;
 124
 125            PreparedExperimentSupport.ReportProgress(
 126                $"Batch {batchIndex + 1}/{batches.Count}: executions {batchStart}-{batchEnd} of {manifest.Items.Count}."
 127
 128            var batchResults = await Task.WhenAll(batch.Select(item => ExecuteItemAsync(
 129                item,
 130                request,
 131                experimentName,
 132                datasetName,
 133                runMetadata,
 134                explicitEvaluationTime,
 135                evaluationTimestampPolicy,
 136                predictionRepository,
 137                reconstructionService,
 138                predictionService,
 139                outcomesByKey,
 140                traceTags,
 141                propagatedMetadata,
 142                runMetadataPayload,
 143                cancellationToken)));
 144
 145            foreach (var batchResult in batchResults)
 146            {
 147                datasetRunId ??= batchResult.DatasetRunId;
 148                executionSummaries.Add(batchResult.Summary);
 149            }
 150
 151            completedExecutionCount += batchResults.Length;
 152            PreparedExperimentSupport.ReportProgress(
 153                $"Completed batch {batchIndex + 1}/{batches.Count}: {completedExecutionCount}/{manifest.Items.Count} exe
 154        }
 155
 156        if (string.IsNullOrWhiteSpace(datasetRunId))
 157        {
 158            throw new InvalidOperationException($"Dataset run '{request.RunName}' did not return a datasetRunId.");
 159        }
 160
 161        var aggregateScores = await PostRunScoresAsync(datasetRunId, runMetadata, executionSummaries, cancellationToken)
 162        var datasetRun = await _langfuseClient.GetDatasetRunAsync(datasetName, request.RunName, cancellationToken)
 163            ?? throw new InvalidOperationException(
 164                $"Dataset run '{request.RunName}' could not be retrieved from dataset '{datasetName}'.");
 165        var datasetRunItems = await WaitForDatasetRunItemsAsync(
 166            datasetRun.DatasetId,
 167            request.RunName,
 168            manifest.Items.Count,
 169            cancellationToken);
 170
 171        return new PreparedExperimentRunSummary(
 172            datasetName,
 173            request.RunName,
 174            request.RunName,
 175            runMetadata.TaskType ?? expectedTaskType,
 176            request.Options.Model,
 177            deletedExistingRun,
 178            manifest.Items.Count,
 179            runMetadata.BatchStrategy ?? expectedTaskType,
 180            runMetadata.BatchSize,
 181            runMetadata.BatchCount,
 182            runMetadata.Parallelism,
 183            executionSummaries.Count,
 184            1,
 185            aggregateScores,
 186            [new PreparedExperimentDatasetRunSummary(
 187                1,
 188                request.RunName,
 189                datasetRunId,
 190                datasetRunItems.Meta.TotalItems,
 191                aggregateScores,
 192                executionSummaries.FirstOrDefault(),
 193                executionSummaries.LastOrDefault())],
 194            executionSummaries.FirstOrDefault(),
 195            executionSummaries.LastOrDefault());
 196    }
 197
 198    public async Task<PreparedExperimentRunSummary> ExecuteCommunityToDateAsync(
 199        PreparedExperimentCommunityRunRequest request,
 200        CancellationToken cancellationToken)
 201    {
 202        var manifest = await PreparedExperimentCommandSupport.LoadJsonFileAsync<PreparedExperimentManifest>(
 203            request.ManifestPath,
 204            cancellationToken);
 205        PreparedExperimentCommandSupport.ValidateManifest(manifest);
 206        PreparedExperimentCommandSupport.EnsureTaskType(manifest, "community-to-date");
 207
 208        if (manifest.Participants.Count == 0)
 209        {
 210            throw new InvalidOperationException("Community-to-date manifests must contain at least one participant.");
 211        }
 212
 213        var datasetName = string.IsNullOrWhiteSpace(request.DatasetName)
 214            ? manifest.SliceDatasetName
 215            : request.DatasetName.Trim();
 216        if (string.IsNullOrWhiteSpace(datasetName))
 217        {
 218            throw new InvalidOperationException("No dataset name was provided for the community-to-date run.");
 219        }
 220
 221        var startedAtUtc = ExperimentArtifactSupport.FormatStartedAtUtc(DateTimeOffset.UtcNow);
 222        var batchSize = request.BatchSize;
 223        var participants = SelectParticipants(manifest, request);
 224        var runFamilyName = string.IsNullOrWhiteSpace(request.RunFamilyName)
 225            ? BuildCommunityRunFamilyName(manifest, startedAtUtc)
 226            : request.RunFamilyName.Trim();
 227
 228        var datasetRunSummaries = new List<PreparedExperimentDatasetRunSummary>();
 229        var executionSummaries = new List<PreparedExperimentExecutionSummary>();
 230        var scoreEntries = new List<ExperimentItemScores>();
 231        var deletedAnyExistingRun = false;
 232
 233        PreparedExperimentSupport.ReportProgress(
 234            $"Starting community-to-date run family '{runFamilyName}' with {participants.Count} participant run(s) and s
 235
 236        for (var participantIndex = 0; participantIndex < participants.Count; participantIndex += 1)
 237        {
 238            var participant = participants[participantIndex];
 239            var runName = BuildCommunityParticipantRunName(runFamilyName, participant);
 240            var runMetadata = BuildCommunityRunMetadata(manifest, participant, datasetName, startedAtUtc, batchSize);
 241            var deletedExistingRun = await DeleteExistingRunIfRequestedAsync(
 242                datasetName,
 243                runName,
 244                request.ReplaceRuns,
 245                cancellationToken);
 246            deletedAnyExistingRun |= deletedExistingRun;
 247
 248            var traceTags = PreparedExperimentSupport.DeriveTraceTags(runMetadata);
 249            var propagatedMetadata = PreparedExperimentSupport.DerivePropagatedMetadata(runMetadata);
 250            var experimentName = runFamilyName;
 251            var runMetadataPayload = PreparedExperimentSupport.BuildLangfuseExperimentMetadata(
 252                runMetadata,
 253                experimentName,
 254                runName);
 255            var predictionsBySourceDatasetItemId = participant.Predictions
 256                .GroupBy(prediction => prediction.SourceDatasetItemId, StringComparer.Ordinal)
 257                .ToDictionary(group => group.Key, group => group.First(), StringComparer.Ordinal);
 258            var batches = PreparedExperimentSupport.CreateBatchChunks(manifest.Items, batchSize);
 259            var participantScoreEntries = new List<ExperimentItemScores>();
 260            var participantExecutionSummaries = new List<PreparedExperimentExecutionSummary>();
 261            string? datasetRunId = null;
 262            var completedExecutionCount = 0;
 263
 264            PreparedExperimentSupport.ReportProgress(
 265                $"Participant {participantIndex + 1}/{participants.Count}: starting run '{runName}' for '{participant.Di
 266
 267            for (var batchIndex = 0; batchIndex < batches.Count; batchIndex += 1)
 268            {
 269                var batch = batches[batchIndex];
 270                var batchStart = completedExecutionCount + 1;
 271                var batchEnd = completedExecutionCount + batch.Count;
 272
 273                PreparedExperimentSupport.ReportProgress(
 274                    $"Participant {participant.DisplayName}: batch {batchIndex + 1}/{batches.Count}, executions {batchSt
 275
 276                var batchResults = await Task.WhenAll(batch.Select(item => ExecuteCommunityItemAsync(
 277                    item,
 278                    participant,
 279                    predictionsBySourceDatasetItemId,
 280                    runName,
 281                    experimentName,
 282                    request.RunDescription,
 283                    datasetName,
 284                    runMetadata,
 285                    traceTags,
 286                    propagatedMetadata,
 287                    runMetadataPayload,
 288                    cancellationToken)));
 289
 290                foreach (var batchResult in batchResults)
 291                {
 292                    datasetRunId ??= batchResult.DatasetRunId;
 293                    participantScoreEntries.Add(batchResult.Summary.Scores);
 294                    participantExecutionSummaries.Add(batchResult.Summary);
 295                }
 296
 297                completedExecutionCount += batchResults.Length;
 298            }
 299
 300            if (string.IsNullOrWhiteSpace(datasetRunId))
 301            {
 302                throw new InvalidOperationException($"Dataset run '{runName}' did not return a datasetRunId.");
 303            }
 304
 305            var aggregateScores = await PostRunScoresAsync(datasetRunId, runMetadata, participantExecutionSummaries, can
 306            var datasetRun = await _langfuseClient.GetDatasetRunAsync(datasetName, runName, cancellationToken)
 307                ?? throw new InvalidOperationException(
 308                    $"Dataset run '{runName}' could not be retrieved from dataset '{datasetName}'.");
 309            var datasetRunItems = await WaitForDatasetRunItemsAsync(
 310                datasetRun.DatasetId,
 311                runName,
 312                manifest.Items.Count,
 313                cancellationToken);
 314
 315            datasetRunSummaries.Add(new PreparedExperimentDatasetRunSummary(
 316                participantIndex + 1,
 317                runName,
 318                datasetRunId,
 319                datasetRunItems.Meta.TotalItems,
 320                aggregateScores,
 321                participantExecutionSummaries.FirstOrDefault(),
 322                participantExecutionSummaries.LastOrDefault()));
 323            executionSummaries.AddRange(participantExecutionSummaries);
 324            scoreEntries.AddRange(participantScoreEntries);
 325        }
 326
 327        var overallAggregateScores = PreparedExperimentSupport.SummarizeScores(scoreEntries);
 328        return new PreparedExperimentRunSummary(
 329            datasetName,
 330            runFamilyName,
 331            runFamilyName,
 332            "community-to-date",
 333            "community-predictions",
 334            deletedAnyExistingRun,
 335            manifest.Items.Count,
 336            "simple-batched",
 337            batchSize,
 338            null,
 339            null,
 340            executionSummaries.Count,
 341            datasetRunSummaries.Count,
 342            overallAggregateScores,
 343            datasetRunSummaries,
 344            executionSummaries.FirstOrDefault(),
 345            executionSummaries.LastOrDefault());
 346    }
 347
 348    private IReadOnlyList<IReadOnlyList<PreparedExperimentManifestItem>> BuildBatches(
 349        IReadOnlyList<PreparedExperimentManifestItem> items,
 350        PreparedExperimentRunMetadata runMetadata,
 351        string expectedTaskType)
 352    {
 353        if (string.Equals(expectedTaskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase))
 354        {
 355            return CreateRepeatedMatchSliceBatches(
 356                items,
 357                runMetadata.BatchCount ?? 3,
 358                runMetadata.Parallelism ?? 5);
 359        }
 360
 361        return IsWarmupBatchTask(expectedTaskType)
 362            ? PreparedExperimentSupport.CreateWarmupThenBatchChunks(items, runMetadata.BatchCount ?? 3)
 363            : PreparedExperimentSupport.CreateBatchChunks(items, runMetadata.BatchSize ?? 10);
 364    }
 365
 366    internal static IReadOnlyList<IReadOnlyList<PreparedExperimentManifestItem>> CreateRepeatedMatchSliceBatches(
 367        IReadOnlyList<PreparedExperimentManifestItem> items,
 368        int batchCount,
 369        int parallelism)
 370    {
 371        if (parallelism < 1)
 372        {
 373            throw new ArgumentOutOfRangeException(nameof(parallelism), parallelism, "Parallelism must be at least 1.");
 374        }
 375
 376        var fixtureWorkflows = items
 377            .GroupBy(item => item.SourceDatasetItemId, StringComparer.Ordinal)
 378            .OrderBy(group => group.Min(item => item.FixtureIndex ?? int.MaxValue))
 379            .ThenBy(group => group.Key, StringComparer.Ordinal)
 380            .Select(group =>
 381                PreparedExperimentSupport.CreateWarmupThenBatchChunks(
 382                    group
 383                        .OrderBy(item => item.RepetitionIndex ?? int.MaxValue)
 384                        .ThenBy(item => item.SliceDatasetItemId, StringComparer.Ordinal)
 385                        .ToList(),
 386                    batchCount))
 387            .ToList();
 388
 389        var batches = new List<IReadOnlyList<PreparedExperimentManifestItem>>();
 390        for (var workflowStart = 0; workflowStart < fixtureWorkflows.Count; workflowStart += parallelism)
 391        {
 392            var workflowGroup = fixtureWorkflows
 393                .Skip(workflowStart)
 394                .Take(parallelism)
 395                .ToList();
 396            var maxWorkflowBatchCount = workflowGroup.Max(workflow => workflow.Count);
 397            for (var workflowBatchIndex = 0; workflowBatchIndex < maxWorkflowBatchCount; workflowBatchIndex += 1)
 398            {
 399                var batch = workflowGroup
 400                    .Where(workflow => workflowBatchIndex < workflow.Count)
 401                    .SelectMany(workflow => workflow[workflowBatchIndex])
 402                    .ToList();
 403                if (batch.Count > 0)
 404                {
 405                    batches.Add(batch);
 406                }
 407            }
 408        }
 409
 410        return batches;
 411    }
 412
 413    private async Task<ExperimentPromptRoute> ResolvePromptRouteAsync(
 414        PreparedExperimentRunMetadata runMetadata,
 415        CancellationToken cancellationToken)
 416    {
 417        var promptSource = string.IsNullOrWhiteSpace(runMetadata.PromptSource)
 418            ? "local"
 419            : runMetadata.PromptSource.Trim().ToLowerInvariant();
 420
 421        if (promptSource == "local")
 422        {
 423            return new ExperimentPromptRoute(null, null);
 424        }
 425
 426        if (promptSource != "langfuse")
 427        {
 428            throw new InvalidOperationException($"Unsupported prompt source '{runMetadata.PromptSource}'.");
 429        }
 430
 431        if (runMetadata.IncludeJustification)
 432        {
 433            throw new InvalidOperationException(
 434                "The Langfuse prompt source POC only supports match prompts without justification.");
 435        }
 436
 437        if (string.IsNullOrWhiteSpace(runMetadata.LangfusePromptName))
 438        {
 439            throw new InvalidOperationException("Run metadata must contain langfusePromptName when promptSource is langf
 440        }
 441
 442        var prompt = await _langfuseClient.GetPromptAsync(
 443                         runMetadata.LangfusePromptName,
 444                         runMetadata.LangfusePromptLabel,
 445                         runMetadata.LangfusePromptVersion,
 446                         cancellationToken)
 447                     ?? throw new FileNotFoundException(
 448                         $"Langfuse prompt '{runMetadata.LangfusePromptName}' was not found.");
 449
 450        _ = prompt.GetTextPrompt();
 451        var templateProvider = new LangfuseTextPromptTemplateProvider(
 452            _langfuseClient,
 453            runMetadata.LangfusePromptName,
 454            runMetadata.LangfusePromptLabel,
 455            runMetadata.LangfusePromptVersion,
 456            prompt);
 457
 458        return new ExperimentPromptRoute(
 459            templateProvider,
 460            new LangfusePromptTraceMetadata(prompt.Name, prompt.Version));
 461    }
 462
 463    private static PreparedExperimentRunMetadata ApplyBatchingDefaults(
 464        PreparedExperimentRunMetadata runMetadata,
 465        string expectedTaskType)
 466    {
 467        if (IsWarmupBatchTask(expectedTaskType))
 468        {
 469            return runMetadata with
 470            {
 471                BatchStrategy = string.IsNullOrWhiteSpace(runMetadata.BatchStrategy)
 472                    ? "warmup-plus-batches"
 473                    : runMetadata.BatchStrategy,
 474                BatchCount = runMetadata.BatchCount ?? 3,
 475                BatchSize = null,
 476                Parallelism = string.Equals(expectedTaskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase
 477                    ? runMetadata.Parallelism ?? 5
 478                    : runMetadata.Parallelism
 479            };
 480        }
 481
 482        return runMetadata with
 483        {
 484            BatchStrategy = string.IsNullOrWhiteSpace(runMetadata.BatchStrategy)
 485                ? "simple-batched"
 486                : runMetadata.BatchStrategy,
 487            BatchSize = runMetadata.BatchSize ?? 10,
 488            BatchCount = null,
 489            Parallelism = null
 490        };
 491    }
 492
 493    private async Task<PreparedExperimentExecutionResult> ExecuteItemAsync(
 494        PreparedExperimentManifestItem item,
 495        PreparedExperimentRunRequest request,
 496        string experimentName,
 497        string datasetName,
 498        PreparedExperimentRunMetadata runMetadata,
 499        DateTimeOffset? explicitEvaluationTime,
 500        EvaluationTimestampPolicy? evaluationTimestampPolicy,
 501        IPredictionRepository predictionRepository,
 502        MatchPromptReconstructionService reconstructionService,
 503        IPredictionService predictionService,
 504        IReadOnlyDictionary<string, PersistedMatchOutcome> outcomesByKey,
 505        IReadOnlyList<string> traceTags,
 506        IReadOnlyDictionary<string, string> propagatedMetadata,
 507        JsonElement runMetadataPayload,
 508        CancellationToken cancellationToken)
 509    {
 510        var outcomeKey = BuildOutcomeKey(item.HomeTeam, item.AwayTeam, item.Matchday);
 511        if (!outcomesByKey.TryGetValue(outcomeKey, out var outcome))
 512        {
 513            throw new InvalidOperationException(
 514                $"No persisted match outcome was found for {item.HomeTeam} vs {item.AwayTeam} on matchday {item.Matchday
 515        }
 516
 517        if (!outcome.HasOutcome || outcome.HomeGoals is null || outcome.AwayGoals is null)
 518        {
 519            throw new InvalidOperationException(
 520                $"The selected match does not have a completed persisted outcome yet: {item.HomeTeam} vs {item.AwayTeam}
 521        }
 522
 523        var storedMatch = await predictionRepository.GetStoredMatchAsync(
 524            item.HomeTeam,
 525            item.AwayTeam,
 526            item.Matchday,
 527            (PredictionModelConfig?)null,
 528            null,
 529            cancellationToken);
 530
 531        var promptMatch = storedMatch is null
 532            ? ExperimentArtifactSupport.RehydrateForPromptOutput(new Match(item.HomeTeam, item.AwayTeam, outcome.StartsA
 533            : ExperimentArtifactSupport.RehydrateForPromptOutput(storedMatch);
 534        var evaluationTimestamp = explicitEvaluationTime
 535            ?? EvaluationTimestampResolver.Resolve(
 536                promptMatch,
 537                evaluationTimestampPolicy ?? throw new InvalidOperationException(
 538                    "Run metadata must contain either evaluationTime or evaluationTimestampPolicy."));
 539        var selection = MatchContextDocumentCatalog.ForMatch(item.HomeTeam, item.AwayTeam, runMetadata.CommunityContext!
 540        var reconstructedPrompt = await reconstructionService.ReconstructMatchPredictionPromptAtTimestampAsync(
 541            promptMatch,
 542            request.Options.Model,
 543            runMetadata.CommunityContext!,
 544            evaluationTimestamp,
 545            selection.RequiredDocumentNames,
 546            selection.OptionalDocumentNames,
 547            runMetadata.IncludeJustification,
 548            cancellationToken);
 549
 550        var contextDocuments = reconstructedPrompt.ResolvedContextDocuments
 551            .Select(document => new DocumentContext(document.DocumentName, document.Content))
 552            .ToList();
 553        var telemetryMetadata = new PredictionTelemetryMetadata(
 554            HomeTeam: item.HomeTeam,
 555            AwayTeam: item.AwayTeam,
 556            RepredictionIndex: 0);
 557
 558        using var activity = Telemetry.Source.StartActivity("experiment-item-run");
 559        ConfigureTraceContext(
 560            activity,
 561            request.RunName,
 562            experimentName,
 563            request.RunDescription,
 564            datasetName,
 565            runMetadata,
 566            item,
 567            outcome.TippSpielId,
 568            traceTags,
 569            propagatedMetadata,
 570            evaluationTimestamp,
 571            predictionService.GetMatchPromptPath(runMetadata.IncludeJustification));
 572
 573        SetExperimentItemMetadata(activity, CreateExperimentItemMetadataJson(item));
 574        SetExperimentItemExpectedOutput(
 575            activity,
 576            CreateExperimentItemExpectedOutputJson(outcome.HomeGoals.Value, outcome.AwayGoals.Value));
 577        SetTraceAndRootObservationInput(activity, CreateExperimentItemInputJson(item));
 578
 579        var traceId = activity?.TraceId.ToString();
 580        if (string.IsNullOrWhiteSpace(traceId))
 581        {
 582            throw new InvalidOperationException(
 583                $"Trace creation failed for {item.HomeTeam} vs {item.AwayTeam}; no trace id was available.");
 584        }
 585
 586        var datasetRunItem = await _langfuseClient.CreateDatasetRunItemAsync(
 587            new LangfuseCreateDatasetRunItemRequest(
 588                request.RunName,
 589                item.SliceDatasetItemId,
 590                traceId,
 591                request.RunDescription,
 592                runMetadataPayload,
 593                activity?.SpanId.ToString()),
 594            cancellationToken);
 595        SetExperimentRunId(activity, datasetRunItem.DatasetRunId);
 596
 597        var prediction = await predictionService.PredictMatchAsync(
 598            promptMatch,
 599            contextDocuments,
 600            runMetadata.IncludeJustification,
 601            telemetryMetadata,
 602            cancellationToken);
 603
 604        if (prediction is null)
 605        {
 606            SetTraceAndRootObservationOutput(
 607                activity,
 608                JsonSerializer.Serialize(new { error = "Failed to generate prediction" }, TraceJsonOptions));
 609            throw new InvalidOperationException(
 610                $"Failed to generate prediction for {item.HomeTeam} vs {item.AwayTeam} on matchday {item.Matchday}.");
 611        }
 612
 613        SetTraceAndRootObservationOutput(activity, JsonSerializer.Serialize(prediction, TraceJsonOptions));
 614
 615        var itemScores = PreparedExperimentSupport.CalculateScores(prediction, outcome.HomeGoals.Value, outcome.AwayGoal
 616        await PostItemScoreAsync(
 617            datasetRunItem.DatasetRunId,
 618            datasetName,
 619            request.RunName,
 620            experimentName,
 621            runMetadata,
 622            item,
 623            traceId,
 624            activity?.SpanId.ToString(),
 625            itemScores,
 626            cancellationToken);
 627
 628        return new PreparedExperimentExecutionResult(
 629            datasetRunItem.DatasetRunId,
 630            new PreparedExperimentExecutionSummary(
 631                item.SliceDatasetItemId,
 632                item.SourceDatasetItemId,
 633                request.RunName,
 634                traceId,
 635                prediction,
 636                itemScores,
 637                traceTags,
 638                null,
 639                "placed",
 640                item.FixtureIndex,
 641                item.RepetitionIndex));
 642    }
 643
 644    private async Task<PreparedExperimentExecutionResult> ExecuteCommunityItemAsync(
 645        PreparedExperimentManifestItem item,
 646        PreparedExperimentParticipantManifest participant,
 647        IReadOnlyDictionary<string, PreparedExperimentParticipantPrediction> predictionsBySourceDatasetItemId,
 648        string runName,
 649        string experimentName,
 650        string? runDescription,
 651        string datasetName,
 652        PreparedExperimentRunMetadata runMetadata,
 653        IReadOnlyList<string> traceTags,
 654        IReadOnlyDictionary<string, string> propagatedMetadata,
 655        JsonElement runMetadataPayload,
 656        CancellationToken cancellationToken)
 657    {
 658        var participantPrediction = predictionsBySourceDatasetItemId.TryGetValue(item.SourceDatasetItemId, out var predi
 659            ? prediction
 660            : new PreparedExperimentParticipantPrediction
 661            {
 662                SourceDatasetItemId = item.SourceDatasetItemId,
 663                Status = "missed",
 664                KicktippPoints = 0
 665            };
 666
 667        var predictionPayload = CreateCommunityPredictionPayload(participantPrediction);
 668
 669        using var activity = Telemetry.Source.StartActivity("experiment-item-run");
 670        ConfigureTraceContext(
 671            activity,
 672            runName,
 673            experimentName,
 674            runDescription,
 675            datasetName,
 676            runMetadata,
 677            item,
 678            item.TippSpielId,
 679            traceTags,
 680            propagatedMetadata);
 681
 682        SetExperimentItemMetadata(activity, CreateExperimentItemMetadataJson(item));
 683        SetTraceAndRootObservationInput(activity, CreateExperimentItemInputJson(item));
 684        SetTraceAndRootObservationOutput(activity, predictionPayload.GetRawText());
 685
 686        var traceId = activity?.TraceId.ToString();
 687        if (string.IsNullOrWhiteSpace(traceId))
 688        {
 689            throw new InvalidOperationException(
 690                $"Trace creation failed for {item.HomeTeam} vs {item.AwayTeam}; no trace id was available.");
 691        }
 692
 693        var datasetRunItem = await _langfuseClient.CreateDatasetRunItemAsync(
 694            new LangfuseCreateDatasetRunItemRequest(
 695                runName,
 696                item.SliceDatasetItemId,
 697                traceId,
 698                runDescription,
 699                runMetadataPayload,
 700                activity?.SpanId.ToString()),
 701            cancellationToken);
 702        SetExperimentRunId(activity, datasetRunItem.DatasetRunId);
 703
 704        string? predictionObservationId = null;
 705        using (var observation = Telemetry.Source.StartActivity(runMetadata.ObservationName ?? "community-match-predicti
 706        {
 707            predictionObservationId = observation?.SpanId.ToString();
 708            ConfigureCommunityPredictionObservation(observation, participant, participantPrediction, item, predictionPay
 709        }
 710
 711        var itemScores = new ExperimentItemScores(participantPrediction.KicktippPoints);
 712        await PostItemScoreAsync(
 713            datasetRunItem.DatasetRunId,
 714            datasetName,
 715            runName,
 716            experimentName,
 717            runMetadata,
 718            item,
 719            traceId,
 720            predictionObservationId ?? activity?.SpanId.ToString(),
 721            itemScores,
 722            cancellationToken);
 723
 724        return new PreparedExperimentExecutionResult(
 725            datasetRunItem.DatasetRunId,
 726            new PreparedExperimentExecutionSummary(
 727                item.SliceDatasetItemId,
 728                item.SourceDatasetItemId,
 729                runName,
 730                traceId,
 731                CreatePredictionOrNull(participantPrediction),
 732                itemScores,
 733                traceTags,
 734                null,
 735                participantPrediction.Status,
 736                item.FixtureIndex,
 737                item.RepetitionIndex));
 738    }
 739
 740    private async Task<ExperimentAggregateScores> PostRunScoresAsync(
 741        string datasetRunId,
 742        PreparedExperimentRunMetadata runMetadata,
 743        IReadOnlyList<PreparedExperimentExecutionSummary> executionSummaries,
 744        CancellationToken cancellationToken)
 745    {
 746        var aggregateScores = PreparedExperimentSupport.SummarizeExecutionScores(
 747            executionSummaries,
 748            runMetadata.TaskType);
 749        var runMetadataPayload = JsonSerializer.SerializeToElement(runMetadata, PreparedExperimentCommandSupport.JsonOpt
 750
 751        await _langfuseClient.CreateScoreAsync(
 752            new LangfuseCreateScoreRequest(
 753                "total_kicktipp_points",
 754                aggregateScores.TotalKicktippPoints,
 755                DatasetRunId: datasetRunId,
 756                Comment: $"Aggregate score for {runMetadata.SampleSize} item(s)",
 757                Id: PreparedExperimentSupport.CreateScoreId("total_kicktipp_points", datasetRunId),
 758                Metadata: runMetadataPayload,
 759                Environment: "sdk-experiment"),
 760            cancellationToken);
 761
 762        await _langfuseClient.CreateScoreAsync(
 763            new LangfuseCreateScoreRequest(
 764                "avg_kicktipp_points",
 765                aggregateScores.AvgKicktippPoints,
 766                DatasetRunId: datasetRunId,
 767                Comment: $"Aggregate score for {runMetadata.SampleSize} item(s)",
 768                Id: PreparedExperimentSupport.CreateScoreId("avg_kicktipp_points", datasetRunId),
 769                Metadata: runMetadataPayload,
 770                Environment: "sdk-experiment"),
 771            cancellationToken);
 772
 773        return aggregateScores;
 774    }
 775
 776    private async Task PostItemScoreAsync(
 777        string datasetRunId,
 778        string datasetName,
 779        string runName,
 780        string experimentName,
 781        PreparedExperimentRunMetadata runMetadata,
 782        PreparedExperimentManifestItem item,
 783        string traceId,
 784        string? observationId,
 785        ExperimentItemScores itemScores,
 786        CancellationToken cancellationToken)
 787    {
 788        var metadata = JsonSerializer.SerializeToElement(new
 789        {
 790            datasetRunId,
 791            datasetRunName = runName,
 792            datasetName,
 793            datasetItemId = item.SliceDatasetItemId,
 794            sourceDatasetItemId = item.SourceDatasetItemId,
 795            experiment_name = experimentName,
 796            experiment_run_name = runName,
 797            task = runMetadata.TaskType,
 798            runSubjectKind = runMetadata.RunSubjectKind,
 799            runSubjectId = runMetadata.RunSubjectId,
 800            runSubjectDisplayName = runMetadata.RunSubjectDisplayName,
 801            reasoningEffort = runMetadata.ReasoningEffort,
 802            item.HomeTeam,
 803            item.AwayTeam,
 804            item.Matchday,
 805            item.TippSpielId,
 806            item.FixtureIndex,
 807            item.RepetitionIndex
 808        }, PreparedExperimentCommandSupport.JsonOptions);
 809
 810        await _langfuseClient.CreateScoreAsync(
 811            new LangfuseCreateScoreRequest(
 812                "kicktipp_points",
 813                itemScores.KicktippPoints,
 814                TraceId: traceId,
 815                ObservationId: string.IsNullOrWhiteSpace(observationId) ? null : observationId,
 816                DataType: "NUMERIC",
 817                Comment: $"Item score for {item.HomeTeam} vs {item.AwayTeam}",
 818                Id: PreparedExperimentSupport.CreateScoreId(
 819                    "kicktipp_points",
 820                    datasetRunId,
 821                    traceId,
 822                    observationId,
 823                    item.SliceDatasetItemId),
 824                Metadata: metadata,
 825                Environment: "sdk-experiment"),
 826            cancellationToken);
 827    }
 828
 829    private async Task<LangfusePaginatedResponse<LangfuseDatasetRunItem>> WaitForDatasetRunItemsAsync(
 830        string datasetId,
 831        string runName,
 832        int expectedCount,
 833        CancellationToken cancellationToken)
 834    {
 835        var limit = Math.Min(100, Math.Max(1, expectedCount));
 836
 837        for (var attempt = 0; attempt < 6; attempt += 1)
 838        {
 839            var datasetRunItems = await _langfuseClient.ListDatasetRunItemsAsync(
 840                datasetId,
 841                runName,
 842                1,
 843                limit,
 844                cancellationToken);
 845
 846            if (datasetRunItems.Meta.TotalItems >= expectedCount)
 847            {
 848                return datasetRunItems;
 849            }
 850
 851            PreparedExperimentSupport.ReportProgress(
 852                $"Waiting for Langfuse dataset run items for '{runName}': {datasetRunItems.Meta.TotalItems}/{expectedCou
 853            await Task.Delay(TimeSpan.FromSeconds(2), cancellationToken);
 854        }
 855
 856        return await _langfuseClient.ListDatasetRunItemsAsync(datasetId, runName, 1, limit, cancellationToken);
 857    }
 858
 859    private async Task<bool> DeleteExistingRunIfRequestedAsync(
 860        string datasetName,
 861        string runName,
 862        bool replaceRun,
 863        CancellationToken cancellationToken)
 864    {
 865        if (!replaceRun)
 866        {
 867            return false;
 868        }
 869
 870        return await _langfuseClient.DeleteDatasetRunAsync(datasetName, runName, cancellationToken);
 871    }
 872
 873    private static IReadOnlyDictionary<string, PersistedMatchOutcome> LoadOutcomeDictionary(
 874        IEnumerable<PersistedMatchOutcome> outcomes)
 875    {
 876        return outcomes.ToDictionary(
 877            outcome => BuildOutcomeKey(outcome.HomeTeam, outcome.AwayTeam, outcome.Matchday),
 878            StringComparer.OrdinalIgnoreCase);
 879    }
 880
 881    private static async Task<IReadOnlyDictionary<string, PersistedMatchOutcome>> LoadOutcomesAsync(
 882        IMatchOutcomeRepository matchOutcomeRepository,
 883        string communityContext,
 884        PreparedExperimentManifest manifest,
 885        CancellationToken cancellationToken)
 886    {
 887        var dictionary = new Dictionary<string, PersistedMatchOutcome>(StringComparer.OrdinalIgnoreCase);
 888
 889        foreach (var matchday in manifest.Items.Select(item => item.Matchday).Distinct().OrderBy(matchday => matchday))
 890        {
 891            var outcomes = await matchOutcomeRepository.GetMatchdayOutcomesAsync(matchday, communityContext, cancellatio
 892            foreach (var pair in LoadOutcomeDictionary(outcomes))
 893            {
 894                dictionary[pair.Key] = pair.Value;
 895            }
 896        }
 897
 898        return dictionary;
 899    }
 900
 901    private static void ConfigureTraceContext(
 902        Activity? activity,
 903        string runName,
 904        string experimentName,
 905        string? runDescription,
 906        string datasetName,
 907        PreparedExperimentRunMetadata runMetadata,
 908        PreparedExperimentManifestItem item,
 909        string? tippSpielId,
 910        IReadOnlyList<string> traceTags,
 911        IReadOnlyDictionary<string, string> propagatedMetadata,
 912        DateTimeOffset? evaluationTimestamp = null,
 913        string? promptTemplatePath = null)
 914    {
 915        activity?.SetTag("langfuse.trace.name", "experiment-item-run");
 916        LangfuseActivityPropagation.SetEnvironment(activity, "sdk-experiment");
 917        LangfuseActivityPropagation.SetSessionId(activity, runName);
 918        LangfuseActivityPropagation.SetTraceTags(activity, traceTags);
 919
 920        LangfuseActivityPropagation.SetExperimentName(activity, runName);
 921        LangfuseActivityPropagation.SetExperimentDescription(activity, runDescription);
 922        LangfuseActivityPropagation.SetExperimentItemId(activity, item.SliceDatasetItemId);
 923        LangfuseActivityPropagation.SetExperimentItemRootObservationId(activity, activity?.SpanId.ToString());
 924
 925        foreach (var metadata in propagatedMetadata)
 926        {
 927            LangfuseActivityPropagation.SetTraceMetadata(activity, metadata.Key, metadata.Value);
 928        }
 929
 930        LangfuseActivityPropagation.SetTraceMetadata(activity, "experiment_name", experimentName);
 931        LangfuseActivityPropagation.SetTraceMetadata(activity, "experiment_run_name", runName);
 932        LangfuseActivityPropagation.SetTraceMetadata(activity, "datasetName", datasetName, propagateToObservations: fals
 933        LangfuseActivityPropagation.SetTraceMetadata(activity, "datasetItemId", item.SliceDatasetItemId, propagateToObse
 934        LangfuseActivityPropagation.SetTraceMetadata(activity, "dataset_item_id", item.SliceDatasetItemId, propagateToOb
 935        LangfuseActivityPropagation.SetTraceMetadata(activity, "sourceDatasetItemId", item.SourceDatasetItemId, propagat
 936        LangfuseActivityPropagation.SetTraceMetadata(activity, "community", runMetadata.CommunityContext, propagateToObs
 937        LangfuseActivityPropagation.SetTraceMetadata(activity, "matchday", item.Matchday.ToString(), propagateToObservat
 938        LangfuseActivityPropagation.SetTraceMetadata(activity, "selectedMatch", $"{item.HomeTeam} vs {item.AwayTeam}", p
 939        LangfuseActivityPropagation.SetTraceMetadata(
 940            activity,
 941            "homeTeams",
 942            PredictionTelemetryMetadata.BuildDelimitedFilterValue([item.HomeTeam]),
 943            propagateToObservations: false);
 944        LangfuseActivityPropagation.SetTraceMetadata(
 945            activity,
 946            "awayTeams",
 947            PredictionTelemetryMetadata.BuildDelimitedFilterValue([item.AwayTeam]),
 948            propagateToObservations: false);
 949        LangfuseActivityPropagation.SetTraceMetadata(
 950            activity,
 951            "teams",
 952            PredictionTelemetryMetadata.BuildDelimitedFilterValue([item.HomeTeam, item.AwayTeam]),
 953            propagateToObservations: false);
 954        if (evaluationTimestamp is not null)
 955        {
 956            LangfuseActivityPropagation.SetTraceMetadata(
 957                activity,
 958                "evaluationTimestamp",
 959                evaluationTimestamp.Value.ToString("O"),
 960                propagateToObservations: false);
 961        }
 962
 963        LangfuseActivityPropagation.SetTraceMetadata(activity, "tippSpielId", tippSpielId, propagateToObservations: fals
 964        LangfuseActivityPropagation.SetTraceMetadata(activity, "promptTemplatePath", promptTemplatePath, propagateToObse
 965    }
 966
 967    private static void SetTraceAndRootObservationInput(Activity? activity, string inputJson)
 968    {
 969        if (activity is null || string.IsNullOrWhiteSpace(inputJson))
 970        {
 971            return;
 972        }
 973
 974        activity.SetTag("langfuse.trace.input", inputJson);
 975        activity.SetTag("langfuse.observation.input", inputJson);
 976    }
 977
 978    private static void SetExperimentItemExpectedOutput(Activity? activity, string expectedOutputJson)
 979    {
 980        if (activity is null || string.IsNullOrWhiteSpace(expectedOutputJson))
 981        {
 982            return;
 983        }
 984
 985        activity.SetTag("langfuse.experiment.item.expected_output", expectedOutputJson);
 986    }
 987
 988    private static void SetExperimentItemMetadata(Activity? activity, string metadataJson)
 989    {
 990        if (activity is null || string.IsNullOrWhiteSpace(metadataJson))
 991        {
 992            return;
 993        }
 994
 995        activity.SetTag("langfuse.experiment.item.metadata", metadataJson);
 996    }
 997
 998    private static void SetTraceAndRootObservationOutput(Activity? activity, string outputJson)
 999    {
 1000        if (activity is null || string.IsNullOrWhiteSpace(outputJson))
 1001        {
 1002            return;
 1003        }
 1004
 1005        activity.SetTag("langfuse.trace.output", outputJson);
 1006        activity.SetTag("langfuse.observation.output", outputJson);
 1007    }
 1008
 1009    private static void SetExperimentRunId(Activity? activity, string datasetRunId)
 1010    {
 1011        if (activity is null || string.IsNullOrWhiteSpace(datasetRunId))
 1012        {
 1013            return;
 1014        }
 1015
 1016        LangfuseActivityPropagation.SetExperimentRunId(activity, datasetRunId);
 1017    }
 1018
 1019    private static string CreateExperimentItemInputJson(PreparedExperimentManifestItem item)
 1020    {
 1021        return JsonSerializer.Serialize(new
 1022        {
 1023            fixture = $"{item.HomeTeam} vs {item.AwayTeam}",
 1024            item.StartsAt
 1025        }, TraceJsonOptions);
 1026    }
 1027
 1028    private static string CreateExperimentItemExpectedOutputJson(int homeGoals, int awayGoals)
 1029    {
 1030        return JsonSerializer.Serialize(new
 1031        {
 1032            score = $"{homeGoals}:{awayGoals}"
 1033        }, TraceJsonOptions);
 1034    }
 1035
 1036    private static string CreateExperimentItemMetadataJson(PreparedExperimentManifestItem item)
 1037    {
 1038        return JsonSerializer.Serialize(new
 1039        {
 1040            item.SourceDatasetItemId,
 1041            item.SliceDatasetItemId,
 1042            item.HomeTeam,
 1043            item.AwayTeam,
 1044            item.Matchday,
 1045            item.TippSpielId,
 1046            item.FixtureIndex,
 1047            item.RepetitionIndex
 1048        }, TraceJsonOptions);
 1049    }
 1050
 1051    private static IReadOnlyList<PreparedExperimentParticipantManifest> SelectParticipants(
 1052        PreparedExperimentManifest manifest,
 1053        PreparedExperimentCommunityRunRequest request)
 1054    {
 1055        var participants = manifest.Participants
 1056            .OrderBy(participant => participant.DisplayName, StringComparer.OrdinalIgnoreCase)
 1057            .ThenBy(participant => participant.ParticipantId, StringComparer.Ordinal)
 1058            .ToList();
 1059
 1060        if (request.ParticipantIds.Count > 0)
 1061        {
 1062            var filteredParticipants = participants
 1063                .Where(participant => request.ParticipantIds.Contains(participant.ParticipantId))
 1064                .ToList();
 1065            var missingParticipantIds = request.ParticipantIds
 1066                .Except(filteredParticipants.Select(participant => participant.ParticipantId), StringComparer.Ordinal)
 1067                .OrderBy(participantId => participantId, StringComparer.Ordinal)
 1068                .ToList();
 1069            if (missingParticipantIds.Count > 0)
 1070            {
 1071                throw new InvalidOperationException(
 1072                    $"The community-to-date manifest does not contain participant id(s): {string.Join(", ", missingParti
 1073            }
 1074
 1075            participants = filteredParticipants;
 1076        }
 1077
 1078        if (request.ParticipantLimit is not null)
 1079        {
 1080            participants = participants.Take(request.ParticipantLimit.Value).ToList();
 1081        }
 1082
 1083        if (participants.Count == 0)
 1084        {
 1085            throw new InvalidOperationException("No participants remain after applying the requested community-to-date f
 1086        }
 1087
 1088        return participants;
 1089    }
 1090
 1091    private static PreparedExperimentRunMetadata BuildCommunityRunMetadata(
 1092        PreparedExperimentManifest manifest,
 1093        PreparedExperimentParticipantManifest participant,
 1094        string datasetName,
 1095        string startedAtUtc,
 1096        int batchSize)
 1097    {
 1098        return new PreparedExperimentRunMetadata
 1099        {
 1100            Runner = "community-match-experiment-runner",
 1101            TaskType = "community-to-date",
 1102            CommunityContext = manifest.CommunityContext,
 1103            Competition = manifest.Competition,
 1104            SourceDatasetName = manifest.SourceDatasetName,
 1105            DatasetName = datasetName,
 1106            SliceKind = string.IsNullOrWhiteSpace(manifest.SliceKind)
 1107                ? "community-to-date"
 1108                : manifest.SliceKind,
 1109            SliceKey = manifest.SliceKey,
 1110            SourcePoolKey = manifest.SourcePoolKey,
 1111            SelectedItemIdsHash = string.IsNullOrWhiteSpace(manifest.SelectedItemIdsHash)
 1112                ? ExperimentArtifactSupport.ComputeSelectedItemIdsHash(
 1113                    manifest.SelectedItemIds.Count > 0
 1114                        ? manifest.SelectedItemIds
 1115                        : manifest.Items.Select(item => item.SliceDatasetItemId))
 1116                : manifest.SelectedItemIdsHash,
 1117            SelectedItemIdsCount = manifest.SelectedItemIds.Count > 0 ? manifest.SelectedItemIds.Count : manifest.Items.
 1118            SampleSize = manifest.SampleSize > 0 ? manifest.SampleSize : manifest.Items.Count,
 1119            StartedAtUtc = startedAtUtc,
 1120            SampleSeed = manifest.SampleSeed,
 1121            SampleMethod = string.IsNullOrWhiteSpace(manifest.SampleMethod)
 1122                ? "community-to-date"
 1123                : manifest.SampleMethod,
 1124            IncludeJustification = false,
 1125            SourceDatasetKind = "community-to-date",
 1126            DatasetItemIdMap = PreparedExperimentSupport.CreateDatasetItemIdMap(manifest),
 1127            Model = participant.DisplayName,
 1128            ObservationName = "community-match-prediction",
 1129            RunSubjectKind = "participant",
 1130            RunSubjectId = participant.ParticipantId,
 1131            RunSubjectDisplayName = participant.DisplayName,
 1132            BatchStrategy = "simple-batched",
 1133            BatchSize = batchSize,
 1134            BatchCount = null
 1135        };
 1136    }
 1137
 1138    private static string BuildCommunityRunFamilyName(PreparedExperimentManifest manifest, string startedAtUtc)
 1139    {
 1140        var communityToken = ExperimentArtifactSupport.Slugify(manifest.CommunityContext);
 1141        var sliceToken = ExperimentArtifactSupport.Slugify(string.IsNullOrWhiteSpace(manifest.SliceKey) ? "community-to-
 1142        return $"community-to-date__{communityToken}__{sliceToken}__{BuildRunTimestampToken(startedAtUtc)}";
 1143    }
 1144
 1145    private static string BuildCommunityParticipantRunName(
 1146        string runFamilyName,
 1147        PreparedExperimentParticipantManifest participant)
 1148    {
 1149        var participantToken = ExperimentArtifactSupport.Slugify($"{participant.DisplayName}-{participant.ParticipantId}
 1150        return $"{runFamilyName}__{participantToken}";
 1151    }
 1152
 1153    private static string BuildRunTimestampToken(string startedAtUtc)
 1154    {
 1155        return startedAtUtc.ToLowerInvariant().Replace(':', '-');
 1156    }
 1157
 1158    private static JsonElement CreateCommunityPredictionPayload(PreparedExperimentParticipantPrediction prediction)
 1159    {
 1160        return JsonSerializer.SerializeToElement(new
 1161        {
 1162            status = prediction.Status,
 1163            homeGoals = prediction.HomeGoals,
 1164            awayGoals = prediction.AwayGoals,
 1165            kicktippPoints = prediction.KicktippPoints
 1166        }, PreparedExperimentCommandSupport.JsonOptions);
 1167    }
 1168
 1169    private static Prediction? CreatePredictionOrNull(PreparedExperimentParticipantPrediction prediction)
 1170    {
 1171        return prediction.HomeGoals is int homeGoals && prediction.AwayGoals is int awayGoals
 1172            ? new Prediction(homeGoals, awayGoals)
 1173            : null;
 1174    }
 1175
 1176    private static void ConfigureCommunityPredictionObservation(
 1177        Activity? activity,
 1178        PreparedExperimentParticipantManifest participant,
 1179        PreparedExperimentParticipantPrediction prediction,
 1180        PreparedExperimentManifestItem item,
 1181        JsonElement predictionPayload)
 1182    {
 1183        if (activity is null)
 1184        {
 1185            return;
 1186        }
 1187
 1188        activity.SetTag("langfuse.observation.type", "generation");
 1189        activity.SetTag("gen_ai.request.model", "kicktipp-community");
 1190        activity.SetTag("langfuse.observation.input", JsonSerializer.Serialize(new
 1191        {
 1192            source = "kicktipp-community",
 1193            participantId = participant.ParticipantId,
 1194            participantDisplayName = participant.DisplayName,
 1195            item.SourceDatasetItemId,
 1196            item.TippSpielId
 1197        }, TraceJsonOptions));
 1198        activity.SetTag("langfuse.observation.output", predictionPayload.GetRawText());
 1199        new PredictionTelemetryMetadata(item.HomeTeam, item.AwayTeam).ApplyToObservation(activity);
 1200        activity.SetTag("langfuse.observation.metadata.participantId", participant.ParticipantId);
 1201        activity.SetTag("langfuse.observation.metadata.participantDisplayName", participant.DisplayName);
 1202        activity.SetTag("langfuse.observation.metadata.predictionStatus", prediction.Status);
 1203        activity.SetTag("langfuse.observation.metadata.sourceDatasetItemId", item.SourceDatasetItemId);
 1204
 1205        if (!string.IsNullOrWhiteSpace(item.TippSpielId))
 1206        {
 1207            activity.SetTag("langfuse.observation.metadata.tippSpielId", item.TippSpielId);
 1208        }
 1209    }
 1210
 1211    private static string DeriveDatasetName(PreparedExperimentRunMetadata runMetadata, PreparedExperimentManifest manife
 1212    {
 1213        return runMetadata.DatasetName
 1214               ?? manifest.SliceDatasetName
 1215               ?? throw new InvalidOperationException("No dataset name was provided for the experiment run.");
 1216    }
 1217
 1218    private static string GetCommunityContext(PreparedExperimentRunMetadata runMetadata, PreparedExperimentManifest mani
 1219    {
 1220        return !string.IsNullOrWhiteSpace(runMetadata.CommunityContext)
 1221            ? runMetadata.CommunityContext
 1222            : !string.IsNullOrWhiteSpace(manifest.CommunityContext)
 1223                ? manifest.CommunityContext
 1224                : throw new InvalidOperationException("Run metadata or manifest must contain communityContext.");
 1225    }
 1226
 1227    private static string BuildOutcomeKey(string homeTeam, string awayTeam, int matchday)
 1228    {
 1229        return string.Join("|", matchday, homeTeam.Trim(), awayTeam.Trim());
 1230    }
 1231
 1232    private static bool IsWarmupBatchTask(string taskType)
 1233    {
 1234        return string.Equals(taskType, "repeated-match", StringComparison.OrdinalIgnoreCase)
 1235            || string.Equals(taskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase);
 1236    }
 1237
 1238    private static string DescribeBatching(PreparedExperimentRunMetadata runMetadata, int batchTotal)
 1239    {
 1240        if (string.Equals(runMetadata.TaskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase))
 1241        {
 1242            return $"parallelism {runMetadata.Parallelism ?? 5}, warmup plus {Math.Max(0, runMetadata.BatchCount ?? 3)} 
 1243        }
 1244
 1245        return string.Equals(runMetadata.TaskType, "repeated-match", StringComparison.OrdinalIgnoreCase)
 1246            ? $"warmup plus {Math.Max(0, batchTotal - 1)} additional batch(es)"
 1247            : $"batch size {runMetadata.BatchSize}";
 1248    }
 1249
 1250    private sealed record PreparedExperimentExecutionResult(
 1251        string DatasetRunId,
 1252        PreparedExperimentExecutionSummary Summary);
 1253
 1254    private sealed record ExperimentPromptRoute(
 1255        IInstructionsTemplateProvider? TemplateProvider,
 1256        LangfusePromptTraceMetadata? TraceMetadata);
 1257}
 1258
 11259internal sealed record PreparedExperimentRunRequest(
 11260    string ManifestPath,
 11261    string RunName,
 11262    string? RunDescription,
 11263    string? RunMetadataFile,
 11264    bool ReplaceRun,
 11265    PreparedExperimentRunOptions Options);
 1266
 1267internal sealed record PreparedExperimentCommunityRunRequest(
 1268    string ManifestPath,
 1269    string? RunFamilyName,
 1270    string? RunDescription,
 1271    string? DatasetName,
 1272    bool ReplaceRuns,
 1273    int BatchSize,
 1274    int? ParticipantLimit,
 1275    IReadOnlySet<string> ParticipantIds);