| | | 1 | | using System.Diagnostics; |
| | | 2 | | using System.Text.Json; |
| | | 3 | | using EHonda.KicktippAi.Core; |
| | | 4 | | using OpenAiIntegration; |
| | | 5 | | using Orchestrator.Infrastructure.Factories; |
| | | 6 | | using Orchestrator.Infrastructure.Langfuse; |
| | | 7 | | using Match = EHonda.KicktippAi.Core.Match; |
| | | 8 | | |
| | | 9 | | namespace Orchestrator.Commands.Observability.Experiments; |
| | | 10 | | |
| | | 11 | | internal sealed class PreparedExperimentRunExecutor |
| | | 12 | | { |
| | | 13 | | private static readonly JsonSerializerOptions TraceJsonOptions = new(JsonSerializerDefaults.Web) |
| | | 14 | | { |
| | | 15 | | PropertyNameCaseInsensitive = true, |
| | | 16 | | PropertyNamingPolicy = JsonNamingPolicy.CamelCase |
| | | 17 | | }; |
| | | 18 | | |
| | | 19 | | private readonly IFirebaseServiceFactory _firebaseServiceFactory; |
| | | 20 | | private readonly IOpenAiServiceFactory _openAiServiceFactory; |
| | | 21 | | private readonly ILangfusePublicApiClient _langfuseClient; |
| | | 22 | | |
| | | 23 | | public PreparedExperimentRunExecutor( |
| | | 24 | | IFirebaseServiceFactory firebaseServiceFactory, |
| | | 25 | | IOpenAiServiceFactory openAiServiceFactory, |
| | | 26 | | ILangfusePublicApiClient langfuseClient) |
| | | 27 | | { |
| | | 28 | | _firebaseServiceFactory = firebaseServiceFactory; |
| | | 29 | | _openAiServiceFactory = openAiServiceFactory; |
| | | 30 | | _langfuseClient = langfuseClient; |
| | | 31 | | } |
| | | 32 | | |
| | | 33 | | public async Task<PreparedExperimentRunSummary> ExecuteAsync( |
| | | 34 | | string expectedTaskType, |
| | | 35 | | PreparedExperimentRunRequest request, |
| | | 36 | | CancellationToken cancellationToken) |
| | | 37 | | { |
| | | 38 | | var manifest = await PreparedExperimentCommandSupport.LoadJsonFileAsync<PreparedExperimentManifest>( |
| | | 39 | | request.ManifestPath, |
| | | 40 | | cancellationToken); |
| | | 41 | | PreparedExperimentCommandSupport.ValidateManifest(manifest); |
| | | 42 | | PreparedExperimentCommandSupport.EnsureTaskType(manifest, expectedTaskType); |
| | | 43 | | |
| | | 44 | | var runMetadata = string.IsNullOrWhiteSpace(request.RunMetadataFile) |
| | | 45 | | ? PreparedExperimentSupport.BuildRunMetadata(manifest, request.Options) |
| | | 46 | | : PreparedExperimentCommandSupport.NormalizeRunMetadata( |
| | | 47 | | await PreparedExperimentCommandSupport.LoadJsonFileAsync<PreparedExperimentRunMetadata>( |
| | | 48 | | request.RunMetadataFile, |
| | | 49 | | cancellationToken), |
| | | 50 | | manifest, |
| | | 51 | | request.Options); |
| | | 52 | | runMetadata = ApplyBatchingDefaults(runMetadata, expectedTaskType); |
| | | 53 | | |
| | | 54 | | var communityContext = GetCommunityContext(runMetadata, manifest); |
| | | 55 | | var datasetName = DeriveDatasetName(runMetadata, manifest); |
| | | 56 | | var explicitEvaluationTime = PreparedExperimentCommandSupport.ParseExplicitEvaluationTime(runMetadata); |
| | | 57 | | var evaluationTimestampPolicy = explicitEvaluationTime is null |
| | | 58 | | ? PreparedExperimentCommandSupport.ParseEvaluationTimestampPolicy(runMetadata) |
| | | 59 | | : null; |
| | | 60 | | var deletedExistingRun = await DeleteExistingRunIfRequestedAsync( |
| | | 61 | | datasetName, |
| | | 62 | | request.RunName, |
| | | 63 | | request.ReplaceRun, |
| | | 64 | | cancellationToken); |
| | | 65 | | |
| | | 66 | | var predictionRepository = _firebaseServiceFactory.CreatePredictionRepository(); |
| | | 67 | | var contextRepository = _firebaseServiceFactory.CreateContextRepository(); |
| | | 68 | | var matchOutcomeRepository = _firebaseServiceFactory.CreateMatchOutcomeRepository(); |
| | | 69 | | var promptRoute = await ResolvePromptRouteAsync(runMetadata, cancellationToken); |
| | | 70 | | if (promptRoute.TraceMetadata is { } promptTraceMetadata) |
| | | 71 | | { |
| | | 72 | | runMetadata = runMetadata with |
| | | 73 | | { |
| | | 74 | | LangfusePromptVersion = promptTraceMetadata.Version |
| | | 75 | | }; |
| | | 76 | | } |
| | | 77 | | |
| | | 78 | | var predictionServiceOptions = PredictionServiceOptions.FlexProcessingWithStandardFallback with |
| | | 79 | | { |
| | | 80 | | LangfusePromptTraceMetadata = promptRoute.TraceMetadata, |
| | | 81 | | ReasoningEffort = runMetadata.ReasoningEffort, |
| | | 82 | | MaxOutputTokenCount = runMetadata.MaxOutputTokenCount |
| | | 83 | | ?? PredictionServiceOptions.FlexProcessingWithStandardFallback.MaxOutputTokenCount |
| | | 84 | | }; |
| | | 85 | | var predictionService = promptRoute.TemplateProvider is null |
| | | 86 | | ? _openAiServiceFactory.CreatePredictionService( |
| | | 87 | | request.Options.Model, |
| | | 88 | | predictionServiceOptions) |
| | | 89 | | : _openAiServiceFactory.CreatePredictionService( |
| | | 90 | | request.Options.Model, |
| | | 91 | | predictionServiceOptions, |
| | | 92 | | promptRoute.TemplateProvider); |
| | | 93 | | var reconstructionService = new MatchPromptReconstructionService( |
| | | 94 | | predictionRepository, |
| | | 95 | | contextRepository, |
| | | 96 | | promptRoute.TemplateProvider ?? new InstructionsTemplateProvider(PromptsFileProvider.Create())); |
| | | 97 | | |
| | | 98 | | var outcomesByKey = await LoadOutcomesAsync(matchOutcomeRepository, communityContext, manifest, cancellationToke |
| | | 99 | | var experimentName = PreparedExperimentSupport.DeriveExperimentName(runMetadata, request.RunName); |
| | | 100 | | var traceTags = PreparedExperimentSupport.DeriveTraceTags(runMetadata); |
| | | 101 | | var propagatedMetadata = PreparedExperimentSupport.DerivePropagatedMetadata(runMetadata); |
| | | 102 | | var runMetadataPayload = PreparedExperimentSupport.BuildLangfuseExperimentMetadata( |
| | | 103 | | runMetadata, |
| | | 104 | | experimentName, |
| | | 105 | | request.RunName, |
| | | 106 | | new Dictionary<string, string?> |
| | | 107 | | { |
| | | 108 | | ["openaiServiceTierStrategy"] = "flex-first-standard-fallback", |
| | | 109 | | ["openaiReasoningEffort"] = runMetadata.ReasoningEffort |
| | | 110 | | }); |
| | | 111 | | var batches = BuildBatches(manifest.Items, runMetadata, expectedTaskType); |
| | | 112 | | var executionSummaries = new List<PreparedExperimentExecutionSummary>(); |
| | | 113 | | string? datasetRunId = null; |
| | | 114 | | var completedExecutionCount = 0; |
| | | 115 | | |
| | | 116 | | PreparedExperimentSupport.ReportProgress( |
| | | 117 | | $"Starting {expectedTaskType} run '{request.RunName}' for model '{request.Options.Model}' with sample size { |
| | | 118 | | |
| | | 119 | | for (var batchIndex = 0; batchIndex < batches.Count; batchIndex += 1) |
| | | 120 | | { |
| | | 121 | | var batch = batches[batchIndex]; |
| | | 122 | | var batchStart = completedExecutionCount + 1; |
| | | 123 | | var batchEnd = completedExecutionCount + batch.Count; |
| | | 124 | | |
| | | 125 | | PreparedExperimentSupport.ReportProgress( |
| | | 126 | | $"Batch {batchIndex + 1}/{batches.Count}: executions {batchStart}-{batchEnd} of {manifest.Items.Count}." |
| | | 127 | | |
| | | 128 | | var batchResults = await Task.WhenAll(batch.Select(item => ExecuteItemAsync( |
| | | 129 | | item, |
| | | 130 | | request, |
| | | 131 | | experimentName, |
| | | 132 | | datasetName, |
| | | 133 | | runMetadata, |
| | | 134 | | explicitEvaluationTime, |
| | | 135 | | evaluationTimestampPolicy, |
| | | 136 | | predictionRepository, |
| | | 137 | | reconstructionService, |
| | | 138 | | predictionService, |
| | | 139 | | outcomesByKey, |
| | | 140 | | traceTags, |
| | | 141 | | propagatedMetadata, |
| | | 142 | | runMetadataPayload, |
| | | 143 | | cancellationToken))); |
| | | 144 | | |
| | | 145 | | foreach (var batchResult in batchResults) |
| | | 146 | | { |
| | | 147 | | datasetRunId ??= batchResult.DatasetRunId; |
| | | 148 | | executionSummaries.Add(batchResult.Summary); |
| | | 149 | | } |
| | | 150 | | |
| | | 151 | | completedExecutionCount += batchResults.Length; |
| | | 152 | | PreparedExperimentSupport.ReportProgress( |
| | | 153 | | $"Completed batch {batchIndex + 1}/{batches.Count}: {completedExecutionCount}/{manifest.Items.Count} exe |
| | | 154 | | } |
| | | 155 | | |
| | | 156 | | if (string.IsNullOrWhiteSpace(datasetRunId)) |
| | | 157 | | { |
| | | 158 | | throw new InvalidOperationException($"Dataset run '{request.RunName}' did not return a datasetRunId."); |
| | | 159 | | } |
| | | 160 | | |
| | | 161 | | var aggregateScores = await PostRunScoresAsync(datasetRunId, runMetadata, executionSummaries, cancellationToken) |
| | | 162 | | var datasetRun = await _langfuseClient.GetDatasetRunAsync(datasetName, request.RunName, cancellationToken) |
| | | 163 | | ?? throw new InvalidOperationException( |
| | | 164 | | $"Dataset run '{request.RunName}' could not be retrieved from dataset '{datasetName}'."); |
| | | 165 | | var datasetRunItems = await WaitForDatasetRunItemsAsync( |
| | | 166 | | datasetRun.DatasetId, |
| | | 167 | | request.RunName, |
| | | 168 | | manifest.Items.Count, |
| | | 169 | | cancellationToken); |
| | | 170 | | |
| | | 171 | | return new PreparedExperimentRunSummary( |
| | | 172 | | datasetName, |
| | | 173 | | request.RunName, |
| | | 174 | | request.RunName, |
| | | 175 | | runMetadata.TaskType ?? expectedTaskType, |
| | | 176 | | request.Options.Model, |
| | | 177 | | deletedExistingRun, |
| | | 178 | | manifest.Items.Count, |
| | | 179 | | runMetadata.BatchStrategy ?? expectedTaskType, |
| | | 180 | | runMetadata.BatchSize, |
| | | 181 | | runMetadata.BatchCount, |
| | | 182 | | runMetadata.Parallelism, |
| | | 183 | | executionSummaries.Count, |
| | | 184 | | 1, |
| | | 185 | | aggregateScores, |
| | | 186 | | [new PreparedExperimentDatasetRunSummary( |
| | | 187 | | 1, |
| | | 188 | | request.RunName, |
| | | 189 | | datasetRunId, |
| | | 190 | | datasetRunItems.Meta.TotalItems, |
| | | 191 | | aggregateScores, |
| | | 192 | | executionSummaries.FirstOrDefault(), |
| | | 193 | | executionSummaries.LastOrDefault())], |
| | | 194 | | executionSummaries.FirstOrDefault(), |
| | | 195 | | executionSummaries.LastOrDefault()); |
| | | 196 | | } |
| | | 197 | | |
| | | 198 | | public async Task<PreparedExperimentRunSummary> ExecuteCommunityToDateAsync( |
| | | 199 | | PreparedExperimentCommunityRunRequest request, |
| | | 200 | | CancellationToken cancellationToken) |
| | | 201 | | { |
| | | 202 | | var manifest = await PreparedExperimentCommandSupport.LoadJsonFileAsync<PreparedExperimentManifest>( |
| | | 203 | | request.ManifestPath, |
| | | 204 | | cancellationToken); |
| | | 205 | | PreparedExperimentCommandSupport.ValidateManifest(manifest); |
| | | 206 | | PreparedExperimentCommandSupport.EnsureTaskType(manifest, "community-to-date"); |
| | | 207 | | |
| | | 208 | | if (manifest.Participants.Count == 0) |
| | | 209 | | { |
| | | 210 | | throw new InvalidOperationException("Community-to-date manifests must contain at least one participant."); |
| | | 211 | | } |
| | | 212 | | |
| | | 213 | | var datasetName = string.IsNullOrWhiteSpace(request.DatasetName) |
| | | 214 | | ? manifest.SliceDatasetName |
| | | 215 | | : request.DatasetName.Trim(); |
| | | 216 | | if (string.IsNullOrWhiteSpace(datasetName)) |
| | | 217 | | { |
| | | 218 | | throw new InvalidOperationException("No dataset name was provided for the community-to-date run."); |
| | | 219 | | } |
| | | 220 | | |
| | | 221 | | var startedAtUtc = ExperimentArtifactSupport.FormatStartedAtUtc(DateTimeOffset.UtcNow); |
| | | 222 | | var batchSize = request.BatchSize; |
| | | 223 | | var participants = SelectParticipants(manifest, request); |
| | | 224 | | var runFamilyName = string.IsNullOrWhiteSpace(request.RunFamilyName) |
| | | 225 | | ? BuildCommunityRunFamilyName(manifest, startedAtUtc) |
| | | 226 | | : request.RunFamilyName.Trim(); |
| | | 227 | | |
| | | 228 | | var datasetRunSummaries = new List<PreparedExperimentDatasetRunSummary>(); |
| | | 229 | | var executionSummaries = new List<PreparedExperimentExecutionSummary>(); |
| | | 230 | | var scoreEntries = new List<ExperimentItemScores>(); |
| | | 231 | | var deletedAnyExistingRun = false; |
| | | 232 | | |
| | | 233 | | PreparedExperimentSupport.ReportProgress( |
| | | 234 | | $"Starting community-to-date run family '{runFamilyName}' with {participants.Count} participant run(s) and s |
| | | 235 | | |
| | | 236 | | for (var participantIndex = 0; participantIndex < participants.Count; participantIndex += 1) |
| | | 237 | | { |
| | | 238 | | var participant = participants[participantIndex]; |
| | | 239 | | var runName = BuildCommunityParticipantRunName(runFamilyName, participant); |
| | | 240 | | var runMetadata = BuildCommunityRunMetadata(manifest, participant, datasetName, startedAtUtc, batchSize); |
| | | 241 | | var deletedExistingRun = await DeleteExistingRunIfRequestedAsync( |
| | | 242 | | datasetName, |
| | | 243 | | runName, |
| | | 244 | | request.ReplaceRuns, |
| | | 245 | | cancellationToken); |
| | | 246 | | deletedAnyExistingRun |= deletedExistingRun; |
| | | 247 | | |
| | | 248 | | var traceTags = PreparedExperimentSupport.DeriveTraceTags(runMetadata); |
| | | 249 | | var propagatedMetadata = PreparedExperimentSupport.DerivePropagatedMetadata(runMetadata); |
| | | 250 | | var experimentName = runFamilyName; |
| | | 251 | | var runMetadataPayload = PreparedExperimentSupport.BuildLangfuseExperimentMetadata( |
| | | 252 | | runMetadata, |
| | | 253 | | experimentName, |
| | | 254 | | runName); |
| | | 255 | | var predictionsBySourceDatasetItemId = participant.Predictions |
| | | 256 | | .GroupBy(prediction => prediction.SourceDatasetItemId, StringComparer.Ordinal) |
| | | 257 | | .ToDictionary(group => group.Key, group => group.First(), StringComparer.Ordinal); |
| | | 258 | | var batches = PreparedExperimentSupport.CreateBatchChunks(manifest.Items, batchSize); |
| | | 259 | | var participantScoreEntries = new List<ExperimentItemScores>(); |
| | | 260 | | var participantExecutionSummaries = new List<PreparedExperimentExecutionSummary>(); |
| | | 261 | | string? datasetRunId = null; |
| | | 262 | | var completedExecutionCount = 0; |
| | | 263 | | |
| | | 264 | | PreparedExperimentSupport.ReportProgress( |
| | | 265 | | $"Participant {participantIndex + 1}/{participants.Count}: starting run '{runName}' for '{participant.Di |
| | | 266 | | |
| | | 267 | | for (var batchIndex = 0; batchIndex < batches.Count; batchIndex += 1) |
| | | 268 | | { |
| | | 269 | | var batch = batches[batchIndex]; |
| | | 270 | | var batchStart = completedExecutionCount + 1; |
| | | 271 | | var batchEnd = completedExecutionCount + batch.Count; |
| | | 272 | | |
| | | 273 | | PreparedExperimentSupport.ReportProgress( |
| | | 274 | | $"Participant {participant.DisplayName}: batch {batchIndex + 1}/{batches.Count}, executions {batchSt |
| | | 275 | | |
| | | 276 | | var batchResults = await Task.WhenAll(batch.Select(item => ExecuteCommunityItemAsync( |
| | | 277 | | item, |
| | | 278 | | participant, |
| | | 279 | | predictionsBySourceDatasetItemId, |
| | | 280 | | runName, |
| | | 281 | | experimentName, |
| | | 282 | | request.RunDescription, |
| | | 283 | | datasetName, |
| | | 284 | | runMetadata, |
| | | 285 | | traceTags, |
| | | 286 | | propagatedMetadata, |
| | | 287 | | runMetadataPayload, |
| | | 288 | | cancellationToken))); |
| | | 289 | | |
| | | 290 | | foreach (var batchResult in batchResults) |
| | | 291 | | { |
| | | 292 | | datasetRunId ??= batchResult.DatasetRunId; |
| | | 293 | | participantScoreEntries.Add(batchResult.Summary.Scores); |
| | | 294 | | participantExecutionSummaries.Add(batchResult.Summary); |
| | | 295 | | } |
| | | 296 | | |
| | | 297 | | completedExecutionCount += batchResults.Length; |
| | | 298 | | } |
| | | 299 | | |
| | | 300 | | if (string.IsNullOrWhiteSpace(datasetRunId)) |
| | | 301 | | { |
| | | 302 | | throw new InvalidOperationException($"Dataset run '{runName}' did not return a datasetRunId."); |
| | | 303 | | } |
| | | 304 | | |
| | | 305 | | var aggregateScores = await PostRunScoresAsync(datasetRunId, runMetadata, participantExecutionSummaries, can |
| | | 306 | | var datasetRun = await _langfuseClient.GetDatasetRunAsync(datasetName, runName, cancellationToken) |
| | | 307 | | ?? throw new InvalidOperationException( |
| | | 308 | | $"Dataset run '{runName}' could not be retrieved from dataset '{datasetName}'."); |
| | | 309 | | var datasetRunItems = await WaitForDatasetRunItemsAsync( |
| | | 310 | | datasetRun.DatasetId, |
| | | 311 | | runName, |
| | | 312 | | manifest.Items.Count, |
| | | 313 | | cancellationToken); |
| | | 314 | | |
| | | 315 | | datasetRunSummaries.Add(new PreparedExperimentDatasetRunSummary( |
| | | 316 | | participantIndex + 1, |
| | | 317 | | runName, |
| | | 318 | | datasetRunId, |
| | | 319 | | datasetRunItems.Meta.TotalItems, |
| | | 320 | | aggregateScores, |
| | | 321 | | participantExecutionSummaries.FirstOrDefault(), |
| | | 322 | | participantExecutionSummaries.LastOrDefault())); |
| | | 323 | | executionSummaries.AddRange(participantExecutionSummaries); |
| | | 324 | | scoreEntries.AddRange(participantScoreEntries); |
| | | 325 | | } |
| | | 326 | | |
| | | 327 | | var overallAggregateScores = PreparedExperimentSupport.SummarizeScores(scoreEntries); |
| | | 328 | | return new PreparedExperimentRunSummary( |
| | | 329 | | datasetName, |
| | | 330 | | runFamilyName, |
| | | 331 | | runFamilyName, |
| | | 332 | | "community-to-date", |
| | | 333 | | "community-predictions", |
| | | 334 | | deletedAnyExistingRun, |
| | | 335 | | manifest.Items.Count, |
| | | 336 | | "simple-batched", |
| | | 337 | | batchSize, |
| | | 338 | | null, |
| | | 339 | | null, |
| | | 340 | | executionSummaries.Count, |
| | | 341 | | datasetRunSummaries.Count, |
| | | 342 | | overallAggregateScores, |
| | | 343 | | datasetRunSummaries, |
| | | 344 | | executionSummaries.FirstOrDefault(), |
| | | 345 | | executionSummaries.LastOrDefault()); |
| | | 346 | | } |
| | | 347 | | |
| | | 348 | | private IReadOnlyList<IReadOnlyList<PreparedExperimentManifestItem>> BuildBatches( |
| | | 349 | | IReadOnlyList<PreparedExperimentManifestItem> items, |
| | | 350 | | PreparedExperimentRunMetadata runMetadata, |
| | | 351 | | string expectedTaskType) |
| | | 352 | | { |
| | | 353 | | if (string.Equals(expectedTaskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase)) |
| | | 354 | | { |
| | | 355 | | return CreateRepeatedMatchSliceBatches( |
| | | 356 | | items, |
| | | 357 | | runMetadata.BatchCount ?? 3, |
| | | 358 | | runMetadata.Parallelism ?? 5); |
| | | 359 | | } |
| | | 360 | | |
| | | 361 | | return IsWarmupBatchTask(expectedTaskType) |
| | | 362 | | ? PreparedExperimentSupport.CreateWarmupThenBatchChunks(items, runMetadata.BatchCount ?? 3) |
| | | 363 | | : PreparedExperimentSupport.CreateBatchChunks(items, runMetadata.BatchSize ?? 10); |
| | | 364 | | } |
| | | 365 | | |
| | | 366 | | internal static IReadOnlyList<IReadOnlyList<PreparedExperimentManifestItem>> CreateRepeatedMatchSliceBatches( |
| | | 367 | | IReadOnlyList<PreparedExperimentManifestItem> items, |
| | | 368 | | int batchCount, |
| | | 369 | | int parallelism) |
| | | 370 | | { |
| | | 371 | | if (parallelism < 1) |
| | | 372 | | { |
| | | 373 | | throw new ArgumentOutOfRangeException(nameof(parallelism), parallelism, "Parallelism must be at least 1."); |
| | | 374 | | } |
| | | 375 | | |
| | | 376 | | var fixtureWorkflows = items |
| | | 377 | | .GroupBy(item => item.SourceDatasetItemId, StringComparer.Ordinal) |
| | | 378 | | .OrderBy(group => group.Min(item => item.FixtureIndex ?? int.MaxValue)) |
| | | 379 | | .ThenBy(group => group.Key, StringComparer.Ordinal) |
| | | 380 | | .Select(group => |
| | | 381 | | PreparedExperimentSupport.CreateWarmupThenBatchChunks( |
| | | 382 | | group |
| | | 383 | | .OrderBy(item => item.RepetitionIndex ?? int.MaxValue) |
| | | 384 | | .ThenBy(item => item.SliceDatasetItemId, StringComparer.Ordinal) |
| | | 385 | | .ToList(), |
| | | 386 | | batchCount)) |
| | | 387 | | .ToList(); |
| | | 388 | | |
| | | 389 | | var batches = new List<IReadOnlyList<PreparedExperimentManifestItem>>(); |
| | | 390 | | for (var workflowStart = 0; workflowStart < fixtureWorkflows.Count; workflowStart += parallelism) |
| | | 391 | | { |
| | | 392 | | var workflowGroup = fixtureWorkflows |
| | | 393 | | .Skip(workflowStart) |
| | | 394 | | .Take(parallelism) |
| | | 395 | | .ToList(); |
| | | 396 | | var maxWorkflowBatchCount = workflowGroup.Max(workflow => workflow.Count); |
| | | 397 | | for (var workflowBatchIndex = 0; workflowBatchIndex < maxWorkflowBatchCount; workflowBatchIndex += 1) |
| | | 398 | | { |
| | | 399 | | var batch = workflowGroup |
| | | 400 | | .Where(workflow => workflowBatchIndex < workflow.Count) |
| | | 401 | | .SelectMany(workflow => workflow[workflowBatchIndex]) |
| | | 402 | | .ToList(); |
| | | 403 | | if (batch.Count > 0) |
| | | 404 | | { |
| | | 405 | | batches.Add(batch); |
| | | 406 | | } |
| | | 407 | | } |
| | | 408 | | } |
| | | 409 | | |
| | | 410 | | return batches; |
| | | 411 | | } |
| | | 412 | | |
| | | 413 | | private async Task<ExperimentPromptRoute> ResolvePromptRouteAsync( |
| | | 414 | | PreparedExperimentRunMetadata runMetadata, |
| | | 415 | | CancellationToken cancellationToken) |
| | | 416 | | { |
| | | 417 | | var promptSource = string.IsNullOrWhiteSpace(runMetadata.PromptSource) |
| | | 418 | | ? "local" |
| | | 419 | | : runMetadata.PromptSource.Trim().ToLowerInvariant(); |
| | | 420 | | |
| | | 421 | | if (promptSource == "local") |
| | | 422 | | { |
| | | 423 | | return new ExperimentPromptRoute(null, null); |
| | | 424 | | } |
| | | 425 | | |
| | | 426 | | if (promptSource != "langfuse") |
| | | 427 | | { |
| | | 428 | | throw new InvalidOperationException($"Unsupported prompt source '{runMetadata.PromptSource}'."); |
| | | 429 | | } |
| | | 430 | | |
| | | 431 | | if (runMetadata.IncludeJustification) |
| | | 432 | | { |
| | | 433 | | throw new InvalidOperationException( |
| | | 434 | | "The Langfuse prompt source POC only supports match prompts without justification."); |
| | | 435 | | } |
| | | 436 | | |
| | | 437 | | if (string.IsNullOrWhiteSpace(runMetadata.LangfusePromptName)) |
| | | 438 | | { |
| | | 439 | | throw new InvalidOperationException("Run metadata must contain langfusePromptName when promptSource is langf |
| | | 440 | | } |
| | | 441 | | |
| | | 442 | | var prompt = await _langfuseClient.GetPromptAsync( |
| | | 443 | | runMetadata.LangfusePromptName, |
| | | 444 | | runMetadata.LangfusePromptLabel, |
| | | 445 | | runMetadata.LangfusePromptVersion, |
| | | 446 | | cancellationToken) |
| | | 447 | | ?? throw new FileNotFoundException( |
| | | 448 | | $"Langfuse prompt '{runMetadata.LangfusePromptName}' was not found."); |
| | | 449 | | |
| | | 450 | | _ = prompt.GetTextPrompt(); |
| | | 451 | | var templateProvider = new LangfuseTextPromptTemplateProvider( |
| | | 452 | | _langfuseClient, |
| | | 453 | | runMetadata.LangfusePromptName, |
| | | 454 | | runMetadata.LangfusePromptLabel, |
| | | 455 | | runMetadata.LangfusePromptVersion, |
| | | 456 | | prompt); |
| | | 457 | | |
| | | 458 | | return new ExperimentPromptRoute( |
| | | 459 | | templateProvider, |
| | | 460 | | new LangfusePromptTraceMetadata(prompt.Name, prompt.Version)); |
| | | 461 | | } |
| | | 462 | | |
| | | 463 | | private static PreparedExperimentRunMetadata ApplyBatchingDefaults( |
| | | 464 | | PreparedExperimentRunMetadata runMetadata, |
| | | 465 | | string expectedTaskType) |
| | | 466 | | { |
| | | 467 | | if (IsWarmupBatchTask(expectedTaskType)) |
| | | 468 | | { |
| | | 469 | | return runMetadata with |
| | | 470 | | { |
| | | 471 | | BatchStrategy = string.IsNullOrWhiteSpace(runMetadata.BatchStrategy) |
| | | 472 | | ? "warmup-plus-batches" |
| | | 473 | | : runMetadata.BatchStrategy, |
| | | 474 | | BatchCount = runMetadata.BatchCount ?? 3, |
| | | 475 | | BatchSize = null, |
| | | 476 | | Parallelism = string.Equals(expectedTaskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase |
| | | 477 | | ? runMetadata.Parallelism ?? 5 |
| | | 478 | | : runMetadata.Parallelism |
| | | 479 | | }; |
| | | 480 | | } |
| | | 481 | | |
| | | 482 | | return runMetadata with |
| | | 483 | | { |
| | | 484 | | BatchStrategy = string.IsNullOrWhiteSpace(runMetadata.BatchStrategy) |
| | | 485 | | ? "simple-batched" |
| | | 486 | | : runMetadata.BatchStrategy, |
| | | 487 | | BatchSize = runMetadata.BatchSize ?? 10, |
| | | 488 | | BatchCount = null, |
| | | 489 | | Parallelism = null |
| | | 490 | | }; |
| | | 491 | | } |
| | | 492 | | |
| | | 493 | | private async Task<PreparedExperimentExecutionResult> ExecuteItemAsync( |
| | | 494 | | PreparedExperimentManifestItem item, |
| | | 495 | | PreparedExperimentRunRequest request, |
| | | 496 | | string experimentName, |
| | | 497 | | string datasetName, |
| | | 498 | | PreparedExperimentRunMetadata runMetadata, |
| | | 499 | | DateTimeOffset? explicitEvaluationTime, |
| | | 500 | | EvaluationTimestampPolicy? evaluationTimestampPolicy, |
| | | 501 | | IPredictionRepository predictionRepository, |
| | | 502 | | MatchPromptReconstructionService reconstructionService, |
| | | 503 | | IPredictionService predictionService, |
| | | 504 | | IReadOnlyDictionary<string, PersistedMatchOutcome> outcomesByKey, |
| | | 505 | | IReadOnlyList<string> traceTags, |
| | | 506 | | IReadOnlyDictionary<string, string> propagatedMetadata, |
| | | 507 | | JsonElement runMetadataPayload, |
| | | 508 | | CancellationToken cancellationToken) |
| | | 509 | | { |
| | | 510 | | var outcomeKey = BuildOutcomeKey(item.HomeTeam, item.AwayTeam, item.Matchday); |
| | | 511 | | if (!outcomesByKey.TryGetValue(outcomeKey, out var outcome)) |
| | | 512 | | { |
| | | 513 | | throw new InvalidOperationException( |
| | | 514 | | $"No persisted match outcome was found for {item.HomeTeam} vs {item.AwayTeam} on matchday {item.Matchday |
| | | 515 | | } |
| | | 516 | | |
| | | 517 | | if (!outcome.HasOutcome || outcome.HomeGoals is null || outcome.AwayGoals is null) |
| | | 518 | | { |
| | | 519 | | throw new InvalidOperationException( |
| | | 520 | | $"The selected match does not have a completed persisted outcome yet: {item.HomeTeam} vs {item.AwayTeam} |
| | | 521 | | } |
| | | 522 | | |
| | | 523 | | var storedMatch = await predictionRepository.GetStoredMatchAsync( |
| | | 524 | | item.HomeTeam, |
| | | 525 | | item.AwayTeam, |
| | | 526 | | item.Matchday, |
| | | 527 | | (PredictionModelConfig?)null, |
| | | 528 | | null, |
| | | 529 | | cancellationToken); |
| | | 530 | | |
| | | 531 | | var promptMatch = storedMatch is null |
| | | 532 | | ? ExperimentArtifactSupport.RehydrateForPromptOutput(new Match(item.HomeTeam, item.AwayTeam, outcome.StartsA |
| | | 533 | | : ExperimentArtifactSupport.RehydrateForPromptOutput(storedMatch); |
| | | 534 | | var evaluationTimestamp = explicitEvaluationTime |
| | | 535 | | ?? EvaluationTimestampResolver.Resolve( |
| | | 536 | | promptMatch, |
| | | 537 | | evaluationTimestampPolicy ?? throw new InvalidOperationException( |
| | | 538 | | "Run metadata must contain either evaluationTime or evaluationTimestampPolicy.")); |
| | | 539 | | var selection = MatchContextDocumentCatalog.ForMatch(item.HomeTeam, item.AwayTeam, runMetadata.CommunityContext! |
| | | 540 | | var reconstructedPrompt = await reconstructionService.ReconstructMatchPredictionPromptAtTimestampAsync( |
| | | 541 | | promptMatch, |
| | | 542 | | request.Options.Model, |
| | | 543 | | runMetadata.CommunityContext!, |
| | | 544 | | evaluationTimestamp, |
| | | 545 | | selection.RequiredDocumentNames, |
| | | 546 | | selection.OptionalDocumentNames, |
| | | 547 | | runMetadata.IncludeJustification, |
| | | 548 | | cancellationToken); |
| | | 549 | | |
| | | 550 | | var contextDocuments = reconstructedPrompt.ResolvedContextDocuments |
| | | 551 | | .Select(document => new DocumentContext(document.DocumentName, document.Content)) |
| | | 552 | | .ToList(); |
| | | 553 | | var telemetryMetadata = new PredictionTelemetryMetadata( |
| | | 554 | | HomeTeam: item.HomeTeam, |
| | | 555 | | AwayTeam: item.AwayTeam, |
| | | 556 | | RepredictionIndex: 0); |
| | | 557 | | |
| | | 558 | | using var activity = Telemetry.Source.StartActivity("experiment-item-run"); |
| | | 559 | | ConfigureTraceContext( |
| | | 560 | | activity, |
| | | 561 | | request.RunName, |
| | | 562 | | experimentName, |
| | | 563 | | request.RunDescription, |
| | | 564 | | datasetName, |
| | | 565 | | runMetadata, |
| | | 566 | | item, |
| | | 567 | | outcome.TippSpielId, |
| | | 568 | | traceTags, |
| | | 569 | | propagatedMetadata, |
| | | 570 | | evaluationTimestamp, |
| | | 571 | | predictionService.GetMatchPromptPath(runMetadata.IncludeJustification)); |
| | | 572 | | |
| | | 573 | | SetExperimentItemMetadata(activity, CreateExperimentItemMetadataJson(item)); |
| | | 574 | | SetExperimentItemExpectedOutput( |
| | | 575 | | activity, |
| | | 576 | | CreateExperimentItemExpectedOutputJson(outcome.HomeGoals.Value, outcome.AwayGoals.Value)); |
| | | 577 | | SetTraceAndRootObservationInput(activity, CreateExperimentItemInputJson(item)); |
| | | 578 | | |
| | | 579 | | var traceId = activity?.TraceId.ToString(); |
| | | 580 | | if (string.IsNullOrWhiteSpace(traceId)) |
| | | 581 | | { |
| | | 582 | | throw new InvalidOperationException( |
| | | 583 | | $"Trace creation failed for {item.HomeTeam} vs {item.AwayTeam}; no trace id was available."); |
| | | 584 | | } |
| | | 585 | | |
| | | 586 | | var datasetRunItem = await _langfuseClient.CreateDatasetRunItemAsync( |
| | | 587 | | new LangfuseCreateDatasetRunItemRequest( |
| | | 588 | | request.RunName, |
| | | 589 | | item.SliceDatasetItemId, |
| | | 590 | | traceId, |
| | | 591 | | request.RunDescription, |
| | | 592 | | runMetadataPayload, |
| | | 593 | | activity?.SpanId.ToString()), |
| | | 594 | | cancellationToken); |
| | | 595 | | SetExperimentRunId(activity, datasetRunItem.DatasetRunId); |
| | | 596 | | |
| | | 597 | | var prediction = await predictionService.PredictMatchAsync( |
| | | 598 | | promptMatch, |
| | | 599 | | contextDocuments, |
| | | 600 | | runMetadata.IncludeJustification, |
| | | 601 | | telemetryMetadata, |
| | | 602 | | cancellationToken); |
| | | 603 | | |
| | | 604 | | if (prediction is null) |
| | | 605 | | { |
| | | 606 | | SetTraceAndRootObservationOutput( |
| | | 607 | | activity, |
| | | 608 | | JsonSerializer.Serialize(new { error = "Failed to generate prediction" }, TraceJsonOptions)); |
| | | 609 | | throw new InvalidOperationException( |
| | | 610 | | $"Failed to generate prediction for {item.HomeTeam} vs {item.AwayTeam} on matchday {item.Matchday}."); |
| | | 611 | | } |
| | | 612 | | |
| | | 613 | | SetTraceAndRootObservationOutput(activity, JsonSerializer.Serialize(prediction, TraceJsonOptions)); |
| | | 614 | | |
| | | 615 | | var itemScores = PreparedExperimentSupport.CalculateScores(prediction, outcome.HomeGoals.Value, outcome.AwayGoal |
| | | 616 | | await PostItemScoreAsync( |
| | | 617 | | datasetRunItem.DatasetRunId, |
| | | 618 | | datasetName, |
| | | 619 | | request.RunName, |
| | | 620 | | experimentName, |
| | | 621 | | runMetadata, |
| | | 622 | | item, |
| | | 623 | | traceId, |
| | | 624 | | activity?.SpanId.ToString(), |
| | | 625 | | itemScores, |
| | | 626 | | cancellationToken); |
| | | 627 | | |
| | | 628 | | return new PreparedExperimentExecutionResult( |
| | | 629 | | datasetRunItem.DatasetRunId, |
| | | 630 | | new PreparedExperimentExecutionSummary( |
| | | 631 | | item.SliceDatasetItemId, |
| | | 632 | | item.SourceDatasetItemId, |
| | | 633 | | request.RunName, |
| | | 634 | | traceId, |
| | | 635 | | prediction, |
| | | 636 | | itemScores, |
| | | 637 | | traceTags, |
| | | 638 | | null, |
| | | 639 | | "placed", |
| | | 640 | | item.FixtureIndex, |
| | | 641 | | item.RepetitionIndex)); |
| | | 642 | | } |
| | | 643 | | |
| | | 644 | | private async Task<PreparedExperimentExecutionResult> ExecuteCommunityItemAsync( |
| | | 645 | | PreparedExperimentManifestItem item, |
| | | 646 | | PreparedExperimentParticipantManifest participant, |
| | | 647 | | IReadOnlyDictionary<string, PreparedExperimentParticipantPrediction> predictionsBySourceDatasetItemId, |
| | | 648 | | string runName, |
| | | 649 | | string experimentName, |
| | | 650 | | string? runDescription, |
| | | 651 | | string datasetName, |
| | | 652 | | PreparedExperimentRunMetadata runMetadata, |
| | | 653 | | IReadOnlyList<string> traceTags, |
| | | 654 | | IReadOnlyDictionary<string, string> propagatedMetadata, |
| | | 655 | | JsonElement runMetadataPayload, |
| | | 656 | | CancellationToken cancellationToken) |
| | | 657 | | { |
| | | 658 | | var participantPrediction = predictionsBySourceDatasetItemId.TryGetValue(item.SourceDatasetItemId, out var predi |
| | | 659 | | ? prediction |
| | | 660 | | : new PreparedExperimentParticipantPrediction |
| | | 661 | | { |
| | | 662 | | SourceDatasetItemId = item.SourceDatasetItemId, |
| | | 663 | | Status = "missed", |
| | | 664 | | KicktippPoints = 0 |
| | | 665 | | }; |
| | | 666 | | |
| | | 667 | | var predictionPayload = CreateCommunityPredictionPayload(participantPrediction); |
| | | 668 | | |
| | | 669 | | using var activity = Telemetry.Source.StartActivity("experiment-item-run"); |
| | | 670 | | ConfigureTraceContext( |
| | | 671 | | activity, |
| | | 672 | | runName, |
| | | 673 | | experimentName, |
| | | 674 | | runDescription, |
| | | 675 | | datasetName, |
| | | 676 | | runMetadata, |
| | | 677 | | item, |
| | | 678 | | item.TippSpielId, |
| | | 679 | | traceTags, |
| | | 680 | | propagatedMetadata); |
| | | 681 | | |
| | | 682 | | SetExperimentItemMetadata(activity, CreateExperimentItemMetadataJson(item)); |
| | | 683 | | SetTraceAndRootObservationInput(activity, CreateExperimentItemInputJson(item)); |
| | | 684 | | SetTraceAndRootObservationOutput(activity, predictionPayload.GetRawText()); |
| | | 685 | | |
| | | 686 | | var traceId = activity?.TraceId.ToString(); |
| | | 687 | | if (string.IsNullOrWhiteSpace(traceId)) |
| | | 688 | | { |
| | | 689 | | throw new InvalidOperationException( |
| | | 690 | | $"Trace creation failed for {item.HomeTeam} vs {item.AwayTeam}; no trace id was available."); |
| | | 691 | | } |
| | | 692 | | |
| | | 693 | | var datasetRunItem = await _langfuseClient.CreateDatasetRunItemAsync( |
| | | 694 | | new LangfuseCreateDatasetRunItemRequest( |
| | | 695 | | runName, |
| | | 696 | | item.SliceDatasetItemId, |
| | | 697 | | traceId, |
| | | 698 | | runDescription, |
| | | 699 | | runMetadataPayload, |
| | | 700 | | activity?.SpanId.ToString()), |
| | | 701 | | cancellationToken); |
| | | 702 | | SetExperimentRunId(activity, datasetRunItem.DatasetRunId); |
| | | 703 | | |
| | | 704 | | string? predictionObservationId = null; |
| | | 705 | | using (var observation = Telemetry.Source.StartActivity(runMetadata.ObservationName ?? "community-match-predicti |
| | | 706 | | { |
| | | 707 | | predictionObservationId = observation?.SpanId.ToString(); |
| | | 708 | | ConfigureCommunityPredictionObservation(observation, participant, participantPrediction, item, predictionPay |
| | | 709 | | } |
| | | 710 | | |
| | | 711 | | var itemScores = new ExperimentItemScores(participantPrediction.KicktippPoints); |
| | | 712 | | await PostItemScoreAsync( |
| | | 713 | | datasetRunItem.DatasetRunId, |
| | | 714 | | datasetName, |
| | | 715 | | runName, |
| | | 716 | | experimentName, |
| | | 717 | | runMetadata, |
| | | 718 | | item, |
| | | 719 | | traceId, |
| | | 720 | | predictionObservationId ?? activity?.SpanId.ToString(), |
| | | 721 | | itemScores, |
| | | 722 | | cancellationToken); |
| | | 723 | | |
| | | 724 | | return new PreparedExperimentExecutionResult( |
| | | 725 | | datasetRunItem.DatasetRunId, |
| | | 726 | | new PreparedExperimentExecutionSummary( |
| | | 727 | | item.SliceDatasetItemId, |
| | | 728 | | item.SourceDatasetItemId, |
| | | 729 | | runName, |
| | | 730 | | traceId, |
| | | 731 | | CreatePredictionOrNull(participantPrediction), |
| | | 732 | | itemScores, |
| | | 733 | | traceTags, |
| | | 734 | | null, |
| | | 735 | | participantPrediction.Status, |
| | | 736 | | item.FixtureIndex, |
| | | 737 | | item.RepetitionIndex)); |
| | | 738 | | } |
| | | 739 | | |
| | | 740 | | private async Task<ExperimentAggregateScores> PostRunScoresAsync( |
| | | 741 | | string datasetRunId, |
| | | 742 | | PreparedExperimentRunMetadata runMetadata, |
| | | 743 | | IReadOnlyList<PreparedExperimentExecutionSummary> executionSummaries, |
| | | 744 | | CancellationToken cancellationToken) |
| | | 745 | | { |
| | | 746 | | var aggregateScores = PreparedExperimentSupport.SummarizeExecutionScores( |
| | | 747 | | executionSummaries, |
| | | 748 | | runMetadata.TaskType); |
| | | 749 | | var runMetadataPayload = JsonSerializer.SerializeToElement(runMetadata, PreparedExperimentCommandSupport.JsonOpt |
| | | 750 | | |
| | | 751 | | await _langfuseClient.CreateScoreAsync( |
| | | 752 | | new LangfuseCreateScoreRequest( |
| | | 753 | | "total_kicktipp_points", |
| | | 754 | | aggregateScores.TotalKicktippPoints, |
| | | 755 | | DatasetRunId: datasetRunId, |
| | | 756 | | Comment: $"Aggregate score for {runMetadata.SampleSize} item(s)", |
| | | 757 | | Id: PreparedExperimentSupport.CreateScoreId("total_kicktipp_points", datasetRunId), |
| | | 758 | | Metadata: runMetadataPayload, |
| | | 759 | | Environment: "sdk-experiment"), |
| | | 760 | | cancellationToken); |
| | | 761 | | |
| | | 762 | | await _langfuseClient.CreateScoreAsync( |
| | | 763 | | new LangfuseCreateScoreRequest( |
| | | 764 | | "avg_kicktipp_points", |
| | | 765 | | aggregateScores.AvgKicktippPoints, |
| | | 766 | | DatasetRunId: datasetRunId, |
| | | 767 | | Comment: $"Aggregate score for {runMetadata.SampleSize} item(s)", |
| | | 768 | | Id: PreparedExperimentSupport.CreateScoreId("avg_kicktipp_points", datasetRunId), |
| | | 769 | | Metadata: runMetadataPayload, |
| | | 770 | | Environment: "sdk-experiment"), |
| | | 771 | | cancellationToken); |
| | | 772 | | |
| | | 773 | | return aggregateScores; |
| | | 774 | | } |
| | | 775 | | |
| | | 776 | | private async Task PostItemScoreAsync( |
| | | 777 | | string datasetRunId, |
| | | 778 | | string datasetName, |
| | | 779 | | string runName, |
| | | 780 | | string experimentName, |
| | | 781 | | PreparedExperimentRunMetadata runMetadata, |
| | | 782 | | PreparedExperimentManifestItem item, |
| | | 783 | | string traceId, |
| | | 784 | | string? observationId, |
| | | 785 | | ExperimentItemScores itemScores, |
| | | 786 | | CancellationToken cancellationToken) |
| | | 787 | | { |
| | | 788 | | var metadata = JsonSerializer.SerializeToElement(new |
| | | 789 | | { |
| | | 790 | | datasetRunId, |
| | | 791 | | datasetRunName = runName, |
| | | 792 | | datasetName, |
| | | 793 | | datasetItemId = item.SliceDatasetItemId, |
| | | 794 | | sourceDatasetItemId = item.SourceDatasetItemId, |
| | | 795 | | experiment_name = experimentName, |
| | | 796 | | experiment_run_name = runName, |
| | | 797 | | task = runMetadata.TaskType, |
| | | 798 | | runSubjectKind = runMetadata.RunSubjectKind, |
| | | 799 | | runSubjectId = runMetadata.RunSubjectId, |
| | | 800 | | runSubjectDisplayName = runMetadata.RunSubjectDisplayName, |
| | | 801 | | reasoningEffort = runMetadata.ReasoningEffort, |
| | | 802 | | item.HomeTeam, |
| | | 803 | | item.AwayTeam, |
| | | 804 | | item.Matchday, |
| | | 805 | | item.TippSpielId, |
| | | 806 | | item.FixtureIndex, |
| | | 807 | | item.RepetitionIndex |
| | | 808 | | }, PreparedExperimentCommandSupport.JsonOptions); |
| | | 809 | | |
| | | 810 | | await _langfuseClient.CreateScoreAsync( |
| | | 811 | | new LangfuseCreateScoreRequest( |
| | | 812 | | "kicktipp_points", |
| | | 813 | | itemScores.KicktippPoints, |
| | | 814 | | TraceId: traceId, |
| | | 815 | | ObservationId: string.IsNullOrWhiteSpace(observationId) ? null : observationId, |
| | | 816 | | DataType: "NUMERIC", |
| | | 817 | | Comment: $"Item score for {item.HomeTeam} vs {item.AwayTeam}", |
| | | 818 | | Id: PreparedExperimentSupport.CreateScoreId( |
| | | 819 | | "kicktipp_points", |
| | | 820 | | datasetRunId, |
| | | 821 | | traceId, |
| | | 822 | | observationId, |
| | | 823 | | item.SliceDatasetItemId), |
| | | 824 | | Metadata: metadata, |
| | | 825 | | Environment: "sdk-experiment"), |
| | | 826 | | cancellationToken); |
| | | 827 | | } |
| | | 828 | | |
| | | 829 | | private async Task<LangfusePaginatedResponse<LangfuseDatasetRunItem>> WaitForDatasetRunItemsAsync( |
| | | 830 | | string datasetId, |
| | | 831 | | string runName, |
| | | 832 | | int expectedCount, |
| | | 833 | | CancellationToken cancellationToken) |
| | | 834 | | { |
| | | 835 | | var limit = Math.Min(100, Math.Max(1, expectedCount)); |
| | | 836 | | |
| | | 837 | | for (var attempt = 0; attempt < 6; attempt += 1) |
| | | 838 | | { |
| | | 839 | | var datasetRunItems = await _langfuseClient.ListDatasetRunItemsAsync( |
| | | 840 | | datasetId, |
| | | 841 | | runName, |
| | | 842 | | 1, |
| | | 843 | | limit, |
| | | 844 | | cancellationToken); |
| | | 845 | | |
| | | 846 | | if (datasetRunItems.Meta.TotalItems >= expectedCount) |
| | | 847 | | { |
| | | 848 | | return datasetRunItems; |
| | | 849 | | } |
| | | 850 | | |
| | | 851 | | PreparedExperimentSupport.ReportProgress( |
| | | 852 | | $"Waiting for Langfuse dataset run items for '{runName}': {datasetRunItems.Meta.TotalItems}/{expectedCou |
| | | 853 | | await Task.Delay(TimeSpan.FromSeconds(2), cancellationToken); |
| | | 854 | | } |
| | | 855 | | |
| | | 856 | | return await _langfuseClient.ListDatasetRunItemsAsync(datasetId, runName, 1, limit, cancellationToken); |
| | | 857 | | } |
| | | 858 | | |
| | | 859 | | private async Task<bool> DeleteExistingRunIfRequestedAsync( |
| | | 860 | | string datasetName, |
| | | 861 | | string runName, |
| | | 862 | | bool replaceRun, |
| | | 863 | | CancellationToken cancellationToken) |
| | | 864 | | { |
| | | 865 | | if (!replaceRun) |
| | | 866 | | { |
| | | 867 | | return false; |
| | | 868 | | } |
| | | 869 | | |
| | | 870 | | return await _langfuseClient.DeleteDatasetRunAsync(datasetName, runName, cancellationToken); |
| | | 871 | | } |
| | | 872 | | |
| | | 873 | | private static IReadOnlyDictionary<string, PersistedMatchOutcome> LoadOutcomeDictionary( |
| | | 874 | | IEnumerable<PersistedMatchOutcome> outcomes) |
| | | 875 | | { |
| | | 876 | | return outcomes.ToDictionary( |
| | | 877 | | outcome => BuildOutcomeKey(outcome.HomeTeam, outcome.AwayTeam, outcome.Matchday), |
| | | 878 | | StringComparer.OrdinalIgnoreCase); |
| | | 879 | | } |
| | | 880 | | |
| | | 881 | | private static async Task<IReadOnlyDictionary<string, PersistedMatchOutcome>> LoadOutcomesAsync( |
| | | 882 | | IMatchOutcomeRepository matchOutcomeRepository, |
| | | 883 | | string communityContext, |
| | | 884 | | PreparedExperimentManifest manifest, |
| | | 885 | | CancellationToken cancellationToken) |
| | | 886 | | { |
| | | 887 | | var dictionary = new Dictionary<string, PersistedMatchOutcome>(StringComparer.OrdinalIgnoreCase); |
| | | 888 | | |
| | | 889 | | foreach (var matchday in manifest.Items.Select(item => item.Matchday).Distinct().OrderBy(matchday => matchday)) |
| | | 890 | | { |
| | | 891 | | var outcomes = await matchOutcomeRepository.GetMatchdayOutcomesAsync(matchday, communityContext, cancellatio |
| | | 892 | | foreach (var pair in LoadOutcomeDictionary(outcomes)) |
| | | 893 | | { |
| | | 894 | | dictionary[pair.Key] = pair.Value; |
| | | 895 | | } |
| | | 896 | | } |
| | | 897 | | |
| | | 898 | | return dictionary; |
| | | 899 | | } |
| | | 900 | | |
| | | 901 | | private static void ConfigureTraceContext( |
| | | 902 | | Activity? activity, |
| | | 903 | | string runName, |
| | | 904 | | string experimentName, |
| | | 905 | | string? runDescription, |
| | | 906 | | string datasetName, |
| | | 907 | | PreparedExperimentRunMetadata runMetadata, |
| | | 908 | | PreparedExperimentManifestItem item, |
| | | 909 | | string? tippSpielId, |
| | | 910 | | IReadOnlyList<string> traceTags, |
| | | 911 | | IReadOnlyDictionary<string, string> propagatedMetadata, |
| | | 912 | | DateTimeOffset? evaluationTimestamp = null, |
| | | 913 | | string? promptTemplatePath = null) |
| | | 914 | | { |
| | | 915 | | activity?.SetTag("langfuse.trace.name", "experiment-item-run"); |
| | | 916 | | LangfuseActivityPropagation.SetEnvironment(activity, "sdk-experiment"); |
| | | 917 | | LangfuseActivityPropagation.SetSessionId(activity, runName); |
| | | 918 | | LangfuseActivityPropagation.SetTraceTags(activity, traceTags); |
| | | 919 | | |
| | | 920 | | LangfuseActivityPropagation.SetExperimentName(activity, runName); |
| | | 921 | | LangfuseActivityPropagation.SetExperimentDescription(activity, runDescription); |
| | | 922 | | LangfuseActivityPropagation.SetExperimentItemId(activity, item.SliceDatasetItemId); |
| | | 923 | | LangfuseActivityPropagation.SetExperimentItemRootObservationId(activity, activity?.SpanId.ToString()); |
| | | 924 | | |
| | | 925 | | foreach (var metadata in propagatedMetadata) |
| | | 926 | | { |
| | | 927 | | LangfuseActivityPropagation.SetTraceMetadata(activity, metadata.Key, metadata.Value); |
| | | 928 | | } |
| | | 929 | | |
| | | 930 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "experiment_name", experimentName); |
| | | 931 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "experiment_run_name", runName); |
| | | 932 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "datasetName", datasetName, propagateToObservations: fals |
| | | 933 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "datasetItemId", item.SliceDatasetItemId, propagateToObse |
| | | 934 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "dataset_item_id", item.SliceDatasetItemId, propagateToOb |
| | | 935 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "sourceDatasetItemId", item.SourceDatasetItemId, propagat |
| | | 936 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "community", runMetadata.CommunityContext, propagateToObs |
| | | 937 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "matchday", item.Matchday.ToString(), propagateToObservat |
| | | 938 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "selectedMatch", $"{item.HomeTeam} vs {item.AwayTeam}", p |
| | | 939 | | LangfuseActivityPropagation.SetTraceMetadata( |
| | | 940 | | activity, |
| | | 941 | | "homeTeams", |
| | | 942 | | PredictionTelemetryMetadata.BuildDelimitedFilterValue([item.HomeTeam]), |
| | | 943 | | propagateToObservations: false); |
| | | 944 | | LangfuseActivityPropagation.SetTraceMetadata( |
| | | 945 | | activity, |
| | | 946 | | "awayTeams", |
| | | 947 | | PredictionTelemetryMetadata.BuildDelimitedFilterValue([item.AwayTeam]), |
| | | 948 | | propagateToObservations: false); |
| | | 949 | | LangfuseActivityPropagation.SetTraceMetadata( |
| | | 950 | | activity, |
| | | 951 | | "teams", |
| | | 952 | | PredictionTelemetryMetadata.BuildDelimitedFilterValue([item.HomeTeam, item.AwayTeam]), |
| | | 953 | | propagateToObservations: false); |
| | | 954 | | if (evaluationTimestamp is not null) |
| | | 955 | | { |
| | | 956 | | LangfuseActivityPropagation.SetTraceMetadata( |
| | | 957 | | activity, |
| | | 958 | | "evaluationTimestamp", |
| | | 959 | | evaluationTimestamp.Value.ToString("O"), |
| | | 960 | | propagateToObservations: false); |
| | | 961 | | } |
| | | 962 | | |
| | | 963 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "tippSpielId", tippSpielId, propagateToObservations: fals |
| | | 964 | | LangfuseActivityPropagation.SetTraceMetadata(activity, "promptTemplatePath", promptTemplatePath, propagateToObse |
| | | 965 | | } |
| | | 966 | | |
| | | 967 | | private static void SetTraceAndRootObservationInput(Activity? activity, string inputJson) |
| | | 968 | | { |
| | | 969 | | if (activity is null || string.IsNullOrWhiteSpace(inputJson)) |
| | | 970 | | { |
| | | 971 | | return; |
| | | 972 | | } |
| | | 973 | | |
| | | 974 | | activity.SetTag("langfuse.trace.input", inputJson); |
| | | 975 | | activity.SetTag("langfuse.observation.input", inputJson); |
| | | 976 | | } |
| | | 977 | | |
| | | 978 | | private static void SetExperimentItemExpectedOutput(Activity? activity, string expectedOutputJson) |
| | | 979 | | { |
| | | 980 | | if (activity is null || string.IsNullOrWhiteSpace(expectedOutputJson)) |
| | | 981 | | { |
| | | 982 | | return; |
| | | 983 | | } |
| | | 984 | | |
| | | 985 | | activity.SetTag("langfuse.experiment.item.expected_output", expectedOutputJson); |
| | | 986 | | } |
| | | 987 | | |
| | | 988 | | private static void SetExperimentItemMetadata(Activity? activity, string metadataJson) |
| | | 989 | | { |
| | | 990 | | if (activity is null || string.IsNullOrWhiteSpace(metadataJson)) |
| | | 991 | | { |
| | | 992 | | return; |
| | | 993 | | } |
| | | 994 | | |
| | | 995 | | activity.SetTag("langfuse.experiment.item.metadata", metadataJson); |
| | | 996 | | } |
| | | 997 | | |
| | | 998 | | private static void SetTraceAndRootObservationOutput(Activity? activity, string outputJson) |
| | | 999 | | { |
| | | 1000 | | if (activity is null || string.IsNullOrWhiteSpace(outputJson)) |
| | | 1001 | | { |
| | | 1002 | | return; |
| | | 1003 | | } |
| | | 1004 | | |
| | | 1005 | | activity.SetTag("langfuse.trace.output", outputJson); |
| | | 1006 | | activity.SetTag("langfuse.observation.output", outputJson); |
| | | 1007 | | } |
| | | 1008 | | |
| | | 1009 | | private static void SetExperimentRunId(Activity? activity, string datasetRunId) |
| | | 1010 | | { |
| | | 1011 | | if (activity is null || string.IsNullOrWhiteSpace(datasetRunId)) |
| | | 1012 | | { |
| | | 1013 | | return; |
| | | 1014 | | } |
| | | 1015 | | |
| | | 1016 | | LangfuseActivityPropagation.SetExperimentRunId(activity, datasetRunId); |
| | | 1017 | | } |
| | | 1018 | | |
| | | 1019 | | private static string CreateExperimentItemInputJson(PreparedExperimentManifestItem item) |
| | | 1020 | | { |
| | | 1021 | | return JsonSerializer.Serialize(new |
| | | 1022 | | { |
| | | 1023 | | fixture = $"{item.HomeTeam} vs {item.AwayTeam}", |
| | | 1024 | | item.StartsAt |
| | | 1025 | | }, TraceJsonOptions); |
| | | 1026 | | } |
| | | 1027 | | |
| | | 1028 | | private static string CreateExperimentItemExpectedOutputJson(int homeGoals, int awayGoals) |
| | | 1029 | | { |
| | | 1030 | | return JsonSerializer.Serialize(new |
| | | 1031 | | { |
| | | 1032 | | score = $"{homeGoals}:{awayGoals}" |
| | | 1033 | | }, TraceJsonOptions); |
| | | 1034 | | } |
| | | 1035 | | |
| | | 1036 | | private static string CreateExperimentItemMetadataJson(PreparedExperimentManifestItem item) |
| | | 1037 | | { |
| | | 1038 | | return JsonSerializer.Serialize(new |
| | | 1039 | | { |
| | | 1040 | | item.SourceDatasetItemId, |
| | | 1041 | | item.SliceDatasetItemId, |
| | | 1042 | | item.HomeTeam, |
| | | 1043 | | item.AwayTeam, |
| | | 1044 | | item.Matchday, |
| | | 1045 | | item.TippSpielId, |
| | | 1046 | | item.FixtureIndex, |
| | | 1047 | | item.RepetitionIndex |
| | | 1048 | | }, TraceJsonOptions); |
| | | 1049 | | } |
| | | 1050 | | |
| | | 1051 | | private static IReadOnlyList<PreparedExperimentParticipantManifest> SelectParticipants( |
| | | 1052 | | PreparedExperimentManifest manifest, |
| | | 1053 | | PreparedExperimentCommunityRunRequest request) |
| | | 1054 | | { |
| | | 1055 | | var participants = manifest.Participants |
| | | 1056 | | .OrderBy(participant => participant.DisplayName, StringComparer.OrdinalIgnoreCase) |
| | | 1057 | | .ThenBy(participant => participant.ParticipantId, StringComparer.Ordinal) |
| | | 1058 | | .ToList(); |
| | | 1059 | | |
| | | 1060 | | if (request.ParticipantIds.Count > 0) |
| | | 1061 | | { |
| | | 1062 | | var filteredParticipants = participants |
| | | 1063 | | .Where(participant => request.ParticipantIds.Contains(participant.ParticipantId)) |
| | | 1064 | | .ToList(); |
| | | 1065 | | var missingParticipantIds = request.ParticipantIds |
| | | 1066 | | .Except(filteredParticipants.Select(participant => participant.ParticipantId), StringComparer.Ordinal) |
| | | 1067 | | .OrderBy(participantId => participantId, StringComparer.Ordinal) |
| | | 1068 | | .ToList(); |
| | | 1069 | | if (missingParticipantIds.Count > 0) |
| | | 1070 | | { |
| | | 1071 | | throw new InvalidOperationException( |
| | | 1072 | | $"The community-to-date manifest does not contain participant id(s): {string.Join(", ", missingParti |
| | | 1073 | | } |
| | | 1074 | | |
| | | 1075 | | participants = filteredParticipants; |
| | | 1076 | | } |
| | | 1077 | | |
| | | 1078 | | if (request.ParticipantLimit is not null) |
| | | 1079 | | { |
| | | 1080 | | participants = participants.Take(request.ParticipantLimit.Value).ToList(); |
| | | 1081 | | } |
| | | 1082 | | |
| | | 1083 | | if (participants.Count == 0) |
| | | 1084 | | { |
| | | 1085 | | throw new InvalidOperationException("No participants remain after applying the requested community-to-date f |
| | | 1086 | | } |
| | | 1087 | | |
| | | 1088 | | return participants; |
| | | 1089 | | } |
| | | 1090 | | |
| | | 1091 | | private static PreparedExperimentRunMetadata BuildCommunityRunMetadata( |
| | | 1092 | | PreparedExperimentManifest manifest, |
| | | 1093 | | PreparedExperimentParticipantManifest participant, |
| | | 1094 | | string datasetName, |
| | | 1095 | | string startedAtUtc, |
| | | 1096 | | int batchSize) |
| | | 1097 | | { |
| | | 1098 | | return new PreparedExperimentRunMetadata |
| | | 1099 | | { |
| | | 1100 | | Runner = "community-match-experiment-runner", |
| | | 1101 | | TaskType = "community-to-date", |
| | | 1102 | | CommunityContext = manifest.CommunityContext, |
| | | 1103 | | Competition = manifest.Competition, |
| | | 1104 | | SourceDatasetName = manifest.SourceDatasetName, |
| | | 1105 | | DatasetName = datasetName, |
| | | 1106 | | SliceKind = string.IsNullOrWhiteSpace(manifest.SliceKind) |
| | | 1107 | | ? "community-to-date" |
| | | 1108 | | : manifest.SliceKind, |
| | | 1109 | | SliceKey = manifest.SliceKey, |
| | | 1110 | | SourcePoolKey = manifest.SourcePoolKey, |
| | | 1111 | | SelectedItemIdsHash = string.IsNullOrWhiteSpace(manifest.SelectedItemIdsHash) |
| | | 1112 | | ? ExperimentArtifactSupport.ComputeSelectedItemIdsHash( |
| | | 1113 | | manifest.SelectedItemIds.Count > 0 |
| | | 1114 | | ? manifest.SelectedItemIds |
| | | 1115 | | : manifest.Items.Select(item => item.SliceDatasetItemId)) |
| | | 1116 | | : manifest.SelectedItemIdsHash, |
| | | 1117 | | SelectedItemIdsCount = manifest.SelectedItemIds.Count > 0 ? manifest.SelectedItemIds.Count : manifest.Items. |
| | | 1118 | | SampleSize = manifest.SampleSize > 0 ? manifest.SampleSize : manifest.Items.Count, |
| | | 1119 | | StartedAtUtc = startedAtUtc, |
| | | 1120 | | SampleSeed = manifest.SampleSeed, |
| | | 1121 | | SampleMethod = string.IsNullOrWhiteSpace(manifest.SampleMethod) |
| | | 1122 | | ? "community-to-date" |
| | | 1123 | | : manifest.SampleMethod, |
| | | 1124 | | IncludeJustification = false, |
| | | 1125 | | SourceDatasetKind = "community-to-date", |
| | | 1126 | | DatasetItemIdMap = PreparedExperimentSupport.CreateDatasetItemIdMap(manifest), |
| | | 1127 | | Model = participant.DisplayName, |
| | | 1128 | | ObservationName = "community-match-prediction", |
| | | 1129 | | RunSubjectKind = "participant", |
| | | 1130 | | RunSubjectId = participant.ParticipantId, |
| | | 1131 | | RunSubjectDisplayName = participant.DisplayName, |
| | | 1132 | | BatchStrategy = "simple-batched", |
| | | 1133 | | BatchSize = batchSize, |
| | | 1134 | | BatchCount = null |
| | | 1135 | | }; |
| | | 1136 | | } |
| | | 1137 | | |
| | | 1138 | | private static string BuildCommunityRunFamilyName(PreparedExperimentManifest manifest, string startedAtUtc) |
| | | 1139 | | { |
| | | 1140 | | var communityToken = ExperimentArtifactSupport.Slugify(manifest.CommunityContext); |
| | | 1141 | | var sliceToken = ExperimentArtifactSupport.Slugify(string.IsNullOrWhiteSpace(manifest.SliceKey) ? "community-to- |
| | | 1142 | | return $"community-to-date__{communityToken}__{sliceToken}__{BuildRunTimestampToken(startedAtUtc)}"; |
| | | 1143 | | } |
| | | 1144 | | |
| | | 1145 | | private static string BuildCommunityParticipantRunName( |
| | | 1146 | | string runFamilyName, |
| | | 1147 | | PreparedExperimentParticipantManifest participant) |
| | | 1148 | | { |
| | | 1149 | | var participantToken = ExperimentArtifactSupport.Slugify($"{participant.DisplayName}-{participant.ParticipantId} |
| | | 1150 | | return $"{runFamilyName}__{participantToken}"; |
| | | 1151 | | } |
| | | 1152 | | |
| | | 1153 | | private static string BuildRunTimestampToken(string startedAtUtc) |
| | | 1154 | | { |
| | | 1155 | | return startedAtUtc.ToLowerInvariant().Replace(':', '-'); |
| | | 1156 | | } |
| | | 1157 | | |
| | | 1158 | | private static JsonElement CreateCommunityPredictionPayload(PreparedExperimentParticipantPrediction prediction) |
| | | 1159 | | { |
| | | 1160 | | return JsonSerializer.SerializeToElement(new |
| | | 1161 | | { |
| | | 1162 | | status = prediction.Status, |
| | | 1163 | | homeGoals = prediction.HomeGoals, |
| | | 1164 | | awayGoals = prediction.AwayGoals, |
| | | 1165 | | kicktippPoints = prediction.KicktippPoints |
| | | 1166 | | }, PreparedExperimentCommandSupport.JsonOptions); |
| | | 1167 | | } |
| | | 1168 | | |
| | | 1169 | | private static Prediction? CreatePredictionOrNull(PreparedExperimentParticipantPrediction prediction) |
| | | 1170 | | { |
| | | 1171 | | return prediction.HomeGoals is int homeGoals && prediction.AwayGoals is int awayGoals |
| | | 1172 | | ? new Prediction(homeGoals, awayGoals) |
| | | 1173 | | : null; |
| | | 1174 | | } |
| | | 1175 | | |
| | | 1176 | | private static void ConfigureCommunityPredictionObservation( |
| | | 1177 | | Activity? activity, |
| | | 1178 | | PreparedExperimentParticipantManifest participant, |
| | | 1179 | | PreparedExperimentParticipantPrediction prediction, |
| | | 1180 | | PreparedExperimentManifestItem item, |
| | | 1181 | | JsonElement predictionPayload) |
| | | 1182 | | { |
| | | 1183 | | if (activity is null) |
| | | 1184 | | { |
| | | 1185 | | return; |
| | | 1186 | | } |
| | | 1187 | | |
| | | 1188 | | activity.SetTag("langfuse.observation.type", "generation"); |
| | | 1189 | | activity.SetTag("gen_ai.request.model", "kicktipp-community"); |
| | | 1190 | | activity.SetTag("langfuse.observation.input", JsonSerializer.Serialize(new |
| | | 1191 | | { |
| | | 1192 | | source = "kicktipp-community", |
| | | 1193 | | participantId = participant.ParticipantId, |
| | | 1194 | | participantDisplayName = participant.DisplayName, |
| | | 1195 | | item.SourceDatasetItemId, |
| | | 1196 | | item.TippSpielId |
| | | 1197 | | }, TraceJsonOptions)); |
| | | 1198 | | activity.SetTag("langfuse.observation.output", predictionPayload.GetRawText()); |
| | | 1199 | | new PredictionTelemetryMetadata(item.HomeTeam, item.AwayTeam).ApplyToObservation(activity); |
| | | 1200 | | activity.SetTag("langfuse.observation.metadata.participantId", participant.ParticipantId); |
| | | 1201 | | activity.SetTag("langfuse.observation.metadata.participantDisplayName", participant.DisplayName); |
| | | 1202 | | activity.SetTag("langfuse.observation.metadata.predictionStatus", prediction.Status); |
| | | 1203 | | activity.SetTag("langfuse.observation.metadata.sourceDatasetItemId", item.SourceDatasetItemId); |
| | | 1204 | | |
| | | 1205 | | if (!string.IsNullOrWhiteSpace(item.TippSpielId)) |
| | | 1206 | | { |
| | | 1207 | | activity.SetTag("langfuse.observation.metadata.tippSpielId", item.TippSpielId); |
| | | 1208 | | } |
| | | 1209 | | } |
| | | 1210 | | |
| | | 1211 | | private static string DeriveDatasetName(PreparedExperimentRunMetadata runMetadata, PreparedExperimentManifest manife |
| | | 1212 | | { |
| | | 1213 | | return runMetadata.DatasetName |
| | | 1214 | | ?? manifest.SliceDatasetName |
| | | 1215 | | ?? throw new InvalidOperationException("No dataset name was provided for the experiment run."); |
| | | 1216 | | } |
| | | 1217 | | |
| | | 1218 | | private static string GetCommunityContext(PreparedExperimentRunMetadata runMetadata, PreparedExperimentManifest mani |
| | | 1219 | | { |
| | | 1220 | | return !string.IsNullOrWhiteSpace(runMetadata.CommunityContext) |
| | | 1221 | | ? runMetadata.CommunityContext |
| | | 1222 | | : !string.IsNullOrWhiteSpace(manifest.CommunityContext) |
| | | 1223 | | ? manifest.CommunityContext |
| | | 1224 | | : throw new InvalidOperationException("Run metadata or manifest must contain communityContext."); |
| | | 1225 | | } |
| | | 1226 | | |
| | | 1227 | | private static string BuildOutcomeKey(string homeTeam, string awayTeam, int matchday) |
| | | 1228 | | { |
| | | 1229 | | return string.Join("|", matchday, homeTeam.Trim(), awayTeam.Trim()); |
| | | 1230 | | } |
| | | 1231 | | |
| | | 1232 | | private static bool IsWarmupBatchTask(string taskType) |
| | | 1233 | | { |
| | | 1234 | | return string.Equals(taskType, "repeated-match", StringComparison.OrdinalIgnoreCase) |
| | | 1235 | | || string.Equals(taskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase); |
| | | 1236 | | } |
| | | 1237 | | |
| | | 1238 | | private static string DescribeBatching(PreparedExperimentRunMetadata runMetadata, int batchTotal) |
| | | 1239 | | { |
| | | 1240 | | if (string.Equals(runMetadata.TaskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase)) |
| | | 1241 | | { |
| | | 1242 | | return $"parallelism {runMetadata.Parallelism ?? 5}, warmup plus {Math.Max(0, runMetadata.BatchCount ?? 3)} |
| | | 1243 | | } |
| | | 1244 | | |
| | | 1245 | | return string.Equals(runMetadata.TaskType, "repeated-match", StringComparison.OrdinalIgnoreCase) |
| | | 1246 | | ? $"warmup plus {Math.Max(0, batchTotal - 1)} additional batch(es)" |
| | | 1247 | | : $"batch size {runMetadata.BatchSize}"; |
| | | 1248 | | } |
| | | 1249 | | |
| | | 1250 | | private sealed record PreparedExperimentExecutionResult( |
| | | 1251 | | string DatasetRunId, |
| | | 1252 | | PreparedExperimentExecutionSummary Summary); |
| | | 1253 | | |
| | 1 | 1254 | | private sealed record ExperimentPromptRoute( |
| | 1 | 1255 | | IInstructionsTemplateProvider? TemplateProvider, |
| | 1 | 1256 | | LangfusePromptTraceMetadata? TraceMetadata); |
| | | 1257 | | } |
| | | 1258 | | |
| | | 1259 | | internal sealed record PreparedExperimentRunRequest( |
| | | 1260 | | string ManifestPath, |
| | | 1261 | | string RunName, |
| | | 1262 | | string? RunDescription, |
| | | 1263 | | string? RunMetadataFile, |
| | | 1264 | | bool ReplaceRun, |
| | | 1265 | | PreparedExperimentRunOptions Options); |
| | | 1266 | | |
| | | 1267 | | internal sealed record PreparedExperimentCommunityRunRequest( |
| | | 1268 | | string ManifestPath, |
| | | 1269 | | string? RunFamilyName, |
| | | 1270 | | string? RunDescription, |
| | | 1271 | | string? DatasetName, |
| | | 1272 | | bool ReplaceRuns, |
| | | 1273 | | int BatchSize, |
| | | 1274 | | int? ParticipantLimit, |
| | | 1275 | | IReadOnlySet<string> ParticipantIds); |