| | | 1 | | using System.Globalization; |
| | | 2 | | using System.Security.Cryptography; |
| | | 3 | | using System.Text; |
| | | 4 | | using System.Text.Json; |
| | | 5 | | using System.Text.Json.Nodes; |
| | | 6 | | using EHonda.KicktippAi.Core; |
| | | 7 | | |
| | | 8 | | namespace Orchestrator.Commands.Observability.Experiments; |
| | | 9 | | |
| | | 10 | | internal static class PreparedExperimentSupport |
| | | 11 | | { |
| | | 12 | | public static IReadOnlyList<IReadOnlyList<T>> CreateBatchChunks<T>(IReadOnlyList<T> items, int batchSize) |
| | | 13 | | { |
| | 1 | 14 | | var chunks = new List<IReadOnlyList<T>>(); |
| | 1 | 15 | | for (var index = 0; index < items.Count; index += batchSize) |
| | | 16 | | { |
| | 1 | 17 | | chunks.Add(items.Skip(index).Take(batchSize).ToList()); |
| | | 18 | | } |
| | | 19 | | |
| | 1 | 20 | | return chunks; |
| | | 21 | | } |
| | | 22 | | |
| | | 23 | | public static IReadOnlyList<IReadOnlyList<T>> CreateWarmupThenBatchChunks<T>(IReadOnlyList<T> items, int batchCount) |
| | | 24 | | { |
| | 1 | 25 | | ArgumentNullException.ThrowIfNull(items); |
| | | 26 | | |
| | 1 | 27 | | if (batchCount < 1) |
| | | 28 | | { |
| | 0 | 29 | | throw new ArgumentOutOfRangeException(nameof(batchCount), batchCount, "Batch count must be at least 1."); |
| | | 30 | | } |
| | | 31 | | |
| | 1 | 32 | | if (items.Count == 0) |
| | | 33 | | { |
| | 0 | 34 | | return []; |
| | | 35 | | } |
| | | 36 | | |
| | 1 | 37 | | var chunks = new List<IReadOnlyList<T>> |
| | 1 | 38 | | { |
| | 1 | 39 | | new List<T> { items[0] } |
| | 1 | 40 | | }; |
| | | 41 | | |
| | 1 | 42 | | if (items.Count == 1) |
| | | 43 | | { |
| | 1 | 44 | | return chunks; |
| | | 45 | | } |
| | | 46 | | |
| | 1 | 47 | | var remainingItems = items.Skip(1).ToList(); |
| | 1 | 48 | | var actualBatchCount = Math.Min(batchCount, remainingItems.Count); |
| | 1 | 49 | | var baseBatchSize = remainingItems.Count / actualBatchCount; |
| | 1 | 50 | | var remainder = remainingItems.Count % actualBatchCount; |
| | 1 | 51 | | var startIndex = 0; |
| | | 52 | | |
| | 1 | 53 | | for (var batchIndex = 0; batchIndex < actualBatchCount; batchIndex += 1) |
| | | 54 | | { |
| | 1 | 55 | | var currentBatchSize = baseBatchSize + (batchIndex < remainder ? 1 : 0); |
| | 1 | 56 | | chunks.Add(remainingItems.Skip(startIndex).Take(currentBatchSize).ToList()); |
| | 1 | 57 | | startIndex += currentBatchSize; |
| | | 58 | | } |
| | | 59 | | |
| | 1 | 60 | | return chunks; |
| | | 61 | | } |
| | | 62 | | |
| | | 63 | | public static ExperimentItemScores CalculateScores(Prediction prediction, int expectedHomeGoals, int expectedAwayGoa |
| | | 64 | | { |
| | 1 | 65 | | var predictedDifference = prediction.HomeGoals - prediction.AwayGoals; |
| | 1 | 66 | | var expectedDifference = expectedHomeGoals - expectedAwayGoals; |
| | 1 | 67 | | var predictedTendency = Math.Sign(predictedDifference); |
| | 1 | 68 | | var expectedTendency = Math.Sign(expectedDifference); |
| | | 69 | | |
| | 1 | 70 | | var exactHit = prediction.HomeGoals == expectedHomeGoals |
| | 1 | 71 | | && prediction.AwayGoals == expectedAwayGoals; |
| | 1 | 72 | | var outcomeCorrect = predictedTendency == expectedTendency; |
| | | 73 | | |
| | 1 | 74 | | var kicktippPoints = 0; |
| | 1 | 75 | | if (exactHit) |
| | | 76 | | { |
| | 1 | 77 | | kicktippPoints = 4; |
| | | 78 | | } |
| | 1 | 79 | | else if (outcomeCorrect && predictedDifference == expectedDifference && expectedTendency != 0) |
| | | 80 | | { |
| | 1 | 81 | | kicktippPoints = 3; |
| | | 82 | | } |
| | 0 | 83 | | else if (outcomeCorrect) |
| | | 84 | | { |
| | 0 | 85 | | kicktippPoints = 2; |
| | | 86 | | } |
| | | 87 | | |
| | 1 | 88 | | return new ExperimentItemScores(kicktippPoints); |
| | | 89 | | } |
| | | 90 | | |
| | | 91 | | public static ExperimentAggregateScores SummarizeScores(IReadOnlyList<ExperimentItemScores> scoreEntries) |
| | | 92 | | { |
| | 1 | 93 | | var total = scoreEntries.Sum(entry => entry.KicktippPoints); |
| | 1 | 94 | | var average = scoreEntries.Count == 0 ? 0d : (double)total / scoreEntries.Count; |
| | 1 | 95 | | return new ExperimentAggregateScores(total, average); |
| | | 96 | | } |
| | | 97 | | |
| | | 98 | | public static ExperimentAggregateScores SummarizeExecutionScores( |
| | | 99 | | IReadOnlyList<PreparedExperimentExecutionSummary> executionSummaries, |
| | | 100 | | string? taskType) |
| | | 101 | | { |
| | 1 | 102 | | var total = executionSummaries.Sum(summary => summary.Scores.KicktippPoints); |
| | 1 | 103 | | if (!string.Equals(taskType, "repeated-match-slice", StringComparison.OrdinalIgnoreCase)) |
| | | 104 | | { |
| | 1 | 105 | | var average = executionSummaries.Count == 0 ? 0d : (double)total / executionSummaries.Count; |
| | 1 | 106 | | return new ExperimentAggregateScores(total, average); |
| | | 107 | | } |
| | | 108 | | |
| | 1 | 109 | | var indexedSummaries = executionSummaries |
| | 1 | 110 | | .Select(summary => new |
| | 1 | 111 | | { |
| | 1 | 112 | | Summary = summary, |
| | 1 | 113 | | RepetitionIndex = summary.RepetitionIndex ?? TryParseRepeatedMatchSliceRepetition(summary.DatasetItemId) |
| | 1 | 114 | | }) |
| | 1 | 115 | | .ToList(); |
| | | 116 | | |
| | 1 | 117 | | if (indexedSummaries.Count == 0 || indexedSummaries.Any(summary => summary.RepetitionIndex is null)) |
| | | 118 | | { |
| | 0 | 119 | | throw new InvalidOperationException( |
| | 0 | 120 | | "Repeated-match-slice scoring requires a repetition index on each execution summary."); |
| | | 121 | | } |
| | | 122 | | |
| | 1 | 123 | | var repetitionTotals = indexedSummaries |
| | 1 | 124 | | .GroupBy(summary => summary.RepetitionIndex!.Value) |
| | 1 | 125 | | .Select(group => group.Sum(summary => summary.Summary.Scores.KicktippPoints)) |
| | 1 | 126 | | .ToList(); |
| | | 127 | | |
| | 1 | 128 | | return new ExperimentAggregateScores(total, repetitionTotals.Average()); |
| | | 129 | | } |
| | | 130 | | |
| | | 131 | | public static string DeriveExperimentName(PreparedExperimentRunMetadata runMetadata, string runName) |
| | | 132 | | { |
| | 1 | 133 | | var parts = new[] |
| | 1 | 134 | | { |
| | 1 | 135 | | runMetadata.TaskType, |
| | 1 | 136 | | runMetadata.CommunityContext, |
| | 1 | 137 | | runMetadata.SliceKey |
| | 1 | 138 | | } |
| | 1 | 139 | | .Where(value => !string.IsNullOrWhiteSpace(value)) |
| | 1 | 140 | | .Select(value => ExperimentArtifactSupport.Slugify(value!)) |
| | 1 | 141 | | .ToArray(); |
| | | 142 | | |
| | 1 | 143 | | return parts.Length >= 2 |
| | 1 | 144 | | ? string.Join("__", parts) |
| | 1 | 145 | | : runName; |
| | | 146 | | } |
| | | 147 | | |
| | | 148 | | public static JsonElement BuildLangfuseExperimentMetadata( |
| | | 149 | | PreparedExperimentRunMetadata runMetadata, |
| | | 150 | | string experimentName, |
| | | 151 | | string experimentRunName, |
| | | 152 | | IReadOnlyDictionary<string, string?>? extraFields = null) |
| | | 153 | | { |
| | 1 | 154 | | var node = JsonSerializer.SerializeToNode(runMetadata, PreparedExperimentCommandSupport.JsonOptions) |
| | 1 | 155 | | as JsonObject |
| | 1 | 156 | | ?? new JsonObject(); |
| | | 157 | | |
| | 1 | 158 | | node["experiment_name"] = experimentName; |
| | 1 | 159 | | node["experiment_run_name"] = experimentRunName; |
| | | 160 | | |
| | 1 | 161 | | if (extraFields is not null) |
| | | 162 | | { |
| | 1 | 163 | | foreach (var (key, value) in extraFields) |
| | | 164 | | { |
| | 1 | 165 | | if (!string.IsNullOrWhiteSpace(key) && !string.IsNullOrWhiteSpace(value)) |
| | | 166 | | { |
| | 1 | 167 | | node[key] = value; |
| | | 168 | | } |
| | | 169 | | } |
| | | 170 | | } |
| | | 171 | | |
| | 1 | 172 | | return JsonSerializer.SerializeToElement(node, PreparedExperimentCommandSupport.JsonOptions); |
| | | 173 | | } |
| | | 174 | | |
| | | 175 | | public static string CreateScoreId(string scoreName, params string?[] components) |
| | | 176 | | { |
| | 1 | 177 | | var joined = string.Join( |
| | 1 | 178 | | "\n", |
| | 1 | 179 | | new[] { scoreName } |
| | 1 | 180 | | .Concat(components.Where(component => !string.IsNullOrWhiteSpace(component)).Select(component => compone |
| | 1 | 181 | | var hash = SHA256.HashData(Encoding.UTF8.GetBytes(joined)); |
| | 1 | 182 | | return $"exp-score-{Convert.ToHexString(hash).ToLowerInvariant()}"; |
| | | 183 | | } |
| | | 184 | | |
| | | 185 | | public static PreparedExperimentRunMetadata BuildRunMetadata( |
| | | 186 | | PreparedExperimentManifest manifest, |
| | | 187 | | PreparedExperimentRunOptions options) |
| | | 188 | | { |
| | 1 | 189 | | ArgumentNullException.ThrowIfNull(manifest); |
| | 1 | 190 | | ArgumentNullException.ThrowIfNull(options); |
| | | 191 | | |
| | 1 | 192 | | var explicitEvaluationTime = EvaluationTimeParser.ParseOrNull(options.EvaluationTime); |
| | 1 | 193 | | EvaluationTimestampPolicy? evaluationTimestampPolicy = null; |
| | | 194 | | string evaluationTimestampPolicyKey; |
| | | 195 | | |
| | 1 | 196 | | if (explicitEvaluationTime is null) |
| | | 197 | | { |
| | 0 | 198 | | var kind = string.IsNullOrWhiteSpace(options.EvaluationPolicyKind) |
| | 0 | 199 | | ? EvaluationTimestampPolicy.RelativeKind |
| | 0 | 200 | | : options.EvaluationPolicyKind; |
| | 0 | 201 | | var offset = string.IsNullOrWhiteSpace(options.EvaluationPolicyOffset) |
| | 0 | 202 | | ? "-12:00:00" |
| | 0 | 203 | | : options.EvaluationPolicyOffset; |
| | 0 | 204 | | evaluationTimestampPolicy = EvaluationTimestampPolicyParser.Parse(kind, offset); |
| | 0 | 205 | | evaluationTimestampPolicyKey = ExperimentArtifactSupport.BuildRelativeEvaluationPolicyKey(evaluationTimestam |
| | | 206 | | } |
| | | 207 | | else |
| | | 208 | | { |
| | 1 | 209 | | evaluationTimestampPolicyKey = "exact-time"; |
| | | 210 | | } |
| | | 211 | | |
| | 1 | 212 | | var normalizedReasoningEffort = string.IsNullOrWhiteSpace(options.ReasoningEffort) |
| | 1 | 213 | | ? null |
| | 1 | 214 | | : options.ReasoningEffort.Trim().ToLowerInvariant(); |
| | 1 | 215 | | var runSubjectDisplayName = BuildRunSubjectDisplayName(options.Model, normalizedReasoningEffort); |
| | 1 | 216 | | var runSubjectId = normalizedReasoningEffort is null |
| | 1 | 217 | | ? options.Model |
| | 1 | 218 | | : $"{options.Model}:reasoning-effort:{normalizedReasoningEffort}"; |
| | | 219 | | |
| | 1 | 220 | | return new PreparedExperimentRunMetadata |
| | 1 | 221 | | { |
| | 1 | 222 | | Runner = "match-experiment-runner", |
| | 1 | 223 | | TaskType = ResolveTaskType(manifest), |
| | 1 | 224 | | CommunityContext = ResolveCommunityContext(manifest), |
| | 1 | 225 | | Competition = manifest.Competition, |
| | 1 | 226 | | SourceDatasetName = manifest.SourceDatasetName, |
| | 1 | 227 | | DatasetName = options.DatasetName ?? manifest.SliceDatasetName, |
| | 1 | 228 | | PromptKey = options.PromptKey, |
| | 1 | 229 | | PromptSource = options.PromptSource, |
| | 1 | 230 | | LangfusePromptName = options.LangfusePromptName, |
| | 1 | 231 | | LangfusePromptLabel = options.LangfusePromptLabel, |
| | 1 | 232 | | LangfusePromptVersion = options.LangfusePromptVersion, |
| | 1 | 233 | | ReasoningEffort = normalizedReasoningEffort, |
| | 1 | 234 | | MaxOutputTokenCount = options.MaxOutputTokenCount, |
| | 1 | 235 | | SliceKind = ResolveSliceKind(manifest), |
| | 1 | 236 | | SliceKey = manifest.SliceKey, |
| | 1 | 237 | | SourcePoolKey = manifest.SourcePoolKey, |
| | 1 | 238 | | SelectedItemIdsHash = string.IsNullOrWhiteSpace(manifest.SelectedItemIdsHash) |
| | 1 | 239 | | ? ExperimentArtifactSupport.ComputeSelectedItemIdsHash( |
| | 1 | 240 | | manifest.SelectedItemIds.Count > 0 |
| | 1 | 241 | | ? manifest.SelectedItemIds |
| | 1 | 242 | | : manifest.Items.Select(item => item.SliceDatasetItemId)) |
| | 1 | 243 | | : manifest.SelectedItemIdsHash, |
| | 1 | 244 | | SelectedItemIdsCount = manifest.SelectedItemIds.Count > 0 ? manifest.SelectedItemIds.Count : manifest.Items. |
| | 1 | 245 | | SampleSize = manifest.SampleSize > 0 ? manifest.SampleSize : manifest.Items.Count, |
| | 1 | 246 | | MatchCount = manifest.MatchCount, |
| | 1 | 247 | | Repetitions = manifest.Repetitions, |
| | 1 | 248 | | EvaluationTimestampPolicyKey = evaluationTimestampPolicyKey, |
| | 1 | 249 | | EvaluationTimestampPolicy = evaluationTimestampPolicy is null |
| | 1 | 250 | | ? null |
| | 1 | 251 | | : new PreparedExperimentEvaluationTimestampPolicyMetadata |
| | 1 | 252 | | { |
| | 1 | 253 | | Kind = evaluationTimestampPolicy.Kind, |
| | 1 | 254 | | Reference = evaluationTimestampPolicy.Reference, |
| | 1 | 255 | | Offset = evaluationTimestampPolicy.Offset.ToTimeSpan().ToString("c", CultureInfo.InvariantCulture) |
| | 1 | 256 | | }, |
| | 1 | 257 | | EvaluationTime = explicitEvaluationTime?.ToString("O", CultureInfo.InvariantCulture), |
| | 1 | 258 | | StartedAtUtc = ExperimentArtifactSupport.FormatStartedAtUtc(DateTimeOffset.UtcNow), |
| | 1 | 259 | | SampleSeed = manifest.SampleSeed, |
| | 1 | 260 | | SampleMethod = ResolveSampleMethod(manifest), |
| | 1 | 261 | | IncludeJustification = options.IncludeJustification, |
| | 1 | 262 | | PromptVersion = options.PromptKey, |
| | 1 | 263 | | SourceDatasetKind = DeriveSourceDatasetKind(manifest), |
| | 1 | 264 | | DatasetItemIdMap = CreateDatasetItemIdMap(manifest), |
| | 1 | 265 | | Model = options.Model, |
| | 1 | 266 | | ObservationName = "predict-match", |
| | 1 | 267 | | RunSubjectKind = "model", |
| | 1 | 268 | | RunSubjectId = runSubjectId, |
| | 1 | 269 | | RunSubjectDisplayName = runSubjectDisplayName, |
| | 1 | 270 | | BatchStrategy = options.BatchStrategy, |
| | 1 | 271 | | BatchSize = options.BatchSize, |
| | 1 | 272 | | BatchCount = options.BatchCount, |
| | 1 | 273 | | Parallelism = options.Parallelism |
| | 1 | 274 | | }; |
| | | 275 | | } |
| | | 276 | | |
| | | 277 | | public static IReadOnlyDictionary<string, string> CreateDatasetItemIdMap(PreparedExperimentManifest manifest) |
| | | 278 | | { |
| | 1 | 279 | | var groupedItems = manifest.Items |
| | 1 | 280 | | .GroupBy(item => item.SourceDatasetItemId, StringComparer.Ordinal) |
| | 1 | 281 | | .ToList(); |
| | | 282 | | |
| | 1 | 283 | | if (groupedItems.Any(group => group.Count() != 1)) |
| | | 284 | | { |
| | 0 | 285 | | return new Dictionary<string, string>(); |
| | | 286 | | } |
| | | 287 | | |
| | 1 | 288 | | return groupedItems.ToDictionary( |
| | 1 | 289 | | group => group.Key, |
| | 1 | 290 | | group => group.Single().SliceDatasetItemId, |
| | 1 | 291 | | StringComparer.Ordinal); |
| | | 292 | | } |
| | | 293 | | |
| | | 294 | | public static IReadOnlyList<string> DeriveTraceTags(PreparedExperimentRunMetadata runMetadata) |
| | | 295 | | { |
| | 1 | 296 | | var tags = new List<string>(); |
| | | 297 | | |
| | 1 | 298 | | if (!string.IsNullOrWhiteSpace(runMetadata.TaskType)) |
| | | 299 | | { |
| | 1 | 300 | | tags.Add($"task:{runMetadata.TaskType}"); |
| | | 301 | | } |
| | | 302 | | |
| | 1 | 303 | | if (!string.IsNullOrWhiteSpace(runMetadata.CommunityContext)) |
| | | 304 | | { |
| | 1 | 305 | | tags.Add($"community:{runMetadata.CommunityContext}"); |
| | | 306 | | } |
| | | 307 | | |
| | 1 | 308 | | if (!string.IsNullOrWhiteSpace(runMetadata.SliceKey)) |
| | | 309 | | { |
| | 1 | 310 | | tags.Add($"slice:{runMetadata.SliceKey}"); |
| | | 311 | | } |
| | | 312 | | |
| | 1 | 313 | | if (!string.IsNullOrWhiteSpace(runMetadata.Model)) |
| | | 314 | | { |
| | 1 | 315 | | tags.Add($"model:{runMetadata.Model}"); |
| | | 316 | | } |
| | | 317 | | |
| | 1 | 318 | | if (!string.IsNullOrWhiteSpace(runMetadata.SliceKind)) |
| | | 319 | | { |
| | 1 | 320 | | tags.Add($"slice-kind:{runMetadata.SliceKind}"); |
| | | 321 | | } |
| | | 322 | | |
| | 1 | 323 | | if (runMetadata.MatchCount is int matchCount) |
| | | 324 | | { |
| | 0 | 325 | | tags.Add($"match-count:{matchCount.ToString(CultureInfo.InvariantCulture)}"); |
| | | 326 | | } |
| | | 327 | | |
| | 1 | 328 | | if (runMetadata.Repetitions is int repetitions) |
| | | 329 | | { |
| | 0 | 330 | | tags.Add($"repetitions:{repetitions.ToString(CultureInfo.InvariantCulture)}"); |
| | | 331 | | } |
| | | 332 | | |
| | 1 | 333 | | if (!string.IsNullOrWhiteSpace(runMetadata.PromptKey)) |
| | | 334 | | { |
| | 1 | 335 | | tags.Add($"prompt:{runMetadata.PromptKey}"); |
| | | 336 | | } |
| | | 337 | | |
| | 1 | 338 | | if (!string.IsNullOrWhiteSpace(runMetadata.PromptSource)) |
| | | 339 | | { |
| | 1 | 340 | | tags.Add($"prompt-source:{runMetadata.PromptSource}"); |
| | | 341 | | } |
| | | 342 | | |
| | 1 | 343 | | if (!string.IsNullOrWhiteSpace(runMetadata.LangfusePromptName)) |
| | | 344 | | { |
| | 1 | 345 | | tags.Add($"langfuse-prompt:{runMetadata.LangfusePromptName}"); |
| | | 346 | | } |
| | | 347 | | |
| | 1 | 348 | | if (!string.IsNullOrWhiteSpace(runMetadata.LangfusePromptLabel)) |
| | | 349 | | { |
| | 1 | 350 | | tags.Add($"langfuse-prompt-label:{runMetadata.LangfusePromptLabel}"); |
| | | 351 | | } |
| | | 352 | | |
| | 1 | 353 | | if (runMetadata.LangfusePromptVersion is not null) |
| | | 354 | | { |
| | 1 | 355 | | tags.Add($"langfuse-prompt-version:{runMetadata.LangfusePromptVersion}"); |
| | | 356 | | } |
| | | 357 | | |
| | 1 | 358 | | if (!string.IsNullOrWhiteSpace(runMetadata.ReasoningEffort)) |
| | | 359 | | { |
| | 1 | 360 | | tags.Add($"reasoning-effort:{runMetadata.ReasoningEffort}"); |
| | | 361 | | } |
| | | 362 | | |
| | 1 | 363 | | if (runMetadata.MaxOutputTokenCount is not null) |
| | | 364 | | { |
| | 1 | 365 | | tags.Add($"max-output-tokens:{runMetadata.MaxOutputTokenCount}"); |
| | | 366 | | } |
| | | 367 | | |
| | 1 | 368 | | return tags.Distinct(StringComparer.Ordinal).ToList(); |
| | | 369 | | } |
| | | 370 | | |
| | | 371 | | public static IReadOnlyDictionary<string, string> DerivePropagatedMetadata(PreparedExperimentRunMetadata runMetadata |
| | | 372 | | { |
| | 1 | 373 | | var metadata = new Dictionary<string, string>(StringComparer.Ordinal); |
| | 1 | 374 | | AddIfValid(metadata, "communityContext", runMetadata.CommunityContext); |
| | 1 | 375 | | AddIfValid(metadata, "evaluationTime", runMetadata.EvaluationTime); |
| | 1 | 376 | | AddIfValid(metadata, "evaluationTimestampPolicyKey", runMetadata.EvaluationTimestampPolicyKey); |
| | 1 | 377 | | AddIfValid(metadata, "model", runMetadata.Model); |
| | 1 | 378 | | AddIfValid(metadata, "promptKey", runMetadata.PromptKey); |
| | 1 | 379 | | AddIfValid(metadata, "promptSource", runMetadata.PromptSource); |
| | 1 | 380 | | AddIfValid(metadata, "langfusePromptName", runMetadata.LangfusePromptName); |
| | 1 | 381 | | AddIfValid(metadata, "langfusePromptLabel", runMetadata.LangfusePromptLabel); |
| | 1 | 382 | | AddIfValid(metadata, "langfusePromptVersion", runMetadata.LangfusePromptVersion?.ToString(CultureInfo.InvariantC |
| | 1 | 383 | | AddIfValid(metadata, "reasoningEffort", runMetadata.ReasoningEffort); |
| | 1 | 384 | | AddIfValid(metadata, "maxOutputTokens", runMetadata.MaxOutputTokenCount?.ToString(CultureInfo.InvariantCulture)) |
| | 1 | 385 | | AddIfValid(metadata, "sampleMethod", runMetadata.SampleMethod); |
| | 1 | 386 | | AddIfValid(metadata, "matchCount", runMetadata.MatchCount?.ToString(CultureInfo.InvariantCulture)); |
| | 1 | 387 | | AddIfValid(metadata, "repetitions", runMetadata.Repetitions?.ToString(CultureInfo.InvariantCulture)); |
| | 1 | 388 | | AddIfValid(metadata, "parallelism", runMetadata.Parallelism?.ToString(CultureInfo.InvariantCulture)); |
| | 1 | 389 | | AddIfValid(metadata, "selectedItemIdsHash", runMetadata.SelectedItemIdsHash); |
| | 1 | 390 | | AddIfValid(metadata, "sliceKind", runMetadata.SliceKind); |
| | 1 | 391 | | AddIfValid(metadata, "sliceKey", runMetadata.SliceKey); |
| | 1 | 392 | | AddIfValid(metadata, "startedAtUtc", runMetadata.StartedAtUtc); |
| | 1 | 393 | | AddIfValid(metadata, "task", runMetadata.TaskType); |
| | 1 | 394 | | AddIfValid(metadata, "observationName", runMetadata.ObservationName); |
| | 1 | 395 | | AddIfValid(metadata, "runSubjectKind", runMetadata.RunSubjectKind); |
| | 1 | 396 | | AddIfValid(metadata, "runSubjectId", runMetadata.RunSubjectId); |
| | 1 | 397 | | AddIfValid(metadata, "runSubjectDisplayName", runMetadata.RunSubjectDisplayName); |
| | 1 | 398 | | return metadata; |
| | | 399 | | } |
| | | 400 | | |
| | | 401 | | public static string BuildRunSubjectDisplayName(string model, string? reasoningEffort) |
| | | 402 | | { |
| | 1 | 403 | | return string.IsNullOrWhiteSpace(reasoningEffort) |
| | 1 | 404 | | ? model |
| | 1 | 405 | | : $"{model} ({reasoningEffort.Trim().ToLowerInvariant()})"; |
| | | 406 | | } |
| | | 407 | | |
| | | 408 | | public static string ResolveTaskType(PreparedExperimentManifest manifest) |
| | | 409 | | { |
| | 1 | 410 | | if (!string.IsNullOrWhiteSpace(manifest.TaskType)) |
| | | 411 | | { |
| | 1 | 412 | | return manifest.TaskType; |
| | | 413 | | } |
| | | 414 | | |
| | 1 | 415 | | var sliceKind = ResolveSliceKind(manifest); |
| | 1 | 416 | | var sampleMethod = ResolveSampleMethod(manifest); |
| | | 417 | | |
| | 1 | 418 | | if (string.Equals(sliceKind, "community-to-date", StringComparison.OrdinalIgnoreCase) |
| | 1 | 419 | | || string.Equals(sampleMethod, "community-to-date", StringComparison.OrdinalIgnoreCase)) |
| | | 420 | | { |
| | 0 | 421 | | return "community-to-date"; |
| | | 422 | | } |
| | | 423 | | |
| | 1 | 424 | | if (string.Equals(sliceKind, "repeated-match-slice", StringComparison.OrdinalIgnoreCase) |
| | 1 | 425 | | || string.Equals(sampleMethod, "repeated-match-slice", StringComparison.OrdinalIgnoreCase)) |
| | | 426 | | { |
| | 0 | 427 | | return "repeated-match-slice"; |
| | | 428 | | } |
| | | 429 | | |
| | 1 | 430 | | if (string.Equals(sliceKind, "single-match", StringComparison.OrdinalIgnoreCase) |
| | 1 | 431 | | || string.Equals(sliceKind, "repeated-match", StringComparison.OrdinalIgnoreCase) |
| | 1 | 432 | | || string.Equals(sampleMethod, "repeat-single-match", StringComparison.OrdinalIgnoreCase) |
| | 1 | 433 | | || string.Equals(sampleMethod, "repeated-match", StringComparison.OrdinalIgnoreCase)) |
| | | 434 | | { |
| | 1 | 435 | | return "repeated-match"; |
| | | 436 | | } |
| | | 437 | | |
| | 1 | 438 | | return "slice"; |
| | | 439 | | } |
| | | 440 | | |
| | | 441 | | public static void ReportProgress(string message) |
| | | 442 | | { |
| | 1 | 443 | | Console.Error.WriteLine($"[progress] {message}"); |
| | 1 | 444 | | } |
| | | 445 | | |
| | | 446 | | private static void AddIfValid(IDictionary<string, string> metadata, string key, string? value) |
| | | 447 | | { |
| | 1 | 448 | | if (!string.IsNullOrWhiteSpace(value) && value.Length <= 200) |
| | | 449 | | { |
| | 1 | 450 | | metadata[key] = value; |
| | | 451 | | } |
| | 1 | 452 | | } |
| | | 453 | | |
| | | 454 | | private static string DeriveSourceDatasetKind(PreparedExperimentManifest manifest) |
| | | 455 | | { |
| | 1 | 456 | | return ResolveTaskType(manifest); |
| | | 457 | | } |
| | | 458 | | |
| | | 459 | | private static string ResolveCommunityContext(PreparedExperimentManifest manifest) |
| | | 460 | | { |
| | 1 | 461 | | if (!string.IsNullOrWhiteSpace(manifest.CommunityContext)) |
| | | 462 | | { |
| | 1 | 463 | | return manifest.CommunityContext; |
| | | 464 | | } |
| | | 465 | | |
| | 0 | 466 | | if (string.IsNullOrWhiteSpace(manifest.SourceDatasetName)) |
| | | 467 | | { |
| | 0 | 468 | | throw new InvalidOperationException("Slice manifest must contain communityContext or sourceDatasetName."); |
| | | 469 | | } |
| | | 470 | | |
| | 0 | 471 | | return manifest.SourceDatasetName |
| | 0 | 472 | | .Split('/', StringSplitOptions.RemoveEmptyEntries) |
| | 0 | 473 | | .Last(); |
| | | 474 | | } |
| | | 475 | | |
| | | 476 | | private static string ResolveSampleMethod(PreparedExperimentManifest manifest) |
| | | 477 | | { |
| | 1 | 478 | | return string.IsNullOrWhiteSpace(manifest.SampleMethod) |
| | 1 | 479 | | ? "random-sample" |
| | 1 | 480 | | : manifest.SampleMethod; |
| | | 481 | | } |
| | | 482 | | |
| | | 483 | | private static string ResolveSliceKind(PreparedExperimentManifest manifest) |
| | | 484 | | { |
| | 1 | 485 | | return string.IsNullOrWhiteSpace(manifest.SliceKind) |
| | 1 | 486 | | ? "random-sample" |
| | 1 | 487 | | : manifest.SliceKind; |
| | | 488 | | } |
| | | 489 | | |
| | | 490 | | private static int? TryParseRepeatedMatchSliceRepetition(string datasetItemId) |
| | | 491 | | { |
| | 0 | 492 | | var markerIndex = datasetItemId.IndexOf("__repeated-match-slice__", StringComparison.Ordinal); |
| | 0 | 493 | | if (markerIndex < 0) |
| | | 494 | | { |
| | 0 | 495 | | return null; |
| | | 496 | | } |
| | | 497 | | |
| | 0 | 498 | | var lastSeparatorIndex = datasetItemId.LastIndexOf("__", StringComparison.Ordinal); |
| | 0 | 499 | | if (lastSeparatorIndex < 0 || lastSeparatorIndex + 2 >= datasetItemId.Length) |
| | | 500 | | { |
| | 0 | 501 | | return null; |
| | | 502 | | } |
| | | 503 | | |
| | 0 | 504 | | return int.TryParse( |
| | 0 | 505 | | datasetItemId[(lastSeparatorIndex + 2)..], |
| | 0 | 506 | | NumberStyles.Integer, |
| | 0 | 507 | | CultureInfo.InvariantCulture, |
| | 0 | 508 | | out var repetitionIndex) |
| | 0 | 509 | | ? repetitionIndex |
| | 0 | 510 | | : null; |
| | | 511 | | } |
| | | 512 | | } |