| | | 1 | | using System.Globalization; |
| | | 2 | | using System.Text.Json; |
| | | 3 | | using System.Text.Json.Serialization; |
| | | 4 | | |
| | | 5 | | namespace Orchestrator.Commands.Observability.Experiments; |
| | | 6 | | |
| | 1 | 7 | | internal sealed record PreparedExperimentRunOptions( |
| | 1 | 8 | | string Model, |
| | 1 | 9 | | string PromptKey, |
| | 1 | 10 | | bool IncludeJustification, |
| | 1 | 11 | | string? EvaluationTime, |
| | 1 | 12 | | string? EvaluationPolicyKind, |
| | 1 | 13 | | string? EvaluationPolicyOffset, |
| | 1 | 14 | | string? DatasetName, |
| | 1 | 15 | | string PromptSource, |
| | 1 | 16 | | string? LangfusePromptName, |
| | 1 | 17 | | string? LangfusePromptLabel, |
| | 1 | 18 | | int? LangfusePromptVersion, |
| | 1 | 19 | | string BatchStrategy, |
| | 1 | 20 | | int? BatchSize = null, |
| | 1 | 21 | | int? BatchCount = null, |
| | 1 | 22 | | string? ReasoningEffort = null, |
| | 1 | 23 | | int? MaxOutputTokenCount = null, |
| | 1 | 24 | | int? Parallelism = null); |
| | | 25 | | |
| | | 26 | | internal static class PreparedExperimentCommandSupport |
| | | 27 | | { |
| | | 28 | | internal static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web) |
| | | 29 | | { |
| | | 30 | | PropertyNameCaseInsensitive = true, |
| | | 31 | | PropertyNamingPolicy = JsonNamingPolicy.CamelCase, |
| | | 32 | | WriteIndented = true, |
| | | 33 | | DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull |
| | | 34 | | }; |
| | | 35 | | |
| | | 36 | | public static async Task<T> LoadJsonFileAsync<T>(string path, CancellationToken cancellationToken) |
| | | 37 | | { |
| | | 38 | | var absolutePath = Path.GetFullPath(path); |
| | | 39 | | var raw = await File.ReadAllTextAsync(absolutePath, cancellationToken); |
| | | 40 | | var value = JsonSerializer.Deserialize<T>(raw, JsonOptions); |
| | | 41 | | return value ?? throw new InvalidOperationException($"JSON file '{absolutePath}' could not be deserialized."); |
| | | 42 | | } |
| | | 43 | | |
| | | 44 | | public static PreparedExperimentRunMetadata NormalizeRunMetadata( |
| | | 45 | | PreparedExperimentRunMetadata runMetadata, |
| | | 46 | | PreparedExperimentManifest manifest, |
| | | 47 | | PreparedExperimentRunOptions options) |
| | | 48 | | { |
| | | 49 | | if (!string.IsNullOrWhiteSpace(runMetadata.Model) |
| | | 50 | | && !string.Equals(runMetadata.Model, options.Model, StringComparison.Ordinal)) |
| | | 51 | | { |
| | | 52 | | throw new InvalidOperationException( |
| | | 53 | | $"Run metadata model '{runMetadata.Model}' does not match requested model '{options.Model}'."); |
| | | 54 | | } |
| | | 55 | | |
| | | 56 | | var normalizedReasoningEffort = string.IsNullOrWhiteSpace(runMetadata.ReasoningEffort) |
| | | 57 | | ? options.ReasoningEffort |
| | | 58 | | : runMetadata.ReasoningEffort.Trim().ToLowerInvariant(); |
| | | 59 | | var runSubjectId = string.IsNullOrWhiteSpace(runMetadata.RunSubjectId) |
| | | 60 | | ? string.IsNullOrWhiteSpace(normalizedReasoningEffort) |
| | | 61 | | ? options.Model |
| | | 62 | | : $"{options.Model}:reasoning-effort:{normalizedReasoningEffort}" |
| | | 63 | | : runMetadata.RunSubjectId; |
| | | 64 | | var runSubjectDisplayName = string.IsNullOrWhiteSpace(runMetadata.RunSubjectDisplayName) |
| | | 65 | | ? PreparedExperimentSupport.BuildRunSubjectDisplayName(options.Model, normalizedReasoningEffort) |
| | | 66 | | : runMetadata.RunSubjectDisplayName; |
| | | 67 | | |
| | | 68 | | return runMetadata with |
| | | 69 | | { |
| | | 70 | | Runner = string.IsNullOrWhiteSpace(runMetadata.Runner) ? "match-experiment-runner" : runMetadata.Runner, |
| | | 71 | | TaskType = string.IsNullOrWhiteSpace(runMetadata.TaskType) |
| | | 72 | | ? PreparedExperimentSupport.ResolveTaskType(manifest) |
| | | 73 | | : runMetadata.TaskType, |
| | | 74 | | CommunityContext = string.IsNullOrWhiteSpace(runMetadata.CommunityContext) |
| | | 75 | | ? manifest.CommunityContext |
| | | 76 | | : runMetadata.CommunityContext, |
| | | 77 | | Model = options.Model, |
| | | 78 | | Competition = string.IsNullOrWhiteSpace(runMetadata.Competition) ? manifest.Competition : runMetadata.Compet |
| | | 79 | | SourceDatasetName = string.IsNullOrWhiteSpace(runMetadata.SourceDatasetName) |
| | | 80 | | ? manifest.SourceDatasetName |
| | | 81 | | : runMetadata.SourceDatasetName, |
| | | 82 | | DatasetName = string.IsNullOrWhiteSpace(runMetadata.DatasetName) |
| | | 83 | | ? options.DatasetName ?? manifest.SliceDatasetName |
| | | 84 | | : runMetadata.DatasetName, |
| | | 85 | | PromptKey = string.IsNullOrWhiteSpace(runMetadata.PromptKey) ? options.PromptKey : runMetadata.PromptKey, |
| | | 86 | | PromptSource = string.IsNullOrWhiteSpace(runMetadata.PromptSource) ? options.PromptSource : runMetadata.Prom |
| | | 87 | | LangfusePromptName = string.IsNullOrWhiteSpace(runMetadata.LangfusePromptName) ? options.LangfusePromptName |
| | | 88 | | LangfusePromptLabel = string.IsNullOrWhiteSpace(runMetadata.LangfusePromptLabel) ? options.LangfusePromptLab |
| | | 89 | | LangfusePromptVersion = runMetadata.LangfusePromptVersion ?? options.LangfusePromptVersion, |
| | | 90 | | ReasoningEffort = normalizedReasoningEffort, |
| | | 91 | | MaxOutputTokenCount = options.MaxOutputTokenCount ?? runMetadata.MaxOutputTokenCount, |
| | | 92 | | SliceKind = string.IsNullOrWhiteSpace(runMetadata.SliceKind) ? manifest.SliceKind : runMetadata.SliceKind, |
| | | 93 | | SliceKey = string.IsNullOrWhiteSpace(runMetadata.SliceKey) ? manifest.SliceKey : runMetadata.SliceKey, |
| | | 94 | | SourcePoolKey = string.IsNullOrWhiteSpace(runMetadata.SourcePoolKey) ? manifest.SourcePoolKey : runMetadata. |
| | | 95 | | SelectedItemIdsCount = runMetadata.SelectedItemIdsCount > 0 |
| | | 96 | | ? runMetadata.SelectedItemIdsCount |
| | | 97 | | : manifest.SelectedItemIds.Count > 0 ? manifest.SelectedItemIds.Count : manifest.Items.Count, |
| | | 98 | | SelectedItemIdsHash = string.IsNullOrWhiteSpace(runMetadata.SelectedItemIdsHash) |
| | | 99 | | ? string.IsNullOrWhiteSpace(manifest.SelectedItemIdsHash) |
| | | 100 | | ? ExperimentArtifactSupport.ComputeSelectedItemIdsHash( |
| | | 101 | | manifest.SelectedItemIds.Count > 0 |
| | | 102 | | ? manifest.SelectedItemIds |
| | | 103 | | : manifest.Items.Select(item => item.SliceDatasetItemId)) |
| | | 104 | | : manifest.SelectedItemIdsHash |
| | | 105 | | : runMetadata.SelectedItemIdsHash, |
| | | 106 | | SampleSize = runMetadata.SampleSize > 0 ? runMetadata.SampleSize : manifest.SampleSize > 0 ? manifest.Sample |
| | | 107 | | MatchCount = runMetadata.MatchCount ?? manifest.MatchCount, |
| | | 108 | | Repetitions = runMetadata.Repetitions ?? manifest.Repetitions, |
| | | 109 | | SampleSeed = runMetadata.SampleSeed ?? manifest.SampleSeed, |
| | | 110 | | SampleMethod = string.IsNullOrWhiteSpace(runMetadata.SampleMethod) ? manifest.SampleMethod : runMetadata.Sam |
| | | 111 | | PromptVersion = string.IsNullOrWhiteSpace(runMetadata.PromptVersion) |
| | | 112 | | ? string.IsNullOrWhiteSpace(runMetadata.PromptKey) ? options.PromptKey : runMetadata.PromptKey |
| | | 113 | | : runMetadata.PromptVersion, |
| | | 114 | | SourceDatasetKind = string.IsNullOrWhiteSpace(runMetadata.SourceDatasetKind) |
| | | 115 | | ? PreparedExperimentSupport.ResolveTaskType(manifest) |
| | | 116 | | : runMetadata.SourceDatasetKind, |
| | | 117 | | DatasetItemIdMap = runMetadata.DatasetItemIdMap.Count > 0 |
| | | 118 | | ? runMetadata.DatasetItemIdMap |
| | | 119 | | : PreparedExperimentSupport.CreateDatasetItemIdMap(manifest), |
| | | 120 | | BatchStrategy = string.IsNullOrWhiteSpace(runMetadata.BatchStrategy) ? options.BatchStrategy : runMetadata.B |
| | | 121 | | BatchSize = options.BatchSize ?? runMetadata.BatchSize, |
| | | 122 | | BatchCount = options.BatchCount ?? runMetadata.BatchCount, |
| | | 123 | | Parallelism = options.Parallelism ?? runMetadata.Parallelism, |
| | | 124 | | RunSubjectId = runSubjectId, |
| | | 125 | | RunSubjectDisplayName = runSubjectDisplayName |
| | | 126 | | }; |
| | | 127 | | } |
| | | 128 | | |
| | | 129 | | public static DateTimeOffset? ParseExplicitEvaluationTime(PreparedExperimentRunMetadata runMetadata) |
| | | 130 | | { |
| | | 131 | | if (string.IsNullOrWhiteSpace(runMetadata.EvaluationTime)) |
| | | 132 | | { |
| | | 133 | | return null; |
| | | 134 | | } |
| | | 135 | | |
| | | 136 | | if (DateTimeOffset.TryParse( |
| | | 137 | | runMetadata.EvaluationTime, |
| | | 138 | | CultureInfo.InvariantCulture, |
| | | 139 | | DateTimeStyles.RoundtripKind, |
| | | 140 | | out var parsedRoundtrip)) |
| | | 141 | | { |
| | | 142 | | return parsedRoundtrip; |
| | | 143 | | } |
| | | 144 | | |
| | | 145 | | return Commands.Observability.EvaluationTimeParser.Parse(runMetadata.EvaluationTime); |
| | | 146 | | } |
| | | 147 | | |
| | | 148 | | public static EvaluationTimestampPolicy ParseEvaluationTimestampPolicy(PreparedExperimentRunMetadata runMetadata) |
| | | 149 | | { |
| | | 150 | | if (runMetadata.EvaluationTimestampPolicy is null) |
| | | 151 | | { |
| | | 152 | | throw new InvalidOperationException("Run metadata must contain evaluationTimestampPolicy."); |
| | | 153 | | } |
| | | 154 | | |
| | | 155 | | if (!string.Equals( |
| | | 156 | | runMetadata.EvaluationTimestampPolicy.Reference, |
| | | 157 | | EvaluationTimestampPolicy.StartsAtReference, |
| | | 158 | | StringComparison.OrdinalIgnoreCase)) |
| | | 159 | | { |
| | | 160 | | throw new InvalidOperationException( |
| | | 161 | | $"Evaluation policy reference must be '{EvaluationTimestampPolicy.StartsAtReference}'."); |
| | | 162 | | } |
| | | 163 | | |
| | | 164 | | return EvaluationTimestampPolicyParser.Parse( |
| | | 165 | | runMetadata.EvaluationTimestampPolicy.Kind, |
| | | 166 | | runMetadata.EvaluationTimestampPolicy.Offset); |
| | | 167 | | } |
| | | 168 | | |
| | | 169 | | public static void ValidateManifest(PreparedExperimentManifest manifest) |
| | | 170 | | { |
| | | 171 | | if (manifest.Items.Count == 0) |
| | | 172 | | { |
| | | 173 | | throw new InvalidOperationException("Slice manifest must contain at least one item."); |
| | | 174 | | } |
| | | 175 | | |
| | | 176 | | var seenHostedIds = new HashSet<string>(StringComparer.Ordinal); |
| | | 177 | | foreach (var item in manifest.Items) |
| | | 178 | | { |
| | | 179 | | if (string.IsNullOrWhiteSpace(item.SourceDatasetItemId)) |
| | | 180 | | { |
| | | 181 | | throw new InvalidOperationException("Each slice manifest item must contain sourceDatasetItemId."); |
| | | 182 | | } |
| | | 183 | | |
| | | 184 | | if (string.IsNullOrWhiteSpace(item.SliceDatasetItemId)) |
| | | 185 | | { |
| | | 186 | | throw new InvalidOperationException("Each slice manifest item must contain sliceDatasetItemId."); |
| | | 187 | | } |
| | | 188 | | |
| | | 189 | | if (!seenHostedIds.Add(item.SliceDatasetItemId)) |
| | | 190 | | { |
| | | 191 | | throw new InvalidOperationException($"Duplicate slice dataset item id '{item.SliceDatasetItemId}' found |
| | | 192 | | } |
| | | 193 | | |
| | | 194 | | if (string.IsNullOrWhiteSpace(item.HomeTeam) || string.IsNullOrWhiteSpace(item.AwayTeam)) |
| | | 195 | | { |
| | | 196 | | throw new InvalidOperationException("Each slice manifest item must contain non-empty homeTeam and awayTe |
| | | 197 | | } |
| | | 198 | | |
| | | 199 | | if (item.Matchday < 1) |
| | | 200 | | { |
| | | 201 | | throw new InvalidOperationException($"Slice manifest item '{item.SliceDatasetItemId}' has an invalid mat |
| | | 202 | | } |
| | | 203 | | } |
| | | 204 | | } |
| | | 205 | | |
| | | 206 | | public static void EnsureTaskType(PreparedExperimentManifest manifest, string expectedTaskType) |
| | | 207 | | { |
| | | 208 | | var actualTaskType = PreparedExperimentSupport.ResolveTaskType(manifest); |
| | | 209 | | if (!string.Equals(actualTaskType, expectedTaskType, StringComparison.OrdinalIgnoreCase)) |
| | | 210 | | { |
| | | 211 | | throw new InvalidOperationException( |
| | | 212 | | $"The manifest describes a '{actualTaskType}' dataset, but this command expects '{expectedTaskType}'."); |
| | | 213 | | } |
| | | 214 | | } |
| | | 215 | | } |