| | | 1 | | using System.Globalization; |
| | | 2 | | using System.Text.Json; |
| | | 3 | | using EHonda.KicktippAi.Core; |
| | | 4 | | using Microsoft.Extensions.Logging; |
| | | 5 | | using NodaTime; |
| | | 6 | | using Orchestrator.Commands.Observability.Experiments; |
| | | 7 | | using Orchestrator.Commands.Observability.ExportExperimentDataset; |
| | | 8 | | using Orchestrator.Infrastructure.Factories; |
| | | 9 | | using Spectre.Console; |
| | | 10 | | using Spectre.Console.Cli; |
| | | 11 | | |
| | | 12 | | namespace Orchestrator.Commands.Observability.PrepareRepeatedMatchSlice; |
| | | 13 | | |
| | | 14 | | public sealed class PrepareRepeatedMatchSliceCommand : AsyncCommand<PrepareRepeatedMatchSliceSettings> |
| | | 15 | | { |
| | | 16 | | private readonly IAnsiConsole _console; |
| | | 17 | | private readonly IFirebaseServiceFactory _firebaseServiceFactory; |
| | | 18 | | private readonly ILogger<PrepareRepeatedMatchSliceCommand> _logger; |
| | | 19 | | |
| | 1 | 20 | | public PrepareRepeatedMatchSliceCommand( |
| | 1 | 21 | | IAnsiConsole console, |
| | 1 | 22 | | IFirebaseServiceFactory firebaseServiceFactory, |
| | 1 | 23 | | ILogger<PrepareRepeatedMatchSliceCommand> logger) |
| | | 24 | | { |
| | 1 | 25 | | _console = console; |
| | 1 | 26 | | _firebaseServiceFactory = firebaseServiceFactory; |
| | 1 | 27 | | _logger = logger; |
| | 1 | 28 | | } |
| | | 29 | | |
| | | 30 | | protected override async Task<int> ExecuteAsync( |
| | | 31 | | CommandContext context, |
| | | 32 | | PrepareRepeatedMatchSliceSettings settings, |
| | | 33 | | CancellationToken cancellationToken) |
| | | 34 | | { |
| | | 35 | | try |
| | | 36 | | { |
| | 1 | 37 | | var matchOutcomeRepository = _firebaseServiceFactory.CreateMatchOutcomeRepository(); |
| | 1 | 38 | | var matchdays = ParseMatchdays(settings.Matchdays); |
| | 1 | 39 | | var startsAfter = EvaluationTimeParser.ParseOrNull(settings.StartsAfter); |
| | 1 | 40 | | var normalizedStartsAfter = EvaluationTimeParser.NormalizeOrNull(settings.StartsAfter); |
| | 1 | 41 | | var availableItems = await LoadSourceItemsAsync( |
| | 1 | 42 | | matchOutcomeRepository, |
| | 1 | 43 | | settings.CommunityContext, |
| | 1 | 44 | | matchdays, |
| | 1 | 45 | | startsAfter, |
| | 1 | 46 | | cancellationToken); |
| | | 47 | | |
| | 1 | 48 | | if (availableItems.Count == 0) |
| | | 49 | | { |
| | 0 | 50 | | throw new InvalidOperationException("No completed historical matches were found for the requested repeat |
| | | 51 | | } |
| | | 52 | | |
| | 1 | 53 | | var sampleSeed = settings.SampleSeed ?? int.Parse( |
| | 1 | 54 | | DateTimeOffset.UtcNow.ToString("yyyyMMdd", CultureInfo.InvariantCulture), |
| | 1 | 55 | | CultureInfo.InvariantCulture); |
| | 1 | 56 | | var sliceKey = string.IsNullOrWhiteSpace(settings.SliceKey) |
| | 1 | 57 | | ? $"random-{settings.MatchCount}x{settings.Repetitions}-seed-{sampleSeed}" |
| | 1 | 58 | | : settings.SliceKey.Trim(); |
| | 1 | 59 | | var sourcePoolKey = string.IsNullOrWhiteSpace(settings.SourcePoolKey) |
| | 1 | 60 | | ? BuildDefaultSourcePoolKey(matchdays, startsAfter) |
| | 1 | 61 | | : settings.SourcePoolKey.Trim(); |
| | 1 | 62 | | var selectedItems = SelectRandomItems(availableItems, settings.MatchCount, sampleSeed) |
| | 1 | 63 | | .OrderBy(item => item.SourceDatasetItemId, StringComparer.Ordinal) |
| | 1 | 64 | | .ToList(); |
| | 1 | 65 | | var repeatedItems = ExpandRepeatedItems(selectedItems, sliceKey, settings.Repetitions); |
| | | 66 | | |
| | 1 | 67 | | var sourceDatasetName = ExperimentArtifactSupport.BuildSourceDatasetName(settings.CommunityContext); |
| | 1 | 68 | | var sliceDatasetName = settings.DatasetName |
| | 1 | 69 | | ?? $"{sourceDatasetName}/repeated-match-slices/{sourcePoolKey}/{sliceKey}"; |
| | 1 | 70 | | var datasetDescription = string.IsNullOrWhiteSpace(settings.DatasetDescription) |
| | 1 | 71 | | ? null |
| | 1 | 72 | | : settings.DatasetDescription.Trim(); |
| | 1 | 73 | | var outputDirectory = ResolveOutputDirectory(settings.OutputDirectory, settings.CommunityContext, sourcePool |
| | 1 | 74 | | var sliceArtifactPath = Path.Combine(outputDirectory, "slice-dataset.json"); |
| | 1 | 75 | | var sliceManifestPath = Path.Combine(outputDirectory, "slice-manifest.json"); |
| | | 76 | | |
| | 1 | 77 | | Directory.CreateDirectory(outputDirectory); |
| | | 78 | | |
| | 1 | 79 | | var bundle = PreparedExperimentBundleBuilder.Build( |
| | 1 | 80 | | repeatedItems, |
| | 1 | 81 | | settings.CommunityContext, |
| | 1 | 82 | | sourceDatasetName, |
| | 1 | 83 | | sliceDatasetName, |
| | 1 | 84 | | sliceKey, |
| | 1 | 85 | | "repeated-match-slice", |
| | 1 | 86 | | "repeated-match-slice", |
| | 1 | 87 | | sourcePoolKey, |
| | 1 | 88 | | sampleSeed, |
| | 1 | 89 | | datasetDescription, |
| | 1 | 90 | | BuildDatasetMetadata(settings.MatchCount, settings.Repetitions, normalizedStartsAfter, datasetDescriptio |
| | 1 | 91 | | settings.MatchCount, |
| | 1 | 92 | | settings.Repetitions); |
| | 1 | 93 | | var manifest = bundle.Manifest with |
| | 1 | 94 | | { |
| | 1 | 95 | | TaskType = "repeated-match-slice", |
| | 1 | 96 | | StartsAfter = normalizedStartsAfter |
| | 1 | 97 | | }; |
| | | 98 | | |
| | 1 | 99 | | await WriteJsonFileAsync(sliceArtifactPath, bundle.Artifact, cancellationToken); |
| | 1 | 100 | | await WriteJsonFileAsync(sliceManifestPath, manifest, cancellationToken); |
| | | 101 | | |
| | 1 | 102 | | var summary = new |
| | 1 | 103 | | { |
| | 1 | 104 | | mode = "repeated-match-slice", |
| | 1 | 105 | | sourceDatasetName, |
| | 1 | 106 | | datasetName = manifest.SliceDatasetName, |
| | 1 | 107 | | manifest.CommunityContext, |
| | 1 | 108 | | manifest.SourcePoolKey, |
| | 1 | 109 | | manifest.SliceKey, |
| | 1 | 110 | | manifest.SliceKind, |
| | 1 | 111 | | manifest.SampleMethod, |
| | 1 | 112 | | settings.MatchCount, |
| | 1 | 113 | | settings.Repetitions, |
| | 1 | 114 | | manifest.SampleSize, |
| | 1 | 115 | | manifest.SampleSeed, |
| | 1 | 116 | | matchdays, |
| | 1 | 117 | | manifest.StartsAfter, |
| | 1 | 118 | | datasetDescription = bundle.Artifact.DatasetDescription, |
| | 1 | 119 | | datasetMetadata = bundle.Artifact.DatasetMetadata, |
| | 1 | 120 | | manifest.SelectedItemIds, |
| | 1 | 121 | | manifest.SelectedItemIdsHash, |
| | 1 | 122 | | outputDirectory, |
| | 1 | 123 | | sliceArtifactPath, |
| | 1 | 124 | | sliceManifestPath |
| | 1 | 125 | | }; |
| | | 126 | | |
| | 1 | 127 | | _console.WriteLine(JsonSerializer.Serialize(summary, PreparedExperimentCommandSupport.JsonOptions)); |
| | 1 | 128 | | return 0; |
| | | 129 | | } |
| | 0 | 130 | | catch (Exception ex) |
| | | 131 | | { |
| | 0 | 132 | | _logger.LogError(ex, "Error preparing repeated-match-slice experiment artifact"); |
| | 0 | 133 | | _console.MarkupLine($"[red]Error:[/] {Markup.Escape(ex.Message)}"); |
| | 0 | 134 | | return 1; |
| | | 135 | | } |
| | 1 | 136 | | } |
| | | 137 | | |
| | | 138 | | private static async Task<IReadOnlyList<PreparedExperimentSourceItem>> LoadSourceItemsAsync( |
| | | 139 | | IMatchOutcomeRepository matchOutcomeRepository, |
| | | 140 | | string communityContext, |
| | | 141 | | IReadOnlyList<int> matchdays, |
| | | 142 | | DateTimeOffset? startsAfter, |
| | | 143 | | CancellationToken cancellationToken) |
| | | 144 | | { |
| | 1 | 145 | | var sourceItems = new List<PreparedExperimentSourceItem>(); |
| | 1 | 146 | | var startsAfterInstant = startsAfter is null |
| | 1 | 147 | | ? (Instant?)null |
| | 1 | 148 | | : Instant.FromDateTimeOffset(startsAfter.Value); |
| | | 149 | | |
| | 1 | 150 | | foreach (var matchday in matchdays) |
| | | 151 | | { |
| | 1 | 152 | | var outcomes = await matchOutcomeRepository.GetMatchdayOutcomesAsync(matchday, communityContext, cancellatio |
| | 1 | 153 | | foreach (var outcome in outcomes) |
| | | 154 | | { |
| | 1 | 155 | | if (!outcome.HasOutcome || outcome.HomeGoals is null || outcome.AwayGoals is null) |
| | | 156 | | { |
| | | 157 | | continue; |
| | | 158 | | } |
| | | 159 | | |
| | 1 | 160 | | if (startsAfterInstant is not null && outcome.StartsAt.ToInstant() <= startsAfterInstant.Value) |
| | | 161 | | { |
| | | 162 | | continue; |
| | | 163 | | } |
| | | 164 | | |
| | 1 | 165 | | var datasetItem = ExperimentArtifactSupport.BuildHostedDatasetItem(outcome); |
| | 1 | 166 | | sourceItems.Add(new PreparedExperimentSourceItem( |
| | 1 | 167 | | datasetItem.Id, |
| | 1 | 168 | | datasetItem.Id, |
| | 1 | 169 | | datasetItem.Id, |
| | 1 | 170 | | datasetItem.Metadata.Competition, |
| | 1 | 171 | | datasetItem.Metadata.Season, |
| | 1 | 172 | | datasetItem.Metadata.CommunityContext, |
| | 1 | 173 | | datasetItem.Metadata.Matchday, |
| | 1 | 174 | | datasetItem.Metadata.MatchdayLabel, |
| | 1 | 175 | | datasetItem.Metadata.HomeTeam, |
| | 1 | 176 | | datasetItem.Metadata.AwayTeam, |
| | 1 | 177 | | GetStartsAt(datasetItem), |
| | 1 | 178 | | datasetItem.Metadata.TippSpielId, |
| | 1 | 179 | | datasetItem.ExpectedOutput.HomeGoals, |
| | 1 | 180 | | datasetItem.ExpectedOutput.AwayGoals)); |
| | | 181 | | } |
| | | 182 | | } |
| | | 183 | | |
| | 1 | 184 | | return sourceItems |
| | 1 | 185 | | .OrderBy(item => item.SourceDatasetItemId, StringComparer.Ordinal) |
| | 1 | 186 | | .ToList(); |
| | 1 | 187 | | } |
| | | 188 | | |
| | | 189 | | private static IReadOnlyList<PreparedExperimentSourceItem> ExpandRepeatedItems( |
| | | 190 | | IReadOnlyList<PreparedExperimentSourceItem> selectedItems, |
| | | 191 | | string sliceKey, |
| | | 192 | | int repetitions) |
| | | 193 | | { |
| | 1 | 194 | | var repeatedItems = new List<PreparedExperimentSourceItem>(selectedItems.Count * repetitions); |
| | 1 | 195 | | for (var fixtureIndex = 1; fixtureIndex <= selectedItems.Count; fixtureIndex += 1) |
| | | 196 | | { |
| | 1 | 197 | | var selectedItem = selectedItems[fixtureIndex - 1]; |
| | 1 | 198 | | for (var repetitionIndex = 1; repetitionIndex <= repetitions; repetitionIndex += 1) |
| | | 199 | | { |
| | 1 | 200 | | var sliceDatasetItemId = ExperimentArtifactSupport.BuildRepeatedMatchSliceDatasetItemId( |
| | 1 | 201 | | selectedItem.SourceDatasetItemId, |
| | 1 | 202 | | sliceKey, |
| | 1 | 203 | | fixtureIndex, |
| | 1 | 204 | | selectedItems.Count, |
| | 1 | 205 | | repetitionIndex, |
| | 1 | 206 | | repetitions); |
| | 1 | 207 | | repeatedItems.Add(selectedItem with |
| | 1 | 208 | | { |
| | 1 | 209 | | SliceDatasetItemId = sliceDatasetItemId, |
| | 1 | 210 | | FixtureIndex = fixtureIndex, |
| | 1 | 211 | | RepetitionIndex = repetitionIndex |
| | 1 | 212 | | }); |
| | | 213 | | } |
| | | 214 | | } |
| | | 215 | | |
| | 1 | 216 | | return repeatedItems; |
| | | 217 | | } |
| | | 218 | | |
| | | 219 | | private static string GetStartsAt(HostedMatchExperimentDatasetItem item) |
| | | 220 | | { |
| | 1 | 221 | | if (item.Input.ValueKind != JsonValueKind.Object |
| | 1 | 222 | | || !item.Input.TryGetProperty("startsAt", out var startsAt) |
| | 1 | 223 | | || startsAt.ValueKind != JsonValueKind.String |
| | 1 | 224 | | || string.IsNullOrWhiteSpace(startsAt.GetString())) |
| | | 225 | | { |
| | 0 | 226 | | throw new InvalidOperationException($"Dataset item '{item.Id}' is missing input.startsAt."); |
| | | 227 | | } |
| | | 228 | | |
| | 1 | 229 | | return startsAt.GetString()!; |
| | | 230 | | } |
| | | 231 | | |
| | | 232 | | private static IReadOnlyList<PreparedExperimentSourceItem> SelectRandomItems( |
| | | 233 | | IReadOnlyList<PreparedExperimentSourceItem> items, |
| | | 234 | | int count, |
| | | 235 | | int seed) |
| | | 236 | | { |
| | 1 | 237 | | if (items.Count < count) |
| | | 238 | | { |
| | 0 | 239 | | throw new InvalidOperationException( |
| | 0 | 240 | | $"Requested match count {count} exceeds available dataset item count {items.Count}."); |
| | | 241 | | } |
| | | 242 | | |
| | 1 | 243 | | var buffer = items.ToList(); |
| | 1 | 244 | | var random = new Random(seed); |
| | 1 | 245 | | for (var index = buffer.Count - 1; index > 0; index -= 1) |
| | | 246 | | { |
| | 1 | 247 | | var swapIndex = random.Next(index + 1); |
| | 1 | 248 | | (buffer[index], buffer[swapIndex]) = (buffer[swapIndex], buffer[index]); |
| | | 249 | | } |
| | | 250 | | |
| | 1 | 251 | | return buffer.Take(count).ToList(); |
| | | 252 | | } |
| | | 253 | | |
| | | 254 | | private static IReadOnlyList<int> ParseMatchdays(string? matchdays) |
| | | 255 | | { |
| | 1 | 256 | | if (string.IsNullOrWhiteSpace(matchdays)) |
| | | 257 | | { |
| | 0 | 258 | | return Enumerable.Range(1, 34).ToList().AsReadOnly(); |
| | | 259 | | } |
| | | 260 | | |
| | 1 | 261 | | return matchdays |
| | 1 | 262 | | .Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) |
| | 1 | 263 | | .Select(segment => int.Parse(segment, CultureInfo.InvariantCulture)) |
| | 1 | 264 | | .Distinct() |
| | 0 | 265 | | .OrderBy(matchday => matchday) |
| | 1 | 266 | | .ToList() |
| | 1 | 267 | | .AsReadOnly(); |
| | | 268 | | } |
| | | 269 | | |
| | | 270 | | private static string BuildDefaultSourcePoolKey(IReadOnlyList<int> matchdays, DateTimeOffset? startsAfter) |
| | | 271 | | { |
| | 1 | 272 | | var baseKey = matchdays.SequenceEqual(Enumerable.Range(1, 34)) |
| | 1 | 273 | | ? "all-matchdays" |
| | 1 | 274 | | : $"matchdays-{string.Join('-', matchdays)}"; |
| | | 275 | | |
| | 1 | 276 | | if (startsAfter is null) |
| | | 277 | | { |
| | 1 | 278 | | return baseKey; |
| | | 279 | | } |
| | | 280 | | |
| | 0 | 281 | | var utcToken = startsAfter.Value |
| | 0 | 282 | | .ToUniversalTime() |
| | 0 | 283 | | .ToString("yyyyMMdd't'HHmmss'z'", CultureInfo.InvariantCulture) |
| | 0 | 284 | | .ToLowerInvariant(); |
| | 0 | 285 | | return $"{baseKey}-after-{utcToken}"; |
| | | 286 | | } |
| | | 287 | | |
| | | 288 | | private static IReadOnlyDictionary<string, object?> BuildDatasetMetadata( |
| | | 289 | | int matchCount, |
| | | 290 | | int repetitions, |
| | | 291 | | string? startsAfter, |
| | | 292 | | string? datasetDescription) |
| | | 293 | | { |
| | 1 | 294 | | var metadata = new Dictionary<string, object?> |
| | 1 | 295 | | { |
| | 1 | 296 | | ["matchCount"] = matchCount, |
| | 1 | 297 | | ["repetitions"] = repetitions, |
| | 1 | 298 | | ["predictionCount"] = matchCount * repetitions |
| | 1 | 299 | | }; |
| | | 300 | | |
| | 1 | 301 | | if (!string.IsNullOrWhiteSpace(startsAfter)) |
| | | 302 | | { |
| | 0 | 303 | | metadata["startsAfter"] = startsAfter; |
| | | 304 | | } |
| | | 305 | | |
| | 1 | 306 | | if (!string.IsNullOrWhiteSpace(datasetDescription)) |
| | | 307 | | { |
| | 0 | 308 | | metadata["datasetDescription"] = datasetDescription; |
| | | 309 | | } |
| | | 310 | | |
| | 1 | 311 | | return metadata; |
| | | 312 | | } |
| | | 313 | | |
| | | 314 | | private static string ResolveOutputDirectory( |
| | | 315 | | string? outputDirectoryOverride, |
| | | 316 | | string communityContext, |
| | | 317 | | string sourcePoolKey, |
| | | 318 | | string sliceKey) |
| | | 319 | | { |
| | 1 | 320 | | if (!string.IsNullOrWhiteSpace(outputDirectoryOverride)) |
| | | 321 | | { |
| | 1 | 322 | | return Path.GetFullPath(outputDirectoryOverride); |
| | | 323 | | } |
| | | 324 | | |
| | 0 | 325 | | return Path.GetFullPath(Path.Combine( |
| | 0 | 326 | | "artifacts", |
| | 0 | 327 | | "langfuse-experiments", |
| | 0 | 328 | | "repeated-match-slices", |
| | 0 | 329 | | ExperimentArtifactSupport.Slugify(communityContext), |
| | 0 | 330 | | sourcePoolKey, |
| | 0 | 331 | | sliceKey)); |
| | | 332 | | } |
| | | 333 | | |
| | | 334 | | private static Task WriteJsonFileAsync<T>(string path, T value, CancellationToken cancellationToken) |
| | | 335 | | { |
| | 1 | 336 | | return File.WriteAllTextAsync(path, JsonSerializer.Serialize(value, PreparedExperimentCommandSupport.JsonOptions |
| | | 337 | | } |
| | | 338 | | } |