| | | 1 | | using System.Globalization; |
| | | 2 | | using System.Text.Json; |
| | | 3 | | using EHonda.KicktippAi.Core; |
| | | 4 | | using Microsoft.Extensions.Logging; |
| | | 5 | | using NodaTime; |
| | | 6 | | using Orchestrator.Commands.Observability.Experiments; |
| | | 7 | | using Orchestrator.Commands.Observability.ExportExperimentDataset; |
| | | 8 | | using Orchestrator.Infrastructure.Factories; |
| | | 9 | | using Spectre.Console; |
| | | 10 | | using Spectre.Console.Cli; |
| | | 11 | | |
| | | 12 | | namespace Orchestrator.Commands.Observability.PrepareSlice; |
| | | 13 | | |
| | | 14 | | public sealed class PrepareSliceCommand : AsyncCommand<PrepareSliceSettings> |
| | | 15 | | { |
| | | 16 | | private readonly IAnsiConsole _console; |
| | | 17 | | private readonly IFirebaseServiceFactory _firebaseServiceFactory; |
| | | 18 | | private readonly ILogger<PrepareSliceCommand> _logger; |
| | | 19 | | |
| | 1 | 20 | | public PrepareSliceCommand( |
| | 1 | 21 | | IAnsiConsole console, |
| | 1 | 22 | | IFirebaseServiceFactory firebaseServiceFactory, |
| | 1 | 23 | | ILogger<PrepareSliceCommand> logger) |
| | | 24 | | { |
| | 1 | 25 | | _console = console; |
| | 1 | 26 | | _firebaseServiceFactory = firebaseServiceFactory; |
| | 1 | 27 | | _logger = logger; |
| | 1 | 28 | | } |
| | | 29 | | |
| | | 30 | | protected override async Task<int> ExecuteAsync(CommandContext context, PrepareSliceSettings settings, CancellationT |
| | | 31 | | { |
| | | 32 | | try |
| | | 33 | | { |
| | 1 | 34 | | var matchOutcomeRepository = _firebaseServiceFactory.CreateMatchOutcomeRepository(); |
| | 1 | 35 | | var matchdays = ParseMatchdays(settings.Matchdays); |
| | 1 | 36 | | var startsAfter = EvaluationTimeParser.ParseOrNull(settings.StartsAfter); |
| | 1 | 37 | | var normalizedStartsAfter = EvaluationTimeParser.NormalizeOrNull(settings.StartsAfter); |
| | 1 | 38 | | var availableItems = await LoadSourceItemsAsync( |
| | 1 | 39 | | matchOutcomeRepository, |
| | 1 | 40 | | settings.CommunityContext, |
| | 1 | 41 | | matchdays, |
| | 1 | 42 | | startsAfter, |
| | 1 | 43 | | cancellationToken); |
| | | 44 | | |
| | 1 | 45 | | if (availableItems.Count == 0) |
| | | 46 | | { |
| | 0 | 47 | | throw new InvalidOperationException("No completed historical matches were found for the requested slice |
| | | 48 | | } |
| | | 49 | | |
| | 1 | 50 | | var sampleSeed = settings.SampleSeed ?? int.Parse( |
| | 1 | 51 | | DateTimeOffset.UtcNow.ToString("yyyyMMdd", CultureInfo.InvariantCulture), |
| | 1 | 52 | | CultureInfo.InvariantCulture); |
| | 1 | 53 | | var sliceKey = string.IsNullOrWhiteSpace(settings.SliceKey) |
| | 1 | 54 | | ? $"random-{settings.SampleSize}-seed-{sampleSeed}" |
| | 1 | 55 | | : settings.SliceKey.Trim(); |
| | 1 | 56 | | var sourcePoolKey = string.IsNullOrWhiteSpace(settings.SourcePoolKey) |
| | 1 | 57 | | ? BuildDefaultSourcePoolKey(matchdays, startsAfter) |
| | 1 | 58 | | : settings.SourcePoolKey.Trim(); |
| | 1 | 59 | | var selectedItems = SelectRandomItems(availableItems, settings.SampleSize, sampleSeed) |
| | 1 | 60 | | .OrderBy(item => item.SourceDatasetItemId, StringComparer.Ordinal) |
| | 1 | 61 | | .Select(item => item with |
| | 1 | 62 | | { |
| | 1 | 63 | | SliceDatasetItemId = ExperimentArtifactSupport.BuildSliceDatasetItemId(item.SourceDatasetItemId, sli |
| | 1 | 64 | | }) |
| | 1 | 65 | | .ToList(); |
| | | 66 | | |
| | 1 | 67 | | var sourceDatasetName = ExperimentArtifactSupport.BuildSourceDatasetName(settings.CommunityContext); |
| | 1 | 68 | | var sliceDatasetName = settings.DatasetName |
| | 1 | 69 | | ?? $"{sourceDatasetName}/slices/{sourcePoolKey}/{sliceKey}"; |
| | 1 | 70 | | var outputDirectory = ResolveOutputDirectory(settings.OutputDirectory, settings.CommunityContext, sourcePool |
| | 1 | 71 | | var sliceArtifactPath = Path.Combine(outputDirectory, "slice-dataset.json"); |
| | 1 | 72 | | var sliceManifestPath = Path.Combine(outputDirectory, "slice-manifest.json"); |
| | | 73 | | |
| | 1 | 74 | | Directory.CreateDirectory(outputDirectory); |
| | | 75 | | |
| | 1 | 76 | | var bundle = PreparedExperimentBundleBuilder.Build( |
| | 1 | 77 | | selectedItems, |
| | 1 | 78 | | settings.CommunityContext, |
| | 1 | 79 | | sourceDatasetName, |
| | 1 | 80 | | sliceDatasetName, |
| | 1 | 81 | | sliceKey, |
| | 1 | 82 | | settings.SliceKind.Trim(), |
| | 1 | 83 | | settings.SampleMethod.Trim(), |
| | 1 | 84 | | sourcePoolKey, |
| | 1 | 85 | | sampleSeed, |
| | 1 | 86 | | extraDatasetMetadata: BuildSliceDatasetMetadata(normalizedStartsAfter)); |
| | 1 | 87 | | var manifest = bundle.Manifest with |
| | 1 | 88 | | { |
| | 1 | 89 | | StartsAfter = normalizedStartsAfter |
| | 1 | 90 | | }; |
| | | 91 | | |
| | 1 | 92 | | await WriteJsonFileAsync(sliceArtifactPath, bundle.Artifact, cancellationToken); |
| | 1 | 93 | | await WriteJsonFileAsync(sliceManifestPath, manifest, cancellationToken); |
| | | 94 | | |
| | 1 | 95 | | var summary = new |
| | 1 | 96 | | { |
| | 1 | 97 | | mode = "slice", |
| | 1 | 98 | | sourceDatasetName, |
| | 1 | 99 | | datasetName = manifest.SliceDatasetName, |
| | 1 | 100 | | manifest.CommunityContext, |
| | 1 | 101 | | manifest.SourcePoolKey, |
| | 1 | 102 | | manifest.SliceKey, |
| | 1 | 103 | | manifest.SliceKind, |
| | 1 | 104 | | manifest.SampleMethod, |
| | 1 | 105 | | manifest.SampleSize, |
| | 1 | 106 | | manifest.SampleSeed, |
| | 1 | 107 | | matchdays, |
| | 1 | 108 | | manifest.StartsAfter, |
| | 1 | 109 | | manifest.SelectedItemIds, |
| | 1 | 110 | | manifest.SelectedItemIdsHash, |
| | 1 | 111 | | outputDirectory, |
| | 1 | 112 | | sliceArtifactPath, |
| | 1 | 113 | | sliceManifestPath |
| | 1 | 114 | | }; |
| | | 115 | | |
| | 1 | 116 | | _console.WriteLine(JsonSerializer.Serialize(summary, PreparedExperimentCommandSupport.JsonOptions)); |
| | 1 | 117 | | return 0; |
| | | 118 | | } |
| | 0 | 119 | | catch (Exception ex) |
| | | 120 | | { |
| | 0 | 121 | | _logger.LogError(ex, "Error preparing slice experiment artifact"); |
| | 0 | 122 | | _console.MarkupLine($"[red]Error:[/] {Markup.Escape(ex.Message)}"); |
| | 0 | 123 | | return 1; |
| | | 124 | | } |
| | 1 | 125 | | } |
| | | 126 | | |
| | | 127 | | private static async Task<IReadOnlyList<PreparedExperimentSourceItem>> LoadSourceItemsAsync( |
| | | 128 | | IMatchOutcomeRepository matchOutcomeRepository, |
| | | 129 | | string communityContext, |
| | | 130 | | IReadOnlyList<int> matchdays, |
| | | 131 | | DateTimeOffset? startsAfter, |
| | | 132 | | CancellationToken cancellationToken) |
| | | 133 | | { |
| | 1 | 134 | | var sourceItems = new List<PreparedExperimentSourceItem>(); |
| | 1 | 135 | | var startsAfterInstant = startsAfter is null |
| | 1 | 136 | | ? (Instant?)null |
| | 1 | 137 | | : Instant.FromDateTimeOffset(startsAfter.Value); |
| | | 138 | | |
| | 1 | 139 | | foreach (var matchday in matchdays) |
| | | 140 | | { |
| | 1 | 141 | | var outcomes = await matchOutcomeRepository.GetMatchdayOutcomesAsync(matchday, communityContext, cancellatio |
| | 1 | 142 | | foreach (var outcome in outcomes) |
| | | 143 | | { |
| | 1 | 144 | | if (!outcome.HasOutcome || outcome.HomeGoals is null || outcome.AwayGoals is null) |
| | | 145 | | { |
| | | 146 | | continue; |
| | | 147 | | } |
| | | 148 | | |
| | 1 | 149 | | if (startsAfterInstant is not null && outcome.StartsAt.ToInstant() <= startsAfterInstant.Value) |
| | | 150 | | { |
| | | 151 | | continue; |
| | | 152 | | } |
| | | 153 | | |
| | 1 | 154 | | var datasetItem = ExperimentArtifactSupport.BuildHostedDatasetItem(outcome); |
| | 1 | 155 | | sourceItems.Add(new PreparedExperimentSourceItem( |
| | 1 | 156 | | datasetItem.Id, |
| | 1 | 157 | | datasetItem.Id, |
| | 1 | 158 | | datasetItem.Id, |
| | 1 | 159 | | datasetItem.Metadata.Competition, |
| | 1 | 160 | | datasetItem.Metadata.Season, |
| | 1 | 161 | | datasetItem.Metadata.CommunityContext, |
| | 1 | 162 | | datasetItem.Metadata.Matchday, |
| | 1 | 163 | | datasetItem.Metadata.MatchdayLabel, |
| | 1 | 164 | | datasetItem.Metadata.HomeTeam, |
| | 1 | 165 | | datasetItem.Metadata.AwayTeam, |
| | 1 | 166 | | GetStartsAt(datasetItem), |
| | 1 | 167 | | datasetItem.Metadata.TippSpielId, |
| | 1 | 168 | | datasetItem.ExpectedOutput.HomeGoals, |
| | 1 | 169 | | datasetItem.ExpectedOutput.AwayGoals)); |
| | | 170 | | } |
| | | 171 | | } |
| | | 172 | | |
| | 1 | 173 | | return sourceItems |
| | 1 | 174 | | .OrderBy(item => item.SourceDatasetItemId, StringComparer.Ordinal) |
| | 1 | 175 | | .ToList(); |
| | 1 | 176 | | } |
| | | 177 | | |
| | | 178 | | private static string GetStartsAt(HostedMatchExperimentDatasetItem item) |
| | | 179 | | { |
| | 1 | 180 | | if (item.Input.ValueKind != JsonValueKind.Object |
| | 1 | 181 | | || !item.Input.TryGetProperty("startsAt", out var startsAt) |
| | 1 | 182 | | || startsAt.ValueKind != JsonValueKind.String |
| | 1 | 183 | | || string.IsNullOrWhiteSpace(startsAt.GetString())) |
| | | 184 | | { |
| | 0 | 185 | | throw new InvalidOperationException($"Dataset item '{item.Id}' is missing input.startsAt."); |
| | | 186 | | } |
| | | 187 | | |
| | 1 | 188 | | return startsAt.GetString()!; |
| | | 189 | | } |
| | | 190 | | |
| | | 191 | | private static IReadOnlyList<PreparedExperimentSourceItem> SelectRandomItems( |
| | | 192 | | IReadOnlyList<PreparedExperimentSourceItem> items, |
| | | 193 | | int count, |
| | | 194 | | int seed) |
| | | 195 | | { |
| | 1 | 196 | | if (items.Count < count) |
| | | 197 | | { |
| | 0 | 198 | | throw new InvalidOperationException( |
| | 0 | 199 | | $"Requested sample size {count} exceeds available dataset item count {items.Count}."); |
| | | 200 | | } |
| | | 201 | | |
| | 1 | 202 | | var buffer = items.ToList(); |
| | 1 | 203 | | var random = new Random(seed); |
| | 1 | 204 | | for (var index = buffer.Count - 1; index > 0; index -= 1) |
| | | 205 | | { |
| | 1 | 206 | | var swapIndex = random.Next(index + 1); |
| | 1 | 207 | | (buffer[index], buffer[swapIndex]) = (buffer[swapIndex], buffer[index]); |
| | | 208 | | } |
| | | 209 | | |
| | 1 | 210 | | return buffer.Take(count).ToList(); |
| | | 211 | | } |
| | | 212 | | |
| | | 213 | | private static IReadOnlyList<int> ParseMatchdays(string? matchdays) |
| | | 214 | | { |
| | 1 | 215 | | if (string.IsNullOrWhiteSpace(matchdays)) |
| | | 216 | | { |
| | 0 | 217 | | return Enumerable.Range(1, 34).ToList().AsReadOnly(); |
| | | 218 | | } |
| | | 219 | | |
| | 1 | 220 | | return matchdays |
| | 1 | 221 | | .Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) |
| | 1 | 222 | | .Select(segment => int.Parse(segment, CultureInfo.InvariantCulture)) |
| | 1 | 223 | | .Distinct() |
| | 1 | 224 | | .OrderBy(matchday => matchday) |
| | 1 | 225 | | .ToList() |
| | 1 | 226 | | .AsReadOnly(); |
| | | 227 | | } |
| | | 228 | | |
| | | 229 | | private static string BuildDefaultSourcePoolKey(IReadOnlyList<int> matchdays, DateTimeOffset? startsAfter) |
| | | 230 | | { |
| | 1 | 231 | | var baseKey = matchdays.SequenceEqual(Enumerable.Range(1, 34)) |
| | 1 | 232 | | ? "all-matchdays" |
| | 1 | 233 | | : $"matchdays-{string.Join('-', matchdays)}"; |
| | | 234 | | |
| | 1 | 235 | | if (startsAfter is null) |
| | | 236 | | { |
| | 1 | 237 | | return baseKey; |
| | | 238 | | } |
| | | 239 | | |
| | 1 | 240 | | var utcToken = startsAfter.Value |
| | 1 | 241 | | .ToUniversalTime() |
| | 1 | 242 | | .ToString("yyyyMMdd't'HHmmss'z'", CultureInfo.InvariantCulture) |
| | 1 | 243 | | .ToLowerInvariant(); |
| | 1 | 244 | | return $"{baseKey}-after-{utcToken}"; |
| | | 245 | | } |
| | | 246 | | |
| | | 247 | | private static IReadOnlyDictionary<string, object?>? BuildSliceDatasetMetadata(string? startsAfter) |
| | | 248 | | { |
| | 1 | 249 | | if (string.IsNullOrWhiteSpace(startsAfter)) |
| | | 250 | | { |
| | 1 | 251 | | return null; |
| | | 252 | | } |
| | | 253 | | |
| | 1 | 254 | | return new Dictionary<string, object?> |
| | 1 | 255 | | { |
| | 1 | 256 | | ["startsAfter"] = startsAfter |
| | 1 | 257 | | }; |
| | | 258 | | } |
| | | 259 | | |
| | | 260 | | private static string ResolveOutputDirectory( |
| | | 261 | | string? outputDirectoryOverride, |
| | | 262 | | string communityContext, |
| | | 263 | | string sourcePoolKey, |
| | | 264 | | string sliceKey) |
| | | 265 | | { |
| | 1 | 266 | | if (!string.IsNullOrWhiteSpace(outputDirectoryOverride)) |
| | | 267 | | { |
| | 1 | 268 | | return Path.GetFullPath(outputDirectoryOverride); |
| | | 269 | | } |
| | | 270 | | |
| | 0 | 271 | | return Path.GetFullPath(Path.Combine( |
| | 0 | 272 | | "artifacts", |
| | 0 | 273 | | "langfuse-experiments", |
| | 0 | 274 | | "slices", |
| | 0 | 275 | | ExperimentArtifactSupport.Slugify(communityContext), |
| | 0 | 276 | | sourcePoolKey, |
| | 0 | 277 | | sliceKey)); |
| | | 278 | | } |
| | | 279 | | |
| | | 280 | | private static Task WriteJsonFileAsync<T>(string path, T value, CancellationToken cancellationToken) |
| | | 281 | | { |
| | 1 | 282 | | return File.WriteAllTextAsync(path, JsonSerializer.Serialize(value, PreparedExperimentCommandSupport.JsonOptions |
| | | 283 | | } |
| | | 284 | | } |