| | | 1 | | using System.Globalization; |
| | | 2 | | using System.Text; |
| | | 3 | | using System.Text.Json; |
| | | 4 | | using EHonda.KicktippAi.Core; |
| | | 5 | | using Microsoft.Extensions.Logging; |
| | | 6 | | using NodaTime; |
| | | 7 | | using OpenAiIntegration; |
| | | 8 | | using Orchestrator.Infrastructure.Factories; |
| | | 9 | | using Spectre.Console; |
| | | 10 | | using Spectre.Console.Cli; |
| | | 11 | | |
| | | 12 | | namespace Orchestrator.Commands.Observability.ExportExperimentDataset; |
| | | 13 | | |
| | | 14 | | public sealed class ExportExperimentDatasetCommand : AsyncCommand<ExportExperimentDatasetSettings> |
| | | 15 | | { |
| | | 16 | | private const string Season = "2025/2026"; |
| | 0 | 17 | | private static readonly DateTimeZone BundesligaTimeZone = DateTimeZoneProviders.Tzdb["Europe/Berlin"]; |
| | 0 | 18 | | private static readonly JsonSerializerOptions OutputJsonOptions = new() |
| | 0 | 19 | | { |
| | 0 | 20 | | PropertyNamingPolicy = JsonNamingPolicy.CamelCase, |
| | 0 | 21 | | WriteIndented = true |
| | 0 | 22 | | }; |
| | | 23 | | |
| | | 24 | | private readonly IAnsiConsole _console; |
| | | 25 | | private readonly IFirebaseServiceFactory _firebaseServiceFactory; |
| | | 26 | | private readonly ILogger<ExportExperimentDatasetCommand> _logger; |
| | | 27 | | |
| | 0 | 28 | | public ExportExperimentDatasetCommand( |
| | 0 | 29 | | IAnsiConsole console, |
| | 0 | 30 | | IFirebaseServiceFactory firebaseServiceFactory, |
| | 0 | 31 | | ILogger<ExportExperimentDatasetCommand> logger) |
| | | 32 | | { |
| | 0 | 33 | | _console = console; |
| | 0 | 34 | | _firebaseServiceFactory = firebaseServiceFactory; |
| | 0 | 35 | | _logger = logger; |
| | 0 | 36 | | } |
| | | 37 | | |
| | | 38 | | public override async Task<int> ExecuteAsync(CommandContext context, ExportExperimentDatasetSettings settings) |
| | | 39 | | { |
| | | 40 | | try |
| | | 41 | | { |
| | 0 | 42 | | var matchOutcomeRepository = _firebaseServiceFactory.CreateMatchOutcomeRepository(); |
| | 0 | 43 | | var matchdays = ParseMatchdays(settings.Matchdays); |
| | 0 | 44 | | var datasetName = BuildDatasetName(settings.CommunityContext); |
| | | 45 | | |
| | 0 | 46 | | _console.MarkupLine($"[green]Exporting hosted experiment dataset:[/] [yellow]{Markup.Escape(datasetName)}[/] |
| | | 47 | | |
| | 0 | 48 | | var items = new List<HostedMatchExperimentDatasetItem>(); |
| | | 49 | | |
| | 0 | 50 | | foreach (var matchday in matchdays) |
| | | 51 | | { |
| | 0 | 52 | | var outcomes = await matchOutcomeRepository.GetMatchdayOutcomesAsync(matchday, settings.CommunityContext |
| | | 53 | | |
| | 0 | 54 | | foreach (var outcome in outcomes) |
| | | 55 | | { |
| | 0 | 56 | | if (!outcome.HasOutcome || outcome.HomeGoals is null || outcome.AwayGoals is null) |
| | | 57 | | { |
| | | 58 | | continue; |
| | | 59 | | } |
| | | 60 | | |
| | 0 | 61 | | items.Add(BuildItem(outcome)); |
| | | 62 | | } |
| | | 63 | | } |
| | | 64 | | |
| | 0 | 65 | | items.Sort((left, right) => string.Compare(left.Id, right.Id, StringComparison.Ordinal)); |
| | | 66 | | |
| | 0 | 67 | | var export = new ExportedExperimentDataset(datasetName, items.AsReadOnly()); |
| | 0 | 68 | | var outputPath = ResolveOutputPath(settings); |
| | 0 | 69 | | Directory.CreateDirectory(Path.GetDirectoryName(outputPath)!); |
| | | 70 | | |
| | 0 | 71 | | await File.WriteAllTextAsync( |
| | 0 | 72 | | outputPath, |
| | 0 | 73 | | JsonSerializer.Serialize(export, OutputJsonOptions)); |
| | | 74 | | |
| | 0 | 75 | | _console.MarkupLine($"[green]Wrote dataset artifact:[/] [yellow]{Markup.Escape(outputPath)}[/]"); |
| | 0 | 76 | | _console.MarkupLine($"[blue]Exported items:[/] {items.Count}"); |
| | | 77 | | |
| | 0 | 78 | | if (items.Count > 0) |
| | | 79 | | { |
| | 0 | 80 | | _console.MarkupLine($"[blue]First item id:[/] {Markup.Escape(items[0].Id)}"); |
| | 0 | 81 | | _console.MarkupLine($"[blue]Last item id:[/] {Markup.Escape(items[^1].Id)}"); |
| | | 82 | | } |
| | | 83 | | |
| | 0 | 84 | | return 0; |
| | | 85 | | } |
| | 0 | 86 | | catch (Exception ex) |
| | | 87 | | { |
| | 0 | 88 | | _logger.LogError(ex, "Error exporting hosted experiment dataset"); |
| | 0 | 89 | | _console.MarkupLine($"[red]Error:[/] {Markup.Escape(ex.Message)}"); |
| | 0 | 90 | | return 1; |
| | | 91 | | } |
| | 0 | 92 | | } |
| | | 93 | | |
| | | 94 | | private static HostedMatchExperimentDatasetItem BuildItem(PersistedMatchOutcome outcome) |
| | | 95 | | { |
| | 0 | 96 | | var tippSpielId = outcome.TippSpielId ?? throw new InvalidOperationException( |
| | 0 | 97 | | $"Persisted outcome for {outcome.HomeTeam} vs {outcome.AwayTeam} is missing tippspielId."); |
| | | 98 | | |
| | 0 | 99 | | var promptMatch = RehydrateForPromptOutput(outcome); |
| | 0 | 100 | | using var matchJsonDocument = JsonDocument.Parse(PredictionPromptComposer.CreateMatchJson(promptMatch)); |
| | | 101 | | |
| | 0 | 102 | | return new HostedMatchExperimentDatasetItem( |
| | 0 | 103 | | BuildItemId(outcome.Competition, outcome.CommunityContext, tippSpielId), |
| | 0 | 104 | | matchJsonDocument.RootElement.Clone(), |
| | 0 | 105 | | new HostedMatchExperimentExpectedOutput( |
| | 0 | 106 | | outcome.HomeGoals!.Value, |
| | 0 | 107 | | outcome.AwayGoals!.Value), |
| | 0 | 108 | | new HostedMatchExperimentMetadata( |
| | 0 | 109 | | outcome.Competition, |
| | 0 | 110 | | Season, |
| | 0 | 111 | | outcome.CommunityContext, |
| | 0 | 112 | | outcome.Matchday, |
| | 0 | 113 | | $"md{outcome.Matchday:00}", |
| | 0 | 114 | | outcome.HomeTeam, |
| | 0 | 115 | | outcome.AwayTeam, |
| | 0 | 116 | | tippSpielId)); |
| | 0 | 117 | | } |
| | | 118 | | |
| | | 119 | | private static Match RehydrateForPromptOutput(PersistedMatchOutcome outcome) |
| | | 120 | | { |
| | 0 | 121 | | var instant = outcome.StartsAt.ToInstant(); |
| | 0 | 122 | | var offset = BundesligaTimeZone.GetUtcOffset(instant); |
| | 0 | 123 | | var localizedStartsAt = instant.InZone(DateTimeZone.ForOffset(offset)); |
| | 0 | 124 | | return new Match(outcome.HomeTeam, outcome.AwayTeam, localizedStartsAt, outcome.Matchday); |
| | | 125 | | } |
| | | 126 | | |
| | | 127 | | private static IReadOnlyList<int> ParseMatchdays(string? matchdays) |
| | | 128 | | { |
| | 0 | 129 | | if (string.IsNullOrWhiteSpace(matchdays)) |
| | | 130 | | { |
| | 0 | 131 | | return Enumerable.Range(1, 34).ToList().AsReadOnly(); |
| | | 132 | | } |
| | | 133 | | |
| | 0 | 134 | | return matchdays |
| | 0 | 135 | | .Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) |
| | 0 | 136 | | .Select(segment => int.Parse(segment, CultureInfo.InvariantCulture)) |
| | 0 | 137 | | .Distinct() |
| | 0 | 138 | | .OrderBy(matchday => matchday) |
| | 0 | 139 | | .ToList() |
| | 0 | 140 | | .AsReadOnly(); |
| | | 141 | | } |
| | | 142 | | |
| | | 143 | | private static string BuildDatasetName(string communityContext) |
| | | 144 | | { |
| | 0 | 145 | | return $"match-predictions/bundesliga-2025-26/{communityContext}"; |
| | | 146 | | } |
| | | 147 | | |
| | | 148 | | private static string ResolveOutputPath(ExportExperimentDatasetSettings settings) |
| | | 149 | | { |
| | 0 | 150 | | if (!string.IsNullOrWhiteSpace(settings.OutputPath)) |
| | | 151 | | { |
| | 0 | 152 | | return Path.GetFullPath(settings.OutputPath); |
| | | 153 | | } |
| | | 154 | | |
| | 0 | 155 | | return Path.GetFullPath(Path.Combine( |
| | 0 | 156 | | "artifacts", |
| | 0 | 157 | | "langfuse-dataset", |
| | 0 | 158 | | $"{Slugify(settings.CommunityContext)}.json")); |
| | | 159 | | } |
| | | 160 | | |
| | | 161 | | private static string BuildItemId(string competition, string communityContext, string tippSpielId) |
| | | 162 | | { |
| | 0 | 163 | | return string.Join( |
| | 0 | 164 | | "__", |
| | 0 | 165 | | Slugify(competition), |
| | 0 | 166 | | Slugify(communityContext), |
| | 0 | 167 | | $"ts{Slugify(tippSpielId)}"); |
| | | 168 | | } |
| | | 169 | | |
| | | 170 | | private static string Slugify(string value) |
| | | 171 | | { |
| | 0 | 172 | | var normalized = value.Normalize(NormalizationForm.FormD); |
| | 0 | 173 | | var builder = new StringBuilder(normalized.Length); |
| | | 174 | | |
| | 0 | 175 | | foreach (var character in normalized) |
| | | 176 | | { |
| | 0 | 177 | | if (CharUnicodeInfo.GetUnicodeCategory(character) == UnicodeCategory.NonSpacingMark) |
| | | 178 | | { |
| | | 179 | | continue; |
| | | 180 | | } |
| | | 181 | | |
| | 0 | 182 | | if (char.IsLetterOrDigit(character)) |
| | | 183 | | { |
| | 0 | 184 | | builder.Append(char.ToLowerInvariant(character)); |
| | 0 | 185 | | continue; |
| | | 186 | | } |
| | | 187 | | |
| | 0 | 188 | | if (builder.Length == 0 || builder[^1] == '-') |
| | | 189 | | { |
| | | 190 | | continue; |
| | | 191 | | } |
| | | 192 | | |
| | 0 | 193 | | builder.Append('-'); |
| | | 194 | | } |
| | | 195 | | |
| | 0 | 196 | | return builder.ToString().Trim('-'); |
| | | 197 | | } |
| | | 198 | | } |