| | | 1 | | using System.Text.Json; |
| | | 2 | | using System.Text.Json.Serialization; |
| | | 3 | | using Microsoft.Extensions.Logging; |
| | | 4 | | using Orchestrator.Commands.Observability.Experiments; |
| | | 5 | | using Orchestrator.Infrastructure.Langfuse; |
| | | 6 | | using Spectre.Console; |
| | | 7 | | using Spectre.Console.Cli; |
| | | 8 | | |
| | | 9 | | namespace Orchestrator.Commands.Observability.SyncDataset; |
| | | 10 | | |
| | | 11 | | public sealed class SyncDatasetCommand : AsyncCommand<SyncDatasetSettings> |
| | | 12 | | { |
| | 1 | 13 | | private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web) |
| | 1 | 14 | | { |
| | 1 | 15 | | PropertyNameCaseInsensitive = true, |
| | 1 | 16 | | PropertyNamingPolicy = JsonNamingPolicy.CamelCase, |
| | 1 | 17 | | WriteIndented = true, |
| | 1 | 18 | | DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull |
| | 1 | 19 | | }; |
| | | 20 | | |
| | 1 | 21 | | private static readonly JsonElement DefaultInputSchema = ParseJsonElement( |
| | 1 | 22 | | """ |
| | 1 | 23 | | { |
| | 1 | 24 | | "type": "object", |
| | 1 | 25 | | "properties": { |
| | 1 | 26 | | "homeTeam": { |
| | 1 | 27 | | "type": "string", |
| | 1 | 28 | | "minLength": 1, |
| | 1 | 29 | | "description": "Exact home team name from the persisted match outcome" |
| | 1 | 30 | | }, |
| | 1 | 31 | | "awayTeam": { |
| | 1 | 32 | | "type": "string", |
| | 1 | 33 | | "minLength": 1, |
| | 1 | 34 | | "description": "Exact away team name from the persisted match outcome" |
| | 1 | 35 | | }, |
| | 1 | 36 | | "startsAt": { |
| | 1 | 37 | | "type": "string", |
| | 1 | 38 | | "minLength": 1, |
| | 1 | 39 | | "description": "Localized match start timestamp string emitted by the .NET exporter" |
| | 1 | 40 | | } |
| | 1 | 41 | | }, |
| | 1 | 42 | | "required": ["homeTeam", "awayTeam", "startsAt"], |
| | 1 | 43 | | "additionalProperties": false |
| | 1 | 44 | | } |
| | 1 | 45 | | """); |
| | | 46 | | |
| | 1 | 47 | | private static readonly JsonElement DefaultExpectedOutputSchema = ParseJsonElement( |
| | 1 | 48 | | """ |
| | 1 | 49 | | { |
| | 1 | 50 | | "type": "object", |
| | 1 | 51 | | "properties": { |
| | 1 | 52 | | "homeGoals": { |
| | 1 | 53 | | "type": "integer", |
| | 1 | 54 | | "minimum": 0, |
| | 1 | 55 | | "description": "Actual home goals scored in the completed match" |
| | 1 | 56 | | }, |
| | 1 | 57 | | "awayGoals": { |
| | 1 | 58 | | "type": "integer", |
| | 1 | 59 | | "minimum": 0, |
| | 1 | 60 | | "description": "Actual away goals scored in the completed match" |
| | 1 | 61 | | } |
| | 1 | 62 | | }, |
| | 1 | 63 | | "required": ["homeGoals", "awayGoals"], |
| | 1 | 64 | | "additionalProperties": false |
| | 1 | 65 | | } |
| | 1 | 66 | | """); |
| | | 67 | | |
| | 1 | 68 | | private static readonly JsonElement MetadataSchema = ParseJsonElement( |
| | 1 | 69 | | """ |
| | 1 | 70 | | { |
| | 1 | 71 | | "type": "object", |
| | 1 | 72 | | "properties": { |
| | 1 | 73 | | "competition": { |
| | 1 | 74 | | "type": "string", |
| | 1 | 75 | | "minLength": 1, |
| | 1 | 76 | | "description": "Competition identifier" |
| | 1 | 77 | | }, |
| | 1 | 78 | | "season": { |
| | 1 | 79 | | "type": "string", |
| | 1 | 80 | | "minLength": 1, |
| | 1 | 81 | | "description": "Season label" |
| | 1 | 82 | | }, |
| | 1 | 83 | | "communityContext": { |
| | 1 | 84 | | "type": "string", |
| | 1 | 85 | | "minLength": 1, |
| | 1 | 86 | | "description": "Community context slug" |
| | 1 | 87 | | }, |
| | 1 | 88 | | "matchday": { |
| | 1 | 89 | | "type": "integer", |
| | 1 | 90 | | "minimum": 1, |
| | 1 | 91 | | "description": "Bundesliga matchday number" |
| | 1 | 92 | | }, |
| | 1 | 93 | | "matchdayLabel": { |
| | 1 | 94 | | "type": "string", |
| | 1 | 95 | | "minLength": 1, |
| | 1 | 96 | | "description": "Human-readable matchday label" |
| | 1 | 97 | | }, |
| | 1 | 98 | | "homeTeam": { |
| | 1 | 99 | | "type": "string", |
| | 1 | 100 | | "minLength": 1, |
| | 1 | 101 | | "description": "Exact home team name" |
| | 1 | 102 | | }, |
| | 1 | 103 | | "awayTeam": { |
| | 1 | 104 | | "type": "string", |
| | 1 | 105 | | "minLength": 1, |
| | 1 | 106 | | "description": "Exact away team name" |
| | 1 | 107 | | }, |
| | 1 | 108 | | "tippSpielId": { |
| | 1 | 109 | | "type": "string", |
| | 1 | 110 | | "minLength": 1, |
| | 1 | 111 | | "description": "Kicktipp match identifier" |
| | 1 | 112 | | }, |
| | 1 | 113 | | "fixtureIndex": { |
| | 1 | 114 | | "type": "integer", |
| | 1 | 115 | | "minimum": 1, |
| | 1 | 116 | | "description": "One-based fixture index for repeated-match-slice datasets" |
| | 1 | 117 | | }, |
| | 1 | 118 | | "repetitionIndex": { |
| | 1 | 119 | | "type": "integer", |
| | 1 | 120 | | "minimum": 1, |
| | 1 | 121 | | "description": "One-based repetition index for repeated-match-slice datasets" |
| | 1 | 122 | | } |
| | 1 | 123 | | }, |
| | 1 | 124 | | "required": [ |
| | 1 | 125 | | "competition", |
| | 1 | 126 | | "season", |
| | 1 | 127 | | "communityContext", |
| | 1 | 128 | | "matchday", |
| | 1 | 129 | | "matchdayLabel", |
| | 1 | 130 | | "homeTeam", |
| | 1 | 131 | | "awayTeam", |
| | 1 | 132 | | "tippSpielId" |
| | 1 | 133 | | ], |
| | 1 | 134 | | "additionalProperties": false |
| | 1 | 135 | | } |
| | 1 | 136 | | """); |
| | | 137 | | |
| | | 138 | | private readonly IAnsiConsole _console; |
| | | 139 | | private readonly ILangfusePublicApiClient _langfuseClient; |
| | | 140 | | private readonly ILogger<SyncDatasetCommand> _logger; |
| | | 141 | | |
| | 1 | 142 | | public SyncDatasetCommand( |
| | 1 | 143 | | IAnsiConsole console, |
| | 1 | 144 | | ILangfusePublicApiClient langfuseClient, |
| | 1 | 145 | | ILogger<SyncDatasetCommand> logger) |
| | | 146 | | { |
| | 1 | 147 | | _console = console; |
| | 1 | 148 | | _langfuseClient = langfuseClient; |
| | 1 | 149 | | _logger = logger; |
| | 1 | 150 | | } |
| | | 151 | | |
| | | 152 | | protected override async Task<int> ExecuteAsync(CommandContext context, SyncDatasetSettings settings, CancellationTo |
| | | 153 | | { |
| | | 154 | | try |
| | | 155 | | { |
| | 1 | 156 | | var artifact = await LoadArtifactAsync(settings.InputPath, cancellationToken); |
| | 1 | 157 | | var datasetName = settings.DatasetName ?? artifact.DatasetName; |
| | 1 | 158 | | if (string.IsNullOrWhiteSpace(datasetName)) |
| | | 159 | | { |
| | 0 | 160 | | throw new InvalidOperationException("Dataset name missing from artifact and no --dataset-name override w |
| | | 161 | | } |
| | | 162 | | |
| | 1 | 163 | | ValidateArtifact(artifact, datasetName); |
| | | 164 | | |
| | 1 | 165 | | PreparedExperimentSupport.ReportProgress( |
| | 1 | 166 | | $"Starting dataset sync for '{datasetName}' with {artifact.Items.Count} item(s). Dry run: {settings.DryR |
| | | 167 | | |
| | 1 | 168 | | var datasetDefinition = BuildDatasetDefinition(artifact, datasetName); |
| | 1 | 169 | | var dataset = settings.DryRun |
| | 1 | 170 | | ? CreateDryRunDataset(datasetDefinition) |
| | 1 | 171 | | : await CreateOrUpdateDatasetAsync(datasetDefinition, cancellationToken); |
| | | 172 | | |
| | 1 | 173 | | PreparedExperimentSupport.ReportProgress( |
| | 1 | 174 | | settings.DryRun |
| | 1 | 175 | | ? $"Validated dataset definition for '{datasetName}' in dry-run mode." |
| | 1 | 176 | | : $"Dataset '{datasetName}' is ready with id '{dataset.Id}'."); |
| | | 177 | | |
| | 1 | 178 | | var created = 0; |
| | 1 | 179 | | var updated = 0; |
| | 1 | 180 | | var unchanged = 0; |
| | 1 | 181 | | var failures = new List<object>(); |
| | | 182 | | |
| | 1 | 183 | | for (var itemIndex = 0; itemIndex < artifact.Items.Count; itemIndex += 1) |
| | | 184 | | { |
| | 1 | 185 | | var item = artifact.Items[itemIndex]; |
| | | 186 | | |
| | 1 | 187 | | if (settings.DryRun) |
| | | 188 | | { |
| | 1 | 189 | | created += 1; |
| | 1 | 190 | | continue; |
| | | 191 | | } |
| | | 192 | | |
| | | 193 | | try |
| | | 194 | | { |
| | 1 | 195 | | PreparedExperimentSupport.ReportProgress( |
| | 1 | 196 | | $"Syncing dataset item {itemIndex + 1}/{artifact.Items.Count}: {item.Id}."); |
| | 1 | 197 | | var disposition = await SyncDatasetItemAsync(datasetName, item, cancellationToken); |
| | | 198 | | switch (disposition) |
| | | 199 | | { |
| | | 200 | | case SyncDisposition.Created: |
| | 1 | 201 | | created += 1; |
| | 1 | 202 | | break; |
| | | 203 | | case SyncDisposition.Updated: |
| | 0 | 204 | | updated += 1; |
| | 0 | 205 | | break; |
| | | 206 | | default: |
| | 0 | 207 | | unchanged += 1; |
| | | 208 | | break; |
| | | 209 | | } |
| | 1 | 210 | | } |
| | 0 | 211 | | catch (Exception ex) |
| | | 212 | | { |
| | 0 | 213 | | PreparedExperimentSupport.ReportProgress( |
| | 0 | 214 | | $"Dataset item {itemIndex + 1}/{artifact.Items.Count} failed: {item.Id}. {ex.Message}"); |
| | 0 | 215 | | failures.Add(new |
| | 0 | 216 | | { |
| | 0 | 217 | | itemId = item.Id, |
| | 0 | 218 | | message = ex.Message |
| | 0 | 219 | | }); |
| | 0 | 220 | | } |
| | 1 | 221 | | } |
| | | 222 | | |
| | 1 | 223 | | if (failures.Count > 0) |
| | | 224 | | { |
| | 0 | 225 | | throw new InvalidOperationException($"Dataset sync failed for {failures.Count} item(s): {JsonSerializer. |
| | | 226 | | } |
| | | 227 | | |
| | 1 | 228 | | PreparedExperimentSupport.ReportProgress( |
| | 1 | 229 | | $"Completed dataset sync for '{datasetName}': created={created}, updated={updated}, unchanged={unchanged |
| | | 230 | | |
| | 1 | 231 | | var summary = new |
| | 1 | 232 | | { |
| | 1 | 233 | | datasetName, |
| | 1 | 234 | | datasetId = dataset.Id, |
| | 1 | 235 | | datasetInputSchemaKeys = GetSchemaKeys(dataset.InputSchema), |
| | 1 | 236 | | datasetExpectedOutputSchemaKeys = GetSchemaKeys(dataset.ExpectedOutputSchema), |
| | 1 | 237 | | dryRun = settings.DryRun, |
| | 1 | 238 | | itemCount = artifact.Items.Count, |
| | 1 | 239 | | created, |
| | 1 | 240 | | updated, |
| | 1 | 241 | | unchanged, |
| | 1 | 242 | | firstItemId = artifact.Items.FirstOrDefault()?.Id, |
| | 1 | 243 | | lastItemId = artifact.Items.LastOrDefault()?.Id |
| | 1 | 244 | | }; |
| | | 245 | | |
| | 1 | 246 | | _console.WriteLine(JsonSerializer.Serialize(summary, SerializerOptions)); |
| | 1 | 247 | | return 0; |
| | | 248 | | } |
| | 0 | 249 | | catch (Exception ex) |
| | | 250 | | { |
| | 0 | 251 | | _logger.LogError(ex, "Error executing sync-dataset command"); |
| | 0 | 252 | | _console.MarkupLine($"[red]Error:[/] {Markup.Escape(ex.Message)}"); |
| | 0 | 253 | | return 1; |
| | | 254 | | } |
| | 1 | 255 | | } |
| | | 256 | | |
| | | 257 | | private async Task<SyncDisposition> SyncDatasetItemAsync( |
| | | 258 | | string datasetName, |
| | | 259 | | SyncDatasetArtifactItem item, |
| | | 260 | | CancellationToken cancellationToken) |
| | | 261 | | { |
| | 1 | 262 | | var existingItem = await _langfuseClient.GetDatasetItemAsync(item.Id, cancellationToken); |
| | | 263 | | |
| | 1 | 264 | | if (existingItem is not null && HasSameCanonicalContent(existingItem, item, datasetName)) |
| | | 265 | | { |
| | 0 | 266 | | return SyncDisposition.Unchanged; |
| | | 267 | | } |
| | | 268 | | |
| | 1 | 269 | | await _langfuseClient.CreateDatasetItemAsync( |
| | 1 | 270 | | new LangfuseCreateDatasetItemRequest( |
| | 1 | 271 | | item.Id, |
| | 1 | 272 | | datasetName, |
| | 1 | 273 | | item.Input, |
| | 1 | 274 | | item.ExpectedOutput, |
| | 1 | 275 | | item.Metadata), |
| | 1 | 276 | | cancellationToken); |
| | | 277 | | |
| | 1 | 278 | | return existingItem is null ? SyncDisposition.Created : SyncDisposition.Updated; |
| | 1 | 279 | | } |
| | | 280 | | |
| | | 281 | | private static bool HasSameCanonicalContent(LangfuseDatasetItem existingItem, SyncDatasetArtifactItem item, string d |
| | | 282 | | { |
| | 0 | 283 | | if (!string.IsNullOrWhiteSpace(existingItem.DatasetName) |
| | 0 | 284 | | && !string.Equals(existingItem.DatasetName, datasetName, StringComparison.Ordinal)) |
| | | 285 | | { |
| | 0 | 286 | | throw new InvalidOperationException( |
| | 0 | 287 | | $"Dataset item '{item.Id}' already exists in dataset '{existingItem.DatasetName}', not '{datasetName}'." |
| | | 288 | | } |
| | | 289 | | |
| | 0 | 290 | | return LangfuseJsonUtilities.StableEquals(existingItem.Input, item.Input) |
| | 0 | 291 | | && LangfuseJsonUtilities.StableEquals(existingItem.ExpectedOutput, item.ExpectedOutput) |
| | 0 | 292 | | && LangfuseJsonUtilities.StableEquals(existingItem.Metadata, item.Metadata); |
| | | 293 | | } |
| | | 294 | | |
| | | 295 | | private async Task<LangfuseDataset> CreateOrUpdateDatasetAsync( |
| | | 296 | | LangfuseCreateDatasetRequest datasetDefinition, |
| | | 297 | | CancellationToken cancellationToken) |
| | | 298 | | { |
| | 1 | 299 | | var createdDataset = await _langfuseClient.CreateDatasetAsync(datasetDefinition, cancellationToken); |
| | 1 | 300 | | return createdDataset with |
| | 1 | 301 | | { |
| | 1 | 302 | | InputSchema = LangfuseJsonUtilities.IsDefined(createdDataset.InputSchema) |
| | 1 | 303 | | ? createdDataset.InputSchema |
| | 1 | 304 | | : datasetDefinition.InputSchema ?? default, |
| | 1 | 305 | | ExpectedOutputSchema = LangfuseJsonUtilities.IsDefined(createdDataset.ExpectedOutputSchema) |
| | 1 | 306 | | ? createdDataset.ExpectedOutputSchema |
| | 1 | 307 | | : datasetDefinition.ExpectedOutputSchema ?? default |
| | 1 | 308 | | }; |
| | 1 | 309 | | } |
| | | 310 | | |
| | | 311 | | private static LangfuseDataset CreateDryRunDataset(LangfuseCreateDatasetRequest datasetDefinition) |
| | | 312 | | { |
| | 1 | 313 | | return new LangfuseDataset( |
| | 1 | 314 | | string.Empty, |
| | 1 | 315 | | datasetDefinition.Name, |
| | 1 | 316 | | datasetDefinition.Description, |
| | 1 | 317 | | datasetDefinition.Metadata is JsonElement metadata ? metadata : default, |
| | 1 | 318 | | datasetDefinition.InputSchema ?? default, |
| | 1 | 319 | | datasetDefinition.ExpectedOutputSchema ?? default); |
| | | 320 | | } |
| | | 321 | | |
| | | 322 | | private static LangfuseCreateDatasetRequest BuildDatasetDefinition(SyncDatasetArtifact artifact, string datasetName) |
| | | 323 | | { |
| | 1 | 324 | | var firstItemMetadata = artifact.Items.FirstOrDefault()?.Metadata ?? default; |
| | 1 | 325 | | var competition = GetStringProperty(firstItemMetadata, "competition") ?? "bundesliga-2025-26"; |
| | 1 | 326 | | var season = GetStringProperty(firstItemMetadata, "season") ?? "2025/2026"; |
| | 1 | 327 | | var communityContext = GetStringProperty(firstItemMetadata, "communityContext") |
| | 1 | 328 | | ?? datasetName.Split('/', StringSplitOptions.RemoveEmptyEntries).Last(); |
| | | 329 | | |
| | 1 | 330 | | var datasetMetadata = LangfuseJsonUtilities.IsDefined(artifact.DatasetMetadata) |
| | 1 | 331 | | ? artifact.DatasetMetadata |
| | 1 | 332 | | : JsonSerializer.SerializeToElement(new |
| | 1 | 333 | | { |
| | 1 | 334 | | competition, |
| | 1 | 335 | | communityContext, |
| | 1 | 336 | | scope = "match-centric", |
| | 1 | 337 | | season |
| | 1 | 338 | | }, SerializerOptions); |
| | | 339 | | |
| | 1 | 340 | | return new LangfuseCreateDatasetRequest( |
| | 1 | 341 | | datasetName, |
| | 1 | 342 | | artifact.DatasetDescription |
| | 1 | 343 | | ?? $"Hosted dataset for {season} {communityContext} {competition} match experiments", |
| | 1 | 344 | | datasetMetadata, |
| | 1 | 345 | | LangfuseJsonUtilities.IsDefined(artifact.InputSchema) ? artifact.InputSchema : DefaultInputSchema, |
| | 1 | 346 | | LangfuseJsonUtilities.IsDefined(artifact.ExpectedOutputSchema) |
| | 1 | 347 | | ? artifact.ExpectedOutputSchema |
| | 1 | 348 | | : DefaultExpectedOutputSchema); |
| | | 349 | | } |
| | | 350 | | |
| | | 351 | | private static void ValidateArtifact(SyncDatasetArtifact artifact, string datasetName) |
| | | 352 | | { |
| | 1 | 353 | | Assert(!string.IsNullOrWhiteSpace(datasetName), "Dataset name must be a non-empty string."); |
| | | 354 | | |
| | 1 | 355 | | var inputSchema = LangfuseJsonUtilities.IsDefined(artifact.InputSchema) |
| | 1 | 356 | | ? artifact.InputSchema |
| | 1 | 357 | | : DefaultInputSchema; |
| | 1 | 358 | | var expectedOutputSchema = LangfuseJsonUtilities.IsDefined(artifact.ExpectedOutputSchema) |
| | 1 | 359 | | ? artifact.ExpectedOutputSchema |
| | 1 | 360 | | : DefaultExpectedOutputSchema; |
| | | 361 | | |
| | 1 | 362 | | var seenItemIds = new HashSet<string>(StringComparer.Ordinal); |
| | 1 | 363 | | foreach (var item in artifact.Items) |
| | | 364 | | { |
| | 1 | 365 | | Assert(!string.IsNullOrWhiteSpace(item.Id), "Each dataset item must have a non-empty string id."); |
| | 1 | 366 | | Assert(seenItemIds.Add(item.Id), $"Duplicate dataset item id '{item.Id}' found in artifact."); |
| | | 367 | | |
| | 1 | 368 | | ValidateValueAgainstSchema(item.Input, inputSchema, $"Dataset item '{item.Id}' input"); |
| | 1 | 369 | | ValidateValueAgainstSchema(item.ExpectedOutput, expectedOutputSchema, $"Dataset item '{item.Id}' expectedOut |
| | 1 | 370 | | ValidateValueAgainstSchema(item.Metadata, MetadataSchema, $"Dataset item '{item.Id}' metadata"); |
| | | 371 | | } |
| | 1 | 372 | | } |
| | | 373 | | |
| | | 374 | | private static void ValidateValueAgainstSchema(JsonElement value, JsonElement schema, string label) |
| | | 375 | | { |
| | 1 | 376 | | Assert(schema.ValueKind == JsonValueKind.Object, $"{label} schema must be an object."); |
| | | 377 | | |
| | 1 | 378 | | var schemaType = schema.GetProperty("type").GetString(); |
| | | 379 | | switch (schemaType) |
| | | 380 | | { |
| | | 381 | | case "object": |
| | 1 | 382 | | Assert(value.ValueKind == JsonValueKind.Object, $"{label} must be an object."); |
| | | 383 | | |
| | 1 | 384 | | var properties = schema.TryGetProperty("properties", out var propertyNode) |
| | 1 | 385 | | ? propertyNode |
| | 1 | 386 | | : default; |
| | 1 | 387 | | var required = schema.TryGetProperty("required", out var requiredNode) |
| | 1 | 388 | | ? requiredNode.EnumerateArray().Select(entry => entry.GetString()!).ToHashSet(StringComparer.Ordinal |
| | 1 | 389 | | : []; |
| | 1 | 390 | | var actualProperties = value.EnumerateObject().ToList(); |
| | 1 | 391 | | var allowedKeys = properties.ValueKind == JsonValueKind.Object |
| | 1 | 392 | | ? properties.EnumerateObject().Select(property => property.Name).ToHashSet(StringComparer.Ordinal) |
| | 1 | 393 | | : []; |
| | | 394 | | |
| | 1 | 395 | | if (schema.TryGetProperty("additionalProperties", out var additionalProperties) |
| | 1 | 396 | | && additionalProperties.ValueKind == JsonValueKind.False) |
| | | 397 | | { |
| | 1 | 398 | | var unexpectedKeys = actualProperties |
| | 1 | 399 | | .Select(property => property.Name) |
| | 1 | 400 | | .Where(name => !allowedKeys.Contains(name)) |
| | 1 | 401 | | .ToArray(); |
| | 1 | 402 | | Assert( |
| | 1 | 403 | | unexpectedKeys.Length == 0, |
| | 1 | 404 | | $"{label} must not contain unexpected keys: {string.Join(", ", unexpectedKeys)}."); |
| | | 405 | | } |
| | | 406 | | |
| | 1 | 407 | | foreach (var requiredKey in required) |
| | | 408 | | { |
| | 1 | 409 | | Assert(value.TryGetProperty(requiredKey, out _), $"{label}.{requiredKey} is required."); |
| | | 410 | | } |
| | | 411 | | |
| | 1 | 412 | | if (properties.ValueKind == JsonValueKind.Object) |
| | | 413 | | { |
| | 1 | 414 | | foreach (var actualProperty in actualProperties) |
| | | 415 | | { |
| | 1 | 416 | | if (properties.TryGetProperty(actualProperty.Name, out var propertySchema)) |
| | | 417 | | { |
| | 1 | 418 | | ValidateValueAgainstSchema(actualProperty.Value, propertySchema, $"{label}.{actualProperty.N |
| | | 419 | | } |
| | | 420 | | } |
| | | 421 | | } |
| | | 422 | | |
| | 1 | 423 | | return; |
| | | 424 | | |
| | | 425 | | case "string": |
| | 1 | 426 | | Assert(value.ValueKind == JsonValueKind.String, $"{label} must be a non-empty string."); |
| | 1 | 427 | | var stringValue = value.GetString() ?? string.Empty; |
| | 1 | 428 | | Assert(!string.IsNullOrWhiteSpace(stringValue), $"{label} must be a non-empty string."); |
| | | 429 | | |
| | 1 | 430 | | if (schema.TryGetProperty("minLength", out var minLengthNode)) |
| | | 431 | | { |
| | 1 | 432 | | Assert(stringValue.Length >= minLengthNode.GetInt32(), $"{label} must be at least {minLengthNode.Get |
| | | 433 | | } |
| | | 434 | | |
| | 1 | 435 | | if (schema.TryGetProperty("maxLength", out var maxLengthNode)) |
| | | 436 | | { |
| | 0 | 437 | | Assert(stringValue.Length <= maxLengthNode.GetInt32(), $"{label} must be at most {maxLengthNode.GetI |
| | | 438 | | } |
| | | 439 | | |
| | 1 | 440 | | return; |
| | | 441 | | |
| | | 442 | | case "integer": |
| | 1 | 443 | | Assert(value.ValueKind == JsonValueKind.Number, $"{label} must be an integer."); |
| | 1 | 444 | | if (!value.TryGetInt64(out var integerValue)) |
| | | 445 | | { |
| | 0 | 446 | | throw new InvalidOperationException($"{label} must be an integer."); |
| | | 447 | | } |
| | | 448 | | |
| | 1 | 449 | | if (schema.TryGetProperty("minimum", out var minimumIntegerNode)) |
| | | 450 | | { |
| | 1 | 451 | | Assert(integerValue >= minimumIntegerNode.GetInt64(), $"{label} must be at least {minimumIntegerNode |
| | | 452 | | } |
| | | 453 | | |
| | 1 | 454 | | if (schema.TryGetProperty("maximum", out var maximumIntegerNode)) |
| | | 455 | | { |
| | 0 | 456 | | Assert(integerValue <= maximumIntegerNode.GetInt64(), $"{label} must be at most {maximumIntegerNode. |
| | | 457 | | } |
| | | 458 | | |
| | 1 | 459 | | return; |
| | | 460 | | |
| | | 461 | | case "number": |
| | 0 | 462 | | Assert(value.ValueKind == JsonValueKind.Number, $"{label} must be a number."); |
| | 0 | 463 | | if (!value.TryGetDouble(out var numberValue)) |
| | | 464 | | { |
| | 0 | 465 | | throw new InvalidOperationException($"{label} must be a number."); |
| | | 466 | | } |
| | | 467 | | |
| | 0 | 468 | | if (schema.TryGetProperty("minimum", out var minimumNumberNode)) |
| | | 469 | | { |
| | 0 | 470 | | Assert(numberValue >= minimumNumberNode.GetDouble(), $"{label} must be at least {minimumNumberNode.G |
| | | 471 | | } |
| | | 472 | | |
| | 0 | 473 | | if (schema.TryGetProperty("maximum", out var maximumNumberNode)) |
| | | 474 | | { |
| | 0 | 475 | | Assert(numberValue <= maximumNumberNode.GetDouble(), $"{label} must be at most {maximumNumberNode.Ge |
| | | 476 | | } |
| | | 477 | | |
| | 0 | 478 | | return; |
| | | 479 | | |
| | | 480 | | case "boolean": |
| | 0 | 481 | | Assert(value.ValueKind is JsonValueKind.True or JsonValueKind.False, $"{label} must be a boolean."); |
| | 0 | 482 | | return; |
| | | 483 | | |
| | | 484 | | default: |
| | 0 | 485 | | throw new InvalidOperationException($"Unsupported schema type '{schemaType}' for {label}."); |
| | | 486 | | } |
| | | 487 | | } |
| | | 488 | | |
| | | 489 | | private static IReadOnlyList<string> GetSchemaKeys(JsonElement schema) |
| | | 490 | | { |
| | 1 | 491 | | if (!LangfuseJsonUtilities.IsDefined(schema) |
| | 1 | 492 | | || schema.ValueKind != JsonValueKind.Object |
| | 1 | 493 | | || !schema.TryGetProperty("properties", out var properties) |
| | 1 | 494 | | || properties.ValueKind != JsonValueKind.Object) |
| | | 495 | | { |
| | 0 | 496 | | return []; |
| | | 497 | | } |
| | | 498 | | |
| | 1 | 499 | | return properties.EnumerateObject() |
| | 1 | 500 | | .Select(property => property.Name) |
| | 1 | 501 | | .OrderBy(name => name, StringComparer.Ordinal) |
| | 1 | 502 | | .ToList(); |
| | | 503 | | } |
| | | 504 | | |
| | | 505 | | private static string? GetStringProperty(JsonElement element, string propertyName) |
| | | 506 | | { |
| | 1 | 507 | | if (!LangfuseJsonUtilities.IsDefined(element) |
| | 1 | 508 | | || element.ValueKind != JsonValueKind.Object |
| | 1 | 509 | | || !element.TryGetProperty(propertyName, out var property) |
| | 1 | 510 | | || property.ValueKind != JsonValueKind.String) |
| | | 511 | | { |
| | 0 | 512 | | return null; |
| | | 513 | | } |
| | | 514 | | |
| | 1 | 515 | | return property.GetString(); |
| | | 516 | | } |
| | | 517 | | |
| | | 518 | | private static async Task<SyncDatasetArtifact> LoadArtifactAsync(string inputPath, CancellationToken cancellationTok |
| | | 519 | | { |
| | 1 | 520 | | var absolutePath = Path.GetFullPath(inputPath); |
| | 1 | 521 | | var raw = await File.ReadAllTextAsync(absolutePath, cancellationToken); |
| | 1 | 522 | | var artifact = JsonSerializer.Deserialize<SyncDatasetArtifact>(raw, SerializerOptions); |
| | 1 | 523 | | return artifact ?? throw new InvalidOperationException($"Dataset artifact '{absolutePath}' could not be deserial |
| | 1 | 524 | | } |
| | | 525 | | |
| | | 526 | | private static JsonElement ParseJsonElement(string value) |
| | | 527 | | { |
| | 1 | 528 | | using var document = JsonDocument.Parse(value); |
| | 1 | 529 | | return document.RootElement.Clone(); |
| | 1 | 530 | | } |
| | | 531 | | |
| | | 532 | | private static void Assert(bool condition, string message) |
| | | 533 | | { |
| | 1 | 534 | | if (!condition) |
| | | 535 | | { |
| | 0 | 536 | | throw new InvalidOperationException(message); |
| | | 537 | | } |
| | 1 | 538 | | } |
| | | 539 | | |
| | | 540 | | private enum SyncDisposition |
| | | 541 | | { |
| | | 542 | | Created, |
| | | 543 | | Updated, |
| | | 544 | | Unchanged |
| | | 545 | | } |
| | | 546 | | |
| | 1 | 547 | | private sealed record SyncDatasetArtifact( |
| | 1 | 548 | | [property: JsonPropertyName("datasetName")] string? DatasetName, |
| | 1 | 549 | | [property: JsonPropertyName("datasetDescription")] string? DatasetDescription, |
| | 1 | 550 | | [property: JsonPropertyName("datasetMetadata")] JsonElement DatasetMetadata, |
| | 1 | 551 | | [property: JsonPropertyName("inputSchema")] JsonElement InputSchema, |
| | 1 | 552 | | [property: JsonPropertyName("expectedOutputSchema")] JsonElement ExpectedOutputSchema, |
| | 1 | 553 | | [property: JsonPropertyName("items")] IReadOnlyList<SyncDatasetArtifactItem> Items); |
| | | 554 | | |
| | 1 | 555 | | private sealed record SyncDatasetArtifactItem( |
| | 1 | 556 | | [property: JsonPropertyName("id")] string Id, |
| | 1 | 557 | | [property: JsonPropertyName("input")] JsonElement Input, |
| | 1 | 558 | | [property: JsonPropertyName("expectedOutput")] JsonElement ExpectedOutput, |
| | 1 | 559 | | [property: JsonPropertyName("metadata")] JsonElement Metadata); |
| | | 560 | | } |