| | | 1 | | using System.Collections.Generic; |
| | | 2 | | using System.ClientModel; |
| | | 3 | | using System.ClientModel.Primitives; |
| | | 4 | | using System.Diagnostics; |
| | | 5 | | using System.Globalization; |
| | | 6 | | using System.Linq; |
| | | 7 | | using System.Text.Json; |
| | | 8 | | using System.Text.Json.Serialization; |
| | | 9 | | using System.Text.RegularExpressions; |
| | | 10 | | using EHonda.KicktippAi.Core; |
| | | 11 | | using Microsoft.Extensions.Logging; |
| | | 12 | | using OpenAI.Chat; |
| | | 13 | | using OpenAI.Responses; |
| | | 14 | | using Polly; |
| | | 15 | | using Polly.Retry; |
| | | 16 | | using Polly.Timeout; |
| | | 17 | | |
| | | 18 | | namespace OpenAiIntegration; |
| | | 19 | | |
| | | 20 | | /// <summary> |
| | | 21 | | /// Service for predicting match outcomes using OpenAI models |
| | | 22 | | /// </summary> |
| | | 23 | | public class PredictionService : IPredictionService |
| | | 24 | | { |
| | | 25 | | private const int TransientOpenAiMaxRetryAttempts = 3; |
| | | 26 | | private const int RateLimitedOpenAiMaxRetryAttempts = 8; |
| | | 27 | | private const string FlexServiceTier = "flex"; |
| | | 28 | | private const string DefaultServiceTier = "default"; |
| | | 29 | | private static readonly TimeSpan TransientOpenAiRetryDelay = TimeSpan.FromSeconds(2); |
| | | 30 | | private static readonly TimeSpan RateLimitedOpenAiRetryBaseDelay = TimeSpan.FromSeconds(2); |
| | | 31 | | private static readonly TimeSpan RateLimitedOpenAiRetryMaxDelay = TimeSpan.FromMinutes(2); |
| | | 32 | | |
| | | 33 | | private readonly ResponsesClient _responsesClient; |
| | | 34 | | private readonly ILogger<PredictionService> _logger; |
| | | 35 | | private readonly ICostCalculationService _costCalculationService; |
| | | 36 | | private readonly ITokenUsageTracker _tokenUsageTracker; |
| | | 37 | | private readonly IInstructionsTemplateProvider _templateProvider; |
| | | 38 | | private readonly PredictionServiceOptions _options; |
| | | 39 | | private readonly string _model; |
| | | 40 | | private readonly Lazy<(string Template, string Path)> _instructionsTemplate; |
| | | 41 | | private readonly Lazy<(string Template, string Path)> _instructionsTemplateWithJustification; |
| | | 42 | | private readonly Lazy<(string Template, string Path)> _bonusInstructionsTemplate; |
| | | 43 | | |
| | | 44 | | public PredictionService( |
| | | 45 | | ResponsesClient responsesClient, |
| | | 46 | | ILogger<PredictionService> logger, |
| | | 47 | | ICostCalculationService costCalculationService, |
| | | 48 | | ITokenUsageTracker tokenUsageTracker, |
| | | 49 | | IInstructionsTemplateProvider templateProvider, |
| | | 50 | | string model, |
| | | 51 | | PredictionServiceOptions? options = null) |
| | | 52 | | { |
| | | 53 | | _responsesClient = responsesClient ?? throw new ArgumentNullException(nameof(responsesClient)); |
| | | 54 | | _logger = logger ?? throw new ArgumentNullException(nameof(logger)); |
| | | 55 | | _costCalculationService = costCalculationService ?? throw new ArgumentNullException(nameof(costCalculationServic |
| | | 56 | | _tokenUsageTracker = tokenUsageTracker ?? throw new ArgumentNullException(nameof(tokenUsageTracker)); |
| | | 57 | | _templateProvider = templateProvider ?? throw new ArgumentNullException(nameof(templateProvider)); |
| | | 58 | | _options = options ?? PredictionServiceOptions.Default; |
| | | 59 | | _model = model ?? throw new ArgumentNullException(nameof(model)); |
| | | 60 | | |
| | | 61 | | _instructionsTemplate = new Lazy<(string Template, string Path)>( |
| | | 62 | | () => _templateProvider.LoadMatchTemplate(_model, includeJustification: false)); |
| | | 63 | | _instructionsTemplateWithJustification = new Lazy<(string Template, string Path)>( |
| | | 64 | | () => _templateProvider.LoadMatchTemplate(_model, includeJustification: true)); |
| | | 65 | | _bonusInstructionsTemplate = new Lazy<(string Template, string Path)>( |
| | | 66 | | () => _templateProvider.LoadBonusTemplate(_model)); |
| | | 67 | | } |
| | | 68 | | |
| | | 69 | | public async Task<Prediction?> PredictMatchAsync( |
| | | 70 | | EHonda.KicktippAi.Core.Match match, |
| | | 71 | | IEnumerable<DocumentContext> contextDocuments, |
| | | 72 | | bool includeJustification = false, |
| | | 73 | | PredictionTelemetryMetadata? telemetryMetadata = null, |
| | | 74 | | CancellationToken cancellationToken = default) |
| | | 75 | | { |
| | | 76 | | _logger.LogInformation("Generating prediction for match: {HomeTeam} vs {AwayTeam} at {StartTime}", |
| | | 77 | | match.HomeTeam, match.AwayTeam, match.StartsAt); |
| | | 78 | | |
| | | 79 | | try |
| | | 80 | | { |
| | | 81 | | // Build the instructions by combining template with context |
| | | 82 | | var instructions = BuildInstructions(contextDocuments, includeJustification); |
| | | 83 | | |
| | | 84 | | // Create match JSON |
| | | 85 | | var matchJson = PredictionPromptComposer.CreateMatchJson(match); |
| | | 86 | | |
| | | 87 | | _logger.LogDebug("Instructions length: {InstructionsLength} characters", instructions.Length); |
| | | 88 | | _logger.LogDebug("Context documents: {ContextCount}", contextDocuments.Count()); |
| | | 89 | | _logger.LogDebug("Match JSON: {MatchJson}", matchJson); |
| | | 90 | | |
| | | 91 | | // Create input items for the response |
| | | 92 | | var messages = new List<PredictionRequestMessage> |
| | | 93 | | { |
| | | 94 | | new("system", instructions), |
| | | 95 | | new("user", matchJson) |
| | | 96 | | }; |
| | | 97 | | |
| | | 98 | | _logger.LogDebug("Calling OpenAI API for prediction"); |
| | | 99 | | |
| | | 100 | | // Start an OTel activity for Langfuse generation tracking |
| | | 101 | | using var activity = Telemetry.Source.StartActivity("predict-match"); |
| | | 102 | | |
| | | 103 | | // Call OpenAI with structured output format |
| | | 104 | | var completion = await CompleteMatchResponseAsync(messages, includeJustification, cancellationToken); |
| | | 105 | | |
| | | 106 | | // Parse the structured response |
| | | 107 | | var predictionJson = completion.PredictionJson; |
| | | 108 | | _logger.LogDebug("Received prediction JSON: {PredictionJson}", predictionJson); |
| | | 109 | | |
| | | 110 | | var prediction = ParsePrediction(predictionJson); |
| | | 111 | | |
| | | 112 | | _logger.LogInformation("Prediction generated: {HomeGoals}-{AwayGoals} for {HomeTeam} vs {AwayTeam}", |
| | | 113 | | prediction.HomeGoals, prediction.AwayGoals, match.HomeTeam, match.AwayTeam); |
| | | 114 | | |
| | | 115 | | // Log token usage and cost breakdown |
| | | 116 | | var usage = completion.Usage; |
| | | 117 | | _logger.LogDebug("Token usage - Input: {InputTokens}, Output: {OutputTokens}, Total: {TotalTokens}", |
| | | 118 | | usage.InputTokenCount, usage.OutputTokenCount, usage.TotalTokenCount); |
| | | 119 | | |
| | | 120 | | // Set Langfuse generation attributes on the activity |
| | | 121 | | SetLangfuseGenerationAttributes(activity, messages, predictionJson, usage, telemetryMetadata, completion.Exe |
| | | 122 | | |
| | | 123 | | // Add usage to tracker |
| | | 124 | | if (completion.ExecutionTelemetry is null) |
| | | 125 | | { |
| | | 126 | | _tokenUsageTracker.AddUsage(_model, usage); |
| | | 127 | | } |
| | | 128 | | else |
| | | 129 | | { |
| | | 130 | | _tokenUsageTracker.AddUsage(_model, usage, completion.ExecutionTelemetry.FinalServiceTier); |
| | | 131 | | } |
| | | 132 | | |
| | | 133 | | // Calculate and log costs |
| | | 134 | | if (completion.ExecutionTelemetry is null) |
| | | 135 | | { |
| | | 136 | | _costCalculationService.LogCostBreakdown(_model, usage); |
| | | 137 | | } |
| | | 138 | | else |
| | | 139 | | { |
| | | 140 | | _costCalculationService.LogCostBreakdown(_model, usage, completion.ExecutionTelemetry.FinalServiceTier); |
| | | 141 | | } |
| | | 142 | | |
| | | 143 | | return prediction; |
| | | 144 | | } |
| | | 145 | | catch (Exception ex) |
| | | 146 | | { |
| | | 147 | | _logger.LogError(ex, "Error generating prediction for match: {HomeTeam} vs {AwayTeam}", |
| | | 148 | | match.HomeTeam, match.AwayTeam); |
| | | 149 | | Console.Error.WriteLine($"Prediction error for {match.HomeTeam} vs {match.AwayTeam}: {ex.Message}"); |
| | | 150 | | |
| | | 151 | | return null; |
| | | 152 | | } |
| | | 153 | | } |
| | | 154 | | |
| | | 155 | | private Task<OpenAiResponseResult> CompleteMatchResponseAsync( |
| | | 156 | | IReadOnlyList<PredictionRequestMessage> messages, |
| | | 157 | | bool includeJustification, |
| | | 158 | | CancellationToken cancellationToken) |
| | | 159 | | { |
| | | 160 | | return CompleteStructuredResponseAsync( |
| | | 161 | | completeResponseAsync: (serviceTier, ct) => CompleteResponseAsync( |
| | | 162 | | CreateMatchResponseOptions( |
| | | 163 | | messages, |
| | | 164 | | includeJustification, |
| | | 165 | | serviceTier, |
| | | 166 | | _options.ReasoningEffort), |
| | | 167 | | serviceTier, |
| | | 168 | | ct), |
| | | 169 | | cancellationToken); |
| | | 170 | | } |
| | | 171 | | |
| | | 172 | | private Task<OpenAiResponseResult> CompleteBonusResponseAsync( |
| | | 173 | | IReadOnlyList<PredictionRequestMessage> messages, |
| | | 174 | | BonusQuestion bonusQuestion, |
| | | 175 | | CancellationToken cancellationToken) |
| | | 176 | | { |
| | | 177 | | return CompleteStructuredResponseAsync( |
| | | 178 | | completeResponseAsync: (serviceTier, ct) => CompleteResponseAsync( |
| | | 179 | | CreateBonusResponseOptions( |
| | | 180 | | messages, |
| | | 181 | | bonusQuestion, |
| | | 182 | | serviceTier, |
| | | 183 | | _options.ReasoningEffort), |
| | | 184 | | serviceTier, |
| | | 185 | | ct), |
| | | 186 | | cancellationToken); |
| | | 187 | | } |
| | | 188 | | |
| | | 189 | | private async Task<OpenAiResponseResult> CompleteStructuredResponseAsync( |
| | | 190 | | Func<string?, CancellationToken, Task<OpenAiResponseResult>> completeResponseAsync, |
| | | 191 | | CancellationToken cancellationToken) |
| | | 192 | | { |
| | | 193 | | if (_options.DisableFlexProcessing) |
| | | 194 | | { |
| | | 195 | | return await completeResponseAsync(null, cancellationToken); |
| | | 196 | | } |
| | | 197 | | |
| | | 198 | | string? requestedServiceTier = FlexServiceTier; |
| | | 199 | | var usedFallback = false; |
| | | 200 | | var pipeline = new ResiliencePipelineBuilder<OpenAiResponseResult>() |
| | | 201 | | // OpenAI documents that Flex processing can return 429 Resource Unavailable |
| | | 202 | | // when resources are insufficient, and recommends retrying with standard |
| | | 203 | | // processing when occasional higher cost is acceptable: |
| | | 204 | | // https://developers.openai.com/api/docs/guides/flex-processing |
| | | 205 | | // The Responses API reference documents service_tier=default as standard |
| | | 206 | | // pricing/performance, so flex 429 retries switch to that tier: |
| | | 207 | | // https://platform.openai.com/docs/api-reference/responses/create |
| | | 208 | | .AddRetry(new RetryStrategyOptions<OpenAiResponseResult> |
| | | 209 | | { |
| | | 210 | | MaxRetryAttempts = 1, |
| | | 211 | | Delay = TimeSpan.Zero, |
| | | 212 | | ShouldHandle = args => ValueTask.FromResult( |
| | | 213 | | IsFlexProcessingRequest(requestedServiceTier) && |
| | | 214 | | IsFlexFallbackFailure(args.Outcome.Exception, args.Context.CancellationToken)), |
| | | 215 | | OnRetry = args => |
| | | 216 | | { |
| | | 217 | | usedFallback = true; |
| | | 218 | | requestedServiceTier = DefaultServiceTier; |
| | | 219 | | _logger.LogWarning( |
| | | 220 | | args.Outcome.Exception, |
| | | 221 | | "OpenAI flex processing failed with a retryable failure; retrying prediction with default proces |
| | | 222 | | return default; |
| | | 223 | | } |
| | | 224 | | }) |
| | | 225 | | .Build(); |
| | | 226 | | |
| | | 227 | | var result = await pipeline.ExecuteAsync( |
| | | 228 | | async ct => |
| | | 229 | | { |
| | | 230 | | var completion = await completeResponseAsync(requestedServiceTier, ct); |
| | | 231 | | |
| | | 232 | | var finalServiceTier = string.IsNullOrWhiteSpace(completion.FinalServiceTier) |
| | | 233 | | ? requestedServiceTier ?? "standard" |
| | | 234 | | : completion.FinalServiceTier; |
| | | 235 | | |
| | | 236 | | return completion with |
| | | 237 | | { |
| | | 238 | | ExecutionTelemetry = new PredictionExecutionTelemetry( |
| | | 239 | | "flex-first-standard-fallback", |
| | | 240 | | usedFallback ? DefaultServiceTier : FlexServiceTier, |
| | | 241 | | finalServiceTier, |
| | | 242 | | usedFallback) |
| | | 243 | | }; |
| | | 244 | | }, |
| | | 245 | | cancellationToken); |
| | | 246 | | |
| | | 247 | | return result; |
| | | 248 | | } |
| | | 249 | | |
| | | 250 | | private async Task<OpenAiResponseResult> CompleteResponseAsync( |
| | | 251 | | CreateResponseOptions options, |
| | | 252 | | string? serviceTier, |
| | | 253 | | CancellationToken cancellationToken) |
| | | 254 | | { |
| | | 255 | | var response = await CreateResponseWithTransientRetryAsync(options, serviceTier, cancellationToken); |
| | | 256 | | var responseResult = response.Value; |
| | | 257 | | var predictionJson = responseResult.GetOutputText(); |
| | | 258 | | if (predictionJson is null) |
| | | 259 | | { |
| | | 260 | | throw new InvalidOperationException("OpenAI response did not contain output text."); |
| | | 261 | | } |
| | | 262 | | |
| | | 263 | | var usage = responseResult.Usage is null |
| | | 264 | | ? null |
| | | 265 | | : ToChatTokenUsage(responseResult.Usage); |
| | | 266 | | if (usage is null) |
| | | 267 | | { |
| | | 268 | | throw new InvalidOperationException("OpenAI response did not contain token usage."); |
| | | 269 | | } |
| | | 270 | | |
| | | 271 | | return new OpenAiResponseResult( |
| | | 272 | | predictionJson, |
| | | 273 | | usage, |
| | | 274 | | null, |
| | | 275 | | NormalizeResponseServiceTier(responseResult.ServiceTier)); |
| | | 276 | | } |
| | | 277 | | |
| | | 278 | | private async Task<ClientResult<ResponseResult>> CreateResponseWithTransientRetryAsync( |
| | | 279 | | CreateResponseOptions options, |
| | | 280 | | string? requestedServiceTier, |
| | | 281 | | CancellationToken cancellationToken) |
| | | 282 | | { |
| | | 283 | | var pipeline = new ResiliencePipelineBuilder<ClientResult<ResponseResult>>() |
| | | 284 | | // OpenAI documents 429 rate-limit errors as pacing problems and recommends bounded |
| | | 285 | | // random exponential backoff. It also documents x-ratelimit-* response headers |
| | | 286 | | // for reset timing: |
| | | 287 | | // https://platform.openai.com/docs/guides/rate-limits |
| | | 288 | | // https://platform.openai.com/docs/guides/error-codes |
| | | 289 | | // https://platform.openai.com/docs/api-reference |
| | | 290 | | // These references document x-ratelimit-* reset headers, not Retry-After. |
| | | 291 | | .AddRetry(new RetryStrategyOptions<ClientResult<ResponseResult>> |
| | | 292 | | { |
| | | 293 | | MaxRetryAttempts = RateLimitedOpenAiMaxRetryAttempts, |
| | | 294 | | DelayGenerator = args => new ValueTask<TimeSpan?>( |
| | | 295 | | ResolveOpenAiRateLimitDelay(args.Outcome.Exception, args.AttemptNumber)), |
| | | 296 | | ShouldHandle = args => ValueTask.FromResult( |
| | | 297 | | !IsFlexProcessingRequest(requestedServiceTier) && |
| | | 298 | | IsRetryableOpenAiRateLimitFailure(args.Outcome.Exception, args.Context.CancellationToken)), |
| | | 299 | | OnRetry = args => |
| | | 300 | | { |
| | | 301 | | _logger.LogWarning( |
| | | 302 | | args.Outcome.Exception, |
| | | 303 | | "OpenAI request hit a rate limit; retrying prediction request ({RetryAttempt}/{MaxRetryAttempts} |
| | | 304 | | args.AttemptNumber + 1, |
| | | 305 | | RateLimitedOpenAiMaxRetryAttempts, |
| | | 306 | | args.RetryDelay); |
| | | 307 | | return default; |
| | | 308 | | } |
| | | 309 | | }) |
| | | 310 | | .AddRetry(new RetryStrategyOptions<ClientResult<ResponseResult>> |
| | | 311 | | { |
| | | 312 | | MaxRetryAttempts = TransientOpenAiMaxRetryAttempts, |
| | | 313 | | Delay = TransientOpenAiRetryDelay, |
| | | 314 | | BackoffType = DelayBackoffType.Exponential, |
| | | 315 | | UseJitter = true, |
| | | 316 | | ShouldHandle = args => ValueTask.FromResult( |
| | | 317 | | IsTransientOpenAiServerFailure(args.Outcome.Exception, args.Context.CancellationToken)), |
| | | 318 | | OnRetry = args => |
| | | 319 | | { |
| | | 320 | | _logger.LogWarning( |
| | | 321 | | args.Outcome.Exception, |
| | | 322 | | "OpenAI request failed with a transient server error; retrying prediction request ({RetryAttempt |
| | | 323 | | args.AttemptNumber + 1, |
| | | 324 | | TransientOpenAiMaxRetryAttempts); |
| | | 325 | | return default; |
| | | 326 | | } |
| | | 327 | | }) |
| | | 328 | | .Build(); |
| | | 329 | | |
| | | 330 | | return await pipeline.ExecuteAsync( |
| | | 331 | | async ct => await _responsesClient.CreateResponseAsync(options, ct), |
| | | 332 | | cancellationToken); |
| | | 333 | | } |
| | | 334 | | |
| | | 335 | | private CreateResponseOptions CreateMatchResponseOptions( |
| | | 336 | | IReadOnlyList<PredictionRequestMessage> messages, |
| | | 337 | | bool includeJustification, |
| | | 338 | | string? serviceTier, |
| | | 339 | | string? reasoningEffort) |
| | | 340 | | { |
| | | 341 | | return CreateResponseOptions( |
| | | 342 | | messages, |
| | | 343 | | "match_prediction", |
| | | 344 | | BinaryData.FromBytes(BuildPredictionJsonSchema(includeJustification)), |
| | | 345 | | serviceTier, |
| | | 346 | | reasoningEffort); |
| | | 347 | | } |
| | | 348 | | |
| | | 349 | | private CreateResponseOptions CreateBonusResponseOptions( |
| | | 350 | | IReadOnlyList<PredictionRequestMessage> messages, |
| | | 351 | | BonusQuestion bonusQuestion, |
| | | 352 | | string? serviceTier, |
| | | 353 | | string? reasoningEffort) |
| | | 354 | | { |
| | | 355 | | return CreateResponseOptions( |
| | | 356 | | messages, |
| | | 357 | | "bonus_prediction", |
| | | 358 | | BinaryData.FromBytes(CreateSingleBonusPredictionJsonSchema(bonusQuestion)), |
| | | 359 | | serviceTier, |
| | | 360 | | reasoningEffort); |
| | | 361 | | } |
| | | 362 | | |
| | | 363 | | private CreateResponseOptions CreateResponseOptions( |
| | | 364 | | IReadOnlyList<PredictionRequestMessage> messages, |
| | | 365 | | string schemaName, |
| | | 366 | | BinaryData schema, |
| | | 367 | | string? serviceTier, |
| | | 368 | | string? reasoningEffort) |
| | | 369 | | { |
| | | 370 | | var options = new CreateResponseOptions |
| | | 371 | | { |
| | | 372 | | Model = _model, |
| | | 373 | | MaxOutputTokenCount = _options.MaxOutputTokenCount, // Safeguard against high costs |
| | | 374 | | TextOptions = new ResponseTextOptions |
| | | 375 | | { |
| | | 376 | | TextFormat = ResponseTextFormat.CreateJsonSchemaFormat( |
| | | 377 | | jsonSchemaFormatName: schemaName, |
| | | 378 | | jsonSchema: schema, |
| | | 379 | | jsonSchemaIsStrict: true) |
| | | 380 | | } |
| | | 381 | | }; |
| | | 382 | | |
| | | 383 | | foreach (var message in messages) |
| | | 384 | | { |
| | | 385 | | options.InputItems.Add(CreateResponseMessage(message)); |
| | | 386 | | } |
| | | 387 | | |
| | | 388 | | var normalizedServiceTier = NormalizeServiceTier(serviceTier); |
| | | 389 | | if (normalizedServiceTier is not null) |
| | | 390 | | { |
| | | 391 | | options.ServiceTier = new ResponseServiceTier(normalizedServiceTier); |
| | | 392 | | } |
| | | 393 | | |
| | | 394 | | var normalizedReasoningEffort = NormalizeReasoningEffort(reasoningEffort); |
| | | 395 | | if (normalizedReasoningEffort is not null) |
| | | 396 | | { |
| | | 397 | | options.ReasoningOptions = new ResponseReasoningOptions |
| | | 398 | | { |
| | | 399 | | ReasoningEffortLevel = new ResponseReasoningEffortLevel(normalizedReasoningEffort) |
| | | 400 | | }; |
| | | 401 | | } |
| | | 402 | | |
| | | 403 | | return options; |
| | | 404 | | } |
| | | 405 | | |
| | | 406 | | private static string? NormalizeReasoningEffort(string? reasoningEffort) |
| | | 407 | | { |
| | | 408 | | return string.IsNullOrWhiteSpace(reasoningEffort) |
| | | 409 | | ? null |
| | | 410 | | : reasoningEffort.Trim().ToLowerInvariant(); |
| | | 411 | | } |
| | | 412 | | |
| | | 413 | | private static string? NormalizeServiceTier(string? serviceTier) |
| | | 414 | | { |
| | | 415 | | return string.IsNullOrWhiteSpace(serviceTier) |
| | | 416 | | ? null |
| | | 417 | | : serviceTier.Trim().ToLowerInvariant(); |
| | | 418 | | } |
| | | 419 | | |
| | | 420 | | private static string? NormalizeResponseServiceTier(ResponseServiceTier? serviceTier) |
| | | 421 | | { |
| | | 422 | | return string.IsNullOrWhiteSpace(serviceTier?.ToString()) |
| | | 423 | | ? null |
| | | 424 | | : serviceTier.Value.ToString().Trim().ToLowerInvariant(); |
| | | 425 | | } |
| | | 426 | | |
| | | 427 | | private static ResponseItem CreateResponseMessage(PredictionRequestMessage message) |
| | | 428 | | { |
| | | 429 | | return message.Role switch |
| | | 430 | | { |
| | | 431 | | "system" => ResponseItem.CreateSystemMessageItem(message.Content), |
| | | 432 | | "user" => ResponseItem.CreateUserMessageItem(message.Content), |
| | | 433 | | _ => throw new InvalidOperationException($"Unsupported response message role '{message.Role}'.") |
| | | 434 | | }; |
| | | 435 | | } |
| | | 436 | | |
| | | 437 | | private static bool IsFlexProcessingRequest(string? requestedServiceTier) |
| | | 438 | | { |
| | | 439 | | return string.Equals( |
| | | 440 | | NormalizeServiceTier(requestedServiceTier), |
| | | 441 | | FlexServiceTier, |
| | | 442 | | StringComparison.OrdinalIgnoreCase); |
| | | 443 | | } |
| | | 444 | | |
| | | 445 | | private static bool IsFlexFallbackFailure(Exception? exception, CancellationToken cancellationToken) |
| | | 446 | | { |
| | | 447 | | if (exception is null) |
| | | 448 | | { |
| | | 449 | | return false; |
| | | 450 | | } |
| | | 451 | | |
| | | 452 | | if (cancellationToken.IsCancellationRequested) |
| | | 453 | | { |
| | | 454 | | return false; |
| | | 455 | | } |
| | | 456 | | |
| | | 457 | | return exception switch |
| | | 458 | | { |
| | | 459 | | ClientResultException { Status: 408 } => true, |
| | | 460 | | ClientResultException { Status: 429 } clientException => IsFlexResourceUnavailableFailure(clientException) |
| | | 461 | | || IsRetryableOpenAiRateLimitFailure(clientException, cancellationToken), |
| | | 462 | | TimeoutRejectedException => true, |
| | | 463 | | TimeoutException => true, |
| | | 464 | | TaskCanceledException => true, |
| | | 465 | | _ => false |
| | | 466 | | }; |
| | | 467 | | } |
| | | 468 | | |
| | | 469 | | private static bool IsTransientOpenAiServerFailure(Exception? exception, CancellationToken cancellationToken) |
| | | 470 | | { |
| | | 471 | | if (exception is null || cancellationToken.IsCancellationRequested) |
| | | 472 | | { |
| | | 473 | | return false; |
| | | 474 | | } |
| | | 475 | | |
| | | 476 | | return exception is ClientResultException { Status: >= 500 and <= 599 }; |
| | | 477 | | } |
| | | 478 | | |
| | | 479 | | private static bool IsRetryableOpenAiRateLimitFailure(Exception? exception, CancellationToken cancellationToken) |
| | | 480 | | { |
| | | 481 | | if (exception is not ClientResultException { Status: 429 } clientException || |
| | | 482 | | cancellationToken.IsCancellationRequested) |
| | | 483 | | { |
| | | 484 | | return false; |
| | | 485 | | } |
| | | 486 | | |
| | | 487 | | return !IsFlexResourceUnavailableFailure(clientException) |
| | | 488 | | && !ContainsQuotaExhaustedMarker(clientException.Message) |
| | | 489 | | && !ContainsQuotaExhaustedMarker(clientException.GetRawResponse()?.ReasonPhrase) |
| | | 490 | | && !ContainsQuotaExhaustedMarker(clientException.GetRawResponse()?.Content.ToString()); |
| | | 491 | | } |
| | | 492 | | |
| | | 493 | | private static TimeSpan ResolveOpenAiRateLimitDelay(Exception? exception, int attemptNumber) |
| | | 494 | | { |
| | | 495 | | if (exception is ClientResultException clientException && |
| | | 496 | | TryGetOpenAiRateLimitResetDelay(clientException.GetRawResponse(), out var resetDelay)) |
| | | 497 | | { |
| | | 498 | | return ClampOpenAiRateLimitDelay(resetDelay); |
| | | 499 | | } |
| | | 500 | | |
| | | 501 | | var cappedExponentialMilliseconds = Math.Min( |
| | | 502 | | RateLimitedOpenAiRetryMaxDelay.TotalMilliseconds, |
| | | 503 | | RateLimitedOpenAiRetryBaseDelay.TotalMilliseconds * Math.Pow(2, Math.Max(0, attemptNumber))); |
| | | 504 | | var jitterFloorMilliseconds = cappedExponentialMilliseconds / 2; |
| | | 505 | | var jitteredMilliseconds = jitterFloorMilliseconds + Random.Shared.NextDouble() * jitterFloorMilliseconds; |
| | | 506 | | |
| | | 507 | | return ClampOpenAiRateLimitDelay(TimeSpan.FromMilliseconds(jitteredMilliseconds)); |
| | | 508 | | } |
| | | 509 | | |
| | | 510 | | private static bool TryGetOpenAiRateLimitResetDelay(PipelineResponse? response, out TimeSpan delay) |
| | | 511 | | { |
| | | 512 | | delay = default; |
| | | 513 | | if (response is null) |
| | | 514 | | { |
| | | 515 | | return false; |
| | | 516 | | } |
| | | 517 | | |
| | | 518 | | var exhaustedDelays = new List<TimeSpan>(); |
| | | 519 | | var availableDelays = new List<TimeSpan>(); |
| | | 520 | | |
| | | 521 | | AddRateLimitResetDelay(response, "requests", exhaustedDelays, availableDelays); |
| | | 522 | | AddRateLimitResetDelay(response, "tokens", exhaustedDelays, availableDelays); |
| | | 523 | | |
| | | 524 | | if (exhaustedDelays.Count > 0) |
| | | 525 | | { |
| | | 526 | | delay = exhaustedDelays.Max(); |
| | | 527 | | return true; |
| | | 528 | | } |
| | | 529 | | |
| | | 530 | | if (availableDelays.Count > 0) |
| | | 531 | | { |
| | | 532 | | delay = availableDelays.Max(); |
| | | 533 | | return true; |
| | | 534 | | } |
| | | 535 | | |
| | | 536 | | return false; |
| | | 537 | | } |
| | | 538 | | |
| | | 539 | | private static void AddRateLimitResetDelay( |
| | | 540 | | PipelineResponse response, |
| | | 541 | | string dimension, |
| | | 542 | | List<TimeSpan> exhaustedDelays, |
| | | 543 | | List<TimeSpan> availableDelays) |
| | | 544 | | { |
| | | 545 | | if (!TryGetOpenAiRateLimitHeader(response, $"x-ratelimit-reset-{dimension}", out var resetText) || |
| | | 546 | | !TryParseOpenAiRateLimitReset(resetText, out var resetDelay)) |
| | | 547 | | { |
| | | 548 | | return; |
| | | 549 | | } |
| | | 550 | | |
| | | 551 | | if (TryGetOpenAiRateLimitHeader(response, $"x-ratelimit-remaining-{dimension}", out var remainingText) && |
| | | 552 | | decimal.TryParse(remainingText, NumberStyles.Number, CultureInfo.InvariantCulture, out var remaining) && |
| | | 553 | | remaining <= 0) |
| | | 554 | | { |
| | | 555 | | exhaustedDelays.Add(resetDelay); |
| | | 556 | | return; |
| | | 557 | | } |
| | | 558 | | |
| | | 559 | | availableDelays.Add(resetDelay); |
| | | 560 | | } |
| | | 561 | | |
| | | 562 | | private static bool TryGetOpenAiRateLimitHeader(PipelineResponse response, string name, out string value) |
| | | 563 | | { |
| | | 564 | | if (response.Headers is not null && response.Headers.TryGetValue(name, out var headerValue)) |
| | | 565 | | { |
| | | 566 | | value = headerValue ?? string.Empty; |
| | | 567 | | return true; |
| | | 568 | | } |
| | | 569 | | |
| | | 570 | | value = string.Empty; |
| | | 571 | | return false; |
| | | 572 | | } |
| | | 573 | | |
| | | 574 | | private static bool TryParseOpenAiRateLimitReset(string text, out TimeSpan delay) |
| | | 575 | | { |
| | | 576 | | delay = default; |
| | | 577 | | if (string.IsNullOrWhiteSpace(text)) |
| | | 578 | | { |
| | | 579 | | return false; |
| | | 580 | | } |
| | | 581 | | |
| | | 582 | | var matches = Regex.Matches( |
| | | 583 | | text.Trim(), |
| | | 584 | | @"(?<value>\d+(?:\.\d+)?)(?<unit>ms|s|m|h)", |
| | | 585 | | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); |
| | | 586 | | if (matches.Count == 0) |
| | | 587 | | { |
| | | 588 | | return false; |
| | | 589 | | } |
| | | 590 | | |
| | | 591 | | var totalMilliseconds = 0.0; |
| | | 592 | | foreach (System.Text.RegularExpressions.Match match in matches) |
| | | 593 | | { |
| | | 594 | | if (!double.TryParse(match.Groups["value"].Value, NumberStyles.Float, CultureInfo.InvariantCulture, out var |
| | | 595 | | { |
| | | 596 | | return false; |
| | | 597 | | } |
| | | 598 | | |
| | | 599 | | totalMilliseconds += match.Groups["unit"].Value.ToLowerInvariant() switch |
| | | 600 | | { |
| | | 601 | | "ms" => value, |
| | | 602 | | "s" => value * 1_000, |
| | | 603 | | "m" => value * 60_000, |
| | | 604 | | "h" => value * 3_600_000, |
| | | 605 | | _ => 0 |
| | | 606 | | }; |
| | | 607 | | } |
| | | 608 | | |
| | | 609 | | delay = TimeSpan.FromMilliseconds(Math.Max(0, totalMilliseconds)); |
| | | 610 | | return true; |
| | | 611 | | } |
| | | 612 | | |
| | | 613 | | private static TimeSpan ClampOpenAiRateLimitDelay(TimeSpan delay) |
| | | 614 | | { |
| | | 615 | | if (delay < TimeSpan.Zero) |
| | | 616 | | { |
| | | 617 | | return TimeSpan.Zero; |
| | | 618 | | } |
| | | 619 | | |
| | | 620 | | return delay > RateLimitedOpenAiRetryMaxDelay |
| | | 621 | | ? RateLimitedOpenAiRetryMaxDelay |
| | | 622 | | : delay; |
| | | 623 | | } |
| | | 624 | | |
| | | 625 | | private static bool IsFlexResourceUnavailableFailure(ClientResultException exception) |
| | | 626 | | { |
| | | 627 | | var rawResponse = exception.GetRawResponse(); |
| | | 628 | | return ContainsFlexResourceUnavailableMarker(exception.Message) |
| | | 629 | | || ContainsFlexResourceUnavailableMarker(rawResponse?.ReasonPhrase) |
| | | 630 | | || ContainsFlexResourceUnavailableMarker(rawResponse?.Content.ToString()); |
| | | 631 | | } |
| | | 632 | | |
| | | 633 | | private static bool ContainsFlexResourceUnavailableMarker(string? text) |
| | | 634 | | { |
| | | 635 | | if (string.IsNullOrWhiteSpace(text)) |
| | | 636 | | { |
| | | 637 | | return false; |
| | | 638 | | } |
| | | 639 | | |
| | | 640 | | return text.Contains("resource_unavailable", StringComparison.OrdinalIgnoreCase) |
| | | 641 | | || text.Contains("resource unavailable", StringComparison.OrdinalIgnoreCase) |
| | | 642 | | || text.Contains("resources unavailable", StringComparison.OrdinalIgnoreCase) |
| | | 643 | | || text.Contains("insufficient resources", StringComparison.OrdinalIgnoreCase) |
| | | 644 | | || text.Contains("capacity", StringComparison.OrdinalIgnoreCase); |
| | | 645 | | } |
| | | 646 | | |
| | | 647 | | private static bool ContainsQuotaExhaustedMarker(string? text) |
| | | 648 | | { |
| | | 649 | | if (string.IsNullOrWhiteSpace(text)) |
| | | 650 | | { |
| | | 651 | | return false; |
| | | 652 | | } |
| | | 653 | | |
| | | 654 | | return text.Contains("insufficient_quota", StringComparison.OrdinalIgnoreCase) |
| | | 655 | | || text.Contains("exceeded your current quota", StringComparison.OrdinalIgnoreCase) |
| | | 656 | | || text.Contains("check your plan and billing", StringComparison.OrdinalIgnoreCase); |
| | | 657 | | } |
| | | 658 | | |
| | | 659 | | public async Task<BonusPrediction?> PredictBonusQuestionAsync( |
| | | 660 | | BonusQuestion bonusQuestion, |
| | | 661 | | IEnumerable<DocumentContext> contextDocuments, |
| | | 662 | | PredictionTelemetryMetadata? telemetryMetadata = null, |
| | | 663 | | CancellationToken cancellationToken = default) |
| | | 664 | | { |
| | | 665 | | _logger.LogInformation("Generating prediction for bonus question: {QuestionText}", bonusQuestion.Text); |
| | | 666 | | |
| | | 667 | | try |
| | | 668 | | { |
| | | 669 | | // Build the instructions by combining template with context |
| | | 670 | | var instructions = BuildBonusInstructions(contextDocuments); |
| | | 671 | | |
| | | 672 | | // Create bonus question JSON |
| | | 673 | | var questionJson = PredictionPromptComposer.CreateBonusQuestionJson(bonusQuestion); |
| | | 674 | | |
| | | 675 | | _logger.LogDebug("Instructions length: {InstructionsLength} characters", instructions.Length); |
| | | 676 | | _logger.LogDebug("Context documents: {ContextCount}", contextDocuments.Count()); |
| | | 677 | | _logger.LogDebug("Question JSON: {QuestionJson}", questionJson); |
| | | 678 | | |
| | | 679 | | // Create input items for the response |
| | | 680 | | var messages = new List<PredictionRequestMessage> |
| | | 681 | | { |
| | | 682 | | new("system", instructions), |
| | | 683 | | new("user", questionJson) |
| | | 684 | | }; |
| | | 685 | | |
| | | 686 | | _logger.LogDebug("Calling OpenAI API for bonus prediction"); |
| | | 687 | | |
| | | 688 | | // Start an OTel activity for Langfuse generation tracking |
| | | 689 | | using var activity = Telemetry.Source.StartActivity("predict-bonus"); |
| | | 690 | | |
| | | 691 | | // Call OpenAI with structured output format |
| | | 692 | | var completion = await CompleteBonusResponseAsync(messages, bonusQuestion, cancellationToken); |
| | | 693 | | |
| | | 694 | | // Parse the structured response |
| | | 695 | | var predictionJson = completion.PredictionJson; |
| | | 696 | | _logger.LogDebug("Received bonus prediction JSON: {PredictionJson}", predictionJson); |
| | | 697 | | |
| | | 698 | | var prediction = ParseSingleBonusPrediction(predictionJson, bonusQuestion); |
| | | 699 | | |
| | | 700 | | if (prediction != null) |
| | | 701 | | { |
| | | 702 | | _logger.LogInformation("Generated prediction for bonus question: {SelectedOptions}", |
| | | 703 | | string.Join(", ", prediction.SelectedOptionIds)); |
| | | 704 | | } |
| | | 705 | | |
| | | 706 | | // Log token usage and cost breakdown |
| | | 707 | | var usage = completion.Usage; |
| | | 708 | | _logger.LogDebug("Token usage - Input: {InputTokens}, Output: {OutputTokens}, Total: {TotalTokens}", |
| | | 709 | | usage.InputTokenCount, usage.OutputTokenCount, usage.TotalTokenCount); |
| | | 710 | | |
| | | 711 | | // Set Langfuse generation attributes on the activity |
| | | 712 | | SetLangfuseGenerationAttributes(activity, messages, predictionJson, usage, telemetryMetadata, completion.Exe |
| | | 713 | | |
| | | 714 | | // Add usage to tracker |
| | | 715 | | if (completion.ExecutionTelemetry is null) |
| | | 716 | | { |
| | | 717 | | _tokenUsageTracker.AddUsage(_model, usage); |
| | | 718 | | } |
| | | 719 | | else |
| | | 720 | | { |
| | | 721 | | _tokenUsageTracker.AddUsage(_model, usage, completion.ExecutionTelemetry.FinalServiceTier); |
| | | 722 | | } |
| | | 723 | | |
| | | 724 | | // Calculate and log costs |
| | | 725 | | if (completion.ExecutionTelemetry is null) |
| | | 726 | | { |
| | | 727 | | _costCalculationService.LogCostBreakdown(_model, usage); |
| | | 728 | | } |
| | | 729 | | else |
| | | 730 | | { |
| | | 731 | | _costCalculationService.LogCostBreakdown(_model, usage, completion.ExecutionTelemetry.FinalServiceTier); |
| | | 732 | | } |
| | | 733 | | |
| | | 734 | | return prediction; |
| | | 735 | | } |
| | | 736 | | catch (Exception ex) |
| | | 737 | | { |
| | | 738 | | _logger.LogError(ex, "Error generating bonus prediction for question: {QuestionText}", bonusQuestion.Text); |
| | | 739 | | return null; |
| | | 740 | | } |
| | | 741 | | } |
| | | 742 | | |
| | | 743 | | private string BuildInstructions(IEnumerable<DocumentContext> contextDocuments, bool includeJustification) |
| | | 744 | | { |
| | | 745 | | var template = includeJustification |
| | | 746 | | ? _instructionsTemplateWithJustification.Value.Template |
| | | 747 | | : _instructionsTemplate.Value.Template; |
| | | 748 | | |
| | | 749 | | var contextList = contextDocuments.ToList(); |
| | | 750 | | if (contextList.Any()) |
| | | 751 | | { |
| | | 752 | | _logger.LogDebug("Added {ContextCount} context documents to instructions", contextList.Count); |
| | | 753 | | } |
| | | 754 | | else |
| | | 755 | | { |
| | | 756 | | _logger.LogDebug("No context documents provided"); |
| | | 757 | | } |
| | | 758 | | |
| | | 759 | | return PredictionPromptComposer.BuildSystemPrompt(template, contextList); |
| | | 760 | | } |
| | | 761 | | |
| | | 762 | | private static byte[] BuildPredictionJsonSchema(bool includeJustification) |
| | | 763 | | { |
| | | 764 | | var properties = new Dictionary<string, object?> |
| | | 765 | | { |
| | | 766 | | ["home"] = new Dictionary<string, object?> |
| | | 767 | | { |
| | | 768 | | ["type"] = "integer", |
| | | 769 | | ["description"] = "Predicted goals for the home team" |
| | | 770 | | }, |
| | | 771 | | ["away"] = new Dictionary<string, object?> |
| | | 772 | | { |
| | | 773 | | ["type"] = "integer", |
| | | 774 | | ["description"] = "Predicted goals for the away team" |
| | | 775 | | } |
| | | 776 | | }; |
| | | 777 | | |
| | | 778 | | var required = new List<string> { "home", "away" }; |
| | | 779 | | |
| | | 780 | | if (includeJustification) |
| | | 781 | | { |
| | | 782 | | var mostValuableContextSourceItem = new Dictionary<string, object?> |
| | | 783 | | { |
| | | 784 | | ["type"] = "object", |
| | | 785 | | ["properties"] = new Dictionary<string, object?> |
| | | 786 | | { |
| | | 787 | | ["documentName"] = new Dictionary<string, object?> |
| | | 788 | | { |
| | | 789 | | ["type"] = "string", |
| | | 790 | | ["description"] = "Name of the context document referenced" |
| | | 791 | | }, |
| | | 792 | | ["details"] = new Dictionary<string, object?> |
| | | 793 | | { |
| | | 794 | | ["type"] = "string", |
| | | 795 | | ["description"] = "Brief summary of why the document or parts of it were useful" |
| | | 796 | | } |
| | | 797 | | }, |
| | | 798 | | ["required"] = new[] { "documentName", "details" }, |
| | | 799 | | ["additionalProperties"] = false |
| | | 800 | | }; |
| | | 801 | | |
| | | 802 | | var leastValuableContextSourceItem = new Dictionary<string, object?> |
| | | 803 | | { |
| | | 804 | | ["type"] = "object", |
| | | 805 | | ["properties"] = new Dictionary<string, object?> |
| | | 806 | | { |
| | | 807 | | ["documentName"] = new Dictionary<string, object?> |
| | | 808 | | { |
| | | 809 | | ["type"] = "string", |
| | | 810 | | ["description"] = "Name of the context document referenced" |
| | | 811 | | }, |
| | | 812 | | ["details"] = new Dictionary<string, object?> |
| | | 813 | | { |
| | | 814 | | ["type"] = "string", |
| | | 815 | | ["description"] = "Brief summary explaining why the document or parts of it offered limited insi |
| | | 816 | | } |
| | | 817 | | }, |
| | | 818 | | ["required"] = new[] { "documentName", "details" }, |
| | | 819 | | ["additionalProperties"] = false |
| | | 820 | | }; |
| | | 821 | | |
| | | 822 | | var contextSources = new Dictionary<string, object?> |
| | | 823 | | { |
| | | 824 | | ["type"] = "object", |
| | | 825 | | ["properties"] = new Dictionary<string, object?> |
| | | 826 | | { |
| | | 827 | | ["mostValuable"] = new Dictionary<string, object?> |
| | | 828 | | { |
| | | 829 | | ["type"] = "array", |
| | | 830 | | ["items"] = mostValuableContextSourceItem, |
| | | 831 | | ["description"] = "Context documents that most influenced the prediction", |
| | | 832 | | ["minItems"] = 0 |
| | | 833 | | }, |
| | | 834 | | ["leastValuable"] = new Dictionary<string, object?> |
| | | 835 | | { |
| | | 836 | | ["type"] = "array", |
| | | 837 | | ["items"] = leastValuableContextSourceItem, |
| | | 838 | | ["description"] = "Context documents that provided limited or no valuable insight", |
| | | 839 | | ["minItems"] = 0 |
| | | 840 | | } |
| | | 841 | | }, |
| | | 842 | | ["required"] = new[] { "leastValuable", "mostValuable" }, |
| | | 843 | | ["additionalProperties"] = false |
| | | 844 | | }; |
| | | 845 | | |
| | | 846 | | properties["justification"] = new Dictionary<string, object?> |
| | | 847 | | { |
| | | 848 | | ["type"] = "object", |
| | | 849 | | ["properties"] = new Dictionary<string, object?> |
| | | 850 | | { |
| | | 851 | | ["keyReasoning"] = new Dictionary<string, object?> |
| | | 852 | | { |
| | | 853 | | ["type"] = "string", |
| | | 854 | | ["description"] = "Concise analytic summary motivating the predicted scoreline" |
| | | 855 | | }, |
| | | 856 | | ["contextSources"] = contextSources, |
| | | 857 | | ["uncertainties"] = new Dictionary<string, object?> |
| | | 858 | | { |
| | | 859 | | ["type"] = "array", |
| | | 860 | | ["items"] = new Dictionary<string, object?> |
| | | 861 | | { |
| | | 862 | | ["type"] = "string", |
| | | 863 | | ["description"] = "Single uncertainty or external factor affecting confidence" |
| | | 864 | | }, |
| | | 865 | | ["description"] = "Factors that could alter the predicted outcome", |
| | | 866 | | ["minItems"] = 0 |
| | | 867 | | } |
| | | 868 | | }, |
| | | 869 | | ["required"] = new[] { "contextSources", "keyReasoning", "uncertainties" }, |
| | | 870 | | ["additionalProperties"] = false |
| | | 871 | | }; |
| | | 872 | | required.Add("justification"); |
| | | 873 | | } |
| | | 874 | | |
| | | 875 | | var schema = new Dictionary<string, object?> |
| | | 876 | | { |
| | | 877 | | ["type"] = "object", |
| | | 878 | | ["properties"] = properties, |
| | | 879 | | ["required"] = required, |
| | | 880 | | ["additionalProperties"] = false |
| | | 881 | | }; |
| | | 882 | | |
| | | 883 | | return JsonSerializer.SerializeToUtf8Bytes(schema); |
| | | 884 | | } |
| | | 885 | | |
| | | 886 | | private Prediction ParsePrediction(string predictionJson) |
| | | 887 | | { |
| | | 888 | | try |
| | | 889 | | { |
| | | 890 | | _logger.LogDebug("Parsing prediction JSON: {PredictionJson}", predictionJson); |
| | | 891 | | |
| | | 892 | | var predictionResponse = JsonSerializer.Deserialize<PredictionResponse>(predictionJson); |
| | | 893 | | if (predictionResponse == null) |
| | | 894 | | { |
| | | 895 | | LogRawModelResponse(predictionJson); |
| | | 896 | | throw new InvalidOperationException("Failed to deserialize prediction response"); |
| | | 897 | | } |
| | | 898 | | |
| | | 899 | | _logger.LogDebug("Parsed prediction response - Home: {Home}, Away: {Away}", predictionResponse.Home, predict |
| | | 900 | | |
| | | 901 | | PredictionJustification? justification = null; |
| | | 902 | | |
| | | 903 | | if (predictionResponse.Justification != null) |
| | | 904 | | { |
| | | 905 | | var justificationResponse = predictionResponse.Justification; |
| | | 906 | | |
| | | 907 | | var mostValuable = justificationResponse.ContextSources?.MostValuable? |
| | | 908 | | .Where(entry => entry != null) |
| | | 909 | | .Select(entry => new PredictionJustificationContextSource( |
| | | 910 | | entry!.DocumentName?.Trim() ?? string.Empty, |
| | | 911 | | entry.Details?.Trim() ?? string.Empty)) |
| | | 912 | | .ToList() ?? new List<PredictionJustificationContextSource>(); |
| | | 913 | | |
| | | 914 | | var leastValuable = justificationResponse.ContextSources?.LeastValuable? |
| | | 915 | | .Where(entry => entry != null) |
| | | 916 | | .Select(entry => new PredictionJustificationContextSource( |
| | | 917 | | entry!.DocumentName?.Trim() ?? string.Empty, |
| | | 918 | | entry.Details?.Trim() ?? string.Empty)) |
| | | 919 | | .ToList() ?? new List<PredictionJustificationContextSource>(); |
| | | 920 | | |
| | | 921 | | var uncertainties = justificationResponse.Uncertainties? |
| | | 922 | | .Where(item => !string.IsNullOrWhiteSpace(item)) |
| | | 923 | | .Select(item => item.Trim()) |
| | | 924 | | .ToList() ?? new List<string>(); |
| | | 925 | | |
| | | 926 | | justification = new PredictionJustification( |
| | | 927 | | justificationResponse.KeyReasoning?.Trim() ?? string.Empty, |
| | | 928 | | new PredictionJustificationContextSources(mostValuable, leastValuable), |
| | | 929 | | uncertainties); |
| | | 930 | | |
| | | 931 | | _logger.LogDebug( |
| | | 932 | | "Parsed justification with key reasoning: {KeyReasoning}; Most valuable sources: {MostValuableCount} |
| | | 933 | | justification.KeyReasoning, |
| | | 934 | | justification.ContextSources.MostValuable.Count, |
| | | 935 | | justification.ContextSources.LeastValuable.Count, |
| | | 936 | | justification.Uncertainties.Count); |
| | | 937 | | } |
| | | 938 | | |
| | | 939 | | return new Prediction(predictionResponse.Home, predictionResponse.Away, justification); |
| | | 940 | | } |
| | | 941 | | catch (JsonException ex) |
| | | 942 | | { |
| | | 943 | | _logger.LogError(ex, "Failed to parse prediction JSON: {PredictionJson}", predictionJson); |
| | | 944 | | LogRawModelResponse(predictionJson); |
| | | 945 | | throw new InvalidOperationException($"Failed to parse prediction response: {ex.Message}", ex); |
| | | 946 | | } |
| | | 947 | | } |
| | | 948 | | |
| | | 949 | | private void LogRawModelResponse(string rawResponse) |
| | | 950 | | { |
| | | 951 | | if (string.IsNullOrWhiteSpace(rawResponse)) |
| | | 952 | | { |
| | | 953 | | const string message = "Raw model response from OpenAI was empty or whitespace."; |
| | | 954 | | _logger.LogError(message); |
| | | 955 | | Console.Error.WriteLine(message); |
| | | 956 | | return; |
| | | 957 | | } |
| | | 958 | | |
| | | 959 | | _logger.LogError("Raw model response from OpenAI: {RawResponse}", rawResponse); |
| | | 960 | | Console.Error.WriteLine("Raw model response from OpenAI:"); |
| | | 961 | | Console.Error.WriteLine(rawResponse); |
| | | 962 | | } |
| | | 963 | | |
| | | 964 | | private string BuildBonusInstructions(IEnumerable<DocumentContext> contextDocuments) |
| | | 965 | | { |
| | | 966 | | // Use the pre-loaded bonus instructions template |
| | | 967 | | var bonusInstructionsTemplate = _bonusInstructionsTemplate.Value.Template; |
| | | 968 | | |
| | | 969 | | var contextList = contextDocuments.ToList(); |
| | | 970 | | if (contextList.Any()) |
| | | 971 | | { |
| | | 972 | | _logger.LogDebug("Added {ContextCount} context documents to bonus instructions", contextList.Count); |
| | | 973 | | } |
| | | 974 | | else |
| | | 975 | | { |
| | | 976 | | _logger.LogDebug("No context documents provided for bonus predictions"); |
| | | 977 | | } |
| | | 978 | | |
| | | 979 | | return PredictionPromptComposer.BuildSystemPrompt(bonusInstructionsTemplate, contextList); |
| | | 980 | | } |
| | | 981 | | |
| | | 982 | | private static byte[] CreateSingleBonusPredictionJsonSchema(BonusQuestion question) |
| | | 983 | | { |
| | | 984 | | // For multi-selection questions, require exactly MaxSelections answers |
| | | 985 | | // For single-selection questions, require exactly 1 answer |
| | | 986 | | var requiredSelections = question.MaxSelections; |
| | | 987 | | |
| | | 988 | | var schema = new |
| | | 989 | | { |
| | | 990 | | type = "object", |
| | | 991 | | properties = new |
| | | 992 | | { |
| | | 993 | | selectedOptionIds = new |
| | | 994 | | { |
| | | 995 | | type = "array", |
| | | 996 | | items = new { type = "string", @enum = question.Options.Select(o => o.Id).ToArray() }, |
| | | 997 | | minItems = requiredSelections, |
| | | 998 | | maxItems = requiredSelections |
| | | 999 | | } |
| | | 1000 | | }, |
| | | 1001 | | required = new[] { "selectedOptionIds" }, |
| | | 1002 | | additionalProperties = false |
| | | 1003 | | }; |
| | | 1004 | | |
| | | 1005 | | return JsonSerializer.SerializeToUtf8Bytes(schema); |
| | | 1006 | | } |
| | | 1007 | | |
| | | 1008 | | private BonusPrediction? ParseSingleBonusPrediction(string predictionJson, BonusQuestion question) |
| | | 1009 | | { |
| | | 1010 | | try |
| | | 1011 | | { |
| | | 1012 | | _logger.LogDebug("Parsing single bonus prediction JSON: {PredictionJson}", predictionJson); |
| | | 1013 | | |
| | | 1014 | | var response = JsonSerializer.Deserialize<SingleBonusPredictionResponse>(predictionJson); |
| | | 1015 | | if (response?.SelectedOptionIds?.Any() != true) |
| | | 1016 | | { |
| | | 1017 | | throw new InvalidOperationException("Failed to deserialize bonus prediction response or no options selec |
| | | 1018 | | } |
| | | 1019 | | |
| | | 1020 | | // Validate that all selected options exist for this question |
| | | 1021 | | var validOptionIds = question.Options.Select(o => o.Id).ToHashSet(); |
| | | 1022 | | var invalidOptions = response.SelectedOptionIds.Where(id => !validOptionIds.Contains(id)).ToArray(); |
| | | 1023 | | |
| | | 1024 | | if (invalidOptions.Any()) |
| | | 1025 | | { |
| | | 1026 | | _logger.LogWarning("Invalid option IDs for question '{QuestionText}': {InvalidOptions}", |
| | | 1027 | | question.Text, string.Join(", ", invalidOptions)); |
| | | 1028 | | return null; |
| | | 1029 | | } |
| | | 1030 | | |
| | | 1031 | | // Validate no duplicate selections |
| | | 1032 | | var duplicateOptions = response.SelectedOptionIds |
| | | 1033 | | .GroupBy(id => id) |
| | | 1034 | | .Where(g => g.Count() > 1) |
| | | 1035 | | .Select(g => g.Key) |
| | | 1036 | | .ToArray(); |
| | | 1037 | | |
| | | 1038 | | if (duplicateOptions.Any()) |
| | | 1039 | | { |
| | | 1040 | | _logger.LogWarning("Duplicate option IDs for question '{QuestionText}': {DuplicateOptions}", |
| | | 1041 | | question.Text, string.Join(", ", duplicateOptions)); |
| | | 1042 | | return null; |
| | | 1043 | | } |
| | | 1044 | | |
| | | 1045 | | // Validate selection count - must match exactly MaxSelections for full predictions |
| | | 1046 | | if (response.SelectedOptionIds.Length != question.MaxSelections) |
| | | 1047 | | { |
| | | 1048 | | _logger.LogWarning("Invalid selection count for question '{QuestionText}': expected exactly {MaxSelectio |
| | | 1049 | | question.Text, question.MaxSelections, response.SelectedOptionIds.Length); |
| | | 1050 | | return null; |
| | | 1051 | | } |
| | | 1052 | | |
| | | 1053 | | var prediction = new BonusPrediction(response.SelectedOptionIds.ToList()); |
| | | 1054 | | |
| | | 1055 | | _logger.LogDebug("Parsed prediction: {SelectedOptions}", |
| | | 1056 | | string.Join(", ", response.SelectedOptionIds)); |
| | | 1057 | | |
| | | 1058 | | return prediction; |
| | | 1059 | | } |
| | | 1060 | | catch (JsonException ex) |
| | | 1061 | | { |
| | | 1062 | | _logger.LogError(ex, "Failed to parse bonus prediction JSON: {PredictionJson}", predictionJson); |
| | | 1063 | | return null; |
| | | 1064 | | } |
| | | 1065 | | } |
| | | 1066 | | |
| | | 1067 | | /// <summary> |
| | | 1068 | | /// Gets the file path of the match prediction prompt being used by this service |
| | | 1069 | | /// </summary> |
| | | 1070 | | /// <returns>The absolute file path to the match prompt file</returns> |
| | | 1071 | | public string GetMatchPromptPath(bool includeJustification = false) |
| | | 1072 | | { |
| | | 1073 | | return includeJustification |
| | | 1074 | | ? _instructionsTemplateWithJustification.Value.Path |
| | | 1075 | | : _instructionsTemplate.Value.Path; |
| | | 1076 | | } |
| | | 1077 | | |
| | | 1078 | | /// <summary> |
| | | 1079 | | /// Gets the file path of the bonus question prediction prompt being used by this service |
| | | 1080 | | /// </summary> |
| | | 1081 | | /// <returns>The absolute file path to the bonus prompt file</returns> |
| | | 1082 | | public string GetBonusPromptPath() => _bonusInstructionsTemplate.Value.Path; |
| | | 1083 | | |
| | | 1084 | | /// <summary> |
| | | 1085 | | /// Internal class for deserializing the structured prediction response |
| | | 1086 | | /// </summary> |
| | | 1087 | | private class PredictionResponse |
| | | 1088 | | { |
| | | 1089 | | [JsonPropertyName("home")] |
| | | 1090 | | public int Home { get; set; } |
| | | 1091 | | |
| | | 1092 | | [JsonPropertyName("away")] |
| | | 1093 | | public int Away { get; set; } |
| | | 1094 | | |
| | | 1095 | | [JsonPropertyName("justification")] |
| | | 1096 | | public JustificationResponse? Justification { get; set; } |
| | | 1097 | | } |
| | | 1098 | | |
| | | 1099 | | private class JustificationResponse |
| | | 1100 | | { |
| | | 1101 | | [JsonPropertyName("keyReasoning")] |
| | | 1102 | | public string KeyReasoning { get; set; } = string.Empty; |
| | | 1103 | | |
| | | 1104 | | [JsonPropertyName("contextSources")] |
| | | 1105 | | public JustificationContextSourcesResponse ContextSources { get; set; } = new(); |
| | | 1106 | | |
| | | 1107 | | [JsonPropertyName("uncertainties")] |
| | | 1108 | | public string[] Uncertainties { get; set; } = Array.Empty<string>(); |
| | | 1109 | | } |
| | | 1110 | | |
| | | 1111 | | private class JustificationContextSourcesResponse |
| | | 1112 | | { |
| | | 1113 | | [JsonPropertyName("mostValuable")] |
| | | 1114 | | public JustificationContextSourceEntry[] MostValuable { get; set; } = Array.Empty<JustificationContextSourceEntr |
| | | 1115 | | |
| | | 1116 | | [JsonPropertyName("leastValuable")] |
| | | 1117 | | public JustificationContextSourceEntry[] LeastValuable { get; set; } = Array.Empty<JustificationContextSourceEnt |
| | | 1118 | | } |
| | | 1119 | | |
| | | 1120 | | private class JustificationContextSourceEntry |
| | | 1121 | | { |
| | | 1122 | | [JsonPropertyName("documentName")] |
| | | 1123 | | public string DocumentName { get; set; } = string.Empty; |
| | | 1124 | | |
| | | 1125 | | [JsonPropertyName("details")] |
| | | 1126 | | public string Details { get; set; } = string.Empty; |
| | | 1127 | | } |
| | | 1128 | | |
| | | 1129 | | /// <summary> |
| | | 1130 | | /// Internal class for deserializing the bonus predictions response |
| | | 1131 | | /// </summary> |
| | | 1132 | | private class BonusPredictionsResponse |
| | | 1133 | | { |
| | | 1134 | | [JsonPropertyName("predictions")] |
| | | 1135 | | public BonusPredictionEntry[]? Predictions { get; set; } |
| | | 1136 | | } |
| | | 1137 | | |
| | | 1138 | | /// <summary> |
| | | 1139 | | /// Internal class for deserializing individual bonus prediction entries |
| | | 1140 | | /// </summary> |
| | | 1141 | | private class BonusPredictionEntry |
| | | 1142 | | { |
| | | 1143 | | [JsonPropertyName("questionId")] |
| | 0 | 1144 | | public string QuestionId { get; set; } = string.Empty; |
| | | 1145 | | |
| | | 1146 | | [JsonPropertyName("selectedOptionIds")] |
| | 0 | 1147 | | public string[] SelectedOptionIds { get; set; } = Array.Empty<string>(); |
| | | 1148 | | } |
| | | 1149 | | |
| | | 1150 | | /// <summary> |
| | | 1151 | | /// Internal class for deserializing single bonus prediction response |
| | | 1152 | | /// </summary> |
| | | 1153 | | private class SingleBonusPredictionResponse |
| | | 1154 | | { |
| | | 1155 | | [JsonPropertyName("selectedOptionIds")] |
| | | 1156 | | public string[] SelectedOptionIds { get; set; } = Array.Empty<string>(); |
| | | 1157 | | } |
| | | 1158 | | |
| | | 1159 | | /// <summary> |
| | | 1160 | | /// Sets Langfuse-mapped OpenTelemetry attributes on the given activity. |
| | | 1161 | | /// If <paramref name="activity"/> is <c>null</c> (no OTel listener registered), this is a no-op. |
| | | 1162 | | /// </summary> |
| | | 1163 | | private void SetLangfuseGenerationAttributes( |
| | | 1164 | | Activity? activity, |
| | | 1165 | | IReadOnlyList<PredictionRequestMessage> messages, |
| | | 1166 | | string responseJson, |
| | | 1167 | | ChatTokenUsage usage, |
| | | 1168 | | PredictionTelemetryMetadata? telemetryMetadata, |
| | | 1169 | | PredictionExecutionTelemetry? executionTelemetry = null) |
| | | 1170 | | { |
| | | 1171 | | if (activity is null) |
| | | 1172 | | return; |
| | | 1173 | | |
| | | 1174 | | activity.SetTag("langfuse.observation.type", "generation"); |
| | | 1175 | | activity.SetTag("gen_ai.request.model", _model); |
| | | 1176 | | var providerPromptMetadata = (_templateProvider as IPromptTemplateTelemetryMetadataProvider) |
| | | 1177 | | ?.GetPromptTemplateTelemetryMetadata(); |
| | | 1178 | | |
| | | 1179 | | if (providerPromptMetadata?.LangfusePromptName is { } providerPromptName && |
| | | 1180 | | providerPromptMetadata.LangfusePromptVersion is { } providerPromptVersion) |
| | | 1181 | | { |
| | | 1182 | | activity.SetTag("langfuse.observation.prompt.name", providerPromptName); |
| | | 1183 | | activity.SetTag("langfuse.observation.prompt.version", providerPromptVersion); |
| | | 1184 | | } |
| | | 1185 | | else if (_options.LangfusePromptTraceMetadata is { } promptTraceMetadata) |
| | | 1186 | | { |
| | | 1187 | | activity.SetTag("langfuse.observation.prompt.name", promptTraceMetadata.Name); |
| | | 1188 | | activity.SetTag("langfuse.observation.prompt.version", promptTraceMetadata.Version); |
| | | 1189 | | } |
| | | 1190 | | |
| | | 1191 | | if (providerPromptMetadata is not null) |
| | | 1192 | | { |
| | | 1193 | | activity.SetTag("langfuse.observation.metadata.langfusePromptFallback", providerPromptMetadata.IsFallback); |
| | | 1194 | | activity.SetTag("langfuse.observation.metadata.promptTemplatePath", providerPromptMetadata.PromptPath); |
| | | 1195 | | } |
| | | 1196 | | else if (_options.LangfusePromptTraceMetadata is { IsFallback: true }) |
| | | 1197 | | { |
| | | 1198 | | activity.SetTag("langfuse.observation.metadata.langfusePromptFallback", true); |
| | | 1199 | | } |
| | | 1200 | | |
| | | 1201 | | if (!string.IsNullOrWhiteSpace(_options.ReasoningEffort)) |
| | | 1202 | | { |
| | | 1203 | | var reasoningEffort = _options.ReasoningEffort.Trim().ToLowerInvariant(); |
| | | 1204 | | activity.SetTag("gen_ai.request.reasoning_effort", reasoningEffort); |
| | | 1205 | | activity.SetTag("langfuse.observation.metadata.openaiReasoningEffort", reasoningEffort); |
| | | 1206 | | } |
| | | 1207 | | |
| | | 1208 | | if (executionTelemetry is not null) |
| | | 1209 | | { |
| | | 1210 | | activity.SetTag("gen_ai.request.service_tier", executionTelemetry.RequestedServiceTier); |
| | | 1211 | | activity.SetTag("gen_ai.response.service_tier", executionTelemetry.FinalServiceTier); |
| | | 1212 | | activity.SetTag("langfuse.observation.metadata.openaiExecutionStrategy", executionTelemetry.Strategy); |
| | | 1213 | | activity.SetTag("langfuse.observation.metadata.openaiRequestedServiceTier", executionTelemetry.RequestedServ |
| | | 1214 | | activity.SetTag("langfuse.observation.metadata.openaiFinalServiceTier", executionTelemetry.FinalServiceTier) |
| | | 1215 | | activity.SetTag("langfuse.observation.metadata.openaiServiceTierFallbackUsed", executionTelemetry.FallbackUs |
| | | 1216 | | } |
| | | 1217 | | |
| | | 1218 | | // Serialize messages as input (system prompt + user message) |
| | | 1219 | | var inputMessages = messages.Select(m => new |
| | | 1220 | | { |
| | | 1221 | | role = m.Role, |
| | | 1222 | | content = m.Content |
| | | 1223 | | }); |
| | | 1224 | | activity.SetTag("langfuse.observation.input", JsonSerializer.Serialize(inputMessages)); |
| | | 1225 | | activity.SetTag("langfuse.observation.output", responseJson); |
| | | 1226 | | telemetryMetadata?.ApplyToObservation(activity); |
| | | 1227 | | |
| | | 1228 | | // Token usage details |
| | | 1229 | | var usageDetails = new |
| | | 1230 | | { |
| | | 1231 | | input = usage.InputTokenCount, |
| | | 1232 | | output = usage.OutputTokenCount, |
| | | 1233 | | cache_read_input_tokens = usage.InputTokenDetails?.CachedTokenCount ?? 0, |
| | | 1234 | | reasoning_tokens = usage.OutputTokenDetails?.ReasoningTokenCount ?? 0, |
| | | 1235 | | total = usage.InputTokenCount + usage.OutputTokenCount |
| | | 1236 | | }; |
| | | 1237 | | activity.SetTag("langfuse.observation.usage_details", JsonSerializer.Serialize(usageDetails)); |
| | | 1238 | | |
| | | 1239 | | if (executionTelemetry is not null && |
| | | 1240 | | _costCalculationService.CalculateCostBreakdown(_model, usage, executionTelemetry.FinalServiceTier) is { } co |
| | | 1241 | | { |
| | | 1242 | | var costDetails = new |
| | | 1243 | | { |
| | | 1244 | | input = costBreakdown.Input, |
| | | 1245 | | cache_read_input_tokens = costBreakdown.CachedInput, |
| | | 1246 | | output = costBreakdown.Output, |
| | | 1247 | | total = costBreakdown.Total |
| | | 1248 | | }; |
| | | 1249 | | activity.SetTag("langfuse.observation.cost_details", JsonSerializer.Serialize(costDetails)); |
| | | 1250 | | } |
| | | 1251 | | } |
| | | 1252 | | |
| | | 1253 | | private sealed record PredictionRequestMessage(string Role, string Content); |
| | | 1254 | | |
| | | 1255 | | private sealed record OpenAiResponseResult( |
| | | 1256 | | string PredictionJson, |
| | | 1257 | | ChatTokenUsage Usage, |
| | | 1258 | | PredictionExecutionTelemetry? ExecutionTelemetry, |
| | | 1259 | | string? FinalServiceTier = null); |
| | | 1260 | | |
| | | 1261 | | private sealed record PredictionExecutionTelemetry( |
| | | 1262 | | string Strategy, |
| | | 1263 | | string RequestedServiceTier, |
| | | 1264 | | string FinalServiceTier, |
| | | 1265 | | bool FallbackUsed); |
| | | 1266 | | |
| | | 1267 | | private static ChatTokenUsage ToChatTokenUsage(ResponseTokenUsage usage) |
| | | 1268 | | { |
| | | 1269 | | var cachedTokenCount = usage.InputTokenDetails?.CachedTokenCount ?? 0; |
| | | 1270 | | var reasoningTokenCount = usage.OutputTokenDetails?.ReasoningTokenCount ?? 0; |
| | | 1271 | | var inputDetails = cachedTokenCount > 0 |
| | | 1272 | | ? OpenAIChatModelFactory.ChatInputTokenUsageDetails(cachedTokenCount: cachedTokenCount) |
| | | 1273 | | : null; |
| | | 1274 | | var outputDetails = reasoningTokenCount > 0 |
| | | 1275 | | ? OpenAIChatModelFactory.ChatOutputTokenUsageDetails(reasoningTokenCount: reasoningTokenCount) |
| | | 1276 | | : null; |
| | | 1277 | | |
| | | 1278 | | return OpenAIChatModelFactory.ChatTokenUsage( |
| | | 1279 | | inputTokenCount: usage.InputTokenCount, |
| | | 1280 | | outputTokenCount: usage.OutputTokenCount, |
| | | 1281 | | inputTokenDetails: inputDetails, |
| | | 1282 | | outputTokenDetails: outputDetails); |
| | | 1283 | | } |
| | | 1284 | | } |