< Summary

Information
Class: OpenAiIntegration.PredictionService.PredictionExecutionTelemetry
Assembly: OpenAiIntegration
File(s): /home/runner/work/KicktippAi/KicktippAi/src/OpenAiIntegration/PredictionService.cs
Line coverage
100%
Covered lines: 5
Uncovered lines: 0
Coverable lines: 5
Total lines: 1284
Line coverage: 100%
Branch coverage
N/A
Covered branches: 0
Total branches: 0
Branch coverage: N/A
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
.ctor(...)100%11100%

File(s)

/home/runner/work/KicktippAi/KicktippAi/src/OpenAiIntegration/PredictionService.cs

#LineLine coverage
 1using System.Collections.Generic;
 2using System.ClientModel;
 3using System.ClientModel.Primitives;
 4using System.Diagnostics;
 5using System.Globalization;
 6using System.Linq;
 7using System.Text.Json;
 8using System.Text.Json.Serialization;
 9using System.Text.RegularExpressions;
 10using EHonda.KicktippAi.Core;
 11using Microsoft.Extensions.Logging;
 12using OpenAI.Chat;
 13using OpenAI.Responses;
 14using Polly;
 15using Polly.Retry;
 16using Polly.Timeout;
 17
 18namespace OpenAiIntegration;
 19
 20/// <summary>
 21/// Service for predicting match outcomes using OpenAI models
 22/// </summary>
 23public class PredictionService : IPredictionService
 24{
 25    private const int TransientOpenAiMaxRetryAttempts = 3;
 26    private const int RateLimitedOpenAiMaxRetryAttempts = 8;
 27    private const string FlexServiceTier = "flex";
 28    private const string DefaultServiceTier = "default";
 29    private static readonly TimeSpan TransientOpenAiRetryDelay = TimeSpan.FromSeconds(2);
 30    private static readonly TimeSpan RateLimitedOpenAiRetryBaseDelay = TimeSpan.FromSeconds(2);
 31    private static readonly TimeSpan RateLimitedOpenAiRetryMaxDelay = TimeSpan.FromMinutes(2);
 32
 33    private readonly ResponsesClient _responsesClient;
 34    private readonly ILogger<PredictionService> _logger;
 35    private readonly ICostCalculationService _costCalculationService;
 36    private readonly ITokenUsageTracker _tokenUsageTracker;
 37    private readonly IInstructionsTemplateProvider _templateProvider;
 38    private readonly PredictionServiceOptions _options;
 39    private readonly string _model;
 40    private readonly Lazy<(string Template, string Path)> _instructionsTemplate;
 41    private readonly Lazy<(string Template, string Path)> _instructionsTemplateWithJustification;
 42    private readonly Lazy<(string Template, string Path)> _bonusInstructionsTemplate;
 43
 44    public PredictionService(
 45        ResponsesClient responsesClient,
 46        ILogger<PredictionService> logger,
 47        ICostCalculationService costCalculationService,
 48        ITokenUsageTracker tokenUsageTracker,
 49        IInstructionsTemplateProvider templateProvider,
 50        string model,
 51        PredictionServiceOptions? options = null)
 52    {
 53        _responsesClient = responsesClient ?? throw new ArgumentNullException(nameof(responsesClient));
 54        _logger = logger ?? throw new ArgumentNullException(nameof(logger));
 55        _costCalculationService = costCalculationService ?? throw new ArgumentNullException(nameof(costCalculationServic
 56        _tokenUsageTracker = tokenUsageTracker ?? throw new ArgumentNullException(nameof(tokenUsageTracker));
 57        _templateProvider = templateProvider ?? throw new ArgumentNullException(nameof(templateProvider));
 58        _options = options ?? PredictionServiceOptions.Default;
 59        _model = model ?? throw new ArgumentNullException(nameof(model));
 60
 61        _instructionsTemplate = new Lazy<(string Template, string Path)>(
 62            () => _templateProvider.LoadMatchTemplate(_model, includeJustification: false));
 63        _instructionsTemplateWithJustification = new Lazy<(string Template, string Path)>(
 64            () => _templateProvider.LoadMatchTemplate(_model, includeJustification: true));
 65        _bonusInstructionsTemplate = new Lazy<(string Template, string Path)>(
 66            () => _templateProvider.LoadBonusTemplate(_model));
 67    }
 68
 69    public async Task<Prediction?> PredictMatchAsync(
 70        EHonda.KicktippAi.Core.Match match,
 71        IEnumerable<DocumentContext> contextDocuments,
 72        bool includeJustification = false,
 73        PredictionTelemetryMetadata? telemetryMetadata = null,
 74        CancellationToken cancellationToken = default)
 75    {
 76        _logger.LogInformation("Generating prediction for match: {HomeTeam} vs {AwayTeam} at {StartTime}",
 77            match.HomeTeam, match.AwayTeam, match.StartsAt);
 78
 79        try
 80        {
 81            // Build the instructions by combining template with context
 82            var instructions = BuildInstructions(contextDocuments, includeJustification);
 83
 84            // Create match JSON
 85            var matchJson = PredictionPromptComposer.CreateMatchJson(match);
 86
 87            _logger.LogDebug("Instructions length: {InstructionsLength} characters", instructions.Length);
 88            _logger.LogDebug("Context documents: {ContextCount}", contextDocuments.Count());
 89            _logger.LogDebug("Match JSON: {MatchJson}", matchJson);
 90
 91            // Create input items for the response
 92            var messages = new List<PredictionRequestMessage>
 93            {
 94                new("system", instructions),
 95                new("user", matchJson)
 96            };
 97
 98            _logger.LogDebug("Calling OpenAI API for prediction");
 99
 100            // Start an OTel activity for Langfuse generation tracking
 101            using var activity = Telemetry.Source.StartActivity("predict-match");
 102
 103            // Call OpenAI with structured output format
 104            var completion = await CompleteMatchResponseAsync(messages, includeJustification, cancellationToken);
 105
 106            // Parse the structured response
 107            var predictionJson = completion.PredictionJson;
 108            _logger.LogDebug("Received prediction JSON: {PredictionJson}", predictionJson);
 109
 110            var prediction = ParsePrediction(predictionJson);
 111
 112            _logger.LogInformation("Prediction generated: {HomeGoals}-{AwayGoals} for {HomeTeam} vs {AwayTeam}",
 113                prediction.HomeGoals, prediction.AwayGoals, match.HomeTeam, match.AwayTeam);
 114
 115            // Log token usage and cost breakdown
 116            var usage = completion.Usage;
 117            _logger.LogDebug("Token usage - Input: {InputTokens}, Output: {OutputTokens}, Total: {TotalTokens}",
 118                usage.InputTokenCount, usage.OutputTokenCount, usage.TotalTokenCount);
 119
 120            // Set Langfuse generation attributes on the activity
 121            SetLangfuseGenerationAttributes(activity, messages, predictionJson, usage, telemetryMetadata, completion.Exe
 122
 123            // Add usage to tracker
 124            if (completion.ExecutionTelemetry is null)
 125            {
 126                _tokenUsageTracker.AddUsage(_model, usage);
 127            }
 128            else
 129            {
 130                _tokenUsageTracker.AddUsage(_model, usage, completion.ExecutionTelemetry.FinalServiceTier);
 131            }
 132
 133            // Calculate and log costs
 134            if (completion.ExecutionTelemetry is null)
 135            {
 136                _costCalculationService.LogCostBreakdown(_model, usage);
 137            }
 138            else
 139            {
 140                _costCalculationService.LogCostBreakdown(_model, usage, completion.ExecutionTelemetry.FinalServiceTier);
 141            }
 142
 143            return prediction;
 144        }
 145        catch (Exception ex)
 146        {
 147            _logger.LogError(ex, "Error generating prediction for match: {HomeTeam} vs {AwayTeam}",
 148                match.HomeTeam, match.AwayTeam);
 149            Console.Error.WriteLine($"Prediction error for {match.HomeTeam} vs {match.AwayTeam}: {ex.Message}");
 150
 151            return null;
 152        }
 153    }
 154
 155    private Task<OpenAiResponseResult> CompleteMatchResponseAsync(
 156        IReadOnlyList<PredictionRequestMessage> messages,
 157        bool includeJustification,
 158        CancellationToken cancellationToken)
 159    {
 160        return CompleteStructuredResponseAsync(
 161            completeResponseAsync: (serviceTier, ct) => CompleteResponseAsync(
 162                CreateMatchResponseOptions(
 163                    messages,
 164                    includeJustification,
 165                    serviceTier,
 166                    _options.ReasoningEffort),
 167                serviceTier,
 168                ct),
 169            cancellationToken);
 170    }
 171
 172    private Task<OpenAiResponseResult> CompleteBonusResponseAsync(
 173        IReadOnlyList<PredictionRequestMessage> messages,
 174        BonusQuestion bonusQuestion,
 175        CancellationToken cancellationToken)
 176    {
 177        return CompleteStructuredResponseAsync(
 178            completeResponseAsync: (serviceTier, ct) => CompleteResponseAsync(
 179                CreateBonusResponseOptions(
 180                    messages,
 181                    bonusQuestion,
 182                    serviceTier,
 183                    _options.ReasoningEffort),
 184                serviceTier,
 185                ct),
 186            cancellationToken);
 187    }
 188
 189    private async Task<OpenAiResponseResult> CompleteStructuredResponseAsync(
 190        Func<string?, CancellationToken, Task<OpenAiResponseResult>> completeResponseAsync,
 191        CancellationToken cancellationToken)
 192    {
 193        if (_options.DisableFlexProcessing)
 194        {
 195            return await completeResponseAsync(null, cancellationToken);
 196        }
 197
 198        string? requestedServiceTier = FlexServiceTier;
 199        var usedFallback = false;
 200        var pipeline = new ResiliencePipelineBuilder<OpenAiResponseResult>()
 201            // OpenAI documents that Flex processing can return 429 Resource Unavailable
 202            // when resources are insufficient, and recommends retrying with standard
 203            // processing when occasional higher cost is acceptable:
 204            // https://developers.openai.com/api/docs/guides/flex-processing
 205            // The Responses API reference documents service_tier=default as standard
 206            // pricing/performance, so flex 429 retries switch to that tier:
 207            // https://platform.openai.com/docs/api-reference/responses/create
 208            .AddRetry(new RetryStrategyOptions<OpenAiResponseResult>
 209            {
 210                MaxRetryAttempts = 1,
 211                Delay = TimeSpan.Zero,
 212                ShouldHandle = args => ValueTask.FromResult(
 213                    IsFlexProcessingRequest(requestedServiceTier) &&
 214                    IsFlexFallbackFailure(args.Outcome.Exception, args.Context.CancellationToken)),
 215                OnRetry = args =>
 216                {
 217                    usedFallback = true;
 218                    requestedServiceTier = DefaultServiceTier;
 219                    _logger.LogWarning(
 220                        args.Outcome.Exception,
 221                        "OpenAI flex processing failed with a retryable failure; retrying prediction with default proces
 222                    return default;
 223                }
 224            })
 225            .Build();
 226
 227        var result = await pipeline.ExecuteAsync(
 228            async ct =>
 229            {
 230                var completion = await completeResponseAsync(requestedServiceTier, ct);
 231
 232                var finalServiceTier = string.IsNullOrWhiteSpace(completion.FinalServiceTier)
 233                    ? requestedServiceTier ?? "standard"
 234                    : completion.FinalServiceTier;
 235
 236                return completion with
 237                {
 238                    ExecutionTelemetry = new PredictionExecutionTelemetry(
 239                        "flex-first-standard-fallback",
 240                        usedFallback ? DefaultServiceTier : FlexServiceTier,
 241                        finalServiceTier,
 242                        usedFallback)
 243                };
 244            },
 245            cancellationToken);
 246
 247        return result;
 248    }
 249
 250    private async Task<OpenAiResponseResult> CompleteResponseAsync(
 251        CreateResponseOptions options,
 252        string? serviceTier,
 253        CancellationToken cancellationToken)
 254    {
 255        var response = await CreateResponseWithTransientRetryAsync(options, serviceTier, cancellationToken);
 256        var responseResult = response.Value;
 257        var predictionJson = responseResult.GetOutputText();
 258        if (predictionJson is null)
 259        {
 260            throw new InvalidOperationException("OpenAI response did not contain output text.");
 261        }
 262
 263        var usage = responseResult.Usage is null
 264                    ? null
 265                    : ToChatTokenUsage(responseResult.Usage);
 266        if (usage is null)
 267        {
 268            throw new InvalidOperationException("OpenAI response did not contain token usage.");
 269        }
 270
 271        return new OpenAiResponseResult(
 272            predictionJson,
 273            usage,
 274            null,
 275            NormalizeResponseServiceTier(responseResult.ServiceTier));
 276    }
 277
 278    private async Task<ClientResult<ResponseResult>> CreateResponseWithTransientRetryAsync(
 279        CreateResponseOptions options,
 280        string? requestedServiceTier,
 281        CancellationToken cancellationToken)
 282    {
 283        var pipeline = new ResiliencePipelineBuilder<ClientResult<ResponseResult>>()
 284            // OpenAI documents 429 rate-limit errors as pacing problems and recommends bounded
 285            // random exponential backoff. It also documents x-ratelimit-* response headers
 286            // for reset timing:
 287            // https://platform.openai.com/docs/guides/rate-limits
 288            // https://platform.openai.com/docs/guides/error-codes
 289            // https://platform.openai.com/docs/api-reference
 290            // These references document x-ratelimit-* reset headers, not Retry-After.
 291            .AddRetry(new RetryStrategyOptions<ClientResult<ResponseResult>>
 292            {
 293                MaxRetryAttempts = RateLimitedOpenAiMaxRetryAttempts,
 294                DelayGenerator = args => new ValueTask<TimeSpan?>(
 295                    ResolveOpenAiRateLimitDelay(args.Outcome.Exception, args.AttemptNumber)),
 296                ShouldHandle = args => ValueTask.FromResult(
 297                    !IsFlexProcessingRequest(requestedServiceTier) &&
 298                    IsRetryableOpenAiRateLimitFailure(args.Outcome.Exception, args.Context.CancellationToken)),
 299                OnRetry = args =>
 300                {
 301                    _logger.LogWarning(
 302                        args.Outcome.Exception,
 303                        "OpenAI request hit a rate limit; retrying prediction request ({RetryAttempt}/{MaxRetryAttempts}
 304                        args.AttemptNumber + 1,
 305                        RateLimitedOpenAiMaxRetryAttempts,
 306                        args.RetryDelay);
 307                    return default;
 308                }
 309            })
 310            .AddRetry(new RetryStrategyOptions<ClientResult<ResponseResult>>
 311            {
 312                MaxRetryAttempts = TransientOpenAiMaxRetryAttempts,
 313                Delay = TransientOpenAiRetryDelay,
 314                BackoffType = DelayBackoffType.Exponential,
 315                UseJitter = true,
 316                ShouldHandle = args => ValueTask.FromResult(
 317                    IsTransientOpenAiServerFailure(args.Outcome.Exception, args.Context.CancellationToken)),
 318                OnRetry = args =>
 319                {
 320                    _logger.LogWarning(
 321                        args.Outcome.Exception,
 322                        "OpenAI request failed with a transient server error; retrying prediction request ({RetryAttempt
 323                        args.AttemptNumber + 1,
 324                        TransientOpenAiMaxRetryAttempts);
 325                    return default;
 326                }
 327            })
 328            .Build();
 329
 330        return await pipeline.ExecuteAsync(
 331            async ct => await _responsesClient.CreateResponseAsync(options, ct),
 332            cancellationToken);
 333    }
 334
 335    private CreateResponseOptions CreateMatchResponseOptions(
 336        IReadOnlyList<PredictionRequestMessage> messages,
 337        bool includeJustification,
 338        string? serviceTier,
 339        string? reasoningEffort)
 340    {
 341        return CreateResponseOptions(
 342            messages,
 343            "match_prediction",
 344            BinaryData.FromBytes(BuildPredictionJsonSchema(includeJustification)),
 345            serviceTier,
 346            reasoningEffort);
 347    }
 348
 349    private CreateResponseOptions CreateBonusResponseOptions(
 350        IReadOnlyList<PredictionRequestMessage> messages,
 351        BonusQuestion bonusQuestion,
 352        string? serviceTier,
 353        string? reasoningEffort)
 354    {
 355        return CreateResponseOptions(
 356            messages,
 357            "bonus_prediction",
 358            BinaryData.FromBytes(CreateSingleBonusPredictionJsonSchema(bonusQuestion)),
 359            serviceTier,
 360            reasoningEffort);
 361    }
 362
 363    private CreateResponseOptions CreateResponseOptions(
 364        IReadOnlyList<PredictionRequestMessage> messages,
 365        string schemaName,
 366        BinaryData schema,
 367        string? serviceTier,
 368        string? reasoningEffort)
 369    {
 370        var options = new CreateResponseOptions
 371        {
 372            Model = _model,
 373            MaxOutputTokenCount = _options.MaxOutputTokenCount, // Safeguard against high costs
 374            TextOptions = new ResponseTextOptions
 375            {
 376                TextFormat = ResponseTextFormat.CreateJsonSchemaFormat(
 377                    jsonSchemaFormatName: schemaName,
 378                    jsonSchema: schema,
 379                    jsonSchemaIsStrict: true)
 380            }
 381        };
 382
 383        foreach (var message in messages)
 384        {
 385            options.InputItems.Add(CreateResponseMessage(message));
 386        }
 387
 388        var normalizedServiceTier = NormalizeServiceTier(serviceTier);
 389        if (normalizedServiceTier is not null)
 390        {
 391            options.ServiceTier = new ResponseServiceTier(normalizedServiceTier);
 392        }
 393
 394        var normalizedReasoningEffort = NormalizeReasoningEffort(reasoningEffort);
 395        if (normalizedReasoningEffort is not null)
 396        {
 397            options.ReasoningOptions = new ResponseReasoningOptions
 398            {
 399                ReasoningEffortLevel = new ResponseReasoningEffortLevel(normalizedReasoningEffort)
 400            };
 401        }
 402
 403        return options;
 404    }
 405
 406    private static string? NormalizeReasoningEffort(string? reasoningEffort)
 407    {
 408        return string.IsNullOrWhiteSpace(reasoningEffort)
 409            ? null
 410            : reasoningEffort.Trim().ToLowerInvariant();
 411    }
 412
 413    private static string? NormalizeServiceTier(string? serviceTier)
 414    {
 415        return string.IsNullOrWhiteSpace(serviceTier)
 416            ? null
 417            : serviceTier.Trim().ToLowerInvariant();
 418    }
 419
 420    private static string? NormalizeResponseServiceTier(ResponseServiceTier? serviceTier)
 421    {
 422        return string.IsNullOrWhiteSpace(serviceTier?.ToString())
 423            ? null
 424            : serviceTier.Value.ToString().Trim().ToLowerInvariant();
 425    }
 426
 427    private static ResponseItem CreateResponseMessage(PredictionRequestMessage message)
 428    {
 429        return message.Role switch
 430        {
 431            "system" => ResponseItem.CreateSystemMessageItem(message.Content),
 432            "user" => ResponseItem.CreateUserMessageItem(message.Content),
 433            _ => throw new InvalidOperationException($"Unsupported response message role '{message.Role}'.")
 434        };
 435    }
 436
 437    private static bool IsFlexProcessingRequest(string? requestedServiceTier)
 438    {
 439        return string.Equals(
 440            NormalizeServiceTier(requestedServiceTier),
 441            FlexServiceTier,
 442            StringComparison.OrdinalIgnoreCase);
 443    }
 444
 445    private static bool IsFlexFallbackFailure(Exception? exception, CancellationToken cancellationToken)
 446    {
 447        if (exception is null)
 448        {
 449            return false;
 450        }
 451
 452        if (cancellationToken.IsCancellationRequested)
 453        {
 454            return false;
 455        }
 456
 457        return exception switch
 458        {
 459            ClientResultException { Status: 408 } => true,
 460            ClientResultException { Status: 429 } clientException => IsFlexResourceUnavailableFailure(clientException)
 461                || IsRetryableOpenAiRateLimitFailure(clientException, cancellationToken),
 462            TimeoutRejectedException => true,
 463            TimeoutException => true,
 464            TaskCanceledException => true,
 465            _ => false
 466        };
 467    }
 468
 469    private static bool IsTransientOpenAiServerFailure(Exception? exception, CancellationToken cancellationToken)
 470    {
 471        if (exception is null || cancellationToken.IsCancellationRequested)
 472        {
 473            return false;
 474        }
 475
 476        return exception is ClientResultException { Status: >= 500 and <= 599 };
 477    }
 478
 479    private static bool IsRetryableOpenAiRateLimitFailure(Exception? exception, CancellationToken cancellationToken)
 480    {
 481        if (exception is not ClientResultException { Status: 429 } clientException ||
 482            cancellationToken.IsCancellationRequested)
 483        {
 484            return false;
 485        }
 486
 487        return !IsFlexResourceUnavailableFailure(clientException)
 488               && !ContainsQuotaExhaustedMarker(clientException.Message)
 489               && !ContainsQuotaExhaustedMarker(clientException.GetRawResponse()?.ReasonPhrase)
 490               && !ContainsQuotaExhaustedMarker(clientException.GetRawResponse()?.Content.ToString());
 491    }
 492
 493    private static TimeSpan ResolveOpenAiRateLimitDelay(Exception? exception, int attemptNumber)
 494    {
 495        if (exception is ClientResultException clientException &&
 496            TryGetOpenAiRateLimitResetDelay(clientException.GetRawResponse(), out var resetDelay))
 497        {
 498            return ClampOpenAiRateLimitDelay(resetDelay);
 499        }
 500
 501        var cappedExponentialMilliseconds = Math.Min(
 502            RateLimitedOpenAiRetryMaxDelay.TotalMilliseconds,
 503            RateLimitedOpenAiRetryBaseDelay.TotalMilliseconds * Math.Pow(2, Math.Max(0, attemptNumber)));
 504        var jitterFloorMilliseconds = cappedExponentialMilliseconds / 2;
 505        var jitteredMilliseconds = jitterFloorMilliseconds + Random.Shared.NextDouble() * jitterFloorMilliseconds;
 506
 507        return ClampOpenAiRateLimitDelay(TimeSpan.FromMilliseconds(jitteredMilliseconds));
 508    }
 509
 510    private static bool TryGetOpenAiRateLimitResetDelay(PipelineResponse? response, out TimeSpan delay)
 511    {
 512        delay = default;
 513        if (response is null)
 514        {
 515            return false;
 516        }
 517
 518        var exhaustedDelays = new List<TimeSpan>();
 519        var availableDelays = new List<TimeSpan>();
 520
 521        AddRateLimitResetDelay(response, "requests", exhaustedDelays, availableDelays);
 522        AddRateLimitResetDelay(response, "tokens", exhaustedDelays, availableDelays);
 523
 524        if (exhaustedDelays.Count > 0)
 525        {
 526            delay = exhaustedDelays.Max();
 527            return true;
 528        }
 529
 530        if (availableDelays.Count > 0)
 531        {
 532            delay = availableDelays.Max();
 533            return true;
 534        }
 535
 536        return false;
 537    }
 538
 539    private static void AddRateLimitResetDelay(
 540        PipelineResponse response,
 541        string dimension,
 542        List<TimeSpan> exhaustedDelays,
 543        List<TimeSpan> availableDelays)
 544    {
 545        if (!TryGetOpenAiRateLimitHeader(response, $"x-ratelimit-reset-{dimension}", out var resetText) ||
 546            !TryParseOpenAiRateLimitReset(resetText, out var resetDelay))
 547        {
 548            return;
 549        }
 550
 551        if (TryGetOpenAiRateLimitHeader(response, $"x-ratelimit-remaining-{dimension}", out var remainingText) &&
 552            decimal.TryParse(remainingText, NumberStyles.Number, CultureInfo.InvariantCulture, out var remaining) &&
 553            remaining <= 0)
 554        {
 555            exhaustedDelays.Add(resetDelay);
 556            return;
 557        }
 558
 559        availableDelays.Add(resetDelay);
 560    }
 561
 562    private static bool TryGetOpenAiRateLimitHeader(PipelineResponse response, string name, out string value)
 563    {
 564        if (response.Headers is not null && response.Headers.TryGetValue(name, out var headerValue))
 565        {
 566            value = headerValue ?? string.Empty;
 567            return true;
 568        }
 569
 570        value = string.Empty;
 571        return false;
 572    }
 573
 574    private static bool TryParseOpenAiRateLimitReset(string text, out TimeSpan delay)
 575    {
 576        delay = default;
 577        if (string.IsNullOrWhiteSpace(text))
 578        {
 579            return false;
 580        }
 581
 582        var matches = Regex.Matches(
 583            text.Trim(),
 584            @"(?<value>\d+(?:\.\d+)?)(?<unit>ms|s|m|h)",
 585            RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
 586        if (matches.Count == 0)
 587        {
 588            return false;
 589        }
 590
 591        var totalMilliseconds = 0.0;
 592        foreach (System.Text.RegularExpressions.Match match in matches)
 593        {
 594            if (!double.TryParse(match.Groups["value"].Value, NumberStyles.Float, CultureInfo.InvariantCulture, out var 
 595            {
 596                return false;
 597            }
 598
 599            totalMilliseconds += match.Groups["unit"].Value.ToLowerInvariant() switch
 600            {
 601                "ms" => value,
 602                "s" => value * 1_000,
 603                "m" => value * 60_000,
 604                "h" => value * 3_600_000,
 605                _ => 0
 606            };
 607        }
 608
 609        delay = TimeSpan.FromMilliseconds(Math.Max(0, totalMilliseconds));
 610        return true;
 611    }
 612
 613    private static TimeSpan ClampOpenAiRateLimitDelay(TimeSpan delay)
 614    {
 615        if (delay < TimeSpan.Zero)
 616        {
 617            return TimeSpan.Zero;
 618        }
 619
 620        return delay > RateLimitedOpenAiRetryMaxDelay
 621            ? RateLimitedOpenAiRetryMaxDelay
 622            : delay;
 623    }
 624
 625    private static bool IsFlexResourceUnavailableFailure(ClientResultException exception)
 626    {
 627        var rawResponse = exception.GetRawResponse();
 628        return ContainsFlexResourceUnavailableMarker(exception.Message)
 629               || ContainsFlexResourceUnavailableMarker(rawResponse?.ReasonPhrase)
 630               || ContainsFlexResourceUnavailableMarker(rawResponse?.Content.ToString());
 631    }
 632
 633    private static bool ContainsFlexResourceUnavailableMarker(string? text)
 634    {
 635        if (string.IsNullOrWhiteSpace(text))
 636        {
 637            return false;
 638        }
 639
 640        return text.Contains("resource_unavailable", StringComparison.OrdinalIgnoreCase)
 641               || text.Contains("resource unavailable", StringComparison.OrdinalIgnoreCase)
 642               || text.Contains("resources unavailable", StringComparison.OrdinalIgnoreCase)
 643               || text.Contains("insufficient resources", StringComparison.OrdinalIgnoreCase)
 644               || text.Contains("capacity", StringComparison.OrdinalIgnoreCase);
 645    }
 646
 647    private static bool ContainsQuotaExhaustedMarker(string? text)
 648    {
 649        if (string.IsNullOrWhiteSpace(text))
 650        {
 651            return false;
 652        }
 653
 654        return text.Contains("insufficient_quota", StringComparison.OrdinalIgnoreCase)
 655               || text.Contains("exceeded your current quota", StringComparison.OrdinalIgnoreCase)
 656               || text.Contains("check your plan and billing", StringComparison.OrdinalIgnoreCase);
 657    }
 658
 659    public async Task<BonusPrediction?> PredictBonusQuestionAsync(
 660        BonusQuestion bonusQuestion,
 661        IEnumerable<DocumentContext> contextDocuments,
 662        PredictionTelemetryMetadata? telemetryMetadata = null,
 663        CancellationToken cancellationToken = default)
 664    {
 665        _logger.LogInformation("Generating prediction for bonus question: {QuestionText}", bonusQuestion.Text);
 666
 667        try
 668        {
 669            // Build the instructions by combining template with context
 670            var instructions = BuildBonusInstructions(contextDocuments);
 671
 672            // Create bonus question JSON
 673            var questionJson = PredictionPromptComposer.CreateBonusQuestionJson(bonusQuestion);
 674
 675            _logger.LogDebug("Instructions length: {InstructionsLength} characters", instructions.Length);
 676            _logger.LogDebug("Context documents: {ContextCount}", contextDocuments.Count());
 677            _logger.LogDebug("Question JSON: {QuestionJson}", questionJson);
 678
 679            // Create input items for the response
 680            var messages = new List<PredictionRequestMessage>
 681            {
 682                new("system", instructions),
 683                new("user", questionJson)
 684            };
 685
 686            _logger.LogDebug("Calling OpenAI API for bonus prediction");
 687
 688            // Start an OTel activity for Langfuse generation tracking
 689            using var activity = Telemetry.Source.StartActivity("predict-bonus");
 690
 691            // Call OpenAI with structured output format
 692            var completion = await CompleteBonusResponseAsync(messages, bonusQuestion, cancellationToken);
 693
 694            // Parse the structured response
 695            var predictionJson = completion.PredictionJson;
 696            _logger.LogDebug("Received bonus prediction JSON: {PredictionJson}", predictionJson);
 697
 698            var prediction = ParseSingleBonusPrediction(predictionJson, bonusQuestion);
 699
 700            if (prediction != null)
 701            {
 702                _logger.LogInformation("Generated prediction for bonus question: {SelectedOptions}",
 703                    string.Join(", ", prediction.SelectedOptionIds));
 704            }
 705
 706            // Log token usage and cost breakdown
 707            var usage = completion.Usage;
 708            _logger.LogDebug("Token usage - Input: {InputTokens}, Output: {OutputTokens}, Total: {TotalTokens}",
 709                usage.InputTokenCount, usage.OutputTokenCount, usage.TotalTokenCount);
 710
 711            // Set Langfuse generation attributes on the activity
 712            SetLangfuseGenerationAttributes(activity, messages, predictionJson, usage, telemetryMetadata, completion.Exe
 713
 714            // Add usage to tracker
 715            if (completion.ExecutionTelemetry is null)
 716            {
 717                _tokenUsageTracker.AddUsage(_model, usage);
 718            }
 719            else
 720            {
 721                _tokenUsageTracker.AddUsage(_model, usage, completion.ExecutionTelemetry.FinalServiceTier);
 722            }
 723
 724            // Calculate and log costs
 725            if (completion.ExecutionTelemetry is null)
 726            {
 727                _costCalculationService.LogCostBreakdown(_model, usage);
 728            }
 729            else
 730            {
 731                _costCalculationService.LogCostBreakdown(_model, usage, completion.ExecutionTelemetry.FinalServiceTier);
 732            }
 733
 734            return prediction;
 735        }
 736        catch (Exception ex)
 737        {
 738            _logger.LogError(ex, "Error generating bonus prediction for question: {QuestionText}", bonusQuestion.Text);
 739            return null;
 740        }
 741    }
 742
 743    private string BuildInstructions(IEnumerable<DocumentContext> contextDocuments, bool includeJustification)
 744    {
 745        var template = includeJustification
 746            ? _instructionsTemplateWithJustification.Value.Template
 747            : _instructionsTemplate.Value.Template;
 748
 749        var contextList = contextDocuments.ToList();
 750        if (contextList.Any())
 751        {
 752            _logger.LogDebug("Added {ContextCount} context documents to instructions", contextList.Count);
 753        }
 754        else
 755        {
 756            _logger.LogDebug("No context documents provided");
 757        }
 758
 759        return PredictionPromptComposer.BuildSystemPrompt(template, contextList);
 760    }
 761
 762    private static byte[] BuildPredictionJsonSchema(bool includeJustification)
 763    {
 764        var properties = new Dictionary<string, object?>
 765        {
 766            ["home"] = new Dictionary<string, object?>
 767            {
 768                ["type"] = "integer",
 769                ["description"] = "Predicted goals for the home team"
 770            },
 771            ["away"] = new Dictionary<string, object?>
 772            {
 773                ["type"] = "integer",
 774                ["description"] = "Predicted goals for the away team"
 775            }
 776        };
 777
 778        var required = new List<string> { "home", "away" };
 779
 780        if (includeJustification)
 781        {
 782            var mostValuableContextSourceItem = new Dictionary<string, object?>
 783            {
 784                ["type"] = "object",
 785                ["properties"] = new Dictionary<string, object?>
 786                {
 787                    ["documentName"] = new Dictionary<string, object?>
 788                    {
 789                        ["type"] = "string",
 790                        ["description"] = "Name of the context document referenced"
 791                    },
 792                    ["details"] = new Dictionary<string, object?>
 793                    {
 794                        ["type"] = "string",
 795                        ["description"] = "Brief summary of why the document or parts of it were useful"
 796                    }
 797                },
 798                ["required"] = new[] { "documentName", "details" },
 799                ["additionalProperties"] = false
 800            };
 801
 802            var leastValuableContextSourceItem = new Dictionary<string, object?>
 803            {
 804                ["type"] = "object",
 805                ["properties"] = new Dictionary<string, object?>
 806                {
 807                    ["documentName"] = new Dictionary<string, object?>
 808                    {
 809                        ["type"] = "string",
 810                        ["description"] = "Name of the context document referenced"
 811                    },
 812                    ["details"] = new Dictionary<string, object?>
 813                    {
 814                        ["type"] = "string",
 815                        ["description"] = "Brief summary explaining why the document or parts of it offered limited insi
 816                    }
 817                },
 818                ["required"] = new[] { "documentName", "details" },
 819                ["additionalProperties"] = false
 820            };
 821
 822            var contextSources = new Dictionary<string, object?>
 823            {
 824                ["type"] = "object",
 825                ["properties"] = new Dictionary<string, object?>
 826                {
 827                    ["mostValuable"] = new Dictionary<string, object?>
 828                    {
 829                        ["type"] = "array",
 830                        ["items"] = mostValuableContextSourceItem,
 831                        ["description"] = "Context documents that most influenced the prediction",
 832                        ["minItems"] = 0
 833                    },
 834                    ["leastValuable"] = new Dictionary<string, object?>
 835                    {
 836                        ["type"] = "array",
 837                        ["items"] = leastValuableContextSourceItem,
 838                        ["description"] = "Context documents that provided limited or no valuable insight",
 839                        ["minItems"] = 0
 840                    }
 841                },
 842                ["required"] = new[] { "leastValuable", "mostValuable" },
 843                ["additionalProperties"] = false
 844            };
 845
 846            properties["justification"] = new Dictionary<string, object?>
 847            {
 848                ["type"] = "object",
 849                ["properties"] = new Dictionary<string, object?>
 850                {
 851                    ["keyReasoning"] = new Dictionary<string, object?>
 852                    {
 853                        ["type"] = "string",
 854                        ["description"] = "Concise analytic summary motivating the predicted scoreline"
 855                    },
 856                    ["contextSources"] = contextSources,
 857                    ["uncertainties"] = new Dictionary<string, object?>
 858                    {
 859                        ["type"] = "array",
 860                        ["items"] = new Dictionary<string, object?>
 861                        {
 862                            ["type"] = "string",
 863                            ["description"] = "Single uncertainty or external factor affecting confidence"
 864                        },
 865                        ["description"] = "Factors that could alter the predicted outcome",
 866                        ["minItems"] = 0
 867                    }
 868                },
 869                ["required"] = new[] { "contextSources", "keyReasoning", "uncertainties" },
 870                ["additionalProperties"] = false
 871            };
 872            required.Add("justification");
 873        }
 874
 875        var schema = new Dictionary<string, object?>
 876        {
 877            ["type"] = "object",
 878            ["properties"] = properties,
 879            ["required"] = required,
 880            ["additionalProperties"] = false
 881        };
 882
 883        return JsonSerializer.SerializeToUtf8Bytes(schema);
 884    }
 885
 886    private Prediction ParsePrediction(string predictionJson)
 887    {
 888        try
 889        {
 890            _logger.LogDebug("Parsing prediction JSON: {PredictionJson}", predictionJson);
 891
 892            var predictionResponse = JsonSerializer.Deserialize<PredictionResponse>(predictionJson);
 893            if (predictionResponse == null)
 894            {
 895                LogRawModelResponse(predictionJson);
 896                throw new InvalidOperationException("Failed to deserialize prediction response");
 897            }
 898
 899            _logger.LogDebug("Parsed prediction response - Home: {Home}, Away: {Away}", predictionResponse.Home, predict
 900
 901            PredictionJustification? justification = null;
 902
 903            if (predictionResponse.Justification != null)
 904            {
 905                var justificationResponse = predictionResponse.Justification;
 906
 907                var mostValuable = justificationResponse.ContextSources?.MostValuable?
 908                    .Where(entry => entry != null)
 909                    .Select(entry => new PredictionJustificationContextSource(
 910                        entry!.DocumentName?.Trim() ?? string.Empty,
 911                        entry.Details?.Trim() ?? string.Empty))
 912                    .ToList() ?? new List<PredictionJustificationContextSource>();
 913
 914                var leastValuable = justificationResponse.ContextSources?.LeastValuable?
 915                    .Where(entry => entry != null)
 916                    .Select(entry => new PredictionJustificationContextSource(
 917                        entry!.DocumentName?.Trim() ?? string.Empty,
 918                        entry.Details?.Trim() ?? string.Empty))
 919                    .ToList() ?? new List<PredictionJustificationContextSource>();
 920
 921                var uncertainties = justificationResponse.Uncertainties?
 922                    .Where(item => !string.IsNullOrWhiteSpace(item))
 923                    .Select(item => item.Trim())
 924                    .ToList() ?? new List<string>();
 925
 926                justification = new PredictionJustification(
 927                    justificationResponse.KeyReasoning?.Trim() ?? string.Empty,
 928                    new PredictionJustificationContextSources(mostValuable, leastValuable),
 929                    uncertainties);
 930
 931                _logger.LogDebug(
 932                    "Parsed justification with key reasoning: {KeyReasoning}; Most valuable sources: {MostValuableCount}
 933                    justification.KeyReasoning,
 934                    justification.ContextSources.MostValuable.Count,
 935                    justification.ContextSources.LeastValuable.Count,
 936                    justification.Uncertainties.Count);
 937            }
 938
 939            return new Prediction(predictionResponse.Home, predictionResponse.Away, justification);
 940        }
 941        catch (JsonException ex)
 942        {
 943            _logger.LogError(ex, "Failed to parse prediction JSON: {PredictionJson}", predictionJson);
 944            LogRawModelResponse(predictionJson);
 945            throw new InvalidOperationException($"Failed to parse prediction response: {ex.Message}", ex);
 946        }
 947    }
 948
 949    private void LogRawModelResponse(string rawResponse)
 950    {
 951        if (string.IsNullOrWhiteSpace(rawResponse))
 952        {
 953            const string message = "Raw model response from OpenAI was empty or whitespace.";
 954            _logger.LogError(message);
 955            Console.Error.WriteLine(message);
 956            return;
 957        }
 958
 959        _logger.LogError("Raw model response from OpenAI: {RawResponse}", rawResponse);
 960        Console.Error.WriteLine("Raw model response from OpenAI:");
 961        Console.Error.WriteLine(rawResponse);
 962    }
 963
 964    private string BuildBonusInstructions(IEnumerable<DocumentContext> contextDocuments)
 965    {
 966        // Use the pre-loaded bonus instructions template
 967        var bonusInstructionsTemplate = _bonusInstructionsTemplate.Value.Template;
 968
 969        var contextList = contextDocuments.ToList();
 970        if (contextList.Any())
 971        {
 972            _logger.LogDebug("Added {ContextCount} context documents to bonus instructions", contextList.Count);
 973        }
 974        else
 975        {
 976            _logger.LogDebug("No context documents provided for bonus predictions");
 977        }
 978
 979        return PredictionPromptComposer.BuildSystemPrompt(bonusInstructionsTemplate, contextList);
 980    }
 981
 982    private static byte[] CreateSingleBonusPredictionJsonSchema(BonusQuestion question)
 983    {
 984        // For multi-selection questions, require exactly MaxSelections answers
 985        // For single-selection questions, require exactly 1 answer
 986        var requiredSelections = question.MaxSelections;
 987
 988        var schema = new
 989        {
 990            type = "object",
 991            properties = new
 992            {
 993                selectedOptionIds = new
 994                {
 995                    type = "array",
 996                    items = new { type = "string", @enum = question.Options.Select(o => o.Id).ToArray() },
 997                    minItems = requiredSelections,
 998                    maxItems = requiredSelections
 999                }
 1000            },
 1001            required = new[] { "selectedOptionIds" },
 1002            additionalProperties = false
 1003        };
 1004
 1005        return JsonSerializer.SerializeToUtf8Bytes(schema);
 1006    }
 1007
 1008    private BonusPrediction? ParseSingleBonusPrediction(string predictionJson, BonusQuestion question)
 1009    {
 1010        try
 1011        {
 1012            _logger.LogDebug("Parsing single bonus prediction JSON: {PredictionJson}", predictionJson);
 1013
 1014            var response = JsonSerializer.Deserialize<SingleBonusPredictionResponse>(predictionJson);
 1015            if (response?.SelectedOptionIds?.Any() != true)
 1016            {
 1017                throw new InvalidOperationException("Failed to deserialize bonus prediction response or no options selec
 1018            }
 1019
 1020            // Validate that all selected options exist for this question
 1021            var validOptionIds = question.Options.Select(o => o.Id).ToHashSet();
 1022            var invalidOptions = response.SelectedOptionIds.Where(id => !validOptionIds.Contains(id)).ToArray();
 1023
 1024            if (invalidOptions.Any())
 1025            {
 1026                _logger.LogWarning("Invalid option IDs for question '{QuestionText}': {InvalidOptions}",
 1027                    question.Text, string.Join(", ", invalidOptions));
 1028                return null;
 1029            }
 1030
 1031            // Validate no duplicate selections
 1032            var duplicateOptions = response.SelectedOptionIds
 1033                .GroupBy(id => id)
 1034                .Where(g => g.Count() > 1)
 1035                .Select(g => g.Key)
 1036                .ToArray();
 1037
 1038            if (duplicateOptions.Any())
 1039            {
 1040                _logger.LogWarning("Duplicate option IDs for question '{QuestionText}': {DuplicateOptions}",
 1041                    question.Text, string.Join(", ", duplicateOptions));
 1042                return null;
 1043            }
 1044
 1045            // Validate selection count - must match exactly MaxSelections for full predictions
 1046            if (response.SelectedOptionIds.Length != question.MaxSelections)
 1047            {
 1048                _logger.LogWarning("Invalid selection count for question '{QuestionText}': expected exactly {MaxSelectio
 1049                    question.Text, question.MaxSelections, response.SelectedOptionIds.Length);
 1050                return null;
 1051            }
 1052
 1053            var prediction = new BonusPrediction(response.SelectedOptionIds.ToList());
 1054
 1055            _logger.LogDebug("Parsed prediction: {SelectedOptions}",
 1056                string.Join(", ", response.SelectedOptionIds));
 1057
 1058            return prediction;
 1059        }
 1060        catch (JsonException ex)
 1061        {
 1062            _logger.LogError(ex, "Failed to parse bonus prediction JSON: {PredictionJson}", predictionJson);
 1063            return null;
 1064        }
 1065    }
 1066
 1067    /// <summary>
 1068    /// Gets the file path of the match prediction prompt being used by this service
 1069    /// </summary>
 1070    /// <returns>The absolute file path to the match prompt file</returns>
 1071    public string GetMatchPromptPath(bool includeJustification = false)
 1072    {
 1073        return includeJustification
 1074            ? _instructionsTemplateWithJustification.Value.Path
 1075            : _instructionsTemplate.Value.Path;
 1076    }
 1077
 1078    /// <summary>
 1079    /// Gets the file path of the bonus question prediction prompt being used by this service
 1080    /// </summary>
 1081    /// <returns>The absolute file path to the bonus prompt file</returns>
 1082    public string GetBonusPromptPath() => _bonusInstructionsTemplate.Value.Path;
 1083
 1084    /// <summary>
 1085    /// Internal class for deserializing the structured prediction response
 1086    /// </summary>
 1087    private class PredictionResponse
 1088    {
 1089        [JsonPropertyName("home")]
 1090        public int Home { get; set; }
 1091
 1092        [JsonPropertyName("away")]
 1093        public int Away { get; set; }
 1094
 1095        [JsonPropertyName("justification")]
 1096        public JustificationResponse? Justification { get; set; }
 1097    }
 1098
 1099    private class JustificationResponse
 1100    {
 1101        [JsonPropertyName("keyReasoning")]
 1102        public string KeyReasoning { get; set; } = string.Empty;
 1103
 1104        [JsonPropertyName("contextSources")]
 1105        public JustificationContextSourcesResponse ContextSources { get; set; } = new();
 1106
 1107        [JsonPropertyName("uncertainties")]
 1108        public string[] Uncertainties { get; set; } = Array.Empty<string>();
 1109    }
 1110
 1111    private class JustificationContextSourcesResponse
 1112    {
 1113        [JsonPropertyName("mostValuable")]
 1114        public JustificationContextSourceEntry[] MostValuable { get; set; } = Array.Empty<JustificationContextSourceEntr
 1115
 1116        [JsonPropertyName("leastValuable")]
 1117        public JustificationContextSourceEntry[] LeastValuable { get; set; } = Array.Empty<JustificationContextSourceEnt
 1118    }
 1119
 1120    private class JustificationContextSourceEntry
 1121    {
 1122        [JsonPropertyName("documentName")]
 1123        public string DocumentName { get; set; } = string.Empty;
 1124
 1125        [JsonPropertyName("details")]
 1126        public string Details { get; set; } = string.Empty;
 1127    }
 1128
 1129    /// <summary>
 1130    /// Internal class for deserializing the bonus predictions response
 1131    /// </summary>
 1132    private class BonusPredictionsResponse
 1133    {
 1134        [JsonPropertyName("predictions")]
 1135        public BonusPredictionEntry[]? Predictions { get; set; }
 1136    }
 1137
 1138    /// <summary>
 1139    /// Internal class for deserializing individual bonus prediction entries
 1140    /// </summary>
 1141    private class BonusPredictionEntry
 1142    {
 1143        [JsonPropertyName("questionId")]
 1144        public string QuestionId { get; set; } = string.Empty;
 1145
 1146        [JsonPropertyName("selectedOptionIds")]
 1147        public string[] SelectedOptionIds { get; set; } = Array.Empty<string>();
 1148    }
 1149
 1150    /// <summary>
 1151    /// Internal class for deserializing single bonus prediction response
 1152    /// </summary>
 1153    private class SingleBonusPredictionResponse
 1154    {
 1155        [JsonPropertyName("selectedOptionIds")]
 1156        public string[] SelectedOptionIds { get; set; } = Array.Empty<string>();
 1157    }
 1158
 1159    /// <summary>
 1160    /// Sets Langfuse-mapped OpenTelemetry attributes on the given activity.
 1161    /// If <paramref name="activity"/> is <c>null</c> (no OTel listener registered), this is a no-op.
 1162    /// </summary>
 1163    private void SetLangfuseGenerationAttributes(
 1164        Activity? activity,
 1165        IReadOnlyList<PredictionRequestMessage> messages,
 1166        string responseJson,
 1167        ChatTokenUsage usage,
 1168        PredictionTelemetryMetadata? telemetryMetadata,
 1169        PredictionExecutionTelemetry? executionTelemetry = null)
 1170    {
 1171        if (activity is null)
 1172            return;
 1173
 1174        activity.SetTag("langfuse.observation.type", "generation");
 1175        activity.SetTag("gen_ai.request.model", _model);
 1176        var providerPromptMetadata = (_templateProvider as IPromptTemplateTelemetryMetadataProvider)
 1177            ?.GetPromptTemplateTelemetryMetadata();
 1178
 1179        if (providerPromptMetadata?.LangfusePromptName is { } providerPromptName &&
 1180            providerPromptMetadata.LangfusePromptVersion is { } providerPromptVersion)
 1181        {
 1182            activity.SetTag("langfuse.observation.prompt.name", providerPromptName);
 1183            activity.SetTag("langfuse.observation.prompt.version", providerPromptVersion);
 1184        }
 1185        else if (_options.LangfusePromptTraceMetadata is { } promptTraceMetadata)
 1186        {
 1187            activity.SetTag("langfuse.observation.prompt.name", promptTraceMetadata.Name);
 1188            activity.SetTag("langfuse.observation.prompt.version", promptTraceMetadata.Version);
 1189        }
 1190
 1191        if (providerPromptMetadata is not null)
 1192        {
 1193            activity.SetTag("langfuse.observation.metadata.langfusePromptFallback", providerPromptMetadata.IsFallback);
 1194            activity.SetTag("langfuse.observation.metadata.promptTemplatePath", providerPromptMetadata.PromptPath);
 1195        }
 1196        else if (_options.LangfusePromptTraceMetadata is { IsFallback: true })
 1197        {
 1198            activity.SetTag("langfuse.observation.metadata.langfusePromptFallback", true);
 1199        }
 1200
 1201        if (!string.IsNullOrWhiteSpace(_options.ReasoningEffort))
 1202        {
 1203            var reasoningEffort = _options.ReasoningEffort.Trim().ToLowerInvariant();
 1204            activity.SetTag("gen_ai.request.reasoning_effort", reasoningEffort);
 1205            activity.SetTag("langfuse.observation.metadata.openaiReasoningEffort", reasoningEffort);
 1206        }
 1207
 1208        if (executionTelemetry is not null)
 1209        {
 1210            activity.SetTag("gen_ai.request.service_tier", executionTelemetry.RequestedServiceTier);
 1211            activity.SetTag("gen_ai.response.service_tier", executionTelemetry.FinalServiceTier);
 1212            activity.SetTag("langfuse.observation.metadata.openaiExecutionStrategy", executionTelemetry.Strategy);
 1213            activity.SetTag("langfuse.observation.metadata.openaiRequestedServiceTier", executionTelemetry.RequestedServ
 1214            activity.SetTag("langfuse.observation.metadata.openaiFinalServiceTier", executionTelemetry.FinalServiceTier)
 1215            activity.SetTag("langfuse.observation.metadata.openaiServiceTierFallbackUsed", executionTelemetry.FallbackUs
 1216        }
 1217
 1218        // Serialize messages as input (system prompt + user message)
 1219        var inputMessages = messages.Select(m => new
 1220        {
 1221            role = m.Role,
 1222            content = m.Content
 1223        });
 1224        activity.SetTag("langfuse.observation.input", JsonSerializer.Serialize(inputMessages));
 1225        activity.SetTag("langfuse.observation.output", responseJson);
 1226        telemetryMetadata?.ApplyToObservation(activity);
 1227
 1228        // Token usage details
 1229        var usageDetails = new
 1230        {
 1231            input = usage.InputTokenCount,
 1232            output = usage.OutputTokenCount,
 1233            cache_read_input_tokens = usage.InputTokenDetails?.CachedTokenCount ?? 0,
 1234            reasoning_tokens = usage.OutputTokenDetails?.ReasoningTokenCount ?? 0,
 1235            total = usage.InputTokenCount + usage.OutputTokenCount
 1236        };
 1237        activity.SetTag("langfuse.observation.usage_details", JsonSerializer.Serialize(usageDetails));
 1238
 1239        if (executionTelemetry is not null &&
 1240            _costCalculationService.CalculateCostBreakdown(_model, usage, executionTelemetry.FinalServiceTier) is { } co
 1241        {
 1242            var costDetails = new
 1243            {
 1244                input = costBreakdown.Input,
 1245                cache_read_input_tokens = costBreakdown.CachedInput,
 1246                output = costBreakdown.Output,
 1247                total = costBreakdown.Total
 1248            };
 1249            activity.SetTag("langfuse.observation.cost_details", JsonSerializer.Serialize(costDetails));
 1250        }
 1251    }
 1252
 1253    private sealed record PredictionRequestMessage(string Role, string Content);
 1254
 1255    private sealed record OpenAiResponseResult(
 1256        string PredictionJson,
 1257        ChatTokenUsage Usage,
 1258        PredictionExecutionTelemetry? ExecutionTelemetry,
 1259        string? FinalServiceTier = null);
 1260
 11261    private sealed record PredictionExecutionTelemetry(
 11262        string Strategy,
 11263        string RequestedServiceTier,
 11264        string FinalServiceTier,
 11265        bool FallbackUsed);
 1266
 1267    private static ChatTokenUsage ToChatTokenUsage(ResponseTokenUsage usage)
 1268    {
 1269        var cachedTokenCount = usage.InputTokenDetails?.CachedTokenCount ?? 0;
 1270        var reasoningTokenCount = usage.OutputTokenDetails?.ReasoningTokenCount ?? 0;
 1271        var inputDetails = cachedTokenCount > 0
 1272            ? OpenAIChatModelFactory.ChatInputTokenUsageDetails(cachedTokenCount: cachedTokenCount)
 1273            : null;
 1274        var outputDetails = reasoningTokenCount > 0
 1275            ? OpenAIChatModelFactory.ChatOutputTokenUsageDetails(reasoningTokenCount: reasoningTokenCount)
 1276            : null;
 1277
 1278        return OpenAIChatModelFactory.ChatTokenUsage(
 1279            inputTokenCount: usage.InputTokenCount,
 1280            outputTokenCount: usage.OutputTokenCount,
 1281            inputTokenDetails: inputDetails,
 1282            outputTokenDetails: outputDetails);
 1283    }
 1284}