< Summary

Information
Class: OpenAiIntegration.PredictionService
Assembly: OpenAiIntegration
File(s): /home/runner/work/KicktippAi/KicktippAi/src/OpenAiIntegration/PredictionService.cs
Line coverage
93%
Covered lines: 670
Uncovered lines: 44
Coverable lines: 714
Total lines: 1284
Line coverage: 93.8%
Branch coverage
74%
Covered branches: 226
Total branches: 304
Branch coverage: 74.3%
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
.cctor()100%11100%
.ctor(...)100%1414100%
PredictMatchAsync()100%44100%
CompleteMatchResponseAsync(...)100%11100%
CompleteBonusResponseAsync(...)100%11100%
CompleteStructuredResponseAsync()100%22100%
<CompleteStructuredResponseAsync()50%66100%
CompleteResponseAsync()50%6687.5%
CreateResponseWithTransientRetryAsync()100%44100%
<CreateResponseWithTransientRetryAsync()100%11100%
CreateMatchResponseOptions(...)100%11100%
CreateBonusResponseOptions(...)100%11100%
CreateResponseOptions(...)100%66100%
NormalizeReasoningEffort(...)100%22100%
NormalizeServiceTier(...)100%22100%
NormalizeResponseServiceTier(...)50%44100%
CreateResponseMessage(...)75%4483.33%
IsFlexProcessingRequest(...)100%11100%
IsFlexFallbackFailure(...)72.22%331864.29%
IsTransientOpenAiServerFailure(...)100%88100%
IsRetryableOpenAiRateLimitFailure(...)68.75%1616100%
ResolveOpenAiRateLimitDelay(...)50%9433.33%
TryGetOpenAiRateLimitResetDelay(...)33.33%8664.29%
AddRateLimitResetDelay(...)70%111080%
TryGetOpenAiRateLimitHeader(...)83.33%66100%
TryParseOpenAiRateLimitReset(...)37.5%251666.67%
ClampOpenAiRateLimitDelay(...)50%4480%
IsFlexResourceUnavailableFailure(...)75%88100%
ContainsFlexResourceUnavailableMarker(...)90%101085.71%
ContainsQuotaExhaustedMarker(...)83.33%6680%
PredictBonusQuestionAsync()100%66100%
BuildInstructions(...)100%44100%
BuildPredictionJsonSchema(...)100%22100%
ParsePrediction(...)72.22%3636100%
LogRawModelResponse(...)100%22100%
BuildBonusInstructions(...)100%22100%
CreateSingleBonusPredictionJsonSchema(...)100%22100%
ParseSingleBonusPrediction(...)95%2020100%
GetMatchPromptPath(...)100%22100%
GetBonusPromptPath()100%11100%
.ctor()100%11100%
.ctor()100%11100%
.ctor()100%11100%
.ctor()100%210%
.ctor()100%11100%
SetLangfuseGenerationAttributes(...)76.47%353489.47%
.ctor(...)100%11100%
.ctor(...)100%11100%
.ctor(...)100%11100%
ToChatTokenUsage(...)75%88100%

File(s)

/home/runner/work/KicktippAi/KicktippAi/src/OpenAiIntegration/PredictionService.cs

#LineLine coverage
 1using System.Collections.Generic;
 2using System.ClientModel;
 3using System.ClientModel.Primitives;
 4using System.Diagnostics;
 5using System.Globalization;
 6using System.Linq;
 7using System.Text.Json;
 8using System.Text.Json.Serialization;
 9using System.Text.RegularExpressions;
 10using EHonda.KicktippAi.Core;
 11using Microsoft.Extensions.Logging;
 12using OpenAI.Chat;
 13using OpenAI.Responses;
 14using Polly;
 15using Polly.Retry;
 16using Polly.Timeout;
 17
 18namespace OpenAiIntegration;
 19
 20/// <summary>
 21/// Service for predicting match outcomes using OpenAI models
 22/// </summary>
 23public class PredictionService : IPredictionService
 24{
 25    private const int TransientOpenAiMaxRetryAttempts = 3;
 26    private const int RateLimitedOpenAiMaxRetryAttempts = 8;
 27    private const string FlexServiceTier = "flex";
 28    private const string DefaultServiceTier = "default";
 129    private static readonly TimeSpan TransientOpenAiRetryDelay = TimeSpan.FromSeconds(2);
 130    private static readonly TimeSpan RateLimitedOpenAiRetryBaseDelay = TimeSpan.FromSeconds(2);
 131    private static readonly TimeSpan RateLimitedOpenAiRetryMaxDelay = TimeSpan.FromMinutes(2);
 32
 33    private readonly ResponsesClient _responsesClient;
 34    private readonly ILogger<PredictionService> _logger;
 35    private readonly ICostCalculationService _costCalculationService;
 36    private readonly ITokenUsageTracker _tokenUsageTracker;
 37    private readonly IInstructionsTemplateProvider _templateProvider;
 38    private readonly PredictionServiceOptions _options;
 39    private readonly string _model;
 40    private readonly Lazy<(string Template, string Path)> _instructionsTemplate;
 41    private readonly Lazy<(string Template, string Path)> _instructionsTemplateWithJustification;
 42    private readonly Lazy<(string Template, string Path)> _bonusInstructionsTemplate;
 43
 144    public PredictionService(
 145        ResponsesClient responsesClient,
 146        ILogger<PredictionService> logger,
 147        ICostCalculationService costCalculationService,
 148        ITokenUsageTracker tokenUsageTracker,
 149        IInstructionsTemplateProvider templateProvider,
 150        string model,
 151        PredictionServiceOptions? options = null)
 52    {
 153        _responsesClient = responsesClient ?? throw new ArgumentNullException(nameof(responsesClient));
 154        _logger = logger ?? throw new ArgumentNullException(nameof(logger));
 155        _costCalculationService = costCalculationService ?? throw new ArgumentNullException(nameof(costCalculationServic
 156        _tokenUsageTracker = tokenUsageTracker ?? throw new ArgumentNullException(nameof(tokenUsageTracker));
 157        _templateProvider = templateProvider ?? throw new ArgumentNullException(nameof(templateProvider));
 158        _options = options ?? PredictionServiceOptions.Default;
 159        _model = model ?? throw new ArgumentNullException(nameof(model));
 60
 161        _instructionsTemplate = new Lazy<(string Template, string Path)>(
 162            () => _templateProvider.LoadMatchTemplate(_model, includeJustification: false));
 163        _instructionsTemplateWithJustification = new Lazy<(string Template, string Path)>(
 164            () => _templateProvider.LoadMatchTemplate(_model, includeJustification: true));
 165        _bonusInstructionsTemplate = new Lazy<(string Template, string Path)>(
 166            () => _templateProvider.LoadBonusTemplate(_model));
 167    }
 68
 69    public async Task<Prediction?> PredictMatchAsync(
 70        EHonda.KicktippAi.Core.Match match,
 71        IEnumerable<DocumentContext> contextDocuments,
 72        bool includeJustification = false,
 73        PredictionTelemetryMetadata? telemetryMetadata = null,
 74        CancellationToken cancellationToken = default)
 75    {
 176        _logger.LogInformation("Generating prediction for match: {HomeTeam} vs {AwayTeam} at {StartTime}",
 177            match.HomeTeam, match.AwayTeam, match.StartsAt);
 78
 79        try
 80        {
 81            // Build the instructions by combining template with context
 182            var instructions = BuildInstructions(contextDocuments, includeJustification);
 83
 84            // Create match JSON
 185            var matchJson = PredictionPromptComposer.CreateMatchJson(match);
 86
 187            _logger.LogDebug("Instructions length: {InstructionsLength} characters", instructions.Length);
 188            _logger.LogDebug("Context documents: {ContextCount}", contextDocuments.Count());
 189            _logger.LogDebug("Match JSON: {MatchJson}", matchJson);
 90
 91            // Create input items for the response
 192            var messages = new List<PredictionRequestMessage>
 193            {
 194                new("system", instructions),
 195                new("user", matchJson)
 196            };
 97
 198            _logger.LogDebug("Calling OpenAI API for prediction");
 99
 100            // Start an OTel activity for Langfuse generation tracking
 1101            using var activity = Telemetry.Source.StartActivity("predict-match");
 102
 103            // Call OpenAI with structured output format
 1104            var completion = await CompleteMatchResponseAsync(messages, includeJustification, cancellationToken);
 105
 106            // Parse the structured response
 1107            var predictionJson = completion.PredictionJson;
 1108            _logger.LogDebug("Received prediction JSON: {PredictionJson}", predictionJson);
 109
 1110            var prediction = ParsePrediction(predictionJson);
 111
 1112            _logger.LogInformation("Prediction generated: {HomeGoals}-{AwayGoals} for {HomeTeam} vs {AwayTeam}",
 1113                prediction.HomeGoals, prediction.AwayGoals, match.HomeTeam, match.AwayTeam);
 114
 115            // Log token usage and cost breakdown
 1116            var usage = completion.Usage;
 1117            _logger.LogDebug("Token usage - Input: {InputTokens}, Output: {OutputTokens}, Total: {TotalTokens}",
 1118                usage.InputTokenCount, usage.OutputTokenCount, usage.TotalTokenCount);
 119
 120            // Set Langfuse generation attributes on the activity
 1121            SetLangfuseGenerationAttributes(activity, messages, predictionJson, usage, telemetryMetadata, completion.Exe
 122
 123            // Add usage to tracker
 1124            if (completion.ExecutionTelemetry is null)
 125            {
 1126                _tokenUsageTracker.AddUsage(_model, usage);
 127            }
 128            else
 129            {
 1130                _tokenUsageTracker.AddUsage(_model, usage, completion.ExecutionTelemetry.FinalServiceTier);
 131            }
 132
 133            // Calculate and log costs
 1134            if (completion.ExecutionTelemetry is null)
 135            {
 1136                _costCalculationService.LogCostBreakdown(_model, usage);
 137            }
 138            else
 139            {
 1140                _costCalculationService.LogCostBreakdown(_model, usage, completion.ExecutionTelemetry.FinalServiceTier);
 141            }
 142
 1143            return prediction;
 144        }
 1145        catch (Exception ex)
 146        {
 1147            _logger.LogError(ex, "Error generating prediction for match: {HomeTeam} vs {AwayTeam}",
 1148                match.HomeTeam, match.AwayTeam);
 1149            Console.Error.WriteLine($"Prediction error for {match.HomeTeam} vs {match.AwayTeam}: {ex.Message}");
 150
 1151            return null;
 152        }
 1153    }
 154
 155    private Task<OpenAiResponseResult> CompleteMatchResponseAsync(
 156        IReadOnlyList<PredictionRequestMessage> messages,
 157        bool includeJustification,
 158        CancellationToken cancellationToken)
 159    {
 1160        return CompleteStructuredResponseAsync(
 1161            completeResponseAsync: (serviceTier, ct) => CompleteResponseAsync(
 1162                CreateMatchResponseOptions(
 1163                    messages,
 1164                    includeJustification,
 1165                    serviceTier,
 1166                    _options.ReasoningEffort),
 1167                serviceTier,
 1168                ct),
 1169            cancellationToken);
 170    }
 171
 172    private Task<OpenAiResponseResult> CompleteBonusResponseAsync(
 173        IReadOnlyList<PredictionRequestMessage> messages,
 174        BonusQuestion bonusQuestion,
 175        CancellationToken cancellationToken)
 176    {
 1177        return CompleteStructuredResponseAsync(
 1178            completeResponseAsync: (serviceTier, ct) => CompleteResponseAsync(
 1179                CreateBonusResponseOptions(
 1180                    messages,
 1181                    bonusQuestion,
 1182                    serviceTier,
 1183                    _options.ReasoningEffort),
 1184                serviceTier,
 1185                ct),
 1186            cancellationToken);
 187    }
 188
 189    private async Task<OpenAiResponseResult> CompleteStructuredResponseAsync(
 190        Func<string?, CancellationToken, Task<OpenAiResponseResult>> completeResponseAsync,
 191        CancellationToken cancellationToken)
 192    {
 1193        if (_options.DisableFlexProcessing)
 194        {
 1195            return await completeResponseAsync(null, cancellationToken);
 196        }
 197
 1198        string? requestedServiceTier = FlexServiceTier;
 1199        var usedFallback = false;
 1200        var pipeline = new ResiliencePipelineBuilder<OpenAiResponseResult>()
 1201            // OpenAI documents that Flex processing can return 429 Resource Unavailable
 1202            // when resources are insufficient, and recommends retrying with standard
 1203            // processing when occasional higher cost is acceptable:
 1204            // https://developers.openai.com/api/docs/guides/flex-processing
 1205            // The Responses API reference documents service_tier=default as standard
 1206            // pricing/performance, so flex 429 retries switch to that tier:
 1207            // https://platform.openai.com/docs/api-reference/responses/create
 1208            .AddRetry(new RetryStrategyOptions<OpenAiResponseResult>
 1209            {
 1210                MaxRetryAttempts = 1,
 1211                Delay = TimeSpan.Zero,
 1212                ShouldHandle = args => ValueTask.FromResult(
 1213                    IsFlexProcessingRequest(requestedServiceTier) &&
 1214                    IsFlexFallbackFailure(args.Outcome.Exception, args.Context.CancellationToken)),
 1215                OnRetry = args =>
 1216                {
 1217                    usedFallback = true;
 1218                    requestedServiceTier = DefaultServiceTier;
 1219                    _logger.LogWarning(
 1220                        args.Outcome.Exception,
 1221                        "OpenAI flex processing failed with a retryable failure; retrying prediction with default proces
 1222                    return default;
 1223                }
 1224            })
 1225            .Build();
 226
 1227        var result = await pipeline.ExecuteAsync(
 1228            async ct =>
 1229            {
 1230                var completion = await completeResponseAsync(requestedServiceTier, ct);
 1231
 1232                var finalServiceTier = string.IsNullOrWhiteSpace(completion.FinalServiceTier)
 1233                    ? requestedServiceTier ?? "standard"
 1234                    : completion.FinalServiceTier;
 1235
 1236                return completion with
 1237                {
 1238                    ExecutionTelemetry = new PredictionExecutionTelemetry(
 1239                        "flex-first-standard-fallback",
 1240                        usedFallback ? DefaultServiceTier : FlexServiceTier,
 1241                        finalServiceTier,
 1242                        usedFallback)
 1243                };
 1244            },
 1245            cancellationToken);
 246
 1247        return result;
 1248    }
 249
 250    private async Task<OpenAiResponseResult> CompleteResponseAsync(
 251        CreateResponseOptions options,
 252        string? serviceTier,
 253        CancellationToken cancellationToken)
 254    {
 1255        var response = await CreateResponseWithTransientRetryAsync(options, serviceTier, cancellationToken);
 1256        var responseResult = response.Value;
 1257        var predictionJson = responseResult.GetOutputText();
 1258        if (predictionJson is null)
 259        {
 0260            throw new InvalidOperationException("OpenAI response did not contain output text.");
 261        }
 262
 1263        var usage = responseResult.Usage is null
 1264                    ? null
 1265                    : ToChatTokenUsage(responseResult.Usage);
 1266        if (usage is null)
 267        {
 0268            throw new InvalidOperationException("OpenAI response did not contain token usage.");
 269        }
 270
 1271        return new OpenAiResponseResult(
 1272            predictionJson,
 1273            usage,
 1274            null,
 1275            NormalizeResponseServiceTier(responseResult.ServiceTier));
 1276    }
 277
 278    private async Task<ClientResult<ResponseResult>> CreateResponseWithTransientRetryAsync(
 279        CreateResponseOptions options,
 280        string? requestedServiceTier,
 281        CancellationToken cancellationToken)
 282    {
 1283        var pipeline = new ResiliencePipelineBuilder<ClientResult<ResponseResult>>()
 1284            // OpenAI documents 429 rate-limit errors as pacing problems and recommends bounded
 1285            // random exponential backoff. It also documents x-ratelimit-* response headers
 1286            // for reset timing:
 1287            // https://platform.openai.com/docs/guides/rate-limits
 1288            // https://platform.openai.com/docs/guides/error-codes
 1289            // https://platform.openai.com/docs/api-reference
 1290            // These references document x-ratelimit-* reset headers, not Retry-After.
 1291            .AddRetry(new RetryStrategyOptions<ClientResult<ResponseResult>>
 1292            {
 1293                MaxRetryAttempts = RateLimitedOpenAiMaxRetryAttempts,
 1294                DelayGenerator = args => new ValueTask<TimeSpan?>(
 1295                    ResolveOpenAiRateLimitDelay(args.Outcome.Exception, args.AttemptNumber)),
 1296                ShouldHandle = args => ValueTask.FromResult(
 1297                    !IsFlexProcessingRequest(requestedServiceTier) &&
 1298                    IsRetryableOpenAiRateLimitFailure(args.Outcome.Exception, args.Context.CancellationToken)),
 1299                OnRetry = args =>
 1300                {
 1301                    _logger.LogWarning(
 1302                        args.Outcome.Exception,
 1303                        "OpenAI request hit a rate limit; retrying prediction request ({RetryAttempt}/{MaxRetryAttempts}
 1304                        args.AttemptNumber + 1,
 1305                        RateLimitedOpenAiMaxRetryAttempts,
 1306                        args.RetryDelay);
 1307                    return default;
 1308                }
 1309            })
 1310            .AddRetry(new RetryStrategyOptions<ClientResult<ResponseResult>>
 1311            {
 1312                MaxRetryAttempts = TransientOpenAiMaxRetryAttempts,
 1313                Delay = TransientOpenAiRetryDelay,
 1314                BackoffType = DelayBackoffType.Exponential,
 1315                UseJitter = true,
 1316                ShouldHandle = args => ValueTask.FromResult(
 1317                    IsTransientOpenAiServerFailure(args.Outcome.Exception, args.Context.CancellationToken)),
 1318                OnRetry = args =>
 1319                {
 1320                    _logger.LogWarning(
 1321                        args.Outcome.Exception,
 1322                        "OpenAI request failed with a transient server error; retrying prediction request ({RetryAttempt
 1323                        args.AttemptNumber + 1,
 1324                        TransientOpenAiMaxRetryAttempts);
 1325                    return default;
 1326                }
 1327            })
 1328            .Build();
 329
 1330        return await pipeline.ExecuteAsync(
 1331            async ct => await _responsesClient.CreateResponseAsync(options, ct),
 1332            cancellationToken);
 1333    }
 334
 335    private CreateResponseOptions CreateMatchResponseOptions(
 336        IReadOnlyList<PredictionRequestMessage> messages,
 337        bool includeJustification,
 338        string? serviceTier,
 339        string? reasoningEffort)
 340    {
 1341        return CreateResponseOptions(
 1342            messages,
 1343            "match_prediction",
 1344            BinaryData.FromBytes(BuildPredictionJsonSchema(includeJustification)),
 1345            serviceTier,
 1346            reasoningEffort);
 347    }
 348
 349    private CreateResponseOptions CreateBonusResponseOptions(
 350        IReadOnlyList<PredictionRequestMessage> messages,
 351        BonusQuestion bonusQuestion,
 352        string? serviceTier,
 353        string? reasoningEffort)
 354    {
 1355        return CreateResponseOptions(
 1356            messages,
 1357            "bonus_prediction",
 1358            BinaryData.FromBytes(CreateSingleBonusPredictionJsonSchema(bonusQuestion)),
 1359            serviceTier,
 1360            reasoningEffort);
 361    }
 362
 363    private CreateResponseOptions CreateResponseOptions(
 364        IReadOnlyList<PredictionRequestMessage> messages,
 365        string schemaName,
 366        BinaryData schema,
 367        string? serviceTier,
 368        string? reasoningEffort)
 369    {
 1370        var options = new CreateResponseOptions
 1371        {
 1372            Model = _model,
 1373            MaxOutputTokenCount = _options.MaxOutputTokenCount, // Safeguard against high costs
 1374            TextOptions = new ResponseTextOptions
 1375            {
 1376                TextFormat = ResponseTextFormat.CreateJsonSchemaFormat(
 1377                    jsonSchemaFormatName: schemaName,
 1378                    jsonSchema: schema,
 1379                    jsonSchemaIsStrict: true)
 1380            }
 1381        };
 382
 1383        foreach (var message in messages)
 384        {
 1385            options.InputItems.Add(CreateResponseMessage(message));
 386        }
 387
 1388        var normalizedServiceTier = NormalizeServiceTier(serviceTier);
 1389        if (normalizedServiceTier is not null)
 390        {
 1391            options.ServiceTier = new ResponseServiceTier(normalizedServiceTier);
 392        }
 393
 1394        var normalizedReasoningEffort = NormalizeReasoningEffort(reasoningEffort);
 1395        if (normalizedReasoningEffort is not null)
 396        {
 1397            options.ReasoningOptions = new ResponseReasoningOptions
 1398            {
 1399                ReasoningEffortLevel = new ResponseReasoningEffortLevel(normalizedReasoningEffort)
 1400            };
 401        }
 402
 1403        return options;
 404    }
 405
 406    private static string? NormalizeReasoningEffort(string? reasoningEffort)
 407    {
 1408        return string.IsNullOrWhiteSpace(reasoningEffort)
 1409            ? null
 1410            : reasoningEffort.Trim().ToLowerInvariant();
 411    }
 412
 413    private static string? NormalizeServiceTier(string? serviceTier)
 414    {
 1415        return string.IsNullOrWhiteSpace(serviceTier)
 1416            ? null
 1417            : serviceTier.Trim().ToLowerInvariant();
 418    }
 419
 420    private static string? NormalizeResponseServiceTier(ResponseServiceTier? serviceTier)
 421    {
 1422        return string.IsNullOrWhiteSpace(serviceTier?.ToString())
 1423            ? null
 1424            : serviceTier.Value.ToString().Trim().ToLowerInvariant();
 425    }
 426
 427    private static ResponseItem CreateResponseMessage(PredictionRequestMessage message)
 428    {
 1429        return message.Role switch
 1430        {
 1431            "system" => ResponseItem.CreateSystemMessageItem(message.Content),
 1432            "user" => ResponseItem.CreateUserMessageItem(message.Content),
 0433            _ => throw new InvalidOperationException($"Unsupported response message role '{message.Role}'.")
 1434        };
 435    }
 436
 437    private static bool IsFlexProcessingRequest(string? requestedServiceTier)
 438    {
 1439        return string.Equals(
 1440            NormalizeServiceTier(requestedServiceTier),
 1441            FlexServiceTier,
 1442            StringComparison.OrdinalIgnoreCase);
 443    }
 444
 445    private static bool IsFlexFallbackFailure(Exception? exception, CancellationToken cancellationToken)
 446    {
 1447        if (exception is null)
 448        {
 1449            return false;
 450        }
 451
 1452        if (cancellationToken.IsCancellationRequested)
 453        {
 0454            return false;
 455        }
 456
 1457        return exception switch
 1458        {
 0459            ClientResultException { Status: 408 } => true,
 1460            ClientResultException { Status: 429 } clientException => IsFlexResourceUnavailableFailure(clientException)
 1461                || IsRetryableOpenAiRateLimitFailure(clientException, cancellationToken),
 0462            TimeoutRejectedException => true,
 0463            TimeoutException => true,
 0464            TaskCanceledException => true,
 1465            _ => false
 1466        };
 467    }
 468
 469    private static bool IsTransientOpenAiServerFailure(Exception? exception, CancellationToken cancellationToken)
 470    {
 1471        if (exception is null || cancellationToken.IsCancellationRequested)
 472        {
 1473            return false;
 474        }
 475
 1476        return exception is ClientResultException { Status: >= 500 and <= 599 };
 477    }
 478
 479    private static bool IsRetryableOpenAiRateLimitFailure(Exception? exception, CancellationToken cancellationToken)
 480    {
 1481        if (exception is not ClientResultException { Status: 429 } clientException ||
 1482            cancellationToken.IsCancellationRequested)
 483        {
 1484            return false;
 485        }
 486
 1487        return !IsFlexResourceUnavailableFailure(clientException)
 1488               && !ContainsQuotaExhaustedMarker(clientException.Message)
 1489               && !ContainsQuotaExhaustedMarker(clientException.GetRawResponse()?.ReasonPhrase)
 1490               && !ContainsQuotaExhaustedMarker(clientException.GetRawResponse()?.Content.ToString());
 491    }
 492
 493    private static TimeSpan ResolveOpenAiRateLimitDelay(Exception? exception, int attemptNumber)
 494    {
 1495        if (exception is ClientResultException clientException &&
 1496            TryGetOpenAiRateLimitResetDelay(clientException.GetRawResponse(), out var resetDelay))
 497        {
 1498            return ClampOpenAiRateLimitDelay(resetDelay);
 499        }
 500
 0501        var cappedExponentialMilliseconds = Math.Min(
 0502            RateLimitedOpenAiRetryMaxDelay.TotalMilliseconds,
 0503            RateLimitedOpenAiRetryBaseDelay.TotalMilliseconds * Math.Pow(2, Math.Max(0, attemptNumber)));
 0504        var jitterFloorMilliseconds = cappedExponentialMilliseconds / 2;
 0505        var jitteredMilliseconds = jitterFloorMilliseconds + Random.Shared.NextDouble() * jitterFloorMilliseconds;
 506
 0507        return ClampOpenAiRateLimitDelay(TimeSpan.FromMilliseconds(jitteredMilliseconds));
 508    }
 509
 510    private static bool TryGetOpenAiRateLimitResetDelay(PipelineResponse? response, out TimeSpan delay)
 511    {
 1512        delay = default;
 1513        if (response is null)
 514        {
 0515            return false;
 516        }
 517
 1518        var exhaustedDelays = new List<TimeSpan>();
 1519        var availableDelays = new List<TimeSpan>();
 520
 1521        AddRateLimitResetDelay(response, "requests", exhaustedDelays, availableDelays);
 1522        AddRateLimitResetDelay(response, "tokens", exhaustedDelays, availableDelays);
 523
 1524        if (exhaustedDelays.Count > 0)
 525        {
 1526            delay = exhaustedDelays.Max();
 1527            return true;
 528        }
 529
 0530        if (availableDelays.Count > 0)
 531        {
 0532            delay = availableDelays.Max();
 0533            return true;
 534        }
 535
 0536        return false;
 537    }
 538
 539    private static void AddRateLimitResetDelay(
 540        PipelineResponse response,
 541        string dimension,
 542        List<TimeSpan> exhaustedDelays,
 543        List<TimeSpan> availableDelays)
 544    {
 1545        if (!TryGetOpenAiRateLimitHeader(response, $"x-ratelimit-reset-{dimension}", out var resetText) ||
 1546            !TryParseOpenAiRateLimitReset(resetText, out var resetDelay))
 547        {
 1548            return;
 549        }
 550
 1551        if (TryGetOpenAiRateLimitHeader(response, $"x-ratelimit-remaining-{dimension}", out var remainingText) &&
 1552            decimal.TryParse(remainingText, NumberStyles.Number, CultureInfo.InvariantCulture, out var remaining) &&
 1553            remaining <= 0)
 554        {
 1555            exhaustedDelays.Add(resetDelay);
 1556            return;
 557        }
 558
 0559        availableDelays.Add(resetDelay);
 0560    }
 561
 562    private static bool TryGetOpenAiRateLimitHeader(PipelineResponse response, string name, out string value)
 563    {
 1564        if (response.Headers is not null && response.Headers.TryGetValue(name, out var headerValue))
 565        {
 1566            value = headerValue ?? string.Empty;
 1567            return true;
 568        }
 569
 1570        value = string.Empty;
 1571        return false;
 572    }
 573
 574    private static bool TryParseOpenAiRateLimitReset(string text, out TimeSpan delay)
 575    {
 1576        delay = default;
 1577        if (string.IsNullOrWhiteSpace(text))
 578        {
 0579            return false;
 580        }
 581
 1582        var matches = Regex.Matches(
 1583            text.Trim(),
 1584            @"(?<value>\d+(?:\.\d+)?)(?<unit>ms|s|m|h)",
 1585            RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
 1586        if (matches.Count == 0)
 587        {
 0588            return false;
 589        }
 590
 1591        var totalMilliseconds = 0.0;
 1592        foreach (System.Text.RegularExpressions.Match match in matches)
 593        {
 1594            if (!double.TryParse(match.Groups["value"].Value, NumberStyles.Float, CultureInfo.InvariantCulture, out var 
 595            {
 0596                return false;
 597            }
 598
 1599            totalMilliseconds += match.Groups["unit"].Value.ToLowerInvariant() switch
 1600            {
 1601                "ms" => value,
 0602                "s" => value * 1_000,
 0603                "m" => value * 60_000,
 0604                "h" => value * 3_600_000,
 0605                _ => 0
 1606            };
 607        }
 608
 1609        delay = TimeSpan.FromMilliseconds(Math.Max(0, totalMilliseconds));
 1610        return true;
 0611    }
 612
 613    private static TimeSpan ClampOpenAiRateLimitDelay(TimeSpan delay)
 614    {
 1615        if (delay < TimeSpan.Zero)
 616        {
 0617            return TimeSpan.Zero;
 618        }
 619
 1620        return delay > RateLimitedOpenAiRetryMaxDelay
 1621            ? RateLimitedOpenAiRetryMaxDelay
 1622            : delay;
 623    }
 624
 625    private static bool IsFlexResourceUnavailableFailure(ClientResultException exception)
 626    {
 1627        var rawResponse = exception.GetRawResponse();
 1628        return ContainsFlexResourceUnavailableMarker(exception.Message)
 1629               || ContainsFlexResourceUnavailableMarker(rawResponse?.ReasonPhrase)
 1630               || ContainsFlexResourceUnavailableMarker(rawResponse?.Content.ToString());
 631    }
 632
 633    private static bool ContainsFlexResourceUnavailableMarker(string? text)
 634    {
 1635        if (string.IsNullOrWhiteSpace(text))
 636        {
 0637            return false;
 638        }
 639
 1640        return text.Contains("resource_unavailable", StringComparison.OrdinalIgnoreCase)
 1641               || text.Contains("resource unavailable", StringComparison.OrdinalIgnoreCase)
 1642               || text.Contains("resources unavailable", StringComparison.OrdinalIgnoreCase)
 1643               || text.Contains("insufficient resources", StringComparison.OrdinalIgnoreCase)
 1644               || text.Contains("capacity", StringComparison.OrdinalIgnoreCase);
 645    }
 646
 647    private static bool ContainsQuotaExhaustedMarker(string? text)
 648    {
 1649        if (string.IsNullOrWhiteSpace(text))
 650        {
 0651            return false;
 652        }
 653
 1654        return text.Contains("insufficient_quota", StringComparison.OrdinalIgnoreCase)
 1655               || text.Contains("exceeded your current quota", StringComparison.OrdinalIgnoreCase)
 1656               || text.Contains("check your plan and billing", StringComparison.OrdinalIgnoreCase);
 657    }
 658
 659    public async Task<BonusPrediction?> PredictBonusQuestionAsync(
 660        BonusQuestion bonusQuestion,
 661        IEnumerable<DocumentContext> contextDocuments,
 662        PredictionTelemetryMetadata? telemetryMetadata = null,
 663        CancellationToken cancellationToken = default)
 664    {
 1665        _logger.LogInformation("Generating prediction for bonus question: {QuestionText}", bonusQuestion.Text);
 666
 667        try
 668        {
 669            // Build the instructions by combining template with context
 1670            var instructions = BuildBonusInstructions(contextDocuments);
 671
 672            // Create bonus question JSON
 1673            var questionJson = PredictionPromptComposer.CreateBonusQuestionJson(bonusQuestion);
 674
 1675            _logger.LogDebug("Instructions length: {InstructionsLength} characters", instructions.Length);
 1676            _logger.LogDebug("Context documents: {ContextCount}", contextDocuments.Count());
 1677            _logger.LogDebug("Question JSON: {QuestionJson}", questionJson);
 678
 679            // Create input items for the response
 1680            var messages = new List<PredictionRequestMessage>
 1681            {
 1682                new("system", instructions),
 1683                new("user", questionJson)
 1684            };
 685
 1686            _logger.LogDebug("Calling OpenAI API for bonus prediction");
 687
 688            // Start an OTel activity for Langfuse generation tracking
 1689            using var activity = Telemetry.Source.StartActivity("predict-bonus");
 690
 691            // Call OpenAI with structured output format
 1692            var completion = await CompleteBonusResponseAsync(messages, bonusQuestion, cancellationToken);
 693
 694            // Parse the structured response
 1695            var predictionJson = completion.PredictionJson;
 1696            _logger.LogDebug("Received bonus prediction JSON: {PredictionJson}", predictionJson);
 697
 1698            var prediction = ParseSingleBonusPrediction(predictionJson, bonusQuestion);
 699
 1700            if (prediction != null)
 701            {
 1702                _logger.LogInformation("Generated prediction for bonus question: {SelectedOptions}",
 1703                    string.Join(", ", prediction.SelectedOptionIds));
 704            }
 705
 706            // Log token usage and cost breakdown
 1707            var usage = completion.Usage;
 1708            _logger.LogDebug("Token usage - Input: {InputTokens}, Output: {OutputTokens}, Total: {TotalTokens}",
 1709                usage.InputTokenCount, usage.OutputTokenCount, usage.TotalTokenCount);
 710
 711            // Set Langfuse generation attributes on the activity
 1712            SetLangfuseGenerationAttributes(activity, messages, predictionJson, usage, telemetryMetadata, completion.Exe
 713
 714            // Add usage to tracker
 1715            if (completion.ExecutionTelemetry is null)
 716            {
 1717                _tokenUsageTracker.AddUsage(_model, usage);
 718            }
 719            else
 720            {
 1721                _tokenUsageTracker.AddUsage(_model, usage, completion.ExecutionTelemetry.FinalServiceTier);
 722            }
 723
 724            // Calculate and log costs
 1725            if (completion.ExecutionTelemetry is null)
 726            {
 1727                _costCalculationService.LogCostBreakdown(_model, usage);
 728            }
 729            else
 730            {
 1731                _costCalculationService.LogCostBreakdown(_model, usage, completion.ExecutionTelemetry.FinalServiceTier);
 732            }
 733
 1734            return prediction;
 735        }
 1736        catch (Exception ex)
 737        {
 1738            _logger.LogError(ex, "Error generating bonus prediction for question: {QuestionText}", bonusQuestion.Text);
 1739            return null;
 740        }
 1741    }
 742
 743    private string BuildInstructions(IEnumerable<DocumentContext> contextDocuments, bool includeJustification)
 744    {
 1745        var template = includeJustification
 1746            ? _instructionsTemplateWithJustification.Value.Template
 1747            : _instructionsTemplate.Value.Template;
 748
 1749        var contextList = contextDocuments.ToList();
 1750        if (contextList.Any())
 751        {
 1752            _logger.LogDebug("Added {ContextCount} context documents to instructions", contextList.Count);
 753        }
 754        else
 755        {
 1756            _logger.LogDebug("No context documents provided");
 757        }
 758
 1759        return PredictionPromptComposer.BuildSystemPrompt(template, contextList);
 760    }
 761
 762    private static byte[] BuildPredictionJsonSchema(bool includeJustification)
 763    {
 1764        var properties = new Dictionary<string, object?>
 1765        {
 1766            ["home"] = new Dictionary<string, object?>
 1767            {
 1768                ["type"] = "integer",
 1769                ["description"] = "Predicted goals for the home team"
 1770            },
 1771            ["away"] = new Dictionary<string, object?>
 1772            {
 1773                ["type"] = "integer",
 1774                ["description"] = "Predicted goals for the away team"
 1775            }
 1776        };
 777
 1778        var required = new List<string> { "home", "away" };
 779
 1780        if (includeJustification)
 781        {
 1782            var mostValuableContextSourceItem = new Dictionary<string, object?>
 1783            {
 1784                ["type"] = "object",
 1785                ["properties"] = new Dictionary<string, object?>
 1786                {
 1787                    ["documentName"] = new Dictionary<string, object?>
 1788                    {
 1789                        ["type"] = "string",
 1790                        ["description"] = "Name of the context document referenced"
 1791                    },
 1792                    ["details"] = new Dictionary<string, object?>
 1793                    {
 1794                        ["type"] = "string",
 1795                        ["description"] = "Brief summary of why the document or parts of it were useful"
 1796                    }
 1797                },
 1798                ["required"] = new[] { "documentName", "details" },
 1799                ["additionalProperties"] = false
 1800            };
 801
 1802            var leastValuableContextSourceItem = new Dictionary<string, object?>
 1803            {
 1804                ["type"] = "object",
 1805                ["properties"] = new Dictionary<string, object?>
 1806                {
 1807                    ["documentName"] = new Dictionary<string, object?>
 1808                    {
 1809                        ["type"] = "string",
 1810                        ["description"] = "Name of the context document referenced"
 1811                    },
 1812                    ["details"] = new Dictionary<string, object?>
 1813                    {
 1814                        ["type"] = "string",
 1815                        ["description"] = "Brief summary explaining why the document or parts of it offered limited insi
 1816                    }
 1817                },
 1818                ["required"] = new[] { "documentName", "details" },
 1819                ["additionalProperties"] = false
 1820            };
 821
 1822            var contextSources = new Dictionary<string, object?>
 1823            {
 1824                ["type"] = "object",
 1825                ["properties"] = new Dictionary<string, object?>
 1826                {
 1827                    ["mostValuable"] = new Dictionary<string, object?>
 1828                    {
 1829                        ["type"] = "array",
 1830                        ["items"] = mostValuableContextSourceItem,
 1831                        ["description"] = "Context documents that most influenced the prediction",
 1832                        ["minItems"] = 0
 1833                    },
 1834                    ["leastValuable"] = new Dictionary<string, object?>
 1835                    {
 1836                        ["type"] = "array",
 1837                        ["items"] = leastValuableContextSourceItem,
 1838                        ["description"] = "Context documents that provided limited or no valuable insight",
 1839                        ["minItems"] = 0
 1840                    }
 1841                },
 1842                ["required"] = new[] { "leastValuable", "mostValuable" },
 1843                ["additionalProperties"] = false
 1844            };
 845
 1846            properties["justification"] = new Dictionary<string, object?>
 1847            {
 1848                ["type"] = "object",
 1849                ["properties"] = new Dictionary<string, object?>
 1850                {
 1851                    ["keyReasoning"] = new Dictionary<string, object?>
 1852                    {
 1853                        ["type"] = "string",
 1854                        ["description"] = "Concise analytic summary motivating the predicted scoreline"
 1855                    },
 1856                    ["contextSources"] = contextSources,
 1857                    ["uncertainties"] = new Dictionary<string, object?>
 1858                    {
 1859                        ["type"] = "array",
 1860                        ["items"] = new Dictionary<string, object?>
 1861                        {
 1862                            ["type"] = "string",
 1863                            ["description"] = "Single uncertainty or external factor affecting confidence"
 1864                        },
 1865                        ["description"] = "Factors that could alter the predicted outcome",
 1866                        ["minItems"] = 0
 1867                    }
 1868                },
 1869                ["required"] = new[] { "contextSources", "keyReasoning", "uncertainties" },
 1870                ["additionalProperties"] = false
 1871            };
 1872            required.Add("justification");
 873        }
 874
 1875        var schema = new Dictionary<string, object?>
 1876        {
 1877            ["type"] = "object",
 1878            ["properties"] = properties,
 1879            ["required"] = required,
 1880            ["additionalProperties"] = false
 1881        };
 882
 1883        return JsonSerializer.SerializeToUtf8Bytes(schema);
 884    }
 885
 886    private Prediction ParsePrediction(string predictionJson)
 887    {
 888        try
 889        {
 1890            _logger.LogDebug("Parsing prediction JSON: {PredictionJson}", predictionJson);
 891
 1892            var predictionResponse = JsonSerializer.Deserialize<PredictionResponse>(predictionJson);
 1893            if (predictionResponse == null)
 894            {
 1895                LogRawModelResponse(predictionJson);
 1896                throw new InvalidOperationException("Failed to deserialize prediction response");
 897            }
 898
 1899            _logger.LogDebug("Parsed prediction response - Home: {Home}, Away: {Away}", predictionResponse.Home, predict
 900
 1901            PredictionJustification? justification = null;
 902
 1903            if (predictionResponse.Justification != null)
 904            {
 1905                var justificationResponse = predictionResponse.Justification;
 906
 1907                var mostValuable = justificationResponse.ContextSources?.MostValuable?
 1908                    .Where(entry => entry != null)
 1909                    .Select(entry => new PredictionJustificationContextSource(
 1910                        entry!.DocumentName?.Trim() ?? string.Empty,
 1911                        entry.Details?.Trim() ?? string.Empty))
 1912                    .ToList() ?? new List<PredictionJustificationContextSource>();
 913
 1914                var leastValuable = justificationResponse.ContextSources?.LeastValuable?
 0915                    .Where(entry => entry != null)
 0916                    .Select(entry => new PredictionJustificationContextSource(
 0917                        entry!.DocumentName?.Trim() ?? string.Empty,
 0918                        entry.Details?.Trim() ?? string.Empty))
 1919                    .ToList() ?? new List<PredictionJustificationContextSource>();
 920
 1921                var uncertainties = justificationResponse.Uncertainties?
 1922                    .Where(item => !string.IsNullOrWhiteSpace(item))
 1923                    .Select(item => item.Trim())
 1924                    .ToList() ?? new List<string>();
 925
 1926                justification = new PredictionJustification(
 1927                    justificationResponse.KeyReasoning?.Trim() ?? string.Empty,
 1928                    new PredictionJustificationContextSources(mostValuable, leastValuable),
 1929                    uncertainties);
 930
 1931                _logger.LogDebug(
 1932                    "Parsed justification with key reasoning: {KeyReasoning}; Most valuable sources: {MostValuableCount}
 1933                    justification.KeyReasoning,
 1934                    justification.ContextSources.MostValuable.Count,
 1935                    justification.ContextSources.LeastValuable.Count,
 1936                    justification.Uncertainties.Count);
 937            }
 938
 1939            return new Prediction(predictionResponse.Home, predictionResponse.Away, justification);
 940        }
 1941        catch (JsonException ex)
 942        {
 1943            _logger.LogError(ex, "Failed to parse prediction JSON: {PredictionJson}", predictionJson);
 1944            LogRawModelResponse(predictionJson);
 1945            throw new InvalidOperationException($"Failed to parse prediction response: {ex.Message}", ex);
 946        }
 1947    }
 948
 949    private void LogRawModelResponse(string rawResponse)
 950    {
 1951        if (string.IsNullOrWhiteSpace(rawResponse))
 952        {
 953            const string message = "Raw model response from OpenAI was empty or whitespace.";
 1954            _logger.LogError(message);
 1955            Console.Error.WriteLine(message);
 1956            return;
 957        }
 958
 1959        _logger.LogError("Raw model response from OpenAI: {RawResponse}", rawResponse);
 1960        Console.Error.WriteLine("Raw model response from OpenAI:");
 1961        Console.Error.WriteLine(rawResponse);
 1962    }
 963
 964    private string BuildBonusInstructions(IEnumerable<DocumentContext> contextDocuments)
 965    {
 966        // Use the pre-loaded bonus instructions template
 1967        var bonusInstructionsTemplate = _bonusInstructionsTemplate.Value.Template;
 968
 1969        var contextList = contextDocuments.ToList();
 1970        if (contextList.Any())
 971        {
 1972            _logger.LogDebug("Added {ContextCount} context documents to bonus instructions", contextList.Count);
 973        }
 974        else
 975        {
 1976            _logger.LogDebug("No context documents provided for bonus predictions");
 977        }
 978
 1979        return PredictionPromptComposer.BuildSystemPrompt(bonusInstructionsTemplate, contextList);
 980    }
 981
 982    private static byte[] CreateSingleBonusPredictionJsonSchema(BonusQuestion question)
 983    {
 984        // For multi-selection questions, require exactly MaxSelections answers
 985        // For single-selection questions, require exactly 1 answer
 1986        var requiredSelections = question.MaxSelections;
 987
 1988        var schema = new
 1989        {
 1990            type = "object",
 1991            properties = new
 1992            {
 1993                selectedOptionIds = new
 1994                {
 1995                    type = "array",
 1996                    items = new { type = "string", @enum = question.Options.Select(o => o.Id).ToArray() },
 1997                    minItems = requiredSelections,
 1998                    maxItems = requiredSelections
 1999                }
 11000            },
 11001            required = new[] { "selectedOptionIds" },
 11002            additionalProperties = false
 11003        };
 1004
 11005        return JsonSerializer.SerializeToUtf8Bytes(schema);
 1006    }
 1007
 1008    private BonusPrediction? ParseSingleBonusPrediction(string predictionJson, BonusQuestion question)
 1009    {
 1010        try
 1011        {
 11012            _logger.LogDebug("Parsing single bonus prediction JSON: {PredictionJson}", predictionJson);
 1013
 11014            var response = JsonSerializer.Deserialize<SingleBonusPredictionResponse>(predictionJson);
 11015            if (response?.SelectedOptionIds?.Any() != true)
 1016            {
 11017                throw new InvalidOperationException("Failed to deserialize bonus prediction response or no options selec
 1018            }
 1019
 1020            // Validate that all selected options exist for this question
 11021            var validOptionIds = question.Options.Select(o => o.Id).ToHashSet();
 11022            var invalidOptions = response.SelectedOptionIds.Where(id => !validOptionIds.Contains(id)).ToArray();
 1023
 11024            if (invalidOptions.Any())
 1025            {
 11026                _logger.LogWarning("Invalid option IDs for question '{QuestionText}': {InvalidOptions}",
 11027                    question.Text, string.Join(", ", invalidOptions));
 11028                return null;
 1029            }
 1030
 1031            // Validate no duplicate selections
 11032            var duplicateOptions = response.SelectedOptionIds
 11033                .GroupBy(id => id)
 11034                .Where(g => g.Count() > 1)
 11035                .Select(g => g.Key)
 11036                .ToArray();
 1037
 11038            if (duplicateOptions.Any())
 1039            {
 11040                _logger.LogWarning("Duplicate option IDs for question '{QuestionText}': {DuplicateOptions}",
 11041                    question.Text, string.Join(", ", duplicateOptions));
 11042                return null;
 1043            }
 1044
 1045            // Validate selection count - must match exactly MaxSelections for full predictions
 11046            if (response.SelectedOptionIds.Length != question.MaxSelections)
 1047            {
 11048                _logger.LogWarning("Invalid selection count for question '{QuestionText}': expected exactly {MaxSelectio
 11049                    question.Text, question.MaxSelections, response.SelectedOptionIds.Length);
 11050                return null;
 1051            }
 1052
 11053            var prediction = new BonusPrediction(response.SelectedOptionIds.ToList());
 1054
 11055            _logger.LogDebug("Parsed prediction: {SelectedOptions}",
 11056                string.Join(", ", response.SelectedOptionIds));
 1057
 11058            return prediction;
 1059        }
 11060        catch (JsonException ex)
 1061        {
 11062            _logger.LogError(ex, "Failed to parse bonus prediction JSON: {PredictionJson}", predictionJson);
 11063            return null;
 1064        }
 11065    }
 1066
 1067    /// <summary>
 1068    /// Gets the file path of the match prediction prompt being used by this service
 1069    /// </summary>
 1070    /// <returns>The absolute file path to the match prompt file</returns>
 1071    public string GetMatchPromptPath(bool includeJustification = false)
 1072    {
 11073        return includeJustification
 11074            ? _instructionsTemplateWithJustification.Value.Path
 11075            : _instructionsTemplate.Value.Path;
 1076    }
 1077
 1078    /// <summary>
 1079    /// Gets the file path of the bonus question prediction prompt being used by this service
 1080    /// </summary>
 1081    /// <returns>The absolute file path to the bonus prompt file</returns>
 11082    public string GetBonusPromptPath() => _bonusInstructionsTemplate.Value.Path;
 1083
 1084    /// <summary>
 1085    /// Internal class for deserializing the structured prediction response
 1086    /// </summary>
 1087    private class PredictionResponse
 1088    {
 1089        [JsonPropertyName("home")]
 1090        public int Home { get; set; }
 1091
 1092        [JsonPropertyName("away")]
 1093        public int Away { get; set; }
 1094
 1095        [JsonPropertyName("justification")]
 1096        public JustificationResponse? Justification { get; set; }
 1097    }
 1098
 1099    private class JustificationResponse
 1100    {
 1101        [JsonPropertyName("keyReasoning")]
 11102        public string KeyReasoning { get; set; } = string.Empty;
 1103
 1104        [JsonPropertyName("contextSources")]
 11105        public JustificationContextSourcesResponse ContextSources { get; set; } = new();
 1106
 1107        [JsonPropertyName("uncertainties")]
 11108        public string[] Uncertainties { get; set; } = Array.Empty<string>();
 1109    }
 1110
 1111    private class JustificationContextSourcesResponse
 1112    {
 1113        [JsonPropertyName("mostValuable")]
 11114        public JustificationContextSourceEntry[] MostValuable { get; set; } = Array.Empty<JustificationContextSourceEntr
 1115
 1116        [JsonPropertyName("leastValuable")]
 11117        public JustificationContextSourceEntry[] LeastValuable { get; set; } = Array.Empty<JustificationContextSourceEnt
 1118    }
 1119
 1120    private class JustificationContextSourceEntry
 1121    {
 1122        [JsonPropertyName("documentName")]
 11123        public string DocumentName { get; set; } = string.Empty;
 1124
 1125        [JsonPropertyName("details")]
 11126        public string Details { get; set; } = string.Empty;
 1127    }
 1128
 1129    /// <summary>
 1130    /// Internal class for deserializing the bonus predictions response
 1131    /// </summary>
 1132    private class BonusPredictionsResponse
 1133    {
 1134        [JsonPropertyName("predictions")]
 1135        public BonusPredictionEntry[]? Predictions { get; set; }
 1136    }
 1137
 1138    /// <summary>
 1139    /// Internal class for deserializing individual bonus prediction entries
 1140    /// </summary>
 1141    private class BonusPredictionEntry
 1142    {
 1143        [JsonPropertyName("questionId")]
 01144        public string QuestionId { get; set; } = string.Empty;
 1145
 1146        [JsonPropertyName("selectedOptionIds")]
 01147        public string[] SelectedOptionIds { get; set; } = Array.Empty<string>();
 1148    }
 1149
 1150    /// <summary>
 1151    /// Internal class for deserializing single bonus prediction response
 1152    /// </summary>
 1153    private class SingleBonusPredictionResponse
 1154    {
 1155        [JsonPropertyName("selectedOptionIds")]
 11156        public string[] SelectedOptionIds { get; set; } = Array.Empty<string>();
 1157    }
 1158
 1159    /// <summary>
 1160    /// Sets Langfuse-mapped OpenTelemetry attributes on the given activity.
 1161    /// If <paramref name="activity"/> is <c>null</c> (no OTel listener registered), this is a no-op.
 1162    /// </summary>
 1163    private void SetLangfuseGenerationAttributes(
 1164        Activity? activity,
 1165        IReadOnlyList<PredictionRequestMessage> messages,
 1166        string responseJson,
 1167        ChatTokenUsage usage,
 1168        PredictionTelemetryMetadata? telemetryMetadata,
 1169        PredictionExecutionTelemetry? executionTelemetry = null)
 1170    {
 11171        if (activity is null)
 01172            return;
 1173
 11174        activity.SetTag("langfuse.observation.type", "generation");
 11175        activity.SetTag("gen_ai.request.model", _model);
 11176        var providerPromptMetadata = (_templateProvider as IPromptTemplateTelemetryMetadataProvider)
 11177            ?.GetPromptTemplateTelemetryMetadata();
 1178
 11179        if (providerPromptMetadata?.LangfusePromptName is { } providerPromptName &&
 11180            providerPromptMetadata.LangfusePromptVersion is { } providerPromptVersion)
 1181        {
 01182            activity.SetTag("langfuse.observation.prompt.name", providerPromptName);
 01183            activity.SetTag("langfuse.observation.prompt.version", providerPromptVersion);
 1184        }
 11185        else if (_options.LangfusePromptTraceMetadata is { } promptTraceMetadata)
 1186        {
 11187            activity.SetTag("langfuse.observation.prompt.name", promptTraceMetadata.Name);
 11188            activity.SetTag("langfuse.observation.prompt.version", promptTraceMetadata.Version);
 1189        }
 1190
 11191        if (providerPromptMetadata is not null)
 1192        {
 01193            activity.SetTag("langfuse.observation.metadata.langfusePromptFallback", providerPromptMetadata.IsFallback);
 01194            activity.SetTag("langfuse.observation.metadata.promptTemplatePath", providerPromptMetadata.PromptPath);
 1195        }
 11196        else if (_options.LangfusePromptTraceMetadata is { IsFallback: true })
 1197        {
 01198            activity.SetTag("langfuse.observation.metadata.langfusePromptFallback", true);
 1199        }
 1200
 11201        if (!string.IsNullOrWhiteSpace(_options.ReasoningEffort))
 1202        {
 11203            var reasoningEffort = _options.ReasoningEffort.Trim().ToLowerInvariant();
 11204            activity.SetTag("gen_ai.request.reasoning_effort", reasoningEffort);
 11205            activity.SetTag("langfuse.observation.metadata.openaiReasoningEffort", reasoningEffort);
 1206        }
 1207
 11208        if (executionTelemetry is not null)
 1209        {
 11210            activity.SetTag("gen_ai.request.service_tier", executionTelemetry.RequestedServiceTier);
 11211            activity.SetTag("gen_ai.response.service_tier", executionTelemetry.FinalServiceTier);
 11212            activity.SetTag("langfuse.observation.metadata.openaiExecutionStrategy", executionTelemetry.Strategy);
 11213            activity.SetTag("langfuse.observation.metadata.openaiRequestedServiceTier", executionTelemetry.RequestedServ
 11214            activity.SetTag("langfuse.observation.metadata.openaiFinalServiceTier", executionTelemetry.FinalServiceTier)
 11215            activity.SetTag("langfuse.observation.metadata.openaiServiceTierFallbackUsed", executionTelemetry.FallbackUs
 1216        }
 1217
 1218        // Serialize messages as input (system prompt + user message)
 11219        var inputMessages = messages.Select(m => new
 11220        {
 11221            role = m.Role,
 11222            content = m.Content
 11223        });
 11224        activity.SetTag("langfuse.observation.input", JsonSerializer.Serialize(inputMessages));
 11225        activity.SetTag("langfuse.observation.output", responseJson);
 11226        telemetryMetadata?.ApplyToObservation(activity);
 1227
 1228        // Token usage details
 11229        var usageDetails = new
 11230        {
 11231            input = usage.InputTokenCount,
 11232            output = usage.OutputTokenCount,
 11233            cache_read_input_tokens = usage.InputTokenDetails?.CachedTokenCount ?? 0,
 11234            reasoning_tokens = usage.OutputTokenDetails?.ReasoningTokenCount ?? 0,
 11235            total = usage.InputTokenCount + usage.OutputTokenCount
 11236        };
 11237        activity.SetTag("langfuse.observation.usage_details", JsonSerializer.Serialize(usageDetails));
 1238
 11239        if (executionTelemetry is not null &&
 11240            _costCalculationService.CalculateCostBreakdown(_model, usage, executionTelemetry.FinalServiceTier) is { } co
 1241        {
 11242            var costDetails = new
 11243            {
 11244                input = costBreakdown.Input,
 11245                cache_read_input_tokens = costBreakdown.CachedInput,
 11246                output = costBreakdown.Output,
 11247                total = costBreakdown.Total
 11248            };
 11249            activity.SetTag("langfuse.observation.cost_details", JsonSerializer.Serialize(costDetails));
 1250        }
 11251    }
 1252
 11253    private sealed record PredictionRequestMessage(string Role, string Content);
 1254
 11255    private sealed record OpenAiResponseResult(
 11256        string PredictionJson,
 11257        ChatTokenUsage Usage,
 11258        PredictionExecutionTelemetry? ExecutionTelemetry,
 11259        string? FinalServiceTier = null);
 1260
 11261    private sealed record PredictionExecutionTelemetry(
 11262        string Strategy,
 11263        string RequestedServiceTier,
 11264        string FinalServiceTier,
 11265        bool FallbackUsed);
 1266
 1267    private static ChatTokenUsage ToChatTokenUsage(ResponseTokenUsage usage)
 1268    {
 11269        var cachedTokenCount = usage.InputTokenDetails?.CachedTokenCount ?? 0;
 11270        var reasoningTokenCount = usage.OutputTokenDetails?.ReasoningTokenCount ?? 0;
 11271        var inputDetails = cachedTokenCount > 0
 11272            ? OpenAIChatModelFactory.ChatInputTokenUsageDetails(cachedTokenCount: cachedTokenCount)
 11273            : null;
 11274        var outputDetails = reasoningTokenCount > 0
 11275            ? OpenAIChatModelFactory.ChatOutputTokenUsageDetails(reasoningTokenCount: reasoningTokenCount)
 11276            : null;
 1277
 11278        return OpenAIChatModelFactory.ChatTokenUsage(
 11279            inputTokenCount: usage.InputTokenCount,
 11280            outputTokenCount: usage.OutputTokenCount,
 11281            inputTokenDetails: inputDetails,
 11282            outputTokenDetails: outputDetails);
 1283    }
 1284}

Methods/Properties

.cctor()
.ctor(OpenAI.Responses.ResponsesClient, Microsoft.Extensions.Logging.ILogger<OpenAiIntegration.PredictionService>, OpenAiIntegration.ICostCalculationService, OpenAiIntegration.ITokenUsageTracker, OpenAiIntegration.IInstructionsTemplateProvider, string, OpenAiIntegration.PredictionServiceOptions)
PredictMatchAsync()
CompleteMatchResponseAsync(System.Collections.Generic.IReadOnlyList<OpenAiIntegration.PredictionService.PredictionRequestMessage>, bool, System.Threading.CancellationToken)
CompleteBonusResponseAsync(System.Collections.Generic.IReadOnlyList<OpenAiIntegration.PredictionService.PredictionRequestMessage>, EHonda.KicktippAi.Core.BonusQuestion, System.Threading.CancellationToken)
CompleteStructuredResponseAsync()
<CompleteStructuredResponseAsync()
CompleteResponseAsync()
CreateResponseWithTransientRetryAsync()
<CreateResponseWithTransientRetryAsync()
CreateMatchResponseOptions(System.Collections.Generic.IReadOnlyList<OpenAiIntegration.PredictionService.PredictionRequestMessage>, bool, string, string)
CreateBonusResponseOptions(System.Collections.Generic.IReadOnlyList<OpenAiIntegration.PredictionService.PredictionRequestMessage>, EHonda.KicktippAi.Core.BonusQuestion, string, string)
CreateResponseOptions(System.Collections.Generic.IReadOnlyList<OpenAiIntegration.PredictionService.PredictionRequestMessage>, string, System.BinaryData, string, string)
NormalizeReasoningEffort(string)
NormalizeServiceTier(string)
NormalizeResponseServiceTier(System.Nullable<OpenAI.Responses.ResponseServiceTier>)
CreateResponseMessage(OpenAiIntegration.PredictionService.PredictionRequestMessage)
IsFlexProcessingRequest(string)
IsFlexFallbackFailure(System.Exception, System.Threading.CancellationToken)
IsTransientOpenAiServerFailure(System.Exception, System.Threading.CancellationToken)
IsRetryableOpenAiRateLimitFailure(System.Exception, System.Threading.CancellationToken)
ResolveOpenAiRateLimitDelay(System.Exception, int)
TryGetOpenAiRateLimitResetDelay(System.ClientModel.Primitives.PipelineResponse, out System.TimeSpan)
AddRateLimitResetDelay(System.ClientModel.Primitives.PipelineResponse, string, System.Collections.Generic.List<System.TimeSpan>, System.Collections.Generic.List<System.TimeSpan>)
TryGetOpenAiRateLimitHeader(System.ClientModel.Primitives.PipelineResponse, string, out string)
TryParseOpenAiRateLimitReset(string, out System.TimeSpan)
ClampOpenAiRateLimitDelay(System.TimeSpan)
IsFlexResourceUnavailableFailure(System.ClientModel.ClientResultException)
ContainsFlexResourceUnavailableMarker(string)
ContainsQuotaExhaustedMarker(string)
PredictBonusQuestionAsync()
BuildInstructions(System.Collections.Generic.IEnumerable<EHonda.KicktippAi.Core.DocumentContext>, bool)
BuildPredictionJsonSchema(bool)
ParsePrediction(string)
LogRawModelResponse(string)
BuildBonusInstructions(System.Collections.Generic.IEnumerable<EHonda.KicktippAi.Core.DocumentContext>)
CreateSingleBonusPredictionJsonSchema(EHonda.KicktippAi.Core.BonusQuestion)
ParseSingleBonusPrediction(string, EHonda.KicktippAi.Core.BonusQuestion)
GetMatchPromptPath(bool)
GetBonusPromptPath()
.ctor()
.ctor()
.ctor()
.ctor()
.ctor()
SetLangfuseGenerationAttributes(System.Diagnostics.Activity, System.Collections.Generic.IReadOnlyList<OpenAiIntegration.PredictionService.PredictionRequestMessage>, string, OpenAI.Chat.ChatTokenUsage, OpenAiIntegration.PredictionTelemetryMetadata, OpenAiIntegration.PredictionService.PredictionExecutionTelemetry)
.ctor(string, string)
.ctor(string, OpenAI.Chat.ChatTokenUsage, OpenAiIntegration.PredictionService.PredictionExecutionTelemetry, string)
.ctor(string, string, string, bool)
ToChatTokenUsage(OpenAI.Responses.ResponseTokenUsage)