| | | 1 | | using Microsoft.Extensions.Logging; |
| | | 2 | | using OpenAI.Chat; |
| | | 3 | | |
| | | 4 | | namespace OpenAiIntegration; |
| | | 5 | | |
| | | 6 | | /// <summary> |
| | | 7 | | /// Service for calculating and logging OpenAI API costs |
| | | 8 | | /// </summary> |
| | | 9 | | public class CostCalculationService : ICostCalculationService |
| | | 10 | | { |
| | 0 | 11 | | private const decimal FlexPriceMultiplier = 0.5m; |
| | | 12 | | |
| | | 13 | | private readonly ILogger<CostCalculationService> _logger; |
| | | 14 | | |
| | 1 | 15 | | public CostCalculationService(ILogger<CostCalculationService> logger) |
| | | 16 | | { |
| | 1 | 17 | | _logger = logger ?? throw new ArgumentNullException(nameof(logger)); |
| | 1 | 18 | | } |
| | | 19 | | |
| | | 20 | | public void LogCostBreakdown(string model, ChatTokenUsage usage) |
| | | 21 | | { |
| | 1 | 22 | | LogCostBreakdown(model, usage, serviceTier: null); |
| | 1 | 23 | | } |
| | | 24 | | |
| | | 25 | | public void LogCostBreakdown(string model, ChatTokenUsage usage, string? serviceTier) |
| | | 26 | | { |
| | 1 | 27 | | if (ModelPricingData.Pricing.TryGetValue(model, out var pricing)) |
| | | 28 | | { |
| | 1 | 29 | | pricing = ApplyServiceTier(pricing, serviceTier); |
| | | 30 | | |
| | | 31 | | // Get exact token counts from usage details |
| | 1 | 32 | | var cachedInputTokens = usage.InputTokenDetails?.CachedTokenCount ?? 0; |
| | 1 | 33 | | var uncachedInputTokens = usage.InputTokenCount - cachedInputTokens; |
| | 1 | 34 | | var reasoningOutputTokens = usage.OutputTokenDetails?.ReasoningTokenCount ?? 0; |
| | 1 | 35 | | var textOutputTokens = usage.OutputTokenCount - reasoningOutputTokens; |
| | | 36 | | |
| | | 37 | | // Calculate costs for each component |
| | 1 | 38 | | var uncachedInputCost = (uncachedInputTokens / 1_000_000m) * pricing.InputPrice; |
| | 1 | 39 | | var cachedInputCost = pricing.CachedInputPrice.HasValue |
| | 1 | 40 | | ? (cachedInputTokens / 1_000_000m) * pricing.CachedInputPrice.Value |
| | 1 | 41 | | : 0m; |
| | 1 | 42 | | var outputCost = (usage.OutputTokenCount / 1_000_000m) * pricing.OutputPrice; |
| | 1 | 43 | | var totalCost = uncachedInputCost + cachedInputCost + outputCost; |
| | | 44 | | |
| | | 45 | | // Log the cost breakdown |
| | 1 | 46 | | _logger.LogInformation("Uncached Input Tokens: {UncachedInputTokens:N0} × ${InputPrice:F2}/1M = ${UncachedIn |
| | 1 | 47 | | uncachedInputTokens, pricing.InputPrice, uncachedInputCost); |
| | | 48 | | |
| | 1 | 49 | | if (pricing.CachedInputPrice.HasValue) |
| | | 50 | | { |
| | 1 | 51 | | _logger.LogInformation("Cached Input Tokens: {CachedInputTokens:N0} × ${CachedInputPrice:F3}/1M = ${Cach |
| | 1 | 52 | | cachedInputTokens, pricing.CachedInputPrice.Value, cachedInputCost); |
| | | 53 | | } |
| | | 54 | | |
| | 1 | 55 | | _logger.LogInformation("Reasoning Output Tokens: {ReasoningOutputTokens:N0}", |
| | 1 | 56 | | reasoningOutputTokens); |
| | | 57 | | |
| | 1 | 58 | | _logger.LogInformation("Text Output Tokens: {TextOutputTokens:N0}", |
| | 1 | 59 | | textOutputTokens); |
| | | 60 | | |
| | 1 | 61 | | _logger.LogInformation("Total Output Tokens: {TotalOutputTokens:N0} × ${OutputPrice:F2}/1M = ${OutputCost:F6 |
| | 1 | 62 | | usage.OutputTokenCount, pricing.OutputPrice, outputCost); |
| | | 63 | | |
| | 1 | 64 | | _logger.LogInformation("Total Cost: ${TotalCost:F6}", totalCost); |
| | | 65 | | } |
| | | 66 | | else |
| | | 67 | | { |
| | 1 | 68 | | _logger.LogWarning("Cost calculation not available: Pricing information not found for model '{Model}'", mode |
| | | 69 | | } |
| | 1 | 70 | | } |
| | | 71 | | |
| | | 72 | | public decimal? CalculateCost(string model, ChatTokenUsage usage) |
| | | 73 | | { |
| | 1 | 74 | | return CalculateCost(model, usage, serviceTier: null); |
| | | 75 | | } |
| | | 76 | | |
| | | 77 | | public decimal? CalculateCost(string model, ChatTokenUsage usage, string? serviceTier) |
| | | 78 | | { |
| | 1 | 79 | | return CalculateCostBreakdown(model, usage, serviceTier)?.Total; |
| | | 80 | | } |
| | | 81 | | |
| | | 82 | | public CostBreakdown? CalculateCostBreakdown(string model, ChatTokenUsage usage) |
| | | 83 | | { |
| | 0 | 84 | | return CalculateCostBreakdown(model, usage, serviceTier: null); |
| | | 85 | | } |
| | | 86 | | |
| | | 87 | | public CostBreakdown? CalculateCostBreakdown(string model, ChatTokenUsage usage, string? serviceTier) |
| | | 88 | | { |
| | 1 | 89 | | if (ModelPricingData.Pricing.TryGetValue(model, out var pricing)) |
| | | 90 | | { |
| | 1 | 91 | | pricing = ApplyServiceTier(pricing, serviceTier); |
| | | 92 | | |
| | | 93 | | // Get exact token counts from usage details |
| | 1 | 94 | | var cachedInputTokens = usage.InputTokenDetails?.CachedTokenCount ?? 0; |
| | 1 | 95 | | var uncachedInputTokens = usage.InputTokenCount - cachedInputTokens; |
| | 1 | 96 | | var outputTokens = usage.OutputTokenCount; |
| | | 97 | | |
| | | 98 | | // Calculate costs for each component |
| | 1 | 99 | | var uncachedInputCost = (uncachedInputTokens / 1_000_000m) * pricing.InputPrice; |
| | 1 | 100 | | var cachedInputCost = pricing.CachedInputPrice.HasValue |
| | 1 | 101 | | ? (cachedInputTokens / 1_000_000m) * pricing.CachedInputPrice.Value |
| | 1 | 102 | | : 0m; |
| | 1 | 103 | | var outputCost = (outputTokens / 1_000_000m) * pricing.OutputPrice; |
| | | 104 | | |
| | 1 | 105 | | return new CostBreakdown(uncachedInputCost, cachedInputCost, outputCost, uncachedInputCost + cachedInputCost |
| | | 106 | | } |
| | | 107 | | |
| | 1 | 108 | | return null; |
| | | 109 | | } |
| | | 110 | | |
| | | 111 | | private static ModelPricing ApplyServiceTier(ModelPricing pricing, string? serviceTier) |
| | | 112 | | { |
| | 1 | 113 | | if (!string.Equals(serviceTier?.Trim(), "flex", StringComparison.OrdinalIgnoreCase)) |
| | | 114 | | { |
| | 1 | 115 | | return pricing; |
| | | 116 | | } |
| | | 117 | | |
| | 1 | 118 | | return new ModelPricing( |
| | 1 | 119 | | pricing.InputPrice * FlexPriceMultiplier, |
| | 1 | 120 | | pricing.OutputPrice * FlexPriceMultiplier, |
| | 1 | 121 | | pricing.CachedInputPrice * FlexPriceMultiplier); |
| | | 122 | | } |
| | | 123 | | } |
| | | 124 | | |
| | | 125 | | /// <summary> |
| | | 126 | | /// Static short-context standard pricing data for OpenAI models. |
| | | 127 | | /// </summary> |
| | | 128 | | /// <remarks> |
| | | 129 | | /// Source: <see href="https://developers.openai.com/api/docs/pricing.md">OpenAI API pricing</see>. |
| | | 130 | | /// Flex processing applies the same short-context rates as Batch pricing for supported models. |
| | | 131 | | /// </remarks> |
| | | 132 | | internal static class ModelPricingData |
| | | 133 | | { |
| | | 134 | | public static readonly Dictionary<string, ModelPricing> Pricing = new() |
| | | 135 | | { |
| | | 136 | | ["gpt-4.1"] = new(2.00m, 8.00m, 0.50m), |
| | | 137 | | ["gpt-4.1-mini"] = new(0.40m, 1.60m, 0.10m), |
| | | 138 | | ["gpt-4.1-nano"] = new(0.10m, 0.40m, 0.025m), |
| | | 139 | | ["gpt-4.5-preview"] = new(75.00m, 150.00m, 37.50m), |
| | | 140 | | ["gpt-4o"] = new(2.50m, 10.00m, 1.25m), |
| | | 141 | | ["gpt-4o-mini"] = new(0.15m, 0.60m, 0.075m), |
| | | 142 | | ["gpt-5.5"] = new(5.00m, 30.00m, 0.50m), |
| | | 143 | | ["gpt-5.4"] = new(2.50m, 15.00m, 0.25m), |
| | | 144 | | ["gpt-5.4-mini"] = new(0.75m, 4.50m, 0.075m), |
| | | 145 | | ["gpt-5.4-nano"] = new(0.20m, 1.25m, 0.02m), |
| | | 146 | | ["gpt-5"] = new(1.25m, 10.00m, 0.125m), |
| | | 147 | | ["gpt-5-mini"] = new(0.25m, 2.00m, 0.025m), |
| | | 148 | | ["gpt-5-nano"] = new(0.05m, 0.40m, 0.005m), |
| | | 149 | | ["o1"] = new(15.00m, 60.00m, 7.50m), |
| | | 150 | | ["o1-pro"] = new(150.00m, 600.00m), |
| | | 151 | | ["o3"] = new(2.00m, 8.00m, 0.50m), |
| | | 152 | | ["o3-pro"] = new(20.00m, 80.00m), |
| | | 153 | | ["o4-mini"] = new(1.10m, 4.40m, 0.275m), |
| | | 154 | | ["o3-mini"] = new(1.10m, 4.40m, 0.55m), |
| | | 155 | | ["o1-mini"] = new(1.10m, 4.40m, 0.55m), |
| | | 156 | | }; |
| | | 157 | | } |
| | | 158 | | |
| | | 159 | | /// <summary> |
| | | 160 | | /// Pricing information for an OpenAI model |
| | | 161 | | /// </summary> |
| | | 162 | | /// <param name="InputPrice">Price per 1M input tokens</param> |
| | | 163 | | /// <param name="OutputPrice">Price per 1M output tokens</param> |
| | | 164 | | /// <param name="CachedInputPrice">Price per 1M cached input tokens (if supported)</param> |
| | | 165 | | internal record ModelPricing(decimal InputPrice, decimal OutputPrice, decimal? CachedInputPrice = null); |