| | | 1 | | using System.Globalization; |
| | | 2 | | using CsvHelper; |
| | | 3 | | |
| | | 4 | | namespace EHonda.KicktippAi.Core; |
| | | 5 | | |
| | | 6 | | public sealed record HistoryDateMapEntry( |
| | | 7 | | string DocumentName, |
| | | 8 | | string Competition, |
| | | 9 | | string HomeTeam, |
| | | 10 | | string AwayTeam, |
| | | 11 | | string Score, |
| | | 12 | | string Annotation, |
| | | 13 | | string PlayedAt, |
| | | 14 | | string SourceName, |
| | | 15 | | string SourceUrl, |
| | | 16 | | string VerifiedAt, |
| | | 17 | | string Notes); |
| | | 18 | | |
| | | 19 | | public sealed record HistoryDateMapApplyResult |
| | | 20 | | { |
| | | 21 | | public HistoryDateMapApplyResult( |
| | | 22 | | string Content, |
| | | 23 | | int RowCount, |
| | | 24 | | int UpdatedRowCount, |
| | | 25 | | IReadOnlyList<HistoryDateMapEntry> MissingEntries, |
| | | 26 | | int PreservedRowCount = 0, |
| | | 27 | | int SkippedRowCount = 0, |
| | | 28 | | IReadOnlyList<HistoryDateMapEntry>? MissingPredictionEntries = null) |
| | | 29 | | { |
| | | 30 | | this.Content = Content; |
| | | 31 | | this.RowCount = RowCount; |
| | | 32 | | this.UpdatedRowCount = UpdatedRowCount; |
| | | 33 | | this.MissingEntries = MissingEntries; |
| | | 34 | | this.PreservedRowCount = PreservedRowCount; |
| | | 35 | | this.SkippedRowCount = SkippedRowCount; |
| | | 36 | | this.MissingPredictionEntries = MissingPredictionEntries ?? Array.Empty<HistoryDateMapEntry>(); |
| | | 37 | | } |
| | | 38 | | |
| | | 39 | | public string Content { get; init; } |
| | | 40 | | public int RowCount { get; init; } |
| | | 41 | | public int UpdatedRowCount { get; init; } |
| | | 42 | | public IReadOnlyList<HistoryDateMapEntry> MissingEntries { get; init; } |
| | | 43 | | public int PreservedRowCount { get; init; } |
| | | 44 | | public int SkippedRowCount { get; init; } |
| | | 45 | | public IReadOnlyList<HistoryDateMapEntry> MissingPredictionEntries { get; init; } |
| | | 46 | | } |
| | | 47 | | |
| | | 48 | | public sealed record HistoryDateMapApplyOptions( |
| | | 49 | | bool ApplyKnownOnly = false, |
| | | 50 | | DateOnly? PreserveCollectedOnOrAfter = null, |
| | | 51 | | IReadOnlyList<HistoryDateMapEntry>? PredictionDateEntries = null) |
| | | 52 | | { |
| | | 53 | | public static HistoryDateMapApplyOptions Strict { get; } = new(); |
| | | 54 | | } |
| | | 55 | | |
| | | 56 | | /// <summary> |
| | | 57 | | /// Utility class for handling date columns in history CSV documents. |
| | | 58 | | /// </summary> |
| | | 59 | | public static class HistoryCsvUtility |
| | | 60 | | { |
| | | 61 | | public const string DataCollectedAtColumnName = "Data_Collected_At"; |
| | | 62 | | public const string PlayedAtColumnName = "Played_At"; |
| | | 63 | | |
| | 1 | 64 | | private static readonly string[] DateMapHeaders = |
| | 1 | 65 | | [ |
| | 1 | 66 | | "DocumentName", |
| | 1 | 67 | | "Competition", |
| | 1 | 68 | | "Home_Team", |
| | 1 | 69 | | "Away_Team", |
| | 1 | 70 | | "Score", |
| | 1 | 71 | | "Annotation", |
| | 1 | 72 | | PlayedAtColumnName, |
| | 1 | 73 | | "Source_Name", |
| | 1 | 74 | | "Source_Url", |
| | 1 | 75 | | "Verified_At", |
| | 1 | 76 | | "Notes" |
| | 1 | 77 | | ]; |
| | | 78 | | |
| | 1 | 79 | | private static readonly string[] PlayedAtTimestampFormats = |
| | 1 | 80 | | [ |
| | 1 | 81 | | "yyyy-MM-dd'T'HH:mm:sszzz", |
| | 1 | 82 | | "yyyy-MM-dd'T'HH:mm:ss.FFFFFFFzzz" |
| | 1 | 83 | | ]; |
| | | 84 | | |
| | | 85 | | private enum ExistingDateTreatment |
| | | 86 | | { |
| | | 87 | | None, |
| | | 88 | | ReplaceFromPrediction, |
| | | 89 | | PreserveExistingTimestamp |
| | | 90 | | } |
| | | 91 | | |
| | | 92 | | /// <summary> |
| | | 93 | | /// Adds or updates the Data_Collected_At column in a history CSV document. |
| | | 94 | | /// </summary> |
| | | 95 | | /// <param name="csvContent">The original CSV content.</param> |
| | | 96 | | /// <param name="previousCsvContent">The previous version of the CSV content (null if this is the first version).</p |
| | | 97 | | /// <param name="collectedDate">The date when the data was collected (e.g., "2025-08-30").</param> |
| | | 98 | | /// <returns>The updated CSV content with Data_Collected_At column.</returns> |
| | | 99 | | public static string AddDataCollectedAtColumn(string csvContent, string? previousCsvContent, string collectedDate) |
| | | 100 | | { |
| | | 101 | | // Check if the CSV already has a history date column. |
| | 1 | 102 | | if (HasHistoryDateColumn(csvContent)) |
| | | 103 | | { |
| | 1 | 104 | | return csvContent; // Already has the column |
| | | 105 | | } |
| | | 106 | | |
| | | 107 | | // Extract matches from previous version to get their collection dates |
| | 1 | 108 | | var previousMatches = previousCsvContent != null |
| | 1 | 109 | | ? ExtractMatchesWithCollectionDates(previousCsvContent) |
| | 1 | 110 | | : new Dictionary<string, string>(); |
| | | 111 | | |
| | | 112 | | // Extract current matches |
| | 1 | 113 | | var currentMatches = ExtractMatches(csvContent); |
| | | 114 | | |
| | | 115 | | // Build the new CSV with Data_Collected_At column |
| | 1 | 116 | | var updatedCsvContent = BuildCsvWithDataCollectedAt(csvContent, currentMatches, previousMatches, collectedDate); |
| | | 117 | | |
| | | 118 | | // When the previous version already used Played_At and the rebuilt rows plus dates |
| | | 119 | | // are otherwise identical, keep the previous payload unchanged to avoid transient |
| | | 120 | | // version churn in WM26 collect-context -> date-map workflows. |
| | 1 | 121 | | if (HasPlayedAtColumn(previousCsvContent)) |
| | | 122 | | { |
| | 1 | 123 | | var playedAtVariant = ReplaceDataCollectedAtHeaderWithPlayedAt(updatedCsvContent); |
| | 1 | 124 | | if (string.Equals( |
| | 1 | 125 | | NormalizeLineEndings(playedAtVariant), |
| | 1 | 126 | | NormalizeLineEndings(previousCsvContent!), |
| | 1 | 127 | | StringComparison.Ordinal)) |
| | | 128 | | { |
| | 1 | 129 | | return previousCsvContent; |
| | | 130 | | } |
| | | 131 | | } |
| | | 132 | | |
| | 1 | 133 | | return updatedCsvContent; |
| | | 134 | | } |
| | | 135 | | |
| | | 136 | | public static IReadOnlyList<HistoryDateMapEntry> ReadDateMapEntries(string csvContent) |
| | | 137 | | { |
| | 1 | 138 | | var entries = new List<HistoryDateMapEntry>(); |
| | 1 | 139 | | if (string.IsNullOrWhiteSpace(csvContent)) |
| | | 140 | | { |
| | 0 | 141 | | return entries.AsReadOnly(); |
| | | 142 | | } |
| | | 143 | | |
| | 1 | 144 | | using var reader = new StringReader(csvContent); |
| | 1 | 145 | | using var csv = new CsvReader(reader, CultureInfo.InvariantCulture); |
| | | 146 | | |
| | | 147 | | try |
| | | 148 | | { |
| | 1 | 149 | | csv.Read(); |
| | 1 | 150 | | csv.ReadHeader(); |
| | | 151 | | |
| | 1 | 152 | | while (csv.Read()) |
| | | 153 | | { |
| | 1 | 154 | | entries.Add(new HistoryDateMapEntry( |
| | 1 | 155 | | GetOptionalField(csv, "DocumentName"), |
| | 1 | 156 | | GetOptionalField(csv, "Competition"), |
| | 1 | 157 | | GetOptionalField(csv, "Home_Team"), |
| | 1 | 158 | | GetOptionalField(csv, "Away_Team"), |
| | 1 | 159 | | GetOptionalField(csv, "Score"), |
| | 1 | 160 | | GetOptionalField(csv, "Annotation"), |
| | 1 | 161 | | GetOptionalField(csv, PlayedAtColumnName), |
| | 1 | 162 | | GetOptionalField(csv, "Source_Name"), |
| | 1 | 163 | | GetOptionalField(csv, "Source_Url"), |
| | 1 | 164 | | GetOptionalField(csv, "Verified_At"), |
| | 1 | 165 | | GetOptionalField(csv, "Notes"))); |
| | | 166 | | } |
| | 1 | 167 | | } |
| | 0 | 168 | | catch (Exception) |
| | | 169 | | { |
| | 0 | 170 | | return Array.Empty<HistoryDateMapEntry>(); |
| | | 171 | | } |
| | | 172 | | |
| | 1 | 173 | | return entries.AsReadOnly(); |
| | 1 | 174 | | } |
| | | 175 | | |
| | | 176 | | public static string WriteDateMapEntries(IEnumerable<HistoryDateMapEntry> entries) |
| | | 177 | | { |
| | 1 | 178 | | using var writer = new StringWriter(); |
| | 1 | 179 | | using var csvWriter = new CsvWriter(writer, CultureInfo.InvariantCulture); |
| | | 180 | | |
| | 1 | 181 | | foreach (var header in DateMapHeaders) |
| | | 182 | | { |
| | 1 | 183 | | csvWriter.WriteField(header); |
| | | 184 | | } |
| | | 185 | | |
| | 1 | 186 | | csvWriter.NextRecord(); |
| | | 187 | | |
| | 1 | 188 | | foreach (var entry in entries) |
| | | 189 | | { |
| | 1 | 190 | | csvWriter.WriteField(entry.DocumentName); |
| | 1 | 191 | | csvWriter.WriteField(entry.Competition); |
| | 1 | 192 | | csvWriter.WriteField(entry.HomeTeam); |
| | 1 | 193 | | csvWriter.WriteField(entry.AwayTeam); |
| | 1 | 194 | | csvWriter.WriteField(entry.Score); |
| | 1 | 195 | | csvWriter.WriteField(entry.Annotation); |
| | 1 | 196 | | csvWriter.WriteField(entry.PlayedAt); |
| | 1 | 197 | | csvWriter.WriteField(entry.SourceName); |
| | 1 | 198 | | csvWriter.WriteField(entry.SourceUrl); |
| | 1 | 199 | | csvWriter.WriteField(entry.VerifiedAt); |
| | 1 | 200 | | csvWriter.WriteField(entry.Notes); |
| | 1 | 201 | | csvWriter.NextRecord(); |
| | | 202 | | } |
| | | 203 | | |
| | 1 | 204 | | return writer.ToString(); |
| | 1 | 205 | | } |
| | | 206 | | |
| | | 207 | | public static IReadOnlyList<HistoryDateMapEntry> ExtractDateMapEntries( |
| | | 208 | | string documentName, |
| | | 209 | | string csvContent, |
| | | 210 | | bool includeExistingDataCollectedAt = false) |
| | | 211 | | { |
| | 1 | 212 | | var entries = new List<HistoryDateMapEntry>(); |
| | 1 | 213 | | if (string.IsNullOrWhiteSpace(csvContent)) |
| | | 214 | | { |
| | 0 | 215 | | return entries.AsReadOnly(); |
| | | 216 | | } |
| | | 217 | | |
| | 1 | 218 | | using var reader = new StringReader(csvContent); |
| | 1 | 219 | | using var csv = new CsvReader(reader, CultureInfo.InvariantCulture); |
| | | 220 | | |
| | | 221 | | try |
| | | 222 | | { |
| | 1 | 223 | | csv.Read(); |
| | 1 | 224 | | csv.ReadHeader(); |
| | | 225 | | |
| | 1 | 226 | | while (csv.Read()) |
| | | 227 | | { |
| | 1 | 228 | | var playedAt = includeExistingDataCollectedAt |
| | 1 | 229 | | ? GetHistoryDateField(csv) |
| | 1 | 230 | | : ""; |
| | | 231 | | |
| | 1 | 232 | | entries.Add(new HistoryDateMapEntry( |
| | 1 | 233 | | documentName, |
| | 1 | 234 | | GetOptionalField(csv, "Competition"), |
| | 1 | 235 | | GetOptionalField(csv, "Home_Team"), |
| | 1 | 236 | | GetOptionalField(csv, "Away_Team"), |
| | 1 | 237 | | GetOptionalField(csv, "Score"), |
| | 1 | 238 | | GetOptionalField(csv, "Annotation"), |
| | 1 | 239 | | playedAt, |
| | 1 | 240 | | SourceName: "", |
| | 1 | 241 | | SourceUrl: "", |
| | 1 | 242 | | VerifiedAt: "", |
| | 1 | 243 | | Notes: "")); |
| | | 244 | | } |
| | 1 | 245 | | } |
| | 0 | 246 | | catch (Exception) |
| | | 247 | | { |
| | 0 | 248 | | return Array.Empty<HistoryDateMapEntry>(); |
| | | 249 | | } |
| | | 250 | | |
| | 1 | 251 | | return entries.AsReadOnly(); |
| | 1 | 252 | | } |
| | | 253 | | |
| | | 254 | | public static IReadOnlyList<HistoryDateMapEntry> ExtractRowsRequiringPredictionPlayedAt( |
| | | 255 | | string documentName, |
| | | 256 | | string csvContent, |
| | | 257 | | DateOnly preserveCollectedOnOrAfter) |
| | | 258 | | { |
| | 1 | 259 | | var entries = new List<HistoryDateMapEntry>(); |
| | 1 | 260 | | if (string.IsNullOrWhiteSpace(csvContent)) |
| | | 261 | | { |
| | 0 | 262 | | return entries.AsReadOnly(); |
| | | 263 | | } |
| | | 264 | | |
| | 1 | 265 | | using var reader = new StringReader(csvContent); |
| | 1 | 266 | | using var csv = new CsvReader(reader, CultureInfo.InvariantCulture); |
| | | 267 | | |
| | | 268 | | try |
| | | 269 | | { |
| | 1 | 270 | | csv.Read(); |
| | 1 | 271 | | csv.ReadHeader(); |
| | | 272 | | |
| | 1 | 273 | | while (csv.Read()) |
| | | 274 | | { |
| | 1 | 275 | | var row = new HistoryDateMapEntry( |
| | 1 | 276 | | documentName, |
| | 1 | 277 | | GetOptionalField(csv, "Competition"), |
| | 1 | 278 | | GetOptionalField(csv, "Home_Team"), |
| | 1 | 279 | | GetOptionalField(csv, "Away_Team"), |
| | 1 | 280 | | GetOptionalField(csv, "Score"), |
| | 1 | 281 | | GetOptionalField(csv, "Annotation"), |
| | 1 | 282 | | PlayedAt: GetHistoryDateField(csv), |
| | 1 | 283 | | SourceName: "", |
| | 1 | 284 | | SourceUrl: "", |
| | 1 | 285 | | VerifiedAt: "", |
| | 1 | 286 | | Notes: ""); |
| | | 287 | | |
| | 1 | 288 | | if (IsWorldCupTournamentRow(row) && |
| | 1 | 289 | | GetExistingDateTreatment(row.PlayedAt, preserveCollectedOnOrAfter) == ExistingDateTreatment.ReplaceF |
| | | 290 | | { |
| | 1 | 291 | | entries.Add(row); |
| | | 292 | | } |
| | | 293 | | } |
| | 1 | 294 | | } |
| | 0 | 295 | | catch (Exception) |
| | | 296 | | { |
| | 0 | 297 | | return Array.Empty<HistoryDateMapEntry>(); |
| | | 298 | | } |
| | | 299 | | |
| | 1 | 300 | | return entries.AsReadOnly(); |
| | 1 | 301 | | } |
| | | 302 | | |
| | | 303 | | public static HistoryDateMapApplyResult ApplyDateMap( |
| | | 304 | | string documentName, |
| | | 305 | | string csvContent, |
| | | 306 | | IReadOnlyList<HistoryDateMapEntry> dateMapEntries, |
| | | 307 | | HistoryDateMapApplyOptions? options = null) |
| | | 308 | | { |
| | 1 | 309 | | options ??= HistoryDateMapApplyOptions.Strict; |
| | | 310 | | |
| | 1 | 311 | | var dateMap = dateMapEntries |
| | 1 | 312 | | .Where(entry => string.Equals(entry.DocumentName, documentName, StringComparison.OrdinalIgnoreCase)) |
| | 1 | 313 | | .GroupBy(CreateDateMapKey, StringComparer.OrdinalIgnoreCase) |
| | 1 | 314 | | .ToDictionary( |
| | 1 | 315 | | group => group.Key, |
| | 1 | 316 | | group => new Queue<HistoryDateMapEntry>(group), |
| | 1 | 317 | | StringComparer.OrdinalIgnoreCase); |
| | 1 | 318 | | var predictionDateMap = (options.PredictionDateEntries ?? Array.Empty<HistoryDateMapEntry>()) |
| | 1 | 319 | | .Where(entry => string.Equals(entry.DocumentName, documentName, StringComparison.OrdinalIgnoreCase)) |
| | 1 | 320 | | .GroupBy(CreateDateMapKey, StringComparer.OrdinalIgnoreCase) |
| | 1 | 321 | | .ToDictionary( |
| | 1 | 322 | | group => group.Key, |
| | 1 | 323 | | group => group.Last().PlayedAt.Trim(), |
| | 1 | 324 | | StringComparer.OrdinalIgnoreCase); |
| | | 325 | | |
| | 1 | 326 | | var missingEntries = new List<HistoryDateMapEntry>(); |
| | 1 | 327 | | var missingPredictionEntries = new List<HistoryDateMapEntry>(); |
| | 1 | 328 | | var rows = new List<HistoryDateMapEntry>(); |
| | 1 | 329 | | var updatedRowCount = 0; |
| | 1 | 330 | | var preservedRowCount = 0; |
| | 1 | 331 | | var skippedRowCount = 0; |
| | | 332 | | |
| | 1 | 333 | | if (string.IsNullOrWhiteSpace(csvContent)) |
| | | 334 | | { |
| | 0 | 335 | | return new HistoryDateMapApplyResult(csvContent, RowCount: 0, UpdatedRowCount: 0, missingEntries); |
| | | 336 | | } |
| | | 337 | | |
| | 1 | 338 | | using var reader = new StringReader(csvContent); |
| | 1 | 339 | | using var csv = new CsvReader(reader, CultureInfo.InvariantCulture); |
| | | 340 | | |
| | | 341 | | try |
| | | 342 | | { |
| | 1 | 343 | | csv.Read(); |
| | 1 | 344 | | csv.ReadHeader(); |
| | | 345 | | |
| | 1 | 346 | | while (csv.Read()) |
| | | 347 | | { |
| | 1 | 348 | | var row = new HistoryDateMapEntry( |
| | 1 | 349 | | documentName, |
| | 1 | 350 | | GetOptionalField(csv, "Competition"), |
| | 1 | 351 | | GetOptionalField(csv, "Home_Team"), |
| | 1 | 352 | | GetOptionalField(csv, "Away_Team"), |
| | 1 | 353 | | GetOptionalField(csv, "Score"), |
| | 1 | 354 | | GetOptionalField(csv, "Annotation"), |
| | 1 | 355 | | PlayedAt: GetHistoryDateField(csv), |
| | 1 | 356 | | SourceName: "", |
| | 1 | 357 | | SourceUrl: "", |
| | 1 | 358 | | VerifiedAt: "", |
| | 1 | 359 | | Notes: ""); |
| | | 360 | | |
| | 1 | 361 | | if (options.PreserveCollectedOnOrAfter.HasValue) |
| | | 362 | | { |
| | 1 | 363 | | var existingDateTreatment = GetExistingDateTreatment( |
| | 1 | 364 | | row.PlayedAt, |
| | 1 | 365 | | options.PreserveCollectedOnOrAfter.Value); |
| | | 366 | | |
| | 1 | 367 | | if (existingDateTreatment == ExistingDateTreatment.PreserveExistingTimestamp) |
| | | 368 | | { |
| | 1 | 369 | | preservedRowCount++; |
| | 1 | 370 | | rows.Add(row); |
| | 1 | 371 | | continue; |
| | | 372 | | } |
| | | 373 | | |
| | 1 | 374 | | if (IsWorldCupTournamentRow(row) && |
| | 1 | 375 | | existingDateTreatment == ExistingDateTreatment.ReplaceFromPrediction) |
| | | 376 | | { |
| | 1 | 377 | | if (predictionDateMap.TryGetValue(CreateDateMapKey(row), out var predictedPlayedAt) && |
| | 1 | 378 | | IsExactTimestamp(predictedPlayedAt)) |
| | | 379 | | { |
| | 1 | 380 | | rows.Add(row with { PlayedAt = predictedPlayedAt }); |
| | 1 | 381 | | if (!string.Equals(row.PlayedAt, predictedPlayedAt, StringComparison.Ordinal)) |
| | | 382 | | { |
| | 1 | 383 | | updatedRowCount++; |
| | | 384 | | } |
| | | 385 | | |
| | 1 | 386 | | continue; |
| | | 387 | | } |
| | | 388 | | |
| | 1 | 389 | | if (dateMap.TryGetValue(CreateDateMapKey(row), out var predictionFallbackEntries) && |
| | 1 | 390 | | predictionFallbackEntries.Count > 0 && |
| | 1 | 391 | | IsExactDate(predictionFallbackEntries.Peek().PlayedAt)) |
| | | 392 | | { |
| | 1 | 393 | | var fallbackDateMapEntry = predictionFallbackEntries.Dequeue(); |
| | 1 | 394 | | var fallbackPlayedAt = fallbackDateMapEntry.PlayedAt.Trim(); |
| | 1 | 395 | | rows.Add(row with { PlayedAt = fallbackPlayedAt }); |
| | 1 | 396 | | if (!string.Equals(row.PlayedAt, fallbackPlayedAt, StringComparison.Ordinal)) |
| | | 397 | | { |
| | 1 | 398 | | updatedRowCount++; |
| | | 399 | | } |
| | | 400 | | |
| | 1 | 401 | | continue; |
| | | 402 | | } |
| | | 403 | | |
| | 1 | 404 | | missingPredictionEntries.Add(row); |
| | 1 | 405 | | rows.Add(row); |
| | 1 | 406 | | continue; |
| | | 407 | | } |
| | | 408 | | } |
| | | 409 | | |
| | 1 | 410 | | if (!dateMap.TryGetValue(CreateDateMapKey(row), out var dateMapEntriesForRow) || |
| | 1 | 411 | | dateMapEntriesForRow.Count == 0) |
| | | 412 | | { |
| | 1 | 413 | | if (!options.ApplyKnownOnly) |
| | | 414 | | { |
| | 0 | 415 | | missingEntries.Add(row); |
| | | 416 | | } |
| | | 417 | | else |
| | | 418 | | { |
| | 1 | 419 | | skippedRowCount++; |
| | | 420 | | } |
| | | 421 | | |
| | 1 | 422 | | rows.Add(row); |
| | 1 | 423 | | continue; |
| | | 424 | | } |
| | | 425 | | |
| | 1 | 426 | | if (!IsExactDate(dateMapEntriesForRow.Peek().PlayedAt)) |
| | | 427 | | { |
| | 1 | 428 | | var skippedDateMapEntry = dateMapEntriesForRow.Dequeue(); |
| | 1 | 429 | | if (!options.ApplyKnownOnly) |
| | | 430 | | { |
| | 1 | 431 | | missingEntries.Add(skippedDateMapEntry); |
| | | 432 | | } |
| | | 433 | | else |
| | | 434 | | { |
| | 1 | 435 | | skippedRowCount++; |
| | | 436 | | } |
| | | 437 | | |
| | 1 | 438 | | rows.Add(row); |
| | 1 | 439 | | continue; |
| | | 440 | | } |
| | | 441 | | |
| | 1 | 442 | | var dateMapEntry = dateMapEntriesForRow.Dequeue(); |
| | 1 | 443 | | var playedAt = dateMapEntry.PlayedAt.Trim(); |
| | 1 | 444 | | rows.Add(row with { PlayedAt = playedAt }); |
| | 1 | 445 | | if (!string.Equals(row.PlayedAt, playedAt, StringComparison.Ordinal)) |
| | | 446 | | { |
| | 1 | 447 | | updatedRowCount++; |
| | | 448 | | } |
| | | 449 | | } |
| | 1 | 450 | | } |
| | 0 | 451 | | catch (Exception) |
| | | 452 | | { |
| | 0 | 453 | | return new HistoryDateMapApplyResult(csvContent, RowCount: 0, UpdatedRowCount: 0, missingEntries); |
| | | 454 | | } |
| | | 455 | | |
| | 1 | 456 | | if (missingEntries.Count > 0 || missingPredictionEntries.Count > 0) |
| | | 457 | | { |
| | 1 | 458 | | return new HistoryDateMapApplyResult( |
| | 1 | 459 | | csvContent, |
| | 1 | 460 | | rows.Count, |
| | 1 | 461 | | UpdatedRowCount: 0, |
| | 1 | 462 | | missingEntries, |
| | 1 | 463 | | MissingPredictionEntries: missingPredictionEntries); |
| | | 464 | | } |
| | | 465 | | |
| | 1 | 466 | | using var writer = new StringWriter(); |
| | 1 | 467 | | using var csvWriter = new CsvWriter(writer, CultureInfo.InvariantCulture); |
| | | 468 | | |
| | 1 | 469 | | csvWriter.WriteField("Competition"); |
| | 1 | 470 | | csvWriter.WriteField(PlayedAtColumnName); |
| | 1 | 471 | | csvWriter.WriteField("Home_Team"); |
| | 1 | 472 | | csvWriter.WriteField("Away_Team"); |
| | 1 | 473 | | csvWriter.WriteField("Score"); |
| | 1 | 474 | | csvWriter.WriteField("Annotation"); |
| | 1 | 475 | | csvWriter.NextRecord(); |
| | | 476 | | |
| | 1 | 477 | | foreach (var row in rows) |
| | | 478 | | { |
| | 1 | 479 | | csvWriter.WriteField(row.Competition); |
| | 1 | 480 | | csvWriter.WriteField(row.PlayedAt); |
| | 1 | 481 | | csvWriter.WriteField(row.HomeTeam); |
| | 1 | 482 | | csvWriter.WriteField(row.AwayTeam); |
| | 1 | 483 | | csvWriter.WriteField(row.Score); |
| | 1 | 484 | | csvWriter.WriteField(row.Annotation); |
| | 1 | 485 | | csvWriter.NextRecord(); |
| | | 486 | | } |
| | | 487 | | |
| | 1 | 488 | | return new HistoryDateMapApplyResult( |
| | 1 | 489 | | writer.ToString(), |
| | 1 | 490 | | rows.Count, |
| | 1 | 491 | | updatedRowCount, |
| | 1 | 492 | | missingEntries, |
| | 1 | 493 | | preservedRowCount, |
| | 1 | 494 | | skippedRowCount, |
| | 1 | 495 | | missingPredictionEntries); |
| | 1 | 496 | | } |
| | | 497 | | |
| | | 498 | | /// <summary> |
| | | 499 | | /// Checks if the CSV content already has a recognized history date column. |
| | | 500 | | /// </summary> |
| | | 501 | | private static bool HasHistoryDateColumn(string csvContent) |
| | | 502 | | { |
| | 1 | 503 | | var lines = csvContent.Split('\n', StringSplitOptions.RemoveEmptyEntries); |
| | 1 | 504 | | if (lines.Length == 0) |
| | | 505 | | { |
| | 1 | 506 | | return false; |
| | | 507 | | } |
| | | 508 | | |
| | 1 | 509 | | var header = lines[0]; |
| | 1 | 510 | | return header.Contains(DataCollectedAtColumnName, StringComparison.OrdinalIgnoreCase) |
| | 1 | 511 | | || header.Contains(PlayedAtColumnName, StringComparison.OrdinalIgnoreCase); |
| | | 512 | | } |
| | | 513 | | |
| | | 514 | | private static bool HasPlayedAtColumn(string? csvContent) |
| | | 515 | | { |
| | 1 | 516 | | if (string.IsNullOrWhiteSpace(csvContent)) |
| | | 517 | | { |
| | 1 | 518 | | return false; |
| | | 519 | | } |
| | | 520 | | |
| | 1 | 521 | | var lines = csvContent.Split('\n', StringSplitOptions.RemoveEmptyEntries); |
| | 1 | 522 | | if (lines.Length == 0) |
| | | 523 | | { |
| | 0 | 524 | | return false; |
| | | 525 | | } |
| | | 526 | | |
| | 1 | 527 | | return lines[0].Contains(PlayedAtColumnName, StringComparison.OrdinalIgnoreCase); |
| | | 528 | | } |
| | | 529 | | |
| | | 530 | | private static string ReplaceDataCollectedAtHeaderWithPlayedAt(string csvContent) |
| | | 531 | | { |
| | 1 | 532 | | return csvContent.Replace( |
| | 1 | 533 | | DataCollectedAtColumnName, |
| | 1 | 534 | | PlayedAtColumnName, |
| | 1 | 535 | | StringComparison.Ordinal); |
| | | 536 | | } |
| | | 537 | | |
| | | 538 | | private static string NormalizeLineEndings(string value) |
| | | 539 | | { |
| | 1 | 540 | | return value |
| | 1 | 541 | | .Replace("\r\n", "\n", StringComparison.Ordinal) |
| | 1 | 542 | | .Replace("\r", "\n", StringComparison.Ordinal); |
| | | 543 | | } |
| | | 544 | | |
| | | 545 | | /// <summary> |
| | | 546 | | /// Extracts matches from CSV content without a history date column. |
| | | 547 | | /// </summary> |
| | | 548 | | private static HashSet<string> ExtractMatches(string csvContent) |
| | | 549 | | { |
| | 1 | 550 | | var matches = new HashSet<string>(); |
| | | 551 | | |
| | 1 | 552 | | using var reader = new StringReader(csvContent); |
| | 1 | 553 | | using var csv = new CsvReader(reader, CultureInfo.InvariantCulture); |
| | | 554 | | |
| | | 555 | | try |
| | | 556 | | { |
| | 1 | 557 | | csv.Read(); |
| | 1 | 558 | | csv.ReadHeader(); |
| | | 559 | | |
| | 1 | 560 | | while (csv.Read()) |
| | | 561 | | { |
| | 1 | 562 | | var competition = csv.GetField("Competition") ?? ""; |
| | 1 | 563 | | var homeTeam = csv.GetField("Home_Team") ?? ""; |
| | 1 | 564 | | var awayTeam = csv.GetField("Away_Team") ?? ""; |
| | 1 | 565 | | var score = csv.GetField("Score") ?? ""; |
| | 1 | 566 | | var annotation = (csv.TryGetField<string>("Annotation", out var ann) ? ann : null) ?? ""; |
| | | 567 | | |
| | 1 | 568 | | var matchKey = CreateMatchKey(competition, homeTeam, awayTeam, score, annotation); |
| | 1 | 569 | | matches.Add(matchKey); |
| | | 570 | | } |
| | 1 | 571 | | } |
| | 1 | 572 | | catch (Exception) |
| | | 573 | | { |
| | | 574 | | // If CSV parsing fails, return empty set |
| | 1 | 575 | | } |
| | | 576 | | |
| | 1 | 577 | | return matches; |
| | 1 | 578 | | } |
| | | 579 | | |
| | | 580 | | /// <summary> |
| | | 581 | | /// Extracts matches with their collection dates from CSV content that has a history date column. |
| | | 582 | | /// </summary> |
| | | 583 | | private static Dictionary<string, string> ExtractMatchesWithCollectionDates(string csvContent) |
| | | 584 | | { |
| | 1 | 585 | | var matches = new Dictionary<string, string>(); |
| | | 586 | | |
| | 1 | 587 | | if (!HasHistoryDateColumn(csvContent)) |
| | | 588 | | { |
| | 1 | 589 | | return matches; |
| | | 590 | | } |
| | | 591 | | |
| | 1 | 592 | | using var reader = new StringReader(csvContent); |
| | 1 | 593 | | using var csv = new CsvReader(reader, CultureInfo.InvariantCulture); |
| | | 594 | | |
| | | 595 | | try |
| | | 596 | | { |
| | 1 | 597 | | csv.Read(); |
| | 1 | 598 | | csv.ReadHeader(); |
| | | 599 | | |
| | 1 | 600 | | while (csv.Read()) |
| | | 601 | | { |
| | 1 | 602 | | var competition = csv.GetField("Competition") ?? ""; |
| | 1 | 603 | | var historyDate = GetHistoryDateField(csv); |
| | 1 | 604 | | var homeTeam = csv.GetField("Home_Team") ?? ""; |
| | 1 | 605 | | var awayTeam = csv.GetField("Away_Team") ?? ""; |
| | 1 | 606 | | var score = csv.GetField("Score") ?? ""; |
| | 1 | 607 | | var annotation = (csv.TryGetField<string>("Annotation", out var ann) ? ann : null) ?? ""; |
| | | 608 | | |
| | 1 | 609 | | var matchKey = CreateMatchKey(competition, homeTeam, awayTeam, score, annotation); |
| | 1 | 610 | | matches[matchKey] = historyDate; |
| | | 611 | | } |
| | 1 | 612 | | } |
| | 0 | 613 | | catch (Exception) |
| | | 614 | | { |
| | | 615 | | // If CSV parsing fails, return empty dictionary |
| | 0 | 616 | | } |
| | | 617 | | |
| | 1 | 618 | | return matches; |
| | 1 | 619 | | } |
| | | 620 | | |
| | | 621 | | /// <summary> |
| | | 622 | | /// Builds a new CSV with the Data_Collected_At column. |
| | | 623 | | /// </summary> |
| | | 624 | | private static string BuildCsvWithDataCollectedAt( |
| | | 625 | | string originalCsvContent, |
| | | 626 | | HashSet<string> currentMatches, |
| | | 627 | | Dictionary<string, string> previousMatches, |
| | | 628 | | string collectedDate) |
| | | 629 | | { |
| | 1 | 630 | | using var reader = new StringReader(originalCsvContent); |
| | 1 | 631 | | using var csv = new CsvReader(reader, CultureInfo.InvariantCulture); |
| | | 632 | | |
| | 1 | 633 | | using var writer = new StringWriter(); |
| | 1 | 634 | | using var csvWriter = new CsvWriter(writer, CultureInfo.InvariantCulture); |
| | | 635 | | |
| | | 636 | | try |
| | | 637 | | { |
| | 1 | 638 | | csv.Read(); |
| | 1 | 639 | | csv.ReadHeader(); |
| | | 640 | | |
| | | 641 | | // Write new header with Data_Collected_At after Competition |
| | 1 | 642 | | csvWriter.WriteField("Competition"); |
| | 1 | 643 | | csvWriter.WriteField(DataCollectedAtColumnName); |
| | 1 | 644 | | csvWriter.WriteField("Home_Team"); |
| | 1 | 645 | | csvWriter.WriteField("Away_Team"); |
| | 1 | 646 | | csvWriter.WriteField("Score"); |
| | 1 | 647 | | csvWriter.WriteField("Annotation"); |
| | 1 | 648 | | csvWriter.NextRecord(); |
| | | 649 | | |
| | 1 | 650 | | while (csv.Read()) |
| | | 651 | | { |
| | 1 | 652 | | var competition = csv.GetField("Competition") ?? ""; |
| | 1 | 653 | | var homeTeam = csv.GetField("Home_Team") ?? ""; |
| | 1 | 654 | | var awayTeam = csv.GetField("Away_Team") ?? ""; |
| | 1 | 655 | | var score = csv.GetField("Score") ?? ""; |
| | 1 | 656 | | var annotation = (csv.TryGetField<string>("Annotation", out var ann) ? ann : null) ?? ""; |
| | | 657 | | |
| | 1 | 658 | | var matchKey = CreateMatchKey(competition, homeTeam, awayTeam, score, annotation); |
| | | 659 | | |
| | | 660 | | // Determine the collection date for this match |
| | | 661 | | string dataCollectedAt; |
| | 1 | 662 | | if (previousMatches.TryGetValue(matchKey, out var existingDate)) |
| | | 663 | | { |
| | | 664 | | // Match existed in previous version, use its existing date |
| | 1 | 665 | | dataCollectedAt = existingDate; |
| | | 666 | | } |
| | | 667 | | else |
| | | 668 | | { |
| | | 669 | | // New match, use current collection date |
| | 1 | 670 | | dataCollectedAt = collectedDate; |
| | | 671 | | } |
| | | 672 | | |
| | 1 | 673 | | csvWriter.WriteField(competition); |
| | 1 | 674 | | csvWriter.WriteField(dataCollectedAt); |
| | 1 | 675 | | csvWriter.WriteField(homeTeam); |
| | 1 | 676 | | csvWriter.WriteField(awayTeam); |
| | 1 | 677 | | csvWriter.WriteField(score); |
| | 1 | 678 | | csvWriter.WriteField(annotation); |
| | 1 | 679 | | csvWriter.NextRecord(); |
| | | 680 | | } |
| | 1 | 681 | | } |
| | 1 | 682 | | catch (Exception) |
| | | 683 | | { |
| | | 684 | | // If parsing fails, return original content |
| | 1 | 685 | | return originalCsvContent; |
| | | 686 | | } |
| | | 687 | | |
| | 1 | 688 | | return writer.ToString(); |
| | 1 | 689 | | } |
| | | 690 | | |
| | | 691 | | /// <summary> |
| | | 692 | | /// Creates a unique key for a match. |
| | | 693 | | /// </summary> |
| | | 694 | | private static string CreateMatchKey(string competition, string homeTeam, string awayTeam, string score, string anno |
| | | 695 | | { |
| | 1 | 696 | | return $"{competition}|{homeTeam}|{awayTeam}|{score}|{annotation}"; |
| | | 697 | | } |
| | | 698 | | |
| | | 699 | | private static string CreateDateMapKey(HistoryDateMapEntry entry) |
| | | 700 | | { |
| | 1 | 701 | | return CreateMatchKey( |
| | 1 | 702 | | NormalizeKeyPart(entry.Competition), |
| | 1 | 703 | | NormalizeKeyPart(entry.HomeTeam), |
| | 1 | 704 | | NormalizeKeyPart(entry.AwayTeam), |
| | 1 | 705 | | NormalizeKeyPart(entry.Score), |
| | 1 | 706 | | NormalizeKeyPart(entry.Annotation)); |
| | | 707 | | } |
| | | 708 | | |
| | | 709 | | private static bool IsWorldCupTournamentRow(HistoryDateMapEntry entry) |
| | | 710 | | { |
| | 1 | 711 | | return string.Equals( |
| | 1 | 712 | | NormalizeKeyPart(entry.Competition), |
| | 1 | 713 | | "WM", |
| | 1 | 714 | | StringComparison.OrdinalIgnoreCase); |
| | | 715 | | } |
| | | 716 | | |
| | | 717 | | private static string NormalizeKeyPart(string? value) |
| | | 718 | | { |
| | 1 | 719 | | return value?.Trim() ?? ""; |
| | | 720 | | } |
| | | 721 | | |
| | | 722 | | private static bool IsExactDate(string value) |
| | | 723 | | { |
| | 1 | 724 | | return DateOnly.TryParseExact( |
| | 1 | 725 | | value.Trim(), |
| | 1 | 726 | | "yyyy-MM-dd", |
| | 1 | 727 | | CultureInfo.InvariantCulture, |
| | 1 | 728 | | DateTimeStyles.None, |
| | 1 | 729 | | out _); |
| | | 730 | | } |
| | | 731 | | |
| | | 732 | | private static bool IsExactTimestamp(string value) |
| | | 733 | | { |
| | 1 | 734 | | return DateTimeOffset.TryParseExact( |
| | 1 | 735 | | value.Trim(), |
| | 1 | 736 | | PlayedAtTimestampFormats, |
| | 1 | 737 | | CultureInfo.InvariantCulture, |
| | 1 | 738 | | DateTimeStyles.None, |
| | 1 | 739 | | out _); |
| | | 740 | | } |
| | | 741 | | |
| | | 742 | | private static ExistingDateTreatment GetExistingDateTreatment(string value, DateOnly preserveCollectedOnOrAfter) |
| | | 743 | | { |
| | 1 | 744 | | if (TryParseExactDate(value, out var collectedAt)) |
| | | 745 | | { |
| | 1 | 746 | | return collectedAt >= preserveCollectedOnOrAfter |
| | 1 | 747 | | ? ExistingDateTreatment.ReplaceFromPrediction |
| | 1 | 748 | | : ExistingDateTreatment.None; |
| | | 749 | | } |
| | | 750 | | |
| | 1 | 751 | | if (TryParseExactTimestampDate(value, out var playedAtDate) && playedAtDate >= preserveCollectedOnOrAfter) |
| | | 752 | | { |
| | 1 | 753 | | return ExistingDateTreatment.PreserveExistingTimestamp; |
| | | 754 | | } |
| | | 755 | | |
| | 0 | 756 | | return ExistingDateTreatment.None; |
| | | 757 | | } |
| | | 758 | | |
| | | 759 | | private static bool TryParseExactDate(string value, out DateOnly date) |
| | | 760 | | { |
| | 1 | 761 | | return DateOnly.TryParseExact( |
| | 1 | 762 | | value.Trim(), |
| | 1 | 763 | | "yyyy-MM-dd", |
| | 1 | 764 | | CultureInfo.InvariantCulture, |
| | 1 | 765 | | DateTimeStyles.None, |
| | 1 | 766 | | out date); |
| | | 767 | | } |
| | | 768 | | |
| | | 769 | | private static bool TryParseExactTimestampDate(string value, out DateOnly date) |
| | | 770 | | { |
| | 1 | 771 | | if (DateTimeOffset.TryParseExact( |
| | 1 | 772 | | value.Trim(), |
| | 1 | 773 | | PlayedAtTimestampFormats, |
| | 1 | 774 | | CultureInfo.InvariantCulture, |
| | 1 | 775 | | DateTimeStyles.None, |
| | 1 | 776 | | out var timestamp)) |
| | | 777 | | { |
| | 1 | 778 | | date = DateOnly.FromDateTime(timestamp.DateTime); |
| | 1 | 779 | | return true; |
| | | 780 | | } |
| | | 781 | | |
| | 0 | 782 | | date = default; |
| | 0 | 783 | | return false; |
| | | 784 | | } |
| | | 785 | | |
| | | 786 | | private static string GetHistoryDateField(CsvReader csv) |
| | | 787 | | { |
| | 1 | 788 | | var playedAt = GetOptionalField(csv, PlayedAtColumnName); |
| | 1 | 789 | | return string.IsNullOrWhiteSpace(playedAt) |
| | 1 | 790 | | ? GetOptionalField(csv, DataCollectedAtColumnName) |
| | 1 | 791 | | : playedAt; |
| | | 792 | | } |
| | | 793 | | |
| | | 794 | | private static string GetOptionalField(CsvReader csv, string fieldName) |
| | | 795 | | { |
| | 1 | 796 | | return (csv.TryGetField<string>(fieldName, out var value) ? value : null) ?? ""; |
| | | 797 | | } |
| | | 798 | | } |