|   |  | 1 |  | using System.Collections.Concurrent; | 
|   |  | 2 |  | using System.Diagnostics.CodeAnalysis; | 
|   |  | 3 |  | using System.Text.RegularExpressions; | 
|   |  | 4 |  |  | 
|   |  | 5 |  | namespace Pozitron.QuerySpecification; | 
|   |  | 6 |  |  | 
|   |  | 7 |  | internal static class LikeExtension | 
|   |  | 8 |  | { | 
|   | 1 | 9 |  |     private static readonly RegexCache _regexCache = new(); | 
|   |  | 10 |  |  | 
|   |  | 11 |  |     private static Regex BuildRegex(string pattern) | 
|   |  | 12 |  |     { | 
|   |  | 13 |  |         // Escape special regex characters, excluding those handled separately | 
|   | 53 | 14 |  |         var regexPattern = Regex | 
|   | 53 | 15 |  |             .Escape(pattern) | 
|   | 53 | 16 |  |             .Replace("%", ".*")     // Translate SQL LIKE wildcard '%' to regex '.*' | 
|   | 53 | 17 |  |             .Replace("_", ".")      // Translate SQL LIKE wildcard '_' to regex '.' | 
|   | 53 | 18 |  |             .Replace(@"\[", "[")    // Unescape '[' as it's used for character classes/ranges | 
|   | 53 | 19 |  |             .Replace(@"\^", "^");   // Unescape '^' as it can be used for negation in character classes | 
|   |  | 20 |  |  | 
|   |  | 21 |  |         // Ensure the pattern matches the entire string | 
|   | 53 | 22 |  |         regexPattern = "^" + regexPattern + "$"; | 
|   | 53 | 23 |  |         var regex = new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); | 
|   | 51 | 24 |  |         return regex; | 
|   |  | 25 |  |     } | 
|   |  | 26 |  |  | 
|   |  | 27 |  |     public static bool Like(this string input, string pattern) | 
|   |  | 28 |  |     { | 
|   |  | 29 |  |         try | 
|   |  | 30 |  |         { | 
|   |  | 31 |  |             // The pattern is dynamic and arbitrary, the consumer might even compose it by an end-user input. | 
|   |  | 32 |  |             // We can not cache all Regex objects, but at least we can try to reuse the most "recent" ones. We'll cache  | 
|   |  | 33 |  |             // This might improve the performance within the same closed loop for the in-memory evaluator and validator. | 
|   |  | 34 |  |  | 
|   | 139 | 35 |  |             var regex = _regexCache.GetOrAdd(pattern, BuildRegex); | 
|   | 137 | 36 |  |             return regex.IsMatch(input); | 
|   |  | 37 |  |         } | 
|   | 2 | 38 |  |         catch (Exception ex) | 
|   |  | 39 |  |         { | 
|   | 2 | 40 |  |             throw new InvalidLikePatternException(pattern, ex); | 
|   |  | 41 |  |         } | 
|   | 137 | 42 |  |     } | 
|   |  | 43 |  |  | 
|   |  | 44 |  |     private class RegexCache | 
|   |  | 45 |  |     { | 
|   |  | 46 |  |         private const int _maxSize = 10; | 
|   | 1 | 47 |  |         private readonly ConcurrentDictionary<string, Regex> _dictionary = new(); | 
|   |  | 48 |  |  | 
|   |  | 49 |  |         public Regex GetOrAdd(string key, Func<string, Regex> valueFactory) | 
|   |  | 50 |  |         { | 
|   | 139 | 51 |  |             if (_dictionary.TryGetValue(key, out var regex)) | 
|   | 86 | 52 |  |                 return regex; | 
|   |  | 53 |  |  | 
|   |  | 54 |  |             // It might happen we end up with more items than max (concurrency), but we won't be too strict. | 
|   |  | 55 |  |             // We're just trying to avoid indefinite growth. | 
|   | 188 | 56 |  |             for (var i = _dictionary.Count - _maxSize; i >= 0; i--) | 
|   |  | 57 |  |             { | 
|   |  | 58 |  |                 // Avoid being smart, just remove sequentially from the start. | 
|   | 41 | 59 |  |                 var firstKey = _dictionary.Keys.FirstOrDefault(); | 
|   | 41 | 60 |  |                 if (firstKey is not null) | 
|   |  | 61 |  |                 { | 
|   | 41 | 62 |  |                     _dictionary.TryRemove(firstKey, out _); | 
|   |  | 63 |  |                 } | 
|   |  | 64 |  |  | 
|   |  | 65 |  |             } | 
|   |  | 66 |  |  | 
|   | 53 | 67 |  |             var newRegex = valueFactory(key); | 
|   | 51 | 68 |  |             _dictionary.TryAdd(key, newRegex); | 
|   | 51 | 69 |  |             return newRegex; | 
|   |  | 70 |  |         } | 
|   |  | 71 |  |     } | 
|   |  | 72 |  |  | 
|   |  | 73 |  | #pragma warning disable IDE0051 // Remove unused private members | 
|   |  | 74 |  |     // This C# implementation of SQL Like operator is based on the following SO post https://stackoverflow.com/a/8583383 | 
|   |  | 75 |  |     // It covers almost all of the scenarios, and it's faster than regex based implementations. | 
|   |  | 76 |  |     // It may fail/throw in some very specific and edge cases, hence, wrap it in try/catch. | 
|   |  | 77 |  |     // UPDATE: it returns incorrect results for some obvious cases. | 
|   |  | 78 |  |     [ExcludeFromCodeCoverage(Justification = "Dead code. Keeping it just as a reference")] | 
|   |  | 79 |  |     private static bool SqlLikeOption2(string str, string pattern) | 
|   |  | 80 |  |     { | 
|   |  | 81 |  |         var isMatch = true; | 
|   |  | 82 |  |         var isWildCardOn = false; | 
|   |  | 83 |  |         var isCharWildCardOn = false; | 
|   |  | 84 |  |         var isCharSetOn = false; | 
|   |  | 85 |  |         var isNotCharSetOn = false; | 
|   |  | 86 |  |         var lastWildCard = -1; | 
|   |  | 87 |  |         var patternIndex = 0; | 
|   |  | 88 |  |         var set = new List<char>(); | 
|   |  | 89 |  |         var p = '\0'; | 
|   |  | 90 |  |         bool endOfPattern; | 
|   |  | 91 |  |  | 
|   |  | 92 |  |         for (var i = 0; i < str.Length; i++) | 
|   |  | 93 |  |         { | 
|   |  | 94 |  |             var c = str[i]; | 
|   |  | 95 |  |             endOfPattern = (patternIndex >= pattern.Length); | 
|   |  | 96 |  |             if (!endOfPattern) | 
|   |  | 97 |  |             { | 
|   |  | 98 |  |                 p = pattern[patternIndex]; | 
|   |  | 99 |  |  | 
|   |  | 100 |  |                 if (!isWildCardOn && p == '%') | 
|   |  | 101 |  |                 { | 
|   |  | 102 |  |                     lastWildCard = patternIndex; | 
|   |  | 103 |  |                     isWildCardOn = true; | 
|   |  | 104 |  |                     while (patternIndex < pattern.Length && | 
|   |  | 105 |  |                         pattern[patternIndex] == '%') | 
|   |  | 106 |  |                     { | 
|   |  | 107 |  |                         patternIndex++; | 
|   |  | 108 |  |                     } | 
|   |  | 109 |  |                     p = patternIndex >= pattern.Length ? '\0' : pattern[patternIndex]; | 
|   |  | 110 |  |                 } | 
|   |  | 111 |  |                 else if (p == '_') | 
|   |  | 112 |  |                 { | 
|   |  | 113 |  |                     isCharWildCardOn = true; | 
|   |  | 114 |  |                     patternIndex++; | 
|   |  | 115 |  |                 } | 
|   |  | 116 |  |                 else if (p == '[') | 
|   |  | 117 |  |                 { | 
|   |  | 118 |  |                     if (pattern[++patternIndex] == '^') | 
|   |  | 119 |  |                     { | 
|   |  | 120 |  |                         isNotCharSetOn = true; | 
|   |  | 121 |  |                         patternIndex++; | 
|   |  | 122 |  |                     } | 
|   |  | 123 |  |                     else isCharSetOn = true; | 
|   |  | 124 |  |  | 
|   |  | 125 |  |                     set.Clear(); | 
|   |  | 126 |  |                     if (pattern[patternIndex + 1] == '-' && pattern[patternIndex + 3] == ']') | 
|   |  | 127 |  |                     { | 
|   |  | 128 |  |                         var start = char.ToUpper(pattern[patternIndex]); | 
|   |  | 129 |  |                         patternIndex += 2; | 
|   |  | 130 |  |                         var end = char.ToUpper(pattern[patternIndex]); | 
|   |  | 131 |  |                         if (start <= end) | 
|   |  | 132 |  |                         { | 
|   |  | 133 |  |                             for (var ci = start; ci <= end; ci++) | 
|   |  | 134 |  |                             { | 
|   |  | 135 |  |                                 set.Add(ci); | 
|   |  | 136 |  |                             } | 
|   |  | 137 |  |                         } | 
|   |  | 138 |  |                         patternIndex++; | 
|   |  | 139 |  |                     } | 
|   |  | 140 |  |  | 
|   |  | 141 |  |                     while (patternIndex < pattern.Length && | 
|   |  | 142 |  |                         pattern[patternIndex] != ']') | 
|   |  | 143 |  |                     { | 
|   |  | 144 |  |                         set.Add(pattern[patternIndex]); | 
|   |  | 145 |  |                         patternIndex++; | 
|   |  | 146 |  |                     } | 
|   |  | 147 |  |                     patternIndex++; | 
|   |  | 148 |  |                 } | 
|   |  | 149 |  |             } | 
|   |  | 150 |  |  | 
|   |  | 151 |  |             if (isWildCardOn) | 
|   |  | 152 |  |             { | 
|   |  | 153 |  |                 if (char.ToUpper(c) == char.ToUpper(p)) | 
|   |  | 154 |  |                 { | 
|   |  | 155 |  |                     isWildCardOn = false; | 
|   |  | 156 |  |                     patternIndex++; | 
|   |  | 157 |  |                 } | 
|   |  | 158 |  |             } | 
|   |  | 159 |  |             else if (isCharWildCardOn) | 
|   |  | 160 |  |             { | 
|   |  | 161 |  |                 isCharWildCardOn = false; | 
|   |  | 162 |  |             } | 
|   |  | 163 |  |             else if (isCharSetOn || isNotCharSetOn) | 
|   |  | 164 |  |             { | 
|   |  | 165 |  |                 var charMatch = (set.Contains(char.ToUpper(c))); | 
|   |  | 166 |  |                 if ((isNotCharSetOn && charMatch) || (isCharSetOn && !charMatch)) | 
|   |  | 167 |  |                 { | 
|   |  | 168 |  |                     if (lastWildCard >= 0) patternIndex = lastWildCard; | 
|   |  | 169 |  |                     else | 
|   |  | 170 |  |                     { | 
|   |  | 171 |  |                         isMatch = false; | 
|   |  | 172 |  |                         break; | 
|   |  | 173 |  |                     } | 
|   |  | 174 |  |                 } | 
|   |  | 175 |  |                 isNotCharSetOn = isCharSetOn = false; | 
|   |  | 176 |  |             } | 
|   |  | 177 |  |             else | 
|   |  | 178 |  |             { | 
|   |  | 179 |  |                 if (char.ToUpper(c) == char.ToUpper(p)) | 
|   |  | 180 |  |                 { | 
|   |  | 181 |  |                     patternIndex++; | 
|   |  | 182 |  |                 } | 
|   |  | 183 |  |                 else | 
|   |  | 184 |  |                 { | 
|   |  | 185 |  |                     if (lastWildCard >= 0) patternIndex = lastWildCard; | 
|   |  | 186 |  |                     else | 
|   |  | 187 |  |                     { | 
|   |  | 188 |  |                         isMatch = false; | 
|   |  | 189 |  |                         break; | 
|   |  | 190 |  |                     } | 
|   |  | 191 |  |                 } | 
|   |  | 192 |  |             } | 
|   |  | 193 |  |         } | 
|   |  | 194 |  |         endOfPattern = (patternIndex >= pattern.Length); | 
|   |  | 195 |  |  | 
|   |  | 196 |  |         if (isMatch && !endOfPattern) | 
|   |  | 197 |  |         { | 
|   |  | 198 |  |             var isOnlyWildCards = true; | 
|   |  | 199 |  |             for (var i = patternIndex; i < pattern.Length; i++) | 
|   |  | 200 |  |             { | 
|   |  | 201 |  |                 if (pattern[i] != '%') | 
|   |  | 202 |  |                 { | 
|   |  | 203 |  |                     isOnlyWildCards = false; | 
|   |  | 204 |  |                     break; | 
|   |  | 205 |  |                 } | 
|   |  | 206 |  |             } | 
|   |  | 207 |  |             if (isOnlyWildCards) endOfPattern = true; | 
|   |  | 208 |  |         } | 
|   |  | 209 |  |         return isMatch && endOfPattern; | 
|   |  | 210 |  |     } | 
|   |  | 211 |  | } |