| | 1 | | using System.Collections.Concurrent; |
| | 2 | | using System.Diagnostics.CodeAnalysis; |
| | 3 | | using System.Text.RegularExpressions; |
| | 4 | |
|
| | 5 | | namespace Pozitron.QuerySpecification; |
| | 6 | |
|
| | 7 | | internal static class LikeExtension |
| | 8 | | { |
| 1 | 9 | | private static readonly RegexCache _regexCache = new(); |
| | 10 | |
|
| | 11 | | private static Regex BuildRegex(string pattern) |
| | 12 | | { |
| | 13 | | // Escape special regex characters, excluding those handled separately |
| 50 | 14 | | var regexPattern = Regex |
| 50 | 15 | | .Escape(pattern) |
| 50 | 16 | | .Replace("%", ".*") // Translate SQL LIKE wildcard '%' to regex '.*' |
| 50 | 17 | | .Replace("_", ".") // Translate SQL LIKE wildcard '_' to regex '.' |
| 50 | 18 | | .Replace(@"\[", "[") // Unescape '[' as it's used for character classes/ranges |
| 50 | 19 | | .Replace(@"\^", "^"); // Unescape '^' as it can be used for negation in character classes |
| | 20 | |
|
| | 21 | | // Ensure the pattern matches the entire string |
| 50 | 22 | | regexPattern = "^" + regexPattern + "$"; |
| 50 | 23 | | var regex = new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); |
| 48 | 24 | | return regex; |
| | 25 | | } |
| | 26 | |
|
| | 27 | | public static bool Like(this string input, string pattern) |
| | 28 | | { |
| | 29 | | try |
| | 30 | | { |
| | 31 | | // The pattern is dynamic and arbitrary, the consumer might even compose it by an end-user input. |
| | 32 | | // We can not cache all Regex objects, but at least we can try to reuse the most "recent" ones. We'll cache |
| | 33 | | // This might improve the performance within the same closed loop for the in-memory evaluator and validator. |
| | 34 | |
|
| 129 | 35 | | var regex = _regexCache.GetOrAdd(pattern, BuildRegex); |
| 127 | 36 | | return regex.IsMatch(input); |
| | 37 | | } |
| 2 | 38 | | catch (Exception ex) |
| | 39 | | { |
| 2 | 40 | | throw new InvalidLikePatternException(pattern, ex); |
| | 41 | | } |
| 127 | 42 | | } |
| | 43 | |
|
| | 44 | | private class RegexCache |
| | 45 | | { |
| | 46 | | private const int _maxSize = 10; |
| 1 | 47 | | private readonly ConcurrentDictionary<string, Regex> _dictionary = new(); |
| | 48 | |
|
| | 49 | | public Regex GetOrAdd(string key, Func<string, Regex> valueFactory) |
| | 50 | | { |
| 129 | 51 | | if (_dictionary.TryGetValue(key, out var regex)) |
| 79 | 52 | | return regex; |
| | 53 | |
|
| | 54 | | // It might happen we end up with more items than max (concurrency), but we won't be too strict. |
| | 55 | | // We're just trying to avoid indefinite growth. |
| 176 | 56 | | for (var i = _dictionary.Count - _maxSize; i >= 0; i--) |
| | 57 | | { |
| | 58 | | // Avoid being smart, just remove sequentially from the start. |
| 38 | 59 | | var firstKey = _dictionary.Keys.FirstOrDefault(); |
| 38 | 60 | | if (firstKey is not null) |
| | 61 | | { |
| 38 | 62 | | _dictionary.TryRemove(firstKey, out _); |
| | 63 | | } |
| | 64 | |
|
| | 65 | | } |
| | 66 | |
|
| 50 | 67 | | var newRegex = valueFactory(key); |
| 48 | 68 | | _dictionary.TryAdd(key, newRegex); |
| 48 | 69 | | return newRegex; |
| | 70 | | } |
| | 71 | | } |
| | 72 | |
|
| | 73 | | #pragma warning disable IDE0051 // Remove unused private members |
| | 74 | | // This C# implementation of SQL Like operator is based on the following SO post https://stackoverflow.com/a/8583383 |
| | 75 | | // It covers almost all of the scenarios, and it's faster than regex based implementations. |
| | 76 | | // It may fail/throw in some very specific and edge cases, hence, wrap it in try/catch. |
| | 77 | | // UPDATE: it returns incorrect results for some obvious cases. |
| | 78 | | [ExcludeFromCodeCoverage(Justification = "Dead code. Keeping it just as a reference")] |
| | 79 | | private static bool SqlLikeOption2(string str, string pattern) |
| | 80 | | { |
| | 81 | | var isMatch = true; |
| | 82 | | var isWildCardOn = false; |
| | 83 | | var isCharWildCardOn = false; |
| | 84 | | var isCharSetOn = false; |
| | 85 | | var isNotCharSetOn = false; |
| | 86 | | var lastWildCard = -1; |
| | 87 | | var patternIndex = 0; |
| | 88 | | var set = new List<char>(); |
| | 89 | | var p = '\0'; |
| | 90 | | bool endOfPattern; |
| | 91 | |
|
| | 92 | | for (var i = 0; i < str.Length; i++) |
| | 93 | | { |
| | 94 | | var c = str[i]; |
| | 95 | | endOfPattern = (patternIndex >= pattern.Length); |
| | 96 | | if (!endOfPattern) |
| | 97 | | { |
| | 98 | | p = pattern[patternIndex]; |
| | 99 | |
|
| | 100 | | if (!isWildCardOn && p == '%') |
| | 101 | | { |
| | 102 | | lastWildCard = patternIndex; |
| | 103 | | isWildCardOn = true; |
| | 104 | | while (patternIndex < pattern.Length && |
| | 105 | | pattern[patternIndex] == '%') |
| | 106 | | { |
| | 107 | | patternIndex++; |
| | 108 | | } |
| | 109 | | p = patternIndex >= pattern.Length ? '\0' : pattern[patternIndex]; |
| | 110 | | } |
| | 111 | | else if (p == '_') |
| | 112 | | { |
| | 113 | | isCharWildCardOn = true; |
| | 114 | | patternIndex++; |
| | 115 | | } |
| | 116 | | else if (p == '[') |
| | 117 | | { |
| | 118 | | if (pattern[++patternIndex] == '^') |
| | 119 | | { |
| | 120 | | isNotCharSetOn = true; |
| | 121 | | patternIndex++; |
| | 122 | | } |
| | 123 | | else isCharSetOn = true; |
| | 124 | |
|
| | 125 | | set.Clear(); |
| | 126 | | if (pattern[patternIndex + 1] == '-' && pattern[patternIndex + 3] == ']') |
| | 127 | | { |
| | 128 | | var start = char.ToUpper(pattern[patternIndex]); |
| | 129 | | patternIndex += 2; |
| | 130 | | var end = char.ToUpper(pattern[patternIndex]); |
| | 131 | | if (start <= end) |
| | 132 | | { |
| | 133 | | for (var ci = start; ci <= end; ci++) |
| | 134 | | { |
| | 135 | | set.Add(ci); |
| | 136 | | } |
| | 137 | | } |
| | 138 | | patternIndex++; |
| | 139 | | } |
| | 140 | |
|
| | 141 | | while (patternIndex < pattern.Length && |
| | 142 | | pattern[patternIndex] != ']') |
| | 143 | | { |
| | 144 | | set.Add(pattern[patternIndex]); |
| | 145 | | patternIndex++; |
| | 146 | | } |
| | 147 | | patternIndex++; |
| | 148 | | } |
| | 149 | | } |
| | 150 | |
|
| | 151 | | if (isWildCardOn) |
| | 152 | | { |
| | 153 | | if (char.ToUpper(c) == char.ToUpper(p)) |
| | 154 | | { |
| | 155 | | isWildCardOn = false; |
| | 156 | | patternIndex++; |
| | 157 | | } |
| | 158 | | } |
| | 159 | | else if (isCharWildCardOn) |
| | 160 | | { |
| | 161 | | isCharWildCardOn = false; |
| | 162 | | } |
| | 163 | | else if (isCharSetOn || isNotCharSetOn) |
| | 164 | | { |
| | 165 | | var charMatch = (set.Contains(char.ToUpper(c))); |
| | 166 | | if ((isNotCharSetOn && charMatch) || (isCharSetOn && !charMatch)) |
| | 167 | | { |
| | 168 | | if (lastWildCard >= 0) patternIndex = lastWildCard; |
| | 169 | | else |
| | 170 | | { |
| | 171 | | isMatch = false; |
| | 172 | | break; |
| | 173 | | } |
| | 174 | | } |
| | 175 | | isNotCharSetOn = isCharSetOn = false; |
| | 176 | | } |
| | 177 | | else |
| | 178 | | { |
| | 179 | | if (char.ToUpper(c) == char.ToUpper(p)) |
| | 180 | | { |
| | 181 | | patternIndex++; |
| | 182 | | } |
| | 183 | | else |
| | 184 | | { |
| | 185 | | if (lastWildCard >= 0) patternIndex = lastWildCard; |
| | 186 | | else |
| | 187 | | { |
| | 188 | | isMatch = false; |
| | 189 | | break; |
| | 190 | | } |
| | 191 | | } |
| | 192 | | } |
| | 193 | | } |
| | 194 | | endOfPattern = (patternIndex >= pattern.Length); |
| | 195 | |
|
| | 196 | | if (isMatch && !endOfPattern) |
| | 197 | | { |
| | 198 | | var isOnlyWildCards = true; |
| | 199 | | for (var i = patternIndex; i < pattern.Length; i++) |
| | 200 | | { |
| | 201 | | if (pattern[i] != '%') |
| | 202 | | { |
| | 203 | | isOnlyWildCards = false; |
| | 204 | | break; |
| | 205 | | } |
| | 206 | | } |
| | 207 | | if (isOnlyWildCards) endOfPattern = true; |
| | 208 | | } |
| | 209 | | return isMatch && endOfPattern; |
| | 210 | | } |
| | 211 | | } |