| | 1 | | using System.Collections.Concurrent; |
| | 2 | | using System.Diagnostics.CodeAnalysis; |
| | 3 | | using System.Text.RegularExpressions; |
| | 4 | |
|
| | 5 | | namespace Pozitron.QuerySpecification; |
| | 6 | |
|
| | 7 | | internal static class LikeExtension |
| | 8 | | { |
| 1 | 9 | | private static readonly ConcurrentDictionary<string, Regex> _regexCache = new(); |
| | 10 | |
|
| | 11 | | private static Regex BuildRegex(string pattern) |
| | 12 | | { |
| | 13 | | // Escape special regex characters, excluding those handled separately |
| 27 | 14 | | var regexPattern = Regex |
| 27 | 15 | | .Escape(pattern) |
| 27 | 16 | | .Replace("%", ".*") // Translate SQL LIKE wildcard '%' to regex '.*' |
| 27 | 17 | | .Replace("_", ".") // Translate SQL LIKE wildcard '_' to regex '.' |
| 27 | 18 | | .Replace(@"\[", "[") // Unescape '[' as it's used for character classes/ranges |
| 27 | 19 | | .Replace(@"\^", "^"); // Unescape '^' as it can be used for negation in character classes |
| | 20 | |
|
| | 21 | | // Ensure the pattern matches the entire string |
| 27 | 22 | | regexPattern = "^" + regexPattern + "$"; |
| 27 | 23 | | var regex = new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled); |
| 25 | 24 | | return regex; |
| | 25 | | } |
| | 26 | |
|
| | 27 | | public static bool Like(this string input, string pattern) |
| | 28 | | { |
| | 29 | | try |
| | 30 | | { |
| 164 | 31 | | var regex = _regexCache.GetOrAdd(pattern, BuildRegex); |
| 162 | 32 | | return regex.IsMatch(input); |
| | 33 | | } |
| 2 | 34 | | catch (Exception ex) |
| | 35 | | { |
| 2 | 36 | | throw new InvalidLikePatternException(pattern, ex); |
| | 37 | | } |
| 162 | 38 | | } |
| | 39 | |
|
| | 40 | | #pragma warning disable IDE0051 // Remove unused private members |
| | 41 | | // This C# implementation of SQL Like operator is based on the following SO post https://stackoverflow.com/a/8583383 |
| | 42 | | // It covers almost all of the scenarios, and it's faster than regex based implementations. |
| | 43 | | // It may fail/throw in some very specific and edge cases, hence, wrap it in try/catch. |
| | 44 | | // UPDATE: it returns incorrect results for some obvious cases. |
| | 45 | | [ExcludeFromCodeCoverage] |
| | 46 | | private static bool SqlLikeOption2(string str, string pattern) |
| | 47 | | { |
| | 48 | | var isMatch = true; |
| | 49 | | var isWildCardOn = false; |
| | 50 | | var isCharWildCardOn = false; |
| | 51 | | var isCharSetOn = false; |
| | 52 | | var isNotCharSetOn = false; |
| | 53 | | var lastWildCard = -1; |
| | 54 | | var patternIndex = 0; |
| | 55 | | var set = new List<char>(); |
| | 56 | | var p = '\0'; |
| | 57 | | bool endOfPattern; |
| | 58 | |
|
| | 59 | | for (var i = 0; i < str.Length; i++) |
| | 60 | | { |
| | 61 | | var c = str[i]; |
| | 62 | | endOfPattern = (patternIndex >= pattern.Length); |
| | 63 | | if (!endOfPattern) |
| | 64 | | { |
| | 65 | | p = pattern[patternIndex]; |
| | 66 | |
|
| | 67 | | if (!isWildCardOn && p == '%') |
| | 68 | | { |
| | 69 | | lastWildCard = patternIndex; |
| | 70 | | isWildCardOn = true; |
| | 71 | | while (patternIndex < pattern.Length && |
| | 72 | | pattern[patternIndex] == '%') |
| | 73 | | { |
| | 74 | | patternIndex++; |
| | 75 | | } |
| | 76 | | p = patternIndex >= pattern.Length ? '\0' : pattern[patternIndex]; |
| | 77 | | } |
| | 78 | | else if (p == '_') |
| | 79 | | { |
| | 80 | | isCharWildCardOn = true; |
| | 81 | | patternIndex++; |
| | 82 | | } |
| | 83 | | else if (p == '[') |
| | 84 | | { |
| | 85 | | if (pattern[++patternIndex] == '^') |
| | 86 | | { |
| | 87 | | isNotCharSetOn = true; |
| | 88 | | patternIndex++; |
| | 89 | | } |
| | 90 | | else isCharSetOn = true; |
| | 91 | |
|
| | 92 | | set.Clear(); |
| | 93 | | if (pattern[patternIndex + 1] == '-' && pattern[patternIndex + 3] == ']') |
| | 94 | | { |
| | 95 | | var start = char.ToUpper(pattern[patternIndex]); |
| | 96 | | patternIndex += 2; |
| | 97 | | var end = char.ToUpper(pattern[patternIndex]); |
| | 98 | | if (start <= end) |
| | 99 | | { |
| | 100 | | for (var ci = start; ci <= end; ci++) |
| | 101 | | { |
| | 102 | | set.Add(ci); |
| | 103 | | } |
| | 104 | | } |
| | 105 | | patternIndex++; |
| | 106 | | } |
| | 107 | |
|
| | 108 | | while (patternIndex < pattern.Length && |
| | 109 | | pattern[patternIndex] != ']') |
| | 110 | | { |
| | 111 | | set.Add(pattern[patternIndex]); |
| | 112 | | patternIndex++; |
| | 113 | | } |
| | 114 | | patternIndex++; |
| | 115 | | } |
| | 116 | | } |
| | 117 | |
|
| | 118 | | if (isWildCardOn) |
| | 119 | | { |
| | 120 | | if (char.ToUpper(c) == char.ToUpper(p)) |
| | 121 | | { |
| | 122 | | isWildCardOn = false; |
| | 123 | | patternIndex++; |
| | 124 | | } |
| | 125 | | } |
| | 126 | | else if (isCharWildCardOn) |
| | 127 | | { |
| | 128 | | isCharWildCardOn = false; |
| | 129 | | } |
| | 130 | | else if (isCharSetOn || isNotCharSetOn) |
| | 131 | | { |
| | 132 | | var charMatch = (set.Contains(char.ToUpper(c))); |
| | 133 | | if ((isNotCharSetOn && charMatch) || (isCharSetOn && !charMatch)) |
| | 134 | | { |
| | 135 | | if (lastWildCard >= 0) patternIndex = lastWildCard; |
| | 136 | | else |
| | 137 | | { |
| | 138 | | isMatch = false; |
| | 139 | | break; |
| | 140 | | } |
| | 141 | | } |
| | 142 | | isNotCharSetOn = isCharSetOn = false; |
| | 143 | | } |
| | 144 | | else |
| | 145 | | { |
| | 146 | | if (char.ToUpper(c) == char.ToUpper(p)) |
| | 147 | | { |
| | 148 | | patternIndex++; |
| | 149 | | } |
| | 150 | | else |
| | 151 | | { |
| | 152 | | if (lastWildCard >= 0) patternIndex = lastWildCard; |
| | 153 | | else |
| | 154 | | { |
| | 155 | | isMatch = false; |
| | 156 | | break; |
| | 157 | | } |
| | 158 | | } |
| | 159 | | } |
| | 160 | | } |
| | 161 | | endOfPattern = (patternIndex >= pattern.Length); |
| | 162 | |
|
| | 163 | | if (isMatch && !endOfPattern) |
| | 164 | | { |
| | 165 | | var isOnlyWildCards = true; |
| | 166 | | for (var i = patternIndex; i < pattern.Length; i++) |
| | 167 | | { |
| | 168 | | if (pattern[i] != '%') |
| | 169 | | { |
| | 170 | | isOnlyWildCards = false; |
| | 171 | | break; |
| | 172 | | } |
| | 173 | | } |
| | 174 | | if (isOnlyWildCards) endOfPattern = true; |
| | 175 | | } |
| | 176 | | return isMatch && endOfPattern; |
| | 177 | | } |
| | 178 | | } |