< Summary

Information
Class: Pozitron.QuerySpecification.LikeExtension
Assembly: Pozitron.QuerySpecification
File(s): /home/runner/work/QuerySpecification/QuerySpecification/src/QuerySpecification/Evaluators/LikeExtension.cs
Tag: 52_11740816328
Line coverage
100%
Covered lines: 25
Uncovered lines: 0
Coverable lines: 25
Total lines: 211
Line coverage: 100%
Branch coverage
100%
Covered branches: 6
Total branches: 6
Branch coverage: 100%
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

MethodBranch coverage Crap Score Cyclomatic complexity Line coverage
.cctor()100%11100%
BuildRegex(...)100%11100%
Like(...)100%11100%
.ctor()100%11100%
GetOrAdd(...)100%66100%

File(s)

/home/runner/work/QuerySpecification/QuerySpecification/src/QuerySpecification/Evaluators/LikeExtension.cs

#LineLine coverage
 1using System.Collections.Concurrent;
 2using System.Diagnostics.CodeAnalysis;
 3using System.Text.RegularExpressions;
 4
 5namespace Pozitron.QuerySpecification;
 6
 7internal static class LikeExtension
 8{
 19    private static readonly RegexCache _regexCache = new();
 10
 11    private static Regex BuildRegex(string pattern)
 12    {
 13        // Escape special regex characters, excluding those handled separately
 5014        var regexPattern = Regex
 5015            .Escape(pattern)
 5016            .Replace("%", ".*")     // Translate SQL LIKE wildcard '%' to regex '.*'
 5017            .Replace("_", ".")      // Translate SQL LIKE wildcard '_' to regex '.'
 5018            .Replace(@"\[", "[")    // Unescape '[' as it's used for character classes/ranges
 5019            .Replace(@"\^", "^");   // Unescape '^' as it can be used for negation in character classes
 20
 21        // Ensure the pattern matches the entire string
 5022        regexPattern = "^" + regexPattern + "$";
 5023        var regex = new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled);
 4824        return regex;
 25    }
 26
 27    public static bool Like(this string input, string pattern)
 28    {
 29        try
 30        {
 31            // The pattern is dynamic and arbitrary, the consumer might even compose it by an end-user input.
 32            // We can not cache all Regex objects, but at least we can try to reuse the most "recent" ones. We'll cache 
 33            // This might improve the performance within the same closed loop for the in-memory evaluator and validator.
 34
 12935            var regex = _regexCache.GetOrAdd(pattern, BuildRegex);
 12736            return regex.IsMatch(input);
 37        }
 238        catch (Exception ex)
 39        {
 240            throw new InvalidLikePatternException(pattern, ex);
 41        }
 12742    }
 43
 44    private class RegexCache
 45    {
 46        private const int _maxSize = 10;
 147        private readonly ConcurrentDictionary<string, Regex> _dictionary = new();
 48
 49        public Regex GetOrAdd(string key, Func<string, Regex> valueFactory)
 50        {
 12951            if (_dictionary.TryGetValue(key, out var regex))
 7952                return regex;
 53
 54            // It might happen we end up with more items than max (concurrency), but we won't be too strict.
 55            // We're just trying to avoid indefinite growth.
 17656            for (var i = _dictionary.Count - _maxSize; i >= 0; i--)
 57            {
 58                // Avoid being smart, just remove sequentially from the start.
 3859                var firstKey = _dictionary.Keys.FirstOrDefault();
 3860                if (firstKey is not null)
 61                {
 3862                    _dictionary.TryRemove(firstKey, out _);
 63                }
 64
 65            }
 66
 5067            var newRegex = valueFactory(key);
 4868            _dictionary.TryAdd(key, newRegex);
 4869            return newRegex;
 70        }
 71    }
 72
 73#pragma warning disable IDE0051 // Remove unused private members
 74    // This C# implementation of SQL Like operator is based on the following SO post https://stackoverflow.com/a/8583383
 75    // It covers almost all of the scenarios, and it's faster than regex based implementations.
 76    // It may fail/throw in some very specific and edge cases, hence, wrap it in try/catch.
 77    // UPDATE: it returns incorrect results for some obvious cases.
 78    [ExcludeFromCodeCoverage(Justification = "Dead code. Keeping it just as a reference")]
 79    private static bool SqlLikeOption2(string str, string pattern)
 80    {
 81        var isMatch = true;
 82        var isWildCardOn = false;
 83        var isCharWildCardOn = false;
 84        var isCharSetOn = false;
 85        var isNotCharSetOn = false;
 86        var lastWildCard = -1;
 87        var patternIndex = 0;
 88        var set = new List<char>();
 89        var p = '\0';
 90        bool endOfPattern;
 91
 92        for (var i = 0; i < str.Length; i++)
 93        {
 94            var c = str[i];
 95            endOfPattern = (patternIndex >= pattern.Length);
 96            if (!endOfPattern)
 97            {
 98                p = pattern[patternIndex];
 99
 100                if (!isWildCardOn && p == '%')
 101                {
 102                    lastWildCard = patternIndex;
 103                    isWildCardOn = true;
 104                    while (patternIndex < pattern.Length &&
 105                        pattern[patternIndex] == '%')
 106                    {
 107                        patternIndex++;
 108                    }
 109                    p = patternIndex >= pattern.Length ? '\0' : pattern[patternIndex];
 110                }
 111                else if (p == '_')
 112                {
 113                    isCharWildCardOn = true;
 114                    patternIndex++;
 115                }
 116                else if (p == '[')
 117                {
 118                    if (pattern[++patternIndex] == '^')
 119                    {
 120                        isNotCharSetOn = true;
 121                        patternIndex++;
 122                    }
 123                    else isCharSetOn = true;
 124
 125                    set.Clear();
 126                    if (pattern[patternIndex + 1] == '-' && pattern[patternIndex + 3] == ']')
 127                    {
 128                        var start = char.ToUpper(pattern[patternIndex]);
 129                        patternIndex += 2;
 130                        var end = char.ToUpper(pattern[patternIndex]);
 131                        if (start <= end)
 132                        {
 133                            for (var ci = start; ci <= end; ci++)
 134                            {
 135                                set.Add(ci);
 136                            }
 137                        }
 138                        patternIndex++;
 139                    }
 140
 141                    while (patternIndex < pattern.Length &&
 142                        pattern[patternIndex] != ']')
 143                    {
 144                        set.Add(pattern[patternIndex]);
 145                        patternIndex++;
 146                    }
 147                    patternIndex++;
 148                }
 149            }
 150
 151            if (isWildCardOn)
 152            {
 153                if (char.ToUpper(c) == char.ToUpper(p))
 154                {
 155                    isWildCardOn = false;
 156                    patternIndex++;
 157                }
 158            }
 159            else if (isCharWildCardOn)
 160            {
 161                isCharWildCardOn = false;
 162            }
 163            else if (isCharSetOn || isNotCharSetOn)
 164            {
 165                var charMatch = (set.Contains(char.ToUpper(c)));
 166                if ((isNotCharSetOn && charMatch) || (isCharSetOn && !charMatch))
 167                {
 168                    if (lastWildCard >= 0) patternIndex = lastWildCard;
 169                    else
 170                    {
 171                        isMatch = false;
 172                        break;
 173                    }
 174                }
 175                isNotCharSetOn = isCharSetOn = false;
 176            }
 177            else
 178            {
 179                if (char.ToUpper(c) == char.ToUpper(p))
 180                {
 181                    patternIndex++;
 182                }
 183                else
 184                {
 185                    if (lastWildCard >= 0) patternIndex = lastWildCard;
 186                    else
 187                    {
 188                        isMatch = false;
 189                        break;
 190                    }
 191                }
 192            }
 193        }
 194        endOfPattern = (patternIndex >= pattern.Length);
 195
 196        if (isMatch && !endOfPattern)
 197        {
 198            var isOnlyWildCards = true;
 199            for (var i = patternIndex; i < pattern.Length; i++)
 200            {
 201                if (pattern[i] != '%')
 202                {
 203                    isOnlyWildCards = false;
 204                    break;
 205                }
 206            }
 207            if (isOnlyWildCards) endOfPattern = true;
 208        }
 209        return isMatch && endOfPattern;
 210    }
 211}