lib/utils/patterns.rb
module Utils # A module that provides pattern matching functionality for file searching # and text processing. # # It includes classes for different types of pattern matching including fuzzy # matching and regular expression matching. module Patterns # Base class for pattern matching implementations. # # This class serves as the foundation for various pattern matching # strategies, providing common functionality for initializing patterns with # character set filtering and case sensitivity options. It handles the core # configuration and delegates specific matching behavior to subclasses. # # @abstract class Pattern # Initializes a new Pattern instance with the specified options. # # This method sets up the pattern configuration by storing the character # set, case sensitivity flag, and pattern string. It validates that a # pattern is provided and optionally filters the pattern characters based # on the specified character set. # # @param opts [ Hash ] a hash containing the pattern configuration options # @option opts [ String ] :cset the character set to filter pattern characters against # @option opts [ TrueClass, FalseClass ] :icase whether the pattern matching should be case sensitive # @option opts [ String ] :pattern the pattern string to be used for matching # # @raise [ ArgumentError ] if the pattern option is not provided def initialize(opts = {}) @cset = opts[:cset] @icase = opts[:icase] @pattern = opts[:pattern] or raise ArgumentError, "pattern option required" @pattern = @pattern.gsub(/[^#{@cset}]/, '') if @cset end # Returns the matcher object used for pattern matching. # # @return [ Object ] the matcher object that handles pattern matching operations attr_reader :matcher # The method_missing method delegates calls to the matcher object while # handling UTF-8 encoding errors. # # This method acts as a fallback handler for undefined method calls, # forwarding them to the internal matcher object. It specifically catches # ArgumentError exceptions related to invalid byte sequences in UTF-8 and # re-raises them unless they match the expected error pattern. # # @param a [ Array ] the arguments passed to the missing method # @param b [ Proc ] the block passed to the missing method # # @return [ Object ] the result of the delegated method call on the matcher def method_missing(*a, &b) @matcher.__send__(*a, &b) rescue ArgumentError => e raise e unless e.message.include?('invalid byte sequence in UTF-8') end end # A fuzzy pattern matcher that performs partial string matching while # preserving character order. # # This class implements a pattern matching strategy that allows for # flexible matching of strings where the characters of the search pattern # appear in sequence within the target string, but not necessarily # consecutively. It is particularly useful for finding text patterns with # potential typos or # when only partial information about the target is available. # # @example # fuzzy_pattern = FuzzyPattern.new(pattern: 'abc') # fuzzy_pattern.match('a1b2c3') # => matches because 'a', 'b', and 'c' appear in order class FuzzyPattern < Pattern # Initializes a fuzzy pattern matcher by processing the pattern string # and compiling it into a regular expression. # # This method takes the configured pattern string and converts it into a # regular expression that can match strings in a fuzzy manner, allowing # for partial matches while preserving the order of characters. It # handles case sensitivity based on the configuration. # # @param opts [ Hash ] a hash containing the pattern configuration options # @option opts [ String ] :cset the character set to filter pattern characters against # @option opts [ TrueClass, FalseClass ] :icase whether the pattern matching should be case sensitive # @option opts [ String ] :pattern the pattern string to be used for matching def initialize(opts = {}) super r = @pattern.split(//).grep(/[[:print:]]/).map { |x| "(#{Regexp.quote(x)})" } * '.*?' @matcher = Regexp.new( "\\A(?:.*/.*?#{r}|.*#{r})", @icase ? Regexp::IGNORECASE : 0 ) end end # A regular expression pattern matcher that performs exact string matching # with optional case sensitivity. # # This class extends the base Pattern class to provide functionality for # creating and using regular expression patterns. It compiles the provided # pattern into a Regexp object that can be used for matching operations # throughout the application. The pattern matching behavior is influenced # by the case sensitivity configuration inherited from the parent class. # # @example # regexp_pattern = RegexpPattern.new(pattern: 'foo', icase: true) # regexp_pattern.match('FOO') # => matches because case insensitive class RegexpPattern < Pattern # Initializes a regular expression pattern matcher with the specified # options. # # This method sets up a regular expression object based on the pattern # string and case sensitivity configuration that was previously # initialized in the parent class. It compiles the pattern into a Regexp # object that can be used for matching operations throughout the pattern # matching process. # # @param opts [ Hash ] a hash containing the pattern configuration options # @option opts [ String ] :cset the character set to filter pattern characters against # @option opts [ TrueClass, FalseClass ] :icase whether the pattern matching should be case sensitive # @option opts [ String ] :pattern the pattern string to be used for matching # # @return [ Regexp ] a compiled regular expression object ready for pattern matching operations def initialize(opts = {}) super @matcher = Regexp.new( @pattern, @icase ? Regexp::IGNORECASE : 0 ) end end # Chooses and initializes a pattern matcher based on the provided argument # and options. # # This method selects between a regular expression pattern matcher and a # fuzzy pattern matcher depending on the value of the argument parameter # and the default configuration. # It validates that the argument is either 'r' (regexp) or 'f' (fuzzy) and # raises an error if an invalid value is provided. # # @param argument [ String ] the argument string that determines the pattern type # @param pattern_opts [ Hash ] the options to be passed to the pattern matcher constructor # @param default [ String ] the default pattern type to use when argument is nil or empty # # @return [ Utils::Patterns::Pattern ] a new instance of either RegexpPattern or FuzzyPattern # # @raise [ ArgumentError ] if the argument does not match 'r' or 'f' patterns and is not nil # @raise [ ArgumentError ] if the pattern option is not provided to the pattern matcher constructor def choose(argument, pattern_opts, default: ?f) case argument when /^r/, (default == ?r ? nil : :not) RegexpPattern.new(pattern_opts) when /^f/, (default == ?f ? nil : :not) FuzzyPattern.new(pattern_opts) else raise ArgumentError, 'argument -p has to be f=fuzzy or r=regexp' end end end end