lib/ffaker/string.rb



# encoding: utf-8

module FFaker
  module String
    extend ModuleUtils
    extend self

    BACKSLASH = '\\'.freeze
    DASH      = '-'.freeze

    LOWERS     = [*'a'..'z'].freeze
    UPPERS     = [*'A'..'Z'].freeze
    LETTERS    = LOWERS + UPPERS
    NUMBERS    = [*'0'..'9'].freeze
    WORD_CHARS = LETTERS + NUMBERS + ['_']
    SPACES     = [' ', "\t"].freeze
    ESCAPEABLE_CHARS = ['\\', '/', '.', '(', ')', '[', ']', '{', '}'].freeze

    def from_regexp(exp)
      result = ''
      @last_token = nil

      # Drop surrounding /'s and split into characters
      tokens = exp.inspect[1...-1].split(//)
      result << process_token(tokens) until tokens.empty?

      result
    end

    private

    def generate_range(tokens)
      result = []
      while tokens.any?
        token = tokens.shift
        if token == DASH && tokens.first && result.last
          result += [*result.pop..tokens.shift]
        elsif token == BACKSLASH
          result << special(tokens.shift)
        else
          result << token
        end
      end
      result
    end

    def process_token(tokens)
      return '' if tokens.empty?

      token = tokens.shift

      case token
      when '?' then
        # TODO: Let ? generate nothing
        '' # We already printed its target
      when '+' then
        tokens.unshift(token) if rand(0..1) == 1 # Leave the `+` on to run again
        process_token(@last_token) # Run the last one at least once
      when '*' then
        tokens.unshift(token) if rand(0..1) == 1 # Leave the `*` on to run again
        return '' if rand(0..1) == 1 # Or maybe do nothing
        process_token(@last_token) # Else run the last one again
      when '{' then
        number = ''
        while (ch = tokens.shift) != '}'
          number << ch
        end
        number = number.to_i - 1
        t = @last_token.dup
        Array.new(number) { process_token(t.dup) }.join
      else
        generate_token token, tokens
      end
    end

    def generate_token(token, tokens)
      case token
      when /\w/ then
        @last_token = [token]
        token
      when BACKSLASH then
        token = tokens.shift
        @last_token = ['\\', token]
        special(token)
      when '[' then
        set = []
        while (ch = tokens.shift) != ']'
          set << ch
        end
        @last_token = ['['] + set + [']']

        process_token([fetch_sample(generate_range(set))])
      else
        token
      end
    end

    def special(token)
      case token
      when 'w' then fetch_sample(WORD_CHARS)
      when 'd' then fetch_sample(NUMBERS)
      when 's' then fetch_sample(SPACES)
      when *ESCAPEABLE_CHARS then token
      else
        ''
      end
    end
  end
end