module Hizuke::DateKeywordMatcher
def check_compound_date_expressions(clean_text)
-
(Hizuke::Result, nil)
- the result or nil if no match
Parameters:
-
clean_text
(String
) -- the text to check
def check_compound_date_expressions(clean_text) compound_matches = find_compound_matches(clean_text) # If we found compound matches, handle them specially return nil if compound_matches.empty? # Use the first match (in case there are multiple) match_key, indices = compound_matches.min_by { |_, v| v[0] } process_compound_match(clean_text, match_key, indices) end
def check_single_word_date_references(clean_text)
-
(Hizuke::Result, nil)
- the parsing result or nil if no keyword found
Parameters:
-
clean_text
(String
) -- the text to check
def check_single_word_date_references(clean_text) # Split the text into words words = clean_text.split # Find the matching date keyword date_match = find_date_keyword_match(words) # If no reference was found, return nil return nil unless date_match # Calculate the date based on the keyword date = calculate_date(date_match[:value]) # Create the clean text by removing the date keyword final_text = remove_date_keyword_from_text(words, date_match[:index]) Result.new(final_text, date) end
def find_compound_keywords
-
(Array
- array of compound keywords)
def find_compound_keywords DATE_KEYWORDS.keys.select { |k| k.include?(' ') } end
def find_compound_matches(clean_text)
-
(Hash)
- a hash of matches and their indices
Parameters:
-
clean_text
(String
) -- the text to check
def find_compound_matches(clean_text) compound_matches = {} find_compound_keywords.each do |compound_key| next unless clean_text.downcase.include?(compound_key) start_idx = clean_text.downcase.index(compound_key) end_idx = start_idx + compound_key.length - 1 compound_matches[compound_key] = [start_idx, end_idx] end compound_matches end
def find_date_keyword_match(words)
-
(Hash, nil)
- a hash with the index and value of the match or nil if no match
Parameters:
-
words
(Array
) -- the words to check
def find_date_keyword_match(words) words.each_with_index do |word, index| clean_word = word.downcase.gsub(/[^a-z]/, '') next unless DATE_KEYWORDS.key?(clean_word) return { index: index, value: DATE_KEYWORDS[clean_word] } end nil end
def process_compound_match(clean_text, match_key, indices)
-
(Hizuke::Result)
- the result
Parameters:
-
indices
(Array
) -- the start and end indices of the match -
match_key
(String
) -- the matched keyword -
clean_text
(String
) -- the text to check
def process_compound_match(clean_text, match_key, indices) # Calculate date based on the keyword date_value = DATE_KEYWORDS[match_key] date = calculate_date(date_value) # Remove the date expression from the text final_text = clean_text.dup final_text.slice!(indices[0]..indices[1]) Result.new(final_text.strip, date) end
def remove_date_keyword_from_text(words, index)
-
(String)
- the text without the keyword
Parameters:
-
index
(Integer
) -- the index of the keyword to remove -
words
(Array
) -- the words array
def remove_date_keyword_from_text(words, index) clean_words = words.dup clean_words.delete_at(index) clean_words.join(' ').strip end
def try_parsing_strategies(clean_text)
-
(Hizuke::Result, nil)
- the parsing result or nil if no date reference is found
Parameters:
-
clean_text
(String
) -- the text without time references
def try_parsing_strategies(clean_text) # Check for dynamic patterns first (in X days, X days ago) result = check_dynamic_patterns(clean_text) return result if result # Check for day of week patterns (this Monday, next Tuesday, etc.) result = check_day_of_week_patterns(clean_text) return result if result # Try to find compound date expressions (like "next week") result = check_compound_date_expressions(clean_text) return result if result # Try to find single-word date references result = check_single_word_date_references(clean_text) return result if result # If no date reference was found, return nil instead of today's date nil end