module Jekyll::Algolia::Extractor
def self.add_unique_object_id(record)
Public: Adds a unique :objectID field to the hash, representing the
def self.add_unique_object_id(record) record[:objectID] = AlgoliaHTMLExtractor.uuid(record) record end
def self.extract_raw_records(content)
node and its headings
Public: Extract raw records from the file, including content for each
def self.extract_raw_records(content) records = AlgoliaHTMLExtractor.run( content, options: { css_selector: Configurator.algolia('nodes_to_index'), tags_to_exclude: 'script,style,iframe' } ) # We remove objectIDs, as the will be added at the very end, after all # the hooks and shrinkage records.each do |record| record.delete(:objectID) end records end
def self.run(file)
Public: Extract records from the file
def self.run(file) # Getting all nodes from the HTML input raw_records = extract_raw_records(file.content) # Getting file metadata shared_metadata = FileBrowser.metadata(file) # If no content, we still index the metadata raw_records = [shared_metadata] if raw_records.empty? # Building the list of records records = [] raw_records.map do |record| # We do not need to pass the HTML node element to the final record node = record[:node] record.delete(:node) # Merging each record info with file info record = Utils.compact_empty(record.merge(shared_metadata)) # Apply custom user-defined hooks # Users can return `nil` from the hook to signal we should not index # such a record record = Hooks.apply_each(record, node, Jekyll::Algolia.site) next if record.nil? records << record end records end