class CapabilityGenerator
def process_html(docs_html) # rubocop:disable Metrics/MethodLength
def process_html(docs_html) # rubocop:disable Metrics/MethodLength docs_html.transform_values do |html| next '' if html.nil? || html.empty? # Extract just the main content areas, skip scripts, styles, etc main_content = html.scan(%r{<main.*?>.*?</main>}m).first || html.scan(%r{<article.*?>.*?</article>}m).first || html.scan(%r{<div class="content.*?>.*?</div>}m).first if main_content # Further clean up the content main_content.gsub(%r{<script.*?>.*?</script>}m, '') .gsub(%r{<style.*?>.*?</style>}m, '') .gsub(/<!--.*?-->/m, '') .gsub(/\s+/, ' ') .strip else '' end end end