class CapabilityGenerator

def process_html(docs_html) # rubocop:disable Metrics/MethodLength

rubocop:disable Metrics/MethodLength
def process_html(docs_html) # rubocop:disable Metrics/MethodLength
  docs_html.transform_values do |html|
    next '' if html.nil? || html.empty?
    # Extract just the main content areas, skip scripts, styles, etc
    main_content = html.scan(%r{<main.*?>.*?</main>}m).first ||
                   html.scan(%r{<article.*?>.*?</article>}m).first ||
                   html.scan(%r{<div class="content.*?>.*?</div>}m).first
    if main_content
      # Further clean up the content
      main_content.gsub(%r{<script.*?>.*?</script>}m, '')
                  .gsub(%r{<style.*?>.*?</style>}m, '')
                  .gsub(/<!--.*?-->/m, '')
                  .gsub(/\s+/, ' ')
                  .strip
    else
      ''
    end
  end
end