lib/swot.rb
require "public_suffix" require "naughty_or_nice" require_relative "swot/academic_tlds" require_relative "swot/collection_methods" class Swot VERSION = "1.0.5" # These are domains that snuck into the edu registry, # but don't pass the education sniff test # Note: validated domain must not end with the blacklisted string BLACKLIST = File.readlines(File.join(__dir__, '../academic_data/stoplist.txt')).map(&:chomp).freeze include NaughtyOrNice extend SwotCollectionMethods class << self alias_method :is_academic?, :valid? alias_method :academic?, :valid? def get_institution_name(text) Swot.new(text).institution_name end alias_method :school_name, :get_institution_name def domains_path @domains_path ||= File.expand_path "../academic_data", File.dirname(__FILE__) end # Returns a new Swot instance for the domain file at the given path. # Note that the path must be absolute. # # Returns a Swot instance or false is no domain is found at the given path. def from_path(path_string_or_path) path = Pathname.new(path_string_or_path) return false unless path.exist? path_dir, file = path.relative_path_from(Pathname.new(domains_path)).split backwards_path = path_dir.to_s.split('/').push(file.basename('.txt').to_s) domain = backwards_path.reverse.join('.') Swot.new(domain) end end # Figure out if an email or domain belongs to academic institution. # # Returns true if the domain name belongs to an academic institution; # false otherwise. def valid? if domain.nil? false elsif BLACKLIST.any? { |d| to_s =~ /(\A|\.)#{Regexp.escape(d)}\z/ } false elsif ACADEMIC_TLDS.include?(domain.tld) true elsif academic_domain? true else false end end # Figure out the institution name based on the email address/domain. # # Returns a string with the institution name; nil if nothing is found. def institution_name @institution_name ||= File.read(file_path, :mode => "rb", :external_encoding => "UTF-8").strip rescue nil end alias_method :school_name, :institution_name alias_method :name, :institution_name # Figure out if a domain name is a know academic institution. # # Returns true if the domain name belongs to a known academic institution; # false otherwise. def academic_domain? @academic_domain ||= File.exist?(file_path) || File.exist?(file_extended_path) end private def file_path @file_path ||= File.join(Swot::domains_path, domain.domain.to_s.split(".").reverse) + ".txt" end def file_extended_path @file_extended_path ||= File.join([Swot::domains_path, domain.to_s.split(".").reverse].flatten) + ".txt" end end