def self.parse_database
file = open_mime_database
doc = Nokogiri::XML(file)
extensions = {}
types = {}
magics = []
(doc/'mime-info/mime-type').each do |mime|
comments = Hash[*(mime/'comment').map {|comment| [comment['xml:lang'], comment.inner_text] }.flatten]
type = mime['type']
subclass = (mime/'sub-class-of').map{|x| x['type']}
exts = (mime/'glob').map{|x| x['pattern'] =~ /^\*\.([^\[\]]+)$/ ? $1.downcase : nil }.compact
(mime/'magic').each do |magic|
priority = magic['priority'].to_i
matches = get_matches(magic)
magics << [priority, type, matches]
end
if !exts.empty?
exts.each{|x|
extensions[x] = type if !extensions.include?(x)
}
types[type] = [exts,subclass,comments[nil]]
end
end
magics = magics.sort {|a,b| [-a[0],a[1]] <=> [-b[0],b[1]] }
common_types = [
"image/jpeg", # .jpg
"image/png", # .png
"image/gif", # .gif
"image/tiff", # .tiff
"image/bmp", # .bmp
"image/vnd.adobe.photoshop", # .psd
"image/webp", # .webp
"image/svg+xml", # .svg
"video/x-msvideo", # .avi
"video/x-ms-wmv", # .wmv
"video/mp4", # .mp4, .m4v
"video/quicktime", # .mov
"video/mpeg", # .mpeg
"video/ogg", # .ogv
"video/webm", # .webm
"video/x-matroska", # .mkv
"video/x-flv", # .flv
"audio/mpeg", # .mp3
"audio/x-wav", # .wav
"audio/aac", # .aac
"audio/flac", # .flac
"audio/mp4", # .m4a
"audio/ogg", # .ogg
"application/pdf", # .pdf
"application/msword", # .doc
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx
"application/vnd.ms-powerpoint", # .pps
"application/vnd.openxmlformats-officedocument.presentationml.slideshow", # .ppsx
"application/vnd.ms-excel", # .pps
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # .ppsx
]
common_magics = common_types.map do |common_type|
magics.find { |_, type, _| type == common_type }
end
magics = (common_magics.compact + magics).uniq
extensions.keys.sort.each do |key|
EXTENSIONS[key] = extensions[key]
end
types.keys.sort.each do |key|
exts = types[key][0]
parents = types[key][1].sort
comment = types[key][2]
TYPES[key] = [exts, parents, comment]
end
magics.each do |priority, type, matches|
MAGIC << [type, matches]
end
end