require 'base64'
require 'bigdecimal'
require 'date'
require 'stringio'
require 'time'
require 'yaml'
module MultiXml
class ParseError < StandardError; end
REQUIREMENT_MAP = [
['ox', :ox],
['libxml', :libxml],
['nokogiri', :nokogiri],
['rexml/document', :rexml]
] unless defined?(REQUIREMENT_MAP)
CONTENT_ROOT = '__content__'.freeze unless defined?(CONTENT_ROOT)
# TODO: use Time.xmlschema instead of Time.parse;
# use regexp instead of Date.parse
unless defined?(PARSING)
PARSING = {
'symbol' => Proc.new{|symbol| symbol.to_sym},
'date' => Proc.new{|date| Date.parse(date)},
'datetime' => Proc.new{|time| Time.parse(time).utc rescue DateTime.parse(time).utc},
'integer' => Proc.new{|integer| integer.to_i},
'float' => Proc.new{|float| float.to_f},
'decimal' => Proc.new{|number| BigDecimal(number)},
'boolean' => Proc.new{|boolean| !%w(0 false).include?(boolean.strip)},
'string' => Proc.new{|string| string.to_s},
'yaml' => Proc.new{|yaml| YAML::load(yaml) rescue yaml},
'base64Binary' => Proc.new{|binary| binary.unpack('m').first},
'binary' => Proc.new{|binary, entity| parse_binary(binary, entity)},
'file' => Proc.new{|file, entity| parse_file(file, entity)}
}
PARSING.update(
'double' => PARSING['float'],
'dateTime' => PARSING['datetime']
)
end
class << self
# Get the current parser class.
def parser
return @parser if @parser
self.parser = self.default_parser
@parser
end
# The default parser based on what you currently
# have loaded and installed. First checks to see
# if any parsers are already loaded, then checks
# to see which are installed if none are loaded.
def default_parser
return :ox if defined?(::Ox)
return :libxml if defined?(::LibXML)
return :nokogiri if defined?(::Nokogiri)
REQUIREMENT_MAP.each do |(library, parser)|
begin
require library
return parser
rescue LoadError
next
end
end
end
# Set the XML parser utilizing a symbol, string, or class.
# Supported by default are:
#
# * <tt>:libxml</tt>
# * <tt>:nokogiri</tt>
# * <tt>:ox</tt>
# * <tt>:rexml</tt>
def parser=(new_parser)
case new_parser
when String, Symbol
require "multi_xml/parsers/#{new_parser.to_s.downcase}"
@parser = MultiXml::Parsers.const_get("#{new_parser.to_s.split('_').map{|s| s.capitalize}.join('')}")
when Class, Module
@parser = new_parser
else
raise "Did not recognize your parser specification. Please specify either a symbol or a class."
end
end
# Parse an XML string or IO into Ruby.
#
# <b>Options</b>
#
# <tt>:symbolize_keys</tt> :: If true, will use symbols instead of strings for the keys.
def parse(xml, options={})
xml ||= ''
xml.strip! if xml.respond_to?(:strip!)
begin
xml = StringIO.new(xml) unless xml.respond_to?(:read)
char = xml.getc
return {} if char.nil?
xml.ungetc(char)
hash = typecast_xml_value(undasherize_keys(parser.parse(xml))) || {}
rescue parser.parse_error => error
raise ParseError, error.to_s, error.backtrace
end
hash = symbolize_keys(hash) if options[:symbolize_keys]
hash
end
# This module decorates files with the <tt>original_filename</tt>
# and <tt>content_type</tt> methods.
module FileLike #:nodoc:
attr_writer :original_filename, :content_type
def original_filename
@original_filename || 'untitled'
end
def content_type
@content_type || 'application/octet-stream'
end
end
private
# TODO: Add support for other encodings
def parse_binary(binary, entity) #:nodoc:
case entity['encoding']
when 'base64'
Base64.decode64(binary)
else
binary
end
end
def parse_file(file, entity)
f = StringIO.new(Base64.decode64(file))
f.extend(FileLike)
f.original_filename = entity['name']
f.content_type = entity['content_type']
f
end
def symbolize_keys(hash)
hash.inject({}) do |result, (key, value)|
new_key = case key
when String
key.to_sym
else
key
end
new_value = case value
when Hash
symbolize_keys(value)
else
value
end
result[new_key] = new_value
result
end
end
def undasherize_keys(params)
case params
when Hash
params.inject({}) do |hash, (key, value)|
hash[key.to_s.tr('-', '_')] = undasherize_keys(value)
hash
end
when Array
params.map{|value| undasherize_keys(value)}
else
params
end
end
def typecast_xml_value(value)
case value
when Hash
if value['type'] == 'array'
# this commented-out suggestion helps to avoid the multiple attribute
# problem, but it breaks when there is only one item in the array.
#
# from: https://github.com/jnunemaker/httparty/issues/102
#
# _, entries = value.detect { |k, v| k != 'type' && v.is_a?(Array) }
# This attempt fails to consider the order that the detect method
# retrieves the entries.
#_, entries = value.detect {|key, _| key != 'type'}
# This approach ignores attribute entries that are not convertable
# to an Array which allows attributes to be ignored.
_, entries = value.detect {|k, v| k != 'type' && (v.is_a?(Array) || v.is_a?(Hash)) }
if entries.nil? || (entries.is_a?(String) && entries.strip.empty?)
[]
else
case entries
when Array
entries.map {|entry| typecast_xml_value(entry)}
when Hash
[typecast_xml_value(entries)]
else
raise "can't typecast #{entries.class.name}: #{entries.inspect}"
end
end
elsif value.has_key?(CONTENT_ROOT)
content = value[CONTENT_ROOT]
if block = PARSING[value['type']]
block.arity == 1 ? block.call(content) : block.call(content, value)
else
content
end
elsif value['type'] == 'string' && value['nil'] != 'true'
''
# blank or nil parsed values are represented by nil
elsif value.empty? || value['nil'] == 'true'
nil
# If the type is the only element which makes it then
# this still makes the value nil, except if type is
# a XML node(where type['value'] is a Hash)
elsif value['type'] && value.size == 1 && !value['type'].is_a?(Hash)
nil
else
xml_value = value.inject({}) do |hash, (k, v)|
hash[k] = typecast_xml_value(v)
hash
end
# Turn {:files => {:file => #<StringIO>} into {:files => #<StringIO>} so it is compatible with
# how multipart uploaded files from HTML appear
xml_value['file'].is_a?(StringIO) ? xml_value['file'] : xml_value
end
when Array
value.map!{|i| typecast_xml_value(i)}
value.length > 1 ? value : value.first
when String
value
else
raise "can't typecast #{value.class.name}: #{value.inspect}"
end
end
end
end