class SimplePoParser::Parser
the next newline is hit (unless multi-line mode is explicitly enabled)
* rubys regex is by default in single-line mode, therefore scan will only match until
* the start of line anchor ^ is obsolete as scan will only match start of line.
a match of the regex (zero times). We make use of this “trick”
let scan return an empty string if there is “no match” as the empty string qualifies as
* scan will return nil if there is no match. Using the regex * (zero or more) quantifier will
Important notes about StringScanner.scan:
Fast parser directly using Rubys powerful StringScanner (strscan)
def add_result(key, text)
adds text to the given key in results
def add_result(key, text) if @result[key] if @result[key].is_a? Array @result[key].push(text) else @result[key] = [@result[key], text] end else @result[key] = text end end
def comment
match a comment line. called on lines starting with '#'.
def comment begin case @scanner.getch when ' ' skip_whitespace add_result(:translator_comment, comment_text) lines when '.' skip_whitespace add_result(:extracted_comment, comment_text) lines when ':' skip_whitespace add_result(:reference, comment_text) lines when ',' skip_whitespace add_result(:flag, comment_text) lines when '|' skip_whitespace previous_comments lines when "\n" add_result(:translator_comment, "") # empty comment line lines when '~' if @result[:previous_msgctxt] || @result[:previous_msgid] || @result[:previous_msgid_plural] raise PoSyntaxError, "Previous comment entries need to be marked obsolete too in obsolete message entries. But already got: #{@result}" end skip_whitespace add_result(:obsolete, comment_text) obsoletes else @scanner.pos = @scanner.pos - 2 raise PoSyntaxError, "Unknown comment type #{@scanner.peek(10).inspect}" end rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in comment\n" + pe.message, pe.backtrace end end
def comment_text
-
(String)
- text
def comment_text begin text = @scanner.scan(/.*/) # everything until newline text.rstrip! # benchmarked faster too rstrip the string in place raise PoSyntaxError, "Comment text should advance to next line or stop at eos" unless end_of_line text rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in commtent_text\n" + pe.message, pe.backtrace end end
def end_of_line
-
(Boolean)
- true if scanner at beginning of line or eos
def end_of_line @scanner.scan(/\n/) @scanner.eos? || @scanner.bol? end
def lines
message parsing is always started with checking for msgctxt as content is expected in
arbitary line of a PO message. Can be comment or message
def lines begin if @scanner.scan(/#/) comment else msgctxt end rescue PoSyntaxError => pe # throw a normal ParserError to break the recursion raise ParserError, "Syntax error in lines\n" + pe.message, pe.backtrace end end
def message_line
-
(String)
- message_text
def message_line begin if @scanner.getch == '"' text = message_text unless @scanner.getch == '"' err_msg = "The message text '#{text}' must be finished with the double quote character '\"'." raise PoSyntaxError, err_msg end skip_whitespace unless end_of_line err_msg = "There should be only whitespace until the end of line" err_msg += " after the double quote character of a message text." raise PoSyntaxError.new(err_msg) end text else @scanner.pos = @scanner.pos - 1 err_msg = "A message text needs to start with the double quote character '\"'," err_msg += " but this was found: #{@scanner.peek(10).inspect}" raise PoSyntaxError, err_msg end rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in message_line\n" + pe.message, pe.backtrace end end
def message_multiline(key)
followed by more lines starting with the double quote character.
Multiline messages are usually indicated by an empty string as the first line,
parses a multiline message
def message_multiline(key) begin skip_whitespace if @scanner.check(/"/) add_result(key, message_line) message_multiline(key) end rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in message_multiline with key '#{key}'\n" + pe.message, pe.backtrace end end
def message_text
-
(String)
- text
def message_text @scanner.scan_until(/(\\(\\|")|[^"])*/) # this parses anything until an unescaped quote is hit end
def msgctxt
matches the msgctxt line and will continue to check for msgid afterwards
def msgctxt begin if @scanner.scan(/msgctxt/) skip_whitespace text = message_line add_result(:msgctxt, text) message_multiline(:msgctxt) if @scanner.peek(1) == '"' end msgid rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in msgctxt\n" + pe.message, pe.backtrace end end
def msgid
Will advance to msgstr or msgstr_plural based on msgid_plural
matches the msgid line. Will check for optional msgid_plural.
def msgid begin if @scanner.scan(/msgid/) skip_whitespace text = message_line add_result(:msgid, text) message_multiline(:msgid) if @scanner.peek(1) == '"' if msgid_plural msgstr_plural else msgstr end else err_msg = "Message without msgid is not allowed." err_msg += "The Line started unexpectedly with #{@scanner.peek(10).inspect}." raise PoSyntaxError, err_msg end rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in msgid\n" + pe.message, pe.backtrace end end
def msgid_plural
-
(boolean)
- true if msgid_plural is present, false otherwise
def msgid_plural begin if @scanner.scan(/msgid_plural/) skip_whitespace text = message_line add_result(:msgid_plural, text) message_multiline(:msgid_plural) if @scanner.peek(1) == '"' true else false end rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in msgid\n" + pe.message, pe.backtrace end end
def msgstr
parses the msgstr singular line
def msgstr begin if @scanner.scan(/msgstr/) skip_whitespace text = message_line add_result(:msgstr, text) message_multiline(:msgstr) if @scanner.peek(1) == '"' skip_whitespace raise PoSyntaxError, "Unexpected content after expected message end #{@scanner.peek(10).inspect}" unless @scanner.eos? else raise PoSyntaxError, "Singular message without msgstr is not allowed. Line started unexpectedly with #{@scanner.peek(10).inspect}." end rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in msgstr\n" + pe.message, pe.backtrace end end
def msgstr_plural(num = 0)
English language only have two plural forms (singular and plural),
the plural number as specified in the headers "Plural-Forms" entry. Most languages, like the
They have the format msgstr[N] where N is incremental number starting from zero representing
msgstr plural lines are used when there is msgid_plural.
parses the msgstr plural lines
def msgstr_plural(num = 0) begin msgstr_key = @scanner.scan(/msgstr\[\d\]/) # matches 'msgstr[0]' to 'msgstr[9]' if msgstr_key # msgstr plurals must come in 0-based index in order msgstr_num = msgstr_key.match(/\d/)[0].to_i raise PoSyntaxError, "Bad 'msgstr[index]' index." if msgstr_num != num skip_whitespace text = message_line add_result(msgstr_key, text) message_multiline(msgstr_key) if @scanner.peek(1) == '"' msgstr_plural(num+1) elsif num == 0 # and msgstr_key was false raise PoSyntaxError, "Plural message without msgstr[0] is not allowed. Line started unexpectedly with #{@scanner.peek(10).inspect}." else raise PoSyntaxError, "End of message was expected, but line started unexpectedly with #{@scanner.peek(10).inspect}" unless @scanner.eos? end rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in msgstr_plural\n" + pe.message, pe.backtrace end end
def obsoletes
parses all obsolete lines.
def obsoletes if @scanner.scan(/#~/) skip_whitespace add_result(:obsolete, comment_text) obsoletes else raise PoSyntaxError, "All lines must be obsolete after the first obsolete line, but got #{@scanner.peek(10).inspect}." unless @scanner.eos? end end
def parse(message)
-
(Hash)
- parsed PO message information in Hash format
Parameters:
-
message
() -- a single PO message in String format without leading or trailing whitespace
def parse(message) @result = {} @scanner = StringScanner.new(message.strip) begin lines rescue ParserError => pe error_msg = "SimplePoParser::ParserError" error_msg += pe.message error_msg += "\nParseing result before error: '#{@result}'" error_msg += "\nSimplePoParser filtered backtrace: SimplePoParser::ParserError" backtrace = "#{pe.backtrace.select{|i| i =~ /lib\/simple_po_parser/}.join("\n\tfrom ")}" raise ParserError, error_msg, backtrace end @result end
def previous_comments
* #| msgid
* #| msgctxt
previous comments are:
parses previous comments, which provide additional information on fuzzy matching
def previous_comments begin # next part must be msgctxt, msgid or msgid_plural if @scanner.scan(/msg/) if @scanner.scan(/id/) if @scanner.scan(/_plural/) key = :previous_msgid_plural else key = :previous_msgid end elsif @scanner.scan(/ctxt/) key = :previous_msgctxt else raise PoSyntaxError, "Previous comment type #{("msg" + @scanner.peek(10)).inspect} unknown." end skip_whitespace text = message_line add_result(key, text) previous_multiline(key) if @scanner.match?(/#\|\p{Blank}*"/) else raise PoSyntaxError, "Previous comments must start with '#| msg'. #{@scanner.peek(10).inspect} unknown." end rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in previous_comments\n" + pe.message, pe.backtrace end end
def previous_multiline(key)
def previous_multiline(key) begin # scan multilines until no further multiline is hit # /#\|\p{Blank}"/ needs to catch the double quote to ensure it hits a previous # multiline and not another line type. if @scanner.scan(/#\|\p{Blank}*"/) @scanner.pos = @scanner.pos - 1 # go one character back, so we can reuse the "message line" method add_result(key, message_line) previous_multiline(key) # go on until we no longer hit a multiline line end rescue PoSyntaxError => pe raise PoSyntaxError, "Syntax error in previous_multiline\n" + pe.message, pe.backtrace end end
def skip_whitespace
advances the scanner until the next non whitespace position.
def skip_whitespace @scanner.skip(/\p{Blank}+/) end