lib/rouge/lexers/nasm.rb



# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
  module Lexers
    class Nasm < RegexLexer
      tag 'nasm'
      filenames '*.asm'
      #mimetypes 'text/x-chdr', 'text/x-csrc'

      title "Nasm"
      desc "Netwide Assembler"

      ws = %r((?:\s|;.*?\n/)+)
      id = /[a-zA-Z_][a-zA-Z0-9_]*/

      #todo: pull more instructions from: http://www.nasm.us/doc/nasmdocb.html
      #so far, we have sections 1.1 and 1.2

      def self.keywords
        @keywords ||= Set.new %w(
           aaa aad aam aas adc add and arpl bb0_reset bb1_reset bound bsf bsr bswap bt btc btr bts
           call cbw cdq cdqe clc cld cli clts cmc cmp cmpsb cmpsd cmpsq cmpsw cmpxchg
           cmpxchg16b cmpxchg486 cmpxchg8b cpuid cpu_read cpu_write cqo cwd cwde daa das dec div
           dmint emms enter equ f2xm1 fabs fadd faddp fbld fbstp fchs fclex fcmovb fcmovbe fcmove
           fcmovnb fcmovnbe fcmovne fcmovnu fcmovu fcom fcomi fcomip fcomp fcompp fcos fdecstp
           fdisi fdiv fdivp fdivr fdivrp femms feni ffree ffreep fiadd ficom ficomp fidiv fidivr
           fild fimul fincstp finit fist fistp fisttp fisub fisubr fld fld1 fldcw fldenv fldl2e
           fldl2t fldlg2 fldln2 fldpi fldz fmul fmulp fnclex fndisi fneni fninit fnop fnsave fnstcw
           fnstenv fnstsw fpatan fprem fprem1 fptan frndint frstor fsave fscale fsetpm fsin fsincos
           fsqrt fst fstcw fstenv fstp fstsw fsub fsubp fsubr fsubrp ftst fucom fucomi fucomip
           fucomp fucompp fwait fxam fxch fxtract fyl2x fyl2xp1 hlt ibts icebp idiv imul in inc
           incbin insb insd insw int int01 int03 int1 int3 into invd invlpg invlpga invpcid iret
           iretd iretq iretw jcxz jecxz jmp jmpe jrcxz lahf lar lds lea leave les lfence lfs
           lgdt lgs lidt lldt lmsw loadall loadall286 lodsb lodsd lodsq lodsw loop loope loopne
           loopnz loopz lsl lss ltr mfence monitor monitorx mov movd movq movsb movsd movsq movsw
           movsx movsxd movzx mul mwait mwaitx neg nop not or out outsb outsd outsw packssdw
           packsswb packuswb paddb paddd paddsb paddsiw paddsw paddusb paddusw paddw pand pandn
           pause paveb pavgusb pcmpeqb pcmpeqd pcmpeqw pcmpgtb pcmpgtd pcmpgtw pdistib pf2id pfacc
           pfadd pfcmpeq pfcmpge pfcmpgt pfmax pfmin pfmul pfrcp pfrcpit1 pfrcpit2 pfrsqit1 pfrsqrt
           pfsub pfsubr pi2fd pmachriw pmaddwd pmagw pmulhriw pmulhrwa pmulhrwc pmulhw pmullw
           pmvgezb pmvlzb pmvnzb pmvzb pop popa popad popaw popf popfd popfq popfw por prefetch
           prefetchw pslld psllq psllw psrad psraw psrld psrlq psrlw psubb psubd psubsb psubsiw
           psubsw psubusb psubusw psubw punpckhbw punpckhdq punpckhwd punpcklbw punpckldq punpcklwd
           push pusha pushad pushaw pushf pushfd pushfq pushfw pxor rcl rcr rdm rdmsr rdpmc rdshr
           rdtsc rdtscp ret retf retn rol ror rsdc rsldt rsm rsts sahf sal salc sar sbb scasb scasd
           scasq scasw sfence sgdt shl shld shr shrd sidt skinit sldt smi smint smintold smsw
           stc std sti stosb stosd stosq stosw str sub svdc svldt svts swapgs syscall sysenter
           sysexit sysret test ud0 ud1 ud2 ud2a ud2b umov verr verw wbinvd wrmsr wrshr xadd xbts
           xchg xlat xlatb xor

           cmova cmovae cmovb cmovbe cmovc cmove cmovg cmovge cmovl cmovle cmovna cmovnae cmovnb cmovnbe cmovnc cmovne cmovng cmovnge cmovnl cmovnle cmovno cmovnp cmovns cmovnz cmovo cmovp cmovpe cmovpo cmovs cmovz

           ja jae jb jbe jc jcxz jecxz je jg jge jl jle jna jnae jnb jnbe jnc jne jng jnge jnl jnle jno jnp jns jnz jo jp jpe jpo js jz

           seta setae setb setbe setc sete setg setge setl setle setna setnae setnb setnbe setnc setne setng setnge setnl setnle setno setnp setns setnz seto setp setpe setpo sets setz

           AAA AAD AAM AAS ADC ADD AND ARPL BB0_RESET BB1_RESET BOUND BSF BSR BSWAP BT BTC BTR BTS
           CALL CBW CDQ CDQE CLC CLD CLI CLTS CMC  CMP CMPSB CMPSD CMPSQ CMPSW CMPXCHG
           CMPXCHG16B CMPXCHG486 CMPXCHG8B CPUID CPU_READ CPU_WRITE CQO CWD CWDE DAA DAS DEC DIV
           DMINT EMMS ENTER EQU F2XM1 FABS FADD FADDP FBLD FBSTP FCHS FCLEX FCMOVB FCMOVBE FCMOVE
           FCMOVNB FCMOVNBE FCMOVNE FCMOVNU FCMOVU FCOM FCOMI FCOMIP FCOMP FCOMPP FCOS FDECSTP
           FDISI FDIV FDIVP FDIVR FDIVRP FEMMS FENI FFREE FFREEP FIADD FICOM FICOMP FIDIV FIDIVR
           FILD FIMUL FINCSTP FINIT FIST FISTP FISTTP FISUB FISUBR FLD FLD1 FLDCW FLDENV FLDL2E
           FLDL2T FLDLG2 FLDLN2 FLDPI FLDZ FMUL FMULP FNCLEX FNDISI FNENI FNINIT FNOP FNSAVE FNSTCW
           FNSTENV FNSTSW FPATAN FPREM FPREM1 FPTAN FRNDINT FRSTOR FSAVE FSCALE FSETPM FSIN FSINCOS
           FSQRT FST FSTCW FSTENV FSTP FSTSW FSUB FSUBP FSUBR FSUBRP FTST FUCOM FUCOMI FUCOMIP
           FUCOMP FUCOMPP FWAIT FXAM FXCH FXTRACT FYL2X FYL2XP1 HLT IBTS ICEBP IDIV IMUL IN INC
           INCBIN INSB INSD INSW INT INT01 INT03 INT1 INT3 INTO INVD INVLPG INVLPGA INVPCID IRET
           IRETD IRETQ IRETW JCXZ JECXZ JMP JMPE JRCXZ LAHF LAR LDS LEA LEAVE LES LFENCE LFS
           LGDT LGS LIDT LLDT LMSW LOADALL LOADALL286 LODSB LODSD LODSQ LODSW LOOP LOOPE LOOPNE
           LOOPNZ LOOPZ LSL LSS LTR MFENCE MONITOR MONITORX MOV MOVD MOVQ MOVSB MOVSD MOVSQ MOVSW
           MOVSX MOVSXD MOVZX MUL MWAIT MWAITX NEG NOP NOT OR OUT OUTSB OUTSD OUTSW PACKSSDW
           PACKSSWB PACKUSWB PADDB PADDD PADDSB PADDSIW PADDSW PADDUSB PADDUSW PADDW PAND PANDN
           PAUSE PAVEB PAVGUSB PCMPEQB PCMPEQD PCMPEQW PCMPGTB PCMPGTD PCMPGTW PDISTIB PF2ID PFACC
           PFADD PFCMPEQ PFCMPGE PFCMPGT PFMAX PFMIN PFMUL PFRCP PFRCPIT1 PFRCPIT2 PFRSQIT1 PFRSQRT
           PFSUB PFSUBR PI2FD PMACHRIW PMADDWD PMAGW PMULHRIW PMULHRWA PMULHRWC PMULHW PMULLW
           PMVGEZB PMVLZB PMVNZB PMVZB POP POPA POPAD POPAW POPF POPFD POPFQ POPFW POR PREFETCH
           PREFETCHW PSLLD PSLLQ PSLLW PSRAD PSRAW PSRLD PSRLQ PSRLW PSUBB PSUBD PSUBSB PSUBSIW
           PSUBSW PSUBUSB PSUBUSW PSUBW PUNPCKHBW PUNPCKHDQ PUNPCKHWD PUNPCKLBW PUNPCKLDQ PUNPCKLWD
           PUSH PUSHA PUSHAD PUSHAW PUSHF PUSHFD PUSHFQ PUSHFW PXOR RCL RCR RDM RDMSR RDPMC RDSHR
           RDTSC RDTSCP RET RETF RETN ROL ROR RSDC RSLDT RSM RSTS SAHF SAL SALC SAR SBB SCASB SCASD
           SCASQ SCASW  SFENCE SGDT SHL SHLD SHR SHRD SIDT SKINIT SLDT SMI SMINT SMINTOLD SMSW
           STC STD STI STOSB STOSD STOSQ STOSW STR SUB SVDC SVLDT SVTS SWAPGS SYSCALL SYSENTER
           SYSEXIT SYSRET TEST UD0 UD1 UD2 UD2A UD2B UMOV VERR VERW WBINVD WRMSR WRSHR XADD XBTS
           XCHG XLAT XLATB XOR

           CMOVA CMOVAE CMOVB CMOVBE CMOVC CMOVE CMOVG CMOVGE CMOVL CMOVLE CMOVNA CMOVNAE CMOVNB CMOVNBE CMOVNC CMOVNE CMOVNG CMOVNGE CMOVNL CMOVNLE CMOVNO CMOVNP CMOVNS CMOVNZ CMOVO CMOVP CMOVPE CMOVPO CMOVS CMOVZ

           JA JAE JB JBE JC JCXZ JECXZ JE JG JGE JL JLE JNA JNAE JNB JNBE JNC JNE JNG JNGE JNL JNLE JNO JNP JNS JNZ JO JP JPE JPO JS JZ

           SETA SETAE SETB SETBE SETC SETE SETG SETGE SETL SETLE SETNA SETNAE SETNB SETNBE SETNC SETNE SETNG SETNGE SETNL SETNLE SETNO SETNP SETNS SETNZ SETO SETP SETPE SETPO SETS SETZ
        )
      end

      def self.keywords_type
        @keywords_type ||= Set.new %w(
          DB DW DD DQ DT DO DY DZ RESB RESW RESD RESQ REST RESO RESY RESZ
          db dq dd dq dt do dy dz resb resw resd resq rest reso resy resz
        )
      end

      def self.reserved
        @reserved ||= Set.new %w(
          global extern macro endmacro assign rep endrep section
          GLOBAL EXTERN MACRO ENDMACRO ASSIGN REP ENDREP SECTION
        )
      end

      def self.builtins
        @builtins ||= []
      end

      start { push :expr_bol }

      state :expr_bol do
        mixin :inline_whitespace
        rule(//) { pop! }
      end

      state :inline_whitespace do
        rule /[ \t\r]+/, Text
      end

      state :whitespace do
        rule /\n+/m, Text, :expr_bol
        rule %r(//(\\.|.)*?\n), Comment::Single, :expr_bol
        mixin :inline_whitespace
      end

      state :expr_whitespace do
        rule /\n+/m, Text, :expr_bol
        mixin :whitespace
      end

      state :root do
        mixin :expr_whitespace
        rule(//) { push :statement }
        rule /^%[a-zA-Z0-9]+/, Comment::Preproc, :statement

        rule(
          %r(&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|<=|>=|<>|[-&*/\\^+=<>.]),
          Operator
        )
        rule /;.*/, Comment, :statement
        rule /^[a-zA-Z]+[a-zA-Z0-9]*:/, Name::Function
        rule /;.*/, Comment
      end

      state :statement do
        mixin :expr_whitespace
        mixin :statements
        rule /;.*/, Comment
        rule /^%[a-zA-Z0-9]+/, Comment::Preproc
        rule /[a-zA-Z]+%[0-9]+:/, Name::Function
      end

      state :statements do
        mixin :whitespace
        rule /L?"/, Str, :string
        rule /[a-zA-Z]+%[0-9]+:/, Name::Function  #labels/subroutines/functions
        rule %r(L?'(\\.|\\[0-7]{1,3}|\\x[a-f0-9]{1,2}|[^\\'\n])')i, Str::Char
        rule /0x[0-9a-f]+[lu]*/i, Num::Hex
        rule /\d+[lu]*/i, Num::Integer
        rule %r(\*/), Error
        rule %r([~&*+=\|?:<>/-]), Operator
        rule /[(),.]/, Punctuation
        rule /\[[a-zA-Z0-9]*\]/, Punctuation
        rule /%[0-9]+/, Keyword::Reserved
        rule /[a-zA-Z]+%[0-9]+/, Name::Function  #labels/subroutines/functions

        #rule /(?<!\.)#{id}/ do |m|
        rule id do |m|
          name = m[0]

          if self.class.keywords.include? name
            token Keyword
          elsif self.class.keywords_type.include? name
            token Keyword::Type
          elsif self.class.reserved.include? name
            token Keyword::Reserved
          elsif self.class.builtins.include? name
            token Name::Builtin
          else
            token Name
          end
        end
      end

      state :string do
        rule /"/, Str, :pop!
        rule /\\([\\abfnrtv"']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})/, Str::Escape
        rule /[^\\"\n]+/, Str
        rule /\\\n/, Str
        rule /\\/, Str # stray backslash
      end
    end
  end
end