• Bug#1067890: luajit: Add support for LoongArch64 (9/10)

    From Xiaolin Zhao@21:1/5 to All on Thu Mar 28 14:10:01 2024
    [continued from previous message]

    + | fld.d FARG3, CCSTATE->fpr[2]
    + | fld.d FARG4, CCSTATE->fpr[3]
    + | fld.d FARG5, CCSTATE->fpr[4]
    + | fld.d FARG6, CCSTATE->fpr[5]
    + | fld.d FARG7, CCSTATE->fpr[6]
    + | fld.d FARG8, CCSTATE->fpr[7]
    + |3:
    + | ld.d TMP3, CCSTATE->func
    + | ld.d CARG2, CCSTATE->gpr[1]
    + | ld.d CARG3, CCSTATE->gpr[2]
    + | ld.d CARG4, CCSTATE->gpr[3]
    + | ld.d CARG5, CCSTATE->gpr[4]
    + | ld.d CARG6, CCSTATE->gpr[5]
    + | ld.d CARG7, CCSTATE->gpr[6]
    + | ld.d CARG8, CCSTATE->gpr[7]
    + | ld.d CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
    + | jirl r1, TMP3, 0
    + | ld.d CCSTATE:TMP1, -24(r23)
    + | ld.d TMP2, -16(r23)
    + | ld.d ra, -8(r23)
    + | st.d CRET1, CCSTATE:TMP1->gpr[0]
    + | st.d CRET2, CCSTATE:TMP1->gpr[1]
    + | fst.d FRET1, CCSTATE:TMP1->fpr[0]
    + | fst.d FRET2, CCSTATE:TMP1->fpr[1]
    + | or sp, r23, r0
    + | or r23, TMP2, r0
    + | jirl r0, ra, 0
    + |.endif
    + |// Note: vm_ffi_call must be the last function in this object file!
    + |
    + |//----------------------------------------------------------------------- +}
    +
    +/* Generate the code for a single instruction. */
    +static void build_ins(BuildCtx *ctx, BCOp op, int defop)
    +{
    + int vk = 0;
    + |=>defop:
    +
    + switch (op) {
    +
    + /* -- Comparison ops ---------------------------------------------------- */ +
    + /* Remember: all ops branch for a true comparison, fall through otherwise. */
    +
    + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
    + | // RA = src1*8, RD = src2*8, JMP with RD = target
    + | add.d RA, BASE, RA
    + | add.d RD, BASE, RD
    + if (op == BC_ISLT || op == BC_ISGE) {
    + | ld.d CARG1, 0(RA)
    + | ld.d CARG2, 0(RD)
    + | gettp CARG3, CARG1
    + | gettp CARG4, CARG2
    + } else {
    + | ld.d CARG2, 0(RA)
    + | ld.d CARG1, 0(RD)
    + | gettp CARG3, CARG2
    + | gettp CARG4, CARG1
    + }
    + | ld.hu TMP2, OFS_RD(PC) // TMP2=jump
    + | addi.d PC, PC, 4
    + | bne CARG3, TISNUM, >2
    + | decode_BC4b TMP2
    + | bne CARG4, TISNUM, >5
    + | slli.w CARG1, CARG1, 0
    + | slli.w CARG2, CARG2, 0
    + | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
    + | slt TMP1, CARG1, CARG2
    + | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
    + if (op == BC_ISLT || op == BC_ISGT) {
    + | maskeqz TMP2, TMP2, TMP1
    + } else {
    + | masknez TMP2, TMP2,TMP1
    + }
    + |1:
    + | add.d PC, PC, TMP2
    + | ins_next
    + |
    + |2: // RA is not an integer.
    + | sltui TMP1, CARG3, LJ_TISNUM
    + | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
    + | beqz TMP1, ->vmeta_comp
    + | sltui TMP1, CARG4, LJ_TISNUM
    + | decode_BC4b TMP2
    + | beqz TMP1, >4
    + | movgr2fr.d FTMP0, CARG1
    + | movgr2fr.d FTMP2, CARG2
    + |3: // RA and RD are both numbers.
    + if (op == BC_ISLT || op == BC_ISGE) {
    + | fcmp.clt.d FCC0, FTMP0, FTMP2
    + } else {
    + | fcmp.cult.d FCC0, FTMP0, FTMP2
    + }
    + | add.w TMP2, TMP2, TMP3
    + | movcf2gr TMP3, FCC0
    + if (op == BC_ISLT || op == BC_ISGT) {
    + | maskeqz TMP2, TMP2, TMP3
    + } else {
    + | masknez TMP2, TMP2, TMP3
    + }
    + | b <1
    + |
    + |4: // RA is a number, RD is not a number.
    + | // RA is a number, RD is an integer. Convert RD to a number.
    + | bne CARG4, TISNUM, ->vmeta_comp
    + if (op == BC_ISLT || op == BC_ISGE) {
    + | movgr2fr.w FTMP2, CARG2
    + | movgr2fr.d FTMP0, CARG1
    + | ffint.d.w FTMP2, FTMP2
    + } else {
    + | movgr2fr.w FTMP0, CARG1
    + | movgr2fr.d FTMP2, CARG2
    + | ffint.d.w FTMP0, FTMP0
    + }
    + | b <3
    + |
    + |5: // RA is an integer, RD is not an integer
    + | sltui TMP1, CARG4, LJ_TISNUM
    + | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
    + | beqz TMP1, ->vmeta_comp
    + | // RA is an integer, RD is a number. Convert RA to a number.
    + if (op == BC_ISLT || op == BC_ISGE) {
    + | movgr2fr.w FTMP0, CARG1
    + | movgr2fr.d FTMP2, CARG2
    + | ffint.d.w FTMP0, FTMP0
    + } else {
    + | movgr2fr.w FTMP2, CARG2
    + | movgr2fr.d FTMP0, CARG1
    + | ffint.d.w FTMP2, FTMP2
    + }
    + | b <3
    + break;
    +
    + case BC_ISEQV: case BC_ISNEV:
    + vk = op == BC_ISEQV;
    + | // RA = src1*8, RD = src2*8, JMP with RD = target
    + | add.d RA, BASE, RA
    + | add.d RD, BASE, RD
    + | addi.d PC, PC, 4
    + | ld.d CARG1, 0(RA)
    + | ld.d CARG2, 0(RD)
    + | ld.hu TMP2, -4+OFS_RD(PC)
    + | gettp CARG3, CARG1
    + | gettp CARG4, CARG2
    + | sltu TMP0, TISNUM, CARG3
    + | sltu TMP1, TISNUM, CARG4
    + | or TMP0, TMP0, TMP1
    + | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
    + if (vk) {
    + | beqz TMP0, ->BC_ISEQN_Z
    + } else {
    + | beqz TMP0, ->BC_ISNEN_Z
    + }
    + |// Either or both types are not numbers.
    + |.if FFI
    + | // Check if RA or RD is a cdata.
    + | addi.w TMP0, r0, LJ_TCDATA
    + | beq CARG3, TMP0, ->vmeta_equal_cd
    + | beq CARG4, TMP0, ->vmeta_equal_cd
    + |.endif
    + | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
    + | decode_BC4b TMP2
    + | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2
    + | bne CARG1, CARG2, >2
    + | // Tag and value are equal.
    + if (vk) {
    + |->BC_ISEQV_Z:
    + | add.d PC, PC, TMP2
    + }
    + |1:
    + | ins_next
    + |
    + |2: // Check if the tags are the same and it's a table or userdata.
    + | xor TMP3, CARG3, CARG4 // Same type?
    + | sltui TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1
    + | masknez TMP0, TMP0, TMP3 // TMP0=0: not same type, or same type table/userdata
    + | cleartp TAB:TMP1, CARG1
    + if (vk) {
    + | beqz TMP0, <1
    + } else {
    + | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
    + }
    + | // Different tables or userdatas. Need to check __eq metamethod.
    + | // Field metatable must be at same offset for GCtab and GCudata!
    + | ld.d TAB:TMP3, TAB:TMP1->metatable
    + if (vk) {
    + | beqz TAB:TMP3, <1 // No metatable?
    + | ld.bu TMP3, TAB:TMP3->nomm
    + | andi TMP3, TMP3, 1<<MM_eq
    + | addi.w TMP0, r0, 0 // ne = 0
    + | bnez TMP3, <1 // Or 'no __eq' flag set?
    + } else {
    + | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
    + | ld.bu TMP3, TAB:TMP3->nomm
    + | andi TMP3, TMP3, 1<<MM_eq
    + | addi.w TMP0, r0, 1 // ne = 1
    + | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
    + }
    + | b ->vmeta_equal // Handle __eq metamethod.
    + break;
    +
    + case BC_ISEQS: case BC_ISNES:
    + vk = op == BC_ISEQS;
    + | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
    + | add.d RA, BASE, RA
    + | addi.d PC, PC, 4
    + | ld.d CARG1, 0(RA)
    + | sub.d RD, KBASE, RD
    + | ld.hu TMP2, -4+OFS_RD(PC)
    + | ld.d CARG2, -8(RD) // KBASE-8-str_const*8
    + |.if FFI
    + | gettp CARG3, CARG1
    + | addi.w TMP1, r0, LJ_TCDATA
    + |.endif
    + | addi.w TMP0, r0, LJ_TSTR
    + | decode_BC4b TMP2
    + | settp CARG2, TMP0
    + | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
    + |.if FFI
    + | beq CARG3, TMP1, ->vmeta_equal_cd
    + |.endif
    + | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D
    + | add.w TMP2, TMP2, TMP3
    + if (vk) {
    + | masknez TMP2, TMP2, TMP0
    + } else {
    + | maskeqz TMP2, TMP2, TMP0
    + }
    + | add.d PC, PC, TMP2
    + | ins_next
    + break;
    +
    + case BC_ISEQN: case BC_ISNEN:
    + vk = op == BC_ISEQN;
    + | // RA = src*8, RD = num_const*8, JMP with RD = target
    + | add.d RA, BASE, RA
    + | add.d RD, KBASE, RD
    + | ld.d CARG1, 0(RA)
    + | ld.d CARG2, 0(RD)
    + | ld.hu TMP2, OFS_RD(PC)
    + | addi.d PC, PC, 4
    + | gettp CARG3, CARG1
    + | gettp CARG4, CARG2
    + | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
    + if (vk) {
    + |->BC_ISEQN_Z:
    + } else {
    + |->BC_ISNEN_Z:
    + }
    + | decode_BC4b TMP2
    + | bne CARG3, TISNUM, >4
    + | add.w TMP2, TMP2, TMP3
    + | bne CARG4, TISNUM, >6
    + | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D
    + |1:
    + if (vk) {
    + | masknez TMP2, TMP2, TMP0
    + | add.d PC, PC, TMP2
    + |2:
    + } else {
    + | maskeqz TMP2, TMP2, TMP0
    + |2:
    + | add.d PC, PC, TMP2
    + }
    + |3:
    + | ins_next
    + |
    + |4: // RA is not an integer.
    + | sltu TMP0, CARG3, TISNUM
    + | add.w TMP2, TMP2, TMP3
    + |.if FFI
    + | beqz TMP0, >7
    + |.else
    + | beqz TMP0, <2
    + |.endif
    + | movgr2fr.d FTMP0, CARG1
    + | movgr2fr.d FTMP2, CARG2
    + | bne CARG4, TISNUM, >5
    + |// RA is a number, RD is an integer.
    + | ffint.d.w FTMP2, FTMP2
    + |
    + |5: // RA and RD are both numbers.
    + | fcmp.cune.d FCC0, FTMP0, FTMP2
    + | movcf2gr TMP0, FCC0
    + | b <1
    + |
    + |6: // RA is an integer, RD is a number.
    + | sltu TMP0, CARG4, TISNUM
    + |.if FFI
    + | beqz TMP0, >8
    + |.else
    + | beqz TMP0, <2
    + |.endif
    + | movgr2fr.w FTMP0, CARG1
    + | movgr2fr.d FTMP2, CARG2
    + | ffint.d.w FTMP0, FTMP0
    + | b <5
    + |
    + |.if FFI
    + |7: // RA not int, not number
    + | addi.w TMP0, r0, LJ_TCDATA
    + | bne CARG3, TMP0, <2
    + | b ->vmeta_equal_cd
    + |
    + |8: // RD not int, not number
    + | addi.w TMP0, r0, LJ_TCDATA
    + | bne CARG4, TMP0, <2
    + | b ->vmeta_equal_cd
    + |.endif
    + break;
    +
    + case BC_ISEQP: case BC_ISNEP:
    + vk = op == BC_ISEQP;
    + | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
    + | add.d RA, BASE, RA
    + | srli.w TMP0, RD, 3
    + | ld.d TMP1, 0(RA)
    + | nor TMP0, TMP0, r0 // ~TMP0: ~0 ~1 ~2
    + | ld.hu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target + | gettp TMP1, TMP1
    + | addi.d PC, PC, 4
    + | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D
    + |.if FFI
    + | addi.w TMP3, r0, LJ_TCDATA
    + | beq TMP1, TMP3, ->vmeta_equal_cd
    + |.endif
    + | decode_BC4b TMP2
    + | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
    + | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
    + if (vk) {
    + | masknez TMP2, TMP2, TMP0
    + } else {
    + | maskeqz TMP2, TMP2, TMP0
    + }
    + | add.d PC, PC, TMP2
    + | ins_next
    + break;
    +
    + /* -- Unary test and copy ops ------------------------------------------- */ +
    + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
    + | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
    + | add.d RD, BASE, RD
    + | ld.hu TMP2, OFS_RD(PC)
    + | ld.d TMP0, 0(RD)
    + | addi.d PC, PC, 4
    + | gettp TMP0, TMP0
    + | add.d RA, BASE, RA
    + | sltui TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false
    + | decode_BC4b TMP2
    + | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
    + | ld.d CRET1, 0(RD)
    + | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2
    + if (op == BC_IST || op == BC_ISTC) {
    + | beqz TMP0, >1
    + if (op == BC_ISTC) {
    + | st.d CRET1, 0(RA)
    + }
    + } else {
    + | bnez TMP0, >1
    + if (op == BC_ISFC) {
    + | st.d CRET1, 0(RA)
    + }
    + }
    + | add.d PC, PC, TMP2
    + |1:
    + | ins_next
    + break;
    +
    + case BC_ISTYPE:
    + | // RA = src*8, RD = -type*8
    + | add.d TMP0, BASE, RA
    + | srli.w TMP1, RD, 3
    + | ld.d TMP0, 0(TMP0)
    + | gettp TMP0, TMP0
    + | add.d TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0 + | bnez TMP0, ->vmeta_istype
    + | ins_next
    + break;
    + case BC_ISNUM:
    + | // RA = src*8, RD = -(TISNUM-1)*8
    + | add.d TMP0, BASE, RA
    + | ld.d TMP0, 0(TMP0)
    + | checknum TMP0, ->vmeta_istype
    + | ins_next
    + break;
    +
    + /* -- Unary ops --------------------------------------------------------- */ +
    + case BC_MOV:
    + | // RA = dst*8, RD = src*8
    + | add.d RD, BASE, RD
    + | add.d RA, BASE, RA
    + | ld.d TMP0, 0(RD)
    + | ins_next1
    + | st.d TMP0, 0(RA)
    + | ins_next2
    + break;
    + case BC_NOT:
    + | // RA = dst*8, RD = src*8
    + | add.d RD, BASE, RD
    + | add.d RA, BASE, RA
    + | ld.d TMP0, 0(RD)
    + | addi.d TMP1, r0, LJ_TTRUE
    + | ins_next1
    + | gettp TMP0, TMP0
    + | sltu TMP0, TMP1, TMP0
    + | addi.w TMP0, TMP0, 1
    + | slli.d TMP0, TMP0, 47
    + | nor TMP0, TMP0, r0
    + | st.d TMP0, 0(RA)
    + | ins_next2
    + break;
    + case BC_UNM:
    + | // RA = dst*8, RD = src*8
    + | add.d RB, BASE, RD
    + | add.d RA, BASE, RA
    + | ld.d TMP0, 0(RB)
    + | addu16i.d TMP1, r0, 0x8000
    + | gettp CARG3, TMP0
    + | bne CARG3, TISNUM, >1
    + | sub.w TMP0, r0, TMP0
    + | beq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
    + | bstrpick.d TMP0, TMP0, 31, 0
    + | settp TMP0, TISNUM
    + | b >2
    + |1:
    + | sltui TMP3, CARG3, LJ_TISNUM
    + | slli.d TMP1, TMP1, 32
    + | beqz TMP3, ->vmeta_unm
    + | xor TMP0, TMP0, TMP1 // sign => ~sign
    + |2:
    + | st.d TMP0, 0(RA)
    + | ins_next
    + break;
    + case BC_LEN:
    + | // RA = dst*8, RD = src*8
    + | add.d CARG2, BASE, RD
    + | ld.d TMP0, 0(CARG2)
    + | add.d RA, BASE, RA
    + | gettp TMP1, TMP0
    + | addi.d TMP2, TMP1, -LJ_TSTR
    + | cleartp STR:CARG1, TMP0
    + | bnez TMP2, >2
    + | ld.w CARG1, STR:CARG1->len
    + |1:
    + | settp CARG1, TISNUM
    + | st.d CARG1, 0(RA)
    + | ins_next
    + |2:
    + | addi.d TMP2, TMP1, -LJ_TTAB
    + | bnez TMP2, ->vmeta_len
    +#if LJ_52
    + | ld.d TAB:TMP2, TAB:CARG1->metatable
    + | bnez TAB:TMP2, >9
    + |3:
    +#endif
    + |->BC_LEN_Z:
    + | bl extern lj_tab_len // (GCtab *t)
    + | // Returns uint32_t (but less than 2^31).
    + | b <1
    +#if LJ_52
    + |9:
    + | ld.bu TMP0, TAB:TMP2->nomm
    + | andi TMP0, TMP0, 1<<MM_len
    + | bnez TMP0, <3 // 'no __len' flag set: done.
    + | b ->vmeta_len
    +#endif
    + break;
    +
    + /* -- Binary ops -------------------------------------------------------- */ +
    + |.macro fpmod, a, b, c
    + | fdiv.d FARG1, b, c
    + | bl ->vm_floor // floor(b/c)
    + | fmul.d a, FRET1, c
    + | fsub.d a, b, a // b - floor(b/c)*c
    + |.endmacro
    + |
    + |.macro ins_arithpre
    + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
    + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
    + ||if (vk == 1) {
    + | // RA = dst*8, RB = num_const*8, RC = src1*8
    + | decode_RB RC, INS
    + | decode_RDtoRC8 RB, RD
    + ||} else {
    + | // RA = dst*8, RB = src1*8, RC = num_const*8
    + | decode_RB RB, INS
    + | decode_RDtoRC8 RC, RD
    + ||}
    + ||switch (vk) {
    + ||case 0: // suffix is VN
    + | add.d RB, BASE, RB
    + | add.d RC, KBASE, RC
    + || break;
    + ||case 1: // suffix is NV
    + | add.d RC, BASE, RC
    + | add.d RB, KBASE, RB
    + || break;
    + ||default: // CAT or suffix is VV
    + | add.d RB, BASE, RB
    + | add.d RC, BASE, RC
    + || break;
    + ||}
    + |.endmacro
    + |
    + |.macro ins_arithfp, fpins, itype1, itype2
    + | fld.d FTMP0, 0(RB)
    + | sltu itype1, itype1, TISNUM
    + | sltu itype2, itype2, TISNUM
    + | fld.d FTMP2, 0(RC)
    + | and itype1, itype1, itype2
    + | add.d RA, BASE, RA
    + | beqz itype1, ->vmeta_arith
    + | fpins FRET1, FTMP0, FTMP2
    + | ins_next1
    + | fst.d FRET1, 0(RA)
    + | ins_next2
    + |.endmacro
    + |
    + |.macro ins_arithead, itype1, itype2, tval1, tval2
    + | ld.d tval1, 0(RB)
    + | ld.d tval2, 0(RC)
    + | // Check for two integers.
    + | gettp itype1, tval1
    + | gettp itype2, tval2
    + |.endmacro
    + |
    + |.macro ins_arithdn, intins, fpins
    + | ins_arithpre
    + | ins_arithead TMP0, TMP1, CARG1, CARG2
    + | bne TMP0, TISNUM, >1
    + | bne TMP1, TISNUM, >1
    + | slli.w CARG3, CARG1, 0
    + | slli.w CARG4, CARG2, 0
    + |.if "intins" == "add.w"
    + | intins CRET1, CARG3, CARG4
    + | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
    + | xor TMP2, CRET1, CARG4
    + | and TMP1, TMP1, TMP2
    + | add.d RA, BASE, RA
    + | blt TMP1, r0, ->vmeta_arith
    + |.elif "intins" == "sub.w"
    + | intins CRET1, CARG3, CARG4
    + | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
    + | xor TMP2, CARG3, CARG4
    + | and TMP1, TMP1, TMP2
    + | add.d RA, BASE, RA
    + | blt TMP1, r0, ->vmeta_arith
    + |.elif "intins" == "mulw.d.w"
    + | mul.w CRET1, CARG3, CARG4
    + | mulh.w TMP2, CARG3, CARG4
    + | srai.w TMP1, CRET1, 31 // 63-32bit not all 0 or 1: overflow.
    + | add.d RA, BASE, RA
    + | bne TMP1, TMP2, ->vmeta_arith
    + |.endif
    + | bstrpick.d CRET1, CRET1, 31, 0
    + | settp CRET1, TISNUM
    + | st.d CRET1, 0(RA)
    + | ins_next
    + |1: // Check for two numbers.
    + | ins_arithfp, fpins, TMP0, TMP1
    + |.endmacro
    + |
    + |.macro ins_arithdiv, fpins
    + | ins_arithpre
    + | ins_arithead TMP0, TMP1, CARG1, CARG2
    + | ins_arithfp, fpins, TMP0, TMP1
    + |.endmacro
    + |
    + |.macro ins_arithmod, fpins
    + | ins_arithpre
    + | ins_arithead TMP0, TMP1, CARG1, CARG2
    + | bne TMP0, TISNUM, >1
    + | bne TMP1, TISNUM, >1
    + | slli.w CARG1, CARG1, 0
    + | slli.w CARG2, CARG2, 0
    + | add.d RA, BASE, RA
    + | beqz CARG2, ->vmeta_arith
    + | bl extern lj_vm_modi
    + | bstrpick.d CRET1, CRET1, 31, 0
    + | settp CRET1, TISNUM
    + | st.d CRET1, 0(RA)
    + | ins_next
    + |1: // Check for two numbers.
    + | ins_arithfp, fpins, TMP0, TMP1
    + |.endmacro
    +
    + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
    + | ins_arithdn add.w, fadd.d
    + break;
    + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
    + | ins_arithdn sub.w, fsub.d
    + break;
    + case BC_MULVN: case BC_MULNV: case BC_MULVV:
    + | ins_arithdn mulw.d.w, fmul.d
    + break;
    + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
    + | ins_arithdiv fdiv.d
    + break;
    + case BC_MODVN: case BC_MODNV: case BC_MODVV:
    + | ins_arithmod fpmod
    + break;
    + case BC_POW:
    + | ins_arithpre
    + | ld.d CARG1, 0(RB)
    + | ld.d CARG2, 0(RC)
    + | gettp TMP0, CARG1
    + | gettp TMP1, CARG2
    + | sltui TMP0, TMP0, LJ_TISNUM
    + | sltui TMP1, TMP1, LJ_TISNUM
    + | and TMP0, TMP0, TMP1
    + | add.d RA, BASE, RA
    + | beqz TMP0, ->vmeta_arith
    + | fld.d FARG1, 0(RB)
    + | fld.d FARG2, 0(RC)
    + | bl extern pow
    + | ins_next1
    + | fst.d FRET1, 0(RA)
    + | ins_next2
    + break;
    +
    + case BC_CAT:
    + | // RA = dst*8, RB = src_start*8, RC = src_end*8
    + | decode_RB RB, INS
    + | decode_RDtoRC8 RC, RD
    + | sub.d CARG3, RC, RB
    + | st.d BASE, L->base
    + | add.d CARG2, BASE, RC
    + | or MULTRES, RB, r0
    + |->BC_CAT_Z:
    + | srli.w CARG3, CARG3, 3
    + | st.d PC, SAVE_PC(sp)
    + | or CARG1, L, r0
    + | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
    + | // Returns NULL (finished) or TValue * (metamethod).
    + | ld.d BASE, L->base
    + | bnez CRET1, ->vmeta_binop
    + | add.d RB, BASE, MULTRES
    + | ld.d TMP0, 0(RB)
    + | add.d RA, BASE, RA
    + | st.d TMP0, 0(RA)
    + | ins_next
    + break;
    +
    + /* -- Constant ops ------------------------------------------------------ */ +
    + case BC_KSTR:
    + | // RA = dst*8, RD = str_const*8 (~)
    + | sub.d TMP1, KBASE, RD
    + | addi.w TMP2, r0, LJ_TSTR
    + | ld.d TMP0, -8(TMP1) // KBASE-8-str_const*8
    + | add.d RA, BASE, RA
    + | settp TMP0, TMP2
    + | st.d TMP0, 0(RA)
    + | ins_next
    + break;
    + case BC_KCDATA:
    + |.if FFI
    + | // RA = dst*8, RD = cdata_const*8 (~)
    + | sub.d TMP1, KBASE, RD
    + | ld.d TMP0, -8(TMP1) // KBASE-8-cdata_const*8
    + | addi.w TMP2, r0, LJ_TCDATA
    + | add.d RA, BASE, RA
    + | settp TMP0, TMP2
    + | st.d TMP0, 0(RA)
    + | ins_next
    + |.endif
    + break;
    + case BC_KSHORT:
    + | // RA = dst*8, RD = int16_literal*8
    + | srai.w RD, INS, 16
    + | add.d RA, BASE, RA
    + | bstrpick.d RD, RD, 31, 0
    + | settp RD, TISNUM
    + | st.d RD, 0(RA)
    + | ins_next
    + break;
    + case BC_KNUM:
    + | // RA = dst*8, RD = num_const*8
    + | add.d RD, KBASE, RD
    + | add.d RA, BASE, RA
    + | ld.d TMP0, 0(RD)
    + | st.d TMP0, 0(RA)
    + | ins_next
    + break;
    + case BC_KPRI:
    + | // RA = dst*8, RD = primitive_type*8 (~)
    + | add.d RA, BASE, RA
    + | slli.d TMP0, RD, 44 // 44+3
    + | nor TMP0, TMP0, r0
    + | st.d TMP0, 0(RA)
    + | ins_next
    + break;
    + case BC_KNIL:
    + | // RA = base*8, RD = end*8
    + | add.d RA, BASE, RA
    + | st.d TISNIL, 0(RA)
    + | addi.d RA, RA, 8
    + | add.d RD, BASE, RD
    + |1:
    + | st.d TISNIL, 0(RA)
    + | slt TMP0, RA, RD
    + | addi.d RA, RA, 8
    + | bnez TMP0, <1
    + | ins_next
    + break;
    +
    + /* -- Upvalue and function ops ------------------------------------------ */ +
    + case BC_UGET:
    + | // RA = dst*8, RD = uvnum*8
    + | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
    + | add.d RA, BASE, RA
    + | cleartp LFUNC:TMP0
    + | add.d RD, RD, LFUNC:TMP0
    + | ld.d UPVAL:TMP0, LFUNC:RD->uvptr
    + | ld.d TMP1, UPVAL:TMP0->v
    + | ld.d TMP2, 0(TMP1)
    + | ins_next1
    + | st.d TMP2, 0(RA)
    + | ins_next2
    + break;
    + case BC_USETV:
    + | // RA = uvnum*8, RD = src*8
    + | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
    + | add.d RD, BASE, RD
    + | cleartp LFUNC:TMP0
    + | add.d RA, RA, LFUNC:TMP0
    + | ld.d UPVAL:TMP0, LFUNC:RA->uvptr
    + | ld.d CRET1, 0(RD)
    + | ld.bu TMP3, UPVAL:TMP0->marked
    + | ld.d CARG2, UPVAL:TMP0->v
    + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
    + | ld.bu TMP0, UPVAL:TMP0->closed
    + | gettp TMP2, CRET1
    + | st.d CRET1, 0(CARG2)
    + | or TMP3, TMP3, TMP0
    + | addi.d TMP0, r0, LJ_GC_BLACK|1
    + | addi.d TMP2, TMP2, -(LJ_TNUMX+1)
    + | beq TMP3, TMP0, >2 // Upvalue is closed and black? + |1:
    + | ins_next
    + |
    + |2: // Check if new value is collectable.
    + | sltui TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
    + | cleartp GCOBJ:CRET1, CRET1
    + | beqz TMP0, <1 // tvisgcv(v)
    + | ld.bu TMP3, GCOBJ:CRET1->gch.marked
    + | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
    + | beqz TMP3, <1
    + | // Crossed a write barrier. Move the barrier forward.
    + | .ADD16I CARG1, DISPATCH, GG_DISP2G
    + | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
    + | b <1
    + break;
    + case BC_USETS:
    + | // RA = uvnum*8, RD = str_const*8 (~)
    + | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
    + | sub.d TMP1, KBASE, RD
    + | cleartp LFUNC:TMP0
    + | add.d RA, RA, LFUNC:TMP0
    + | ld.d UPVAL:TMP0, LFUNC:RA->uvptr
    + | ld.d STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
    + | ld.bu TMP2, UPVAL:TMP0->marked
    + | ld.d CARG2, UPVAL:TMP0->v
    + | ld.bu TMP3, STR:TMP1->marked
    + | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv)
    + | ld.bu TMP2, UPVAL:TMP0->closed
    + | addi.d TMP0, r0, LJ_TSTR
    + | settp TMP1, TMP0
    + | st.d TMP1, 0(CARG2)
    + | bnez TMP4, >2
    + |1:
    + | ins_next
    + |
    + |2: // Check if string is white and ensure upvalue is closed.
    + | beqz TMP2, <1
    + | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str)
    + | beqz TMP0, <1
    + | // Crossed a write barrier. Move the barrier forward.
    + | .ADD16I CARG1, DISPATCH, GG_DISP2G
    + | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
    + | b <1
    + break;
    + case BC_USETN:
    + | // RA = uvnum*8, RD = num_const*8
    + | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
    + | add.d RD, KBASE, RD
    + | cleartp LFUNC:TMP0
    + | add.d TMP0, RA, LFUNC:TMP0
    + | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr
    + | ld.d TMP1, 0(RD)
    + | ld.d TMP0, UPVAL:TMP0->v
    + | st.d TMP1, 0(TMP0)
    + | ins_next
    + break;
    + case BC_USETP:
    + | // RA = uvnum*8, RD = primitive_type*8 (~)
    + | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
    + | slli.d TMP2, RD, 44
    + | cleartp LFUNC:TMP0
    + | add.d TMP0, RA, LFUNC:TMP0
    + | nor TMP2, TMP2, r0
    + | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr
    + | ld.d TMP1, UPVAL:TMP0->v
    + | st.d TMP2, 0(TMP1)
    + | ins_next
    + break;
    +
    + case BC_UCLO:
    + | // RA = level*8, RD = target
    + | ld.d TMP2, L->openupval
    + | branch_RD // Do this first since RD is not saved. + | st.d BASE, L->base
    + | or CARG1, L, r0
    + | beqz TMP2, >1
    + | add.d CARG2, BASE, RA
    + | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
    + | ld.d BASE, L->base
    + |1:
    + | ins_next
    + break;
    +
    + case BC_FNEW:
    + | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
    + | sub.d TMP1, KBASE, RD
    + | ld.d CARG3, FRAME_FUNC(BASE)
    + | ld.d CARG2, -8(TMP1) // KBASE-8-tab_const*8
    + | st.d BASE, L->base
    + | st.d PC, SAVE_PC(sp)
    + | cleartp CARG3
    + | or CARG1, L, r0
    + | // (lua_State *L, GCproto *pt, GCfuncL *parent)
    + | bl extern lj_func_newL_gc
    + | // Returns GCfuncL *.
    + | addi.d TMP0, r0, LJ_TFUNC
    + | ld.d BASE, L->base
    + | settp CRET1, TMP0
    + | add.d RA, BASE, RA
    + | st.d CRET1, 0(RA)
    + | ins_next
    + break;
    +
    + /* -- Table ops --------------------------------------------------------- */ +
    + case BC_TNEW:
    + case BC_TDUP:
    + | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
    + | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total)
    + | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold)
    + | st.d BASE, L->base
    + | sltu TMP2, TMP0, TMP1
    + | st.d PC, SAVE_PC(sp)
    + | beqz TMP2, >5
    + |1:
    + if (op == BC_TNEW) {
    + | srli.w CARG2, RD, 3
    + | andi CARG2, CARG2, 0x7ff
    + | ori TMP0, r0, 0x801
    + | addi.w TMP2, CARG2, -0x7ff
    + | srli.w CARG3, RD, 14
    + | masknez TMP0, TMP0, TMP2
    + | maskeqz CARG2, CARG2, TMP2
    + | or CARG2, CARG2, TMP0
    + | // (lua_State *L, int32_t asize, uint32_t hbits)
    + | or CARG1, L, r0
    + | bl extern lj_tab_new
    + | // Returns Table *.
    + } else {
    + | sub.d TMP1, KBASE, RD
    + | or CARG1, L, r0
    + | ld.d CARG2, -8(TMP1) // KBASE-8-str_const*8
    + | bl extern lj_tab_dup // (lua_State *L, Table *kt)
    + | // Returns Table *.
    + }
    + | addi.d TMP0, r0, LJ_TTAB
    + | ld.d BASE, L->base
    + | ins_next1
    + | settp CRET1, TMP0
    + | add.d RA, BASE, RA
    + | st.d CRET1, 0(RA)
    + | ins_next2
    + |5:
    + | or MULTRES, RD, r0
    + | or CARG1, L, r0
    + | bl extern lj_gc_step_fixtop // (lua_State *L)
    + | or RD, MULTRES, r0
    + | b <1
    + break;
    +
    + case BC_GGET:
    + | // RA = dst*8, RD = str_const*8 (~)
    + case BC_GSET:
    + | // RA = src*8, RD = str_const*8 (~)
    + | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
    + | sub.d TMP1, KBASE, RD
    + | ld.d STR:RC, -8(TMP1) // KBASE-8-str_const*8
    + | cleartp LFUNC:TMP0
    + | ld.d TAB:RB, LFUNC:TMP0->env
    + | add.d RA, BASE, RA
    + if (op == BC_GGET) {
    + | b ->BC_TGETS_Z
    + } else {
    + | b ->BC_TSETS_Z
    + }
    + break;
    +
    + case BC_TGETV:
    + | // RA = dst*8, RB = table*8, RC = key*8
    + | decode_RB RB, INS
    + | decode_RDtoRC8 RC, RD
    + | add.d CARG2, BASE, RB
    + | add.d CARG3, BASE, RC
    + | ld.d TAB:RB, 0(CARG2)
    + | ld.d TMP2, 0(CARG3)
    + | add.d RA, BASE, RA
    + | checktab TAB:RB, ->vmeta_tgetv
    + | gettp TMP3, TMP2
    + | ld.w TMP0, TAB:RB->asize
    + | bne TMP3, TISNUM, >5 // Integer key?
    + | slli.w TMP2, TMP2, 0
    + | ld.d TMP1, TAB:RB->array
    + | sltu TMP3, TMP2, TMP0 //array part (keys = [0, asize-1])
    + | slli.w TMP2, TMP2, 3
    + | beqz TMP3, ->vmeta_tgetv // Integer key and in array part?
    + | add.d TMP2, TMP1, TMP2
    + | ld.d CRET1, 0(TMP2)
    + | beq CRET1, TISNIL, >2
    + |1:
    + | st.d CRET1, 0(RA)
    + | ins_next
    + |
    + |2: // Check for __index if table value is nil.
    + | ld.d TAB:TMP2, TAB:RB->metatable
    + | beqz TAB:TMP2, <1 // No metatable: done.
    + | ld.bu TMP0, TAB:TMP2->nomm
    + | andi TMP0, TMP0, 1<<MM_index
    + | bnez TMP0, <1 // 'no __index' flag set: done.
    + | b ->vmeta_tgetv
    + |
    + |5:
    + | addi.d TMP0, r0, LJ_TSTR
    + | cleartp RC, TMP2
    + | bne TMP3, TMP0, ->vmeta_tgetv // String key?
    + | b ->BC_TGETS_Z
    + break;
    + case BC_TGETS:
    + | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
    + | decode_RB RB, INS
    + | decode_RDtoRC8 RC, RD
    + | add.d CARG2, BASE, RB
    + | sub.d CARG3, KBASE, RC
    + | ld.d TAB:RB, 0(CARG2)
    + | add.d RA, BASE, RA
    + | ld.d STR:RC, -8(CARG3) // KBASE-8-str_const*8
    + | checktab TAB:RB, ->vmeta_tgets1
    + |->BC_TGETS_Z:
    + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
    + | ld.w TMP0, TAB:RB->hmask
    + | ld.w TMP1, STR:RC->sid
    + | ld.d NODE:TMP2, TAB:RB->node
    + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
    + | slli.w TMP0, TMP1, 5
    + | slli.w TMP1, TMP1, 3
    + | sub.w TMP1, TMP0, TMP1
    + | addi.d TMP3, r0, LJ_TSTR
    + | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
    + | settp STR:RC, TMP3 // Tagged key to look for.
    + |1:
    + | ld.d CARG1, NODE:TMP2->key
    + | ld.d CARG2, NODE:TMP2->val
    + | ld.d NODE:TMP1, NODE:TMP2->next
    + | ld.d TAB:TMP3, TAB:RB->metatable
    + | bne CARG1, RC, >4
    + | beq CARG2, TISNIL, >5 // Key found, but nil value?
    + |3:
    + | st.d CARG2, 0(RA)
    + | ins_next
    + |
    + |4: // Follow hash chain.
    + | or NODE:TMP2, NODE:TMP1, r0
    + | bnez NODE:TMP1, <1
    + | // End of hash chain: key not found, nil result.
    + |
    + |5: // Check for __index if table value is nil.
    + | or CARG2, TISNIL, r0
    + | beqz TAB:TMP3, <3 // No metatable: done.
    + | ld.bu TMP0, TAB:TMP3->nomm
    + | andi TMP0, TMP0, 1<<MM_index
    + | bnez TMP0, <3 // 'no __index' flag set: done.
    + | b ->vmeta_tgets
    + break;
    + case BC_TGETB:
    + | // RA = dst*8, RB = table*8, RC = index*8
    + | decode_RB RB, INS
    + | add.d CARG2, BASE, RB
    + | decode_RDtoRC8 RC, RD
    + | ld.d TAB:RB, 0(CARG2)
    + | add.d RA, BASE, RA
    + | srli.w TMP0, RC, 3
    + | checktab TAB:RB, ->vmeta_tgetb
    + | ld.w TMP1, TAB:RB->asize
    + | ld.d TMP2, TAB:RB->array
    + | sltu TMP1, TMP0, TMP1
    + | add.d RC, TMP2, RC
    + | beqz TMP1, ->vmeta_tgetb
    + | ld.d CRET1, 0(RC)
    + | beq CRET1, TISNIL, >5
    + |1:
    + | st.d CRET1, 0(RA)
    + | ins_next
    + |
    + |5: // Check for __index if table value is nil.
    + | ld.d TAB:TMP2, TAB:RB->metatable
    + | beqz TAB:TMP2, <1 // No metatable: done.
    + | ld.bu TMP1, TAB:TMP2->nomm
    + | andi TMP1, TMP1, 1<<MM_index
    + | bnez TMP1, <1 // 'no __index' flag set: done.
    + | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
    + break;
    + case BC_TGETR:
    + | // RA = dst*8, RB = table*8, RC = key*8
    + | decode_RB RB, INS
    + | decode_RDtoRC8 RC, RD
    + | add.d RB, BASE, RB
    + | add.d RC, BASE, RC
    + | ld.d TAB:CARG1, 0(RB)
    + | ld.w CARG2, 0(RC)
    + | add.d RA, BASE, RA
    + | cleartp TAB:CARG1
    + | ld.w TMP0, TAB:CARG1->asize
    + | ld.d TMP1, TAB:CARG1->array
    + | sltu TMP0, CARG2, TMP0
    + | slli.w TMP2, CARG2, 3
    + | add.d TMP3, TMP1, TMP2
    + | beqz TMP0, ->vmeta_tgetr // In array part?
    + | ld.d TMP1, 0(TMP3)
    + |->BC_TGETR_Z:
    + | ins_next1
    + | st.d TMP1, 0(RA)
    + | ins_next2
    + break;
    +
    + case BC_TSETV:
    + | // RA = src*8, RB = table*8, RC = key*8
    + | decode_RB RB, INS
    + | decode_RDtoRC8 RC, RD
    + | add.d CARG2, BASE, RB
    + | add.d CARG3, BASE, RC
    + | ld.d TAB:RB, 0(CARG2)
    + | ld.d TMP2, 0(CARG3)
    + | add.d RA, BASE, RA
    + | checktab TAB:RB, ->vmeta_tsetv
    + | slli.w RC, TMP2, 0
    + | checkint TMP2, >5
    + | ld.w TMP0, TAB:RB->asize
    + | ld.d TMP1, TAB:RB->array
    + | sltu TMP0, RC, TMP0
    + | slli.w TMP2, RC, 3
    + | beqz TMP0, ->vmeta_tsetv // Integer key and in array part?
    + | add.d TMP1, TMP1, TMP2
    + | ld.bu TMP3, TAB:RB->marked
    + | ld.d TMP0, 0(TMP1)
    + | ld.d CRET1, 0(RA)
    + | beq TMP0, TISNIL, >3
    + |1:
    + | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
    + | st.d CRET1, 0(TMP1)
    + | bnez TMP2, >7
    + |2:
    + | ins_next
    + |
    + |3: // Check for __newindex if previous value is nil.
    + | ld.d TAB:TMP2, TAB:RB->metatable
    + | beqz TAB:TMP2, <1 // No metatable: done.
    + | ld.bu TMP2, TAB:TMP2->nomm
    + | andi TMP2, TMP2, 1<<MM_newindex
    + | bnez TMP2, <1 // 'no __newindex' flag set: done.
    + | b ->vmeta_tsetv
    + |5:
    + | gettp TMP0, TMP2

    [continued in next message]

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)