Bug#1067890: luajit: Add support for LoongArch64 (9/10)
From
Xiaolin Zhao@21:1/5 to
All on Thu Mar 28 14:10:01 2024
[continued from previous message]
+ | fld.d FARG3, CCSTATE->fpr[2]
+ | fld.d FARG4, CCSTATE->fpr[3]
+ | fld.d FARG5, CCSTATE->fpr[4]
+ | fld.d FARG6, CCSTATE->fpr[5]
+ | fld.d FARG7, CCSTATE->fpr[6]
+ | fld.d FARG8, CCSTATE->fpr[7]
+ |3:
+ | ld.d TMP3, CCSTATE->func
+ | ld.d CARG2, CCSTATE->gpr[1]
+ | ld.d CARG3, CCSTATE->gpr[2]
+ | ld.d CARG4, CCSTATE->gpr[3]
+ | ld.d CARG5, CCSTATE->gpr[4]
+ | ld.d CARG6, CCSTATE->gpr[5]
+ | ld.d CARG7, CCSTATE->gpr[6]
+ | ld.d CARG8, CCSTATE->gpr[7]
+ | ld.d CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
+ | jirl r1, TMP3, 0
+ | ld.d CCSTATE:TMP1, -24(r23)
+ | ld.d TMP2, -16(r23)
+ | ld.d ra, -8(r23)
+ | st.d CRET1, CCSTATE:TMP1->gpr[0]
+ | st.d CRET2, CCSTATE:TMP1->gpr[1]
+ | fst.d FRET1, CCSTATE:TMP1->fpr[0]
+ | fst.d FRET2, CCSTATE:TMP1->fpr[1]
+ | or sp, r23, r0
+ | or r23, TMP2, r0
+ | jirl r0, ra, 0
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//----------------------------------------------------------------------- +}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+ int vk = 0;
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */ +
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
+ | add.d RA, BASE, RA
+ | add.d RD, BASE, RD
+ if (op == BC_ISLT || op == BC_ISGE) {
+ | ld.d CARG1, 0(RA)
+ | ld.d CARG2, 0(RD)
+ | gettp CARG3, CARG1
+ | gettp CARG4, CARG2
+ } else {
+ | ld.d CARG2, 0(RA)
+ | ld.d CARG1, 0(RD)
+ | gettp CARG3, CARG2
+ | gettp CARG4, CARG1
+ }
+ | ld.hu TMP2, OFS_RD(PC) // TMP2=jump
+ | addi.d PC, PC, 4
+ | bne CARG3, TISNUM, >2
+ | decode_BC4b TMP2
+ | bne CARG4, TISNUM, >5
+ | slli.w CARG1, CARG1, 0
+ | slli.w CARG2, CARG2, 0
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | slt TMP1, CARG1, CARG2
+ | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
+ if (op == BC_ISLT || op == BC_ISGT) {
+ | maskeqz TMP2, TMP2, TMP1
+ } else {
+ | masknez TMP2, TMP2,TMP1
+ }
+ |1:
+ | add.d PC, PC, TMP2
+ | ins_next
+ |
+ |2: // RA is not an integer.
+ | sltui TMP1, CARG3, LJ_TISNUM
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | beqz TMP1, ->vmeta_comp
+ | sltui TMP1, CARG4, LJ_TISNUM
+ | decode_BC4b TMP2
+ | beqz TMP1, >4
+ | movgr2fr.d FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ |3: // RA and RD are both numbers.
+ if (op == BC_ISLT || op == BC_ISGE) {
+ | fcmp.clt.d FCC0, FTMP0, FTMP2
+ } else {
+ | fcmp.cult.d FCC0, FTMP0, FTMP2
+ }
+ | add.w TMP2, TMP2, TMP3
+ | movcf2gr TMP3, FCC0
+ if (op == BC_ISLT || op == BC_ISGT) {
+ | maskeqz TMP2, TMP2, TMP3
+ } else {
+ | masknez TMP2, TMP2, TMP3
+ }
+ | b <1
+ |
+ |4: // RA is a number, RD is not a number.
+ | // RA is a number, RD is an integer. Convert RD to a number.
+ | bne CARG4, TISNUM, ->vmeta_comp
+ if (op == BC_ISLT || op == BC_ISGE) {
+ | movgr2fr.w FTMP2, CARG2
+ | movgr2fr.d FTMP0, CARG1
+ | ffint.d.w FTMP2, FTMP2
+ } else {
+ | movgr2fr.w FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ | ffint.d.w FTMP0, FTMP0
+ }
+ | b <3
+ |
+ |5: // RA is an integer, RD is not an integer
+ | sltui TMP1, CARG4, LJ_TISNUM
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | beqz TMP1, ->vmeta_comp
+ | // RA is an integer, RD is a number. Convert RA to a number.
+ if (op == BC_ISLT || op == BC_ISGE) {
+ | movgr2fr.w FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ | ffint.d.w FTMP0, FTMP0
+ } else {
+ | movgr2fr.w FTMP2, CARG2
+ | movgr2fr.d FTMP0, CARG1
+ | ffint.d.w FTMP2, FTMP2
+ }
+ | b <3
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
+ | add.d RA, BASE, RA
+ | add.d RD, BASE, RD
+ | addi.d PC, PC, 4
+ | ld.d CARG1, 0(RA)
+ | ld.d CARG2, 0(RD)
+ | ld.hu TMP2, -4+OFS_RD(PC)
+ | gettp CARG3, CARG1
+ | gettp CARG4, CARG2
+ | sltu TMP0, TISNUM, CARG3
+ | sltu TMP1, TISNUM, CARG4
+ | or TMP0, TMP0, TMP1
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ if (vk) {
+ | beqz TMP0, ->BC_ISEQN_Z
+ } else {
+ | beqz TMP0, ->BC_ISNEN_Z
+ }
+ |// Either or both types are not numbers.
+ |.if FFI
+ | // Check if RA or RD is a cdata.
+ | addi.w TMP0, r0, LJ_TCDATA
+ | beq CARG3, TMP0, ->vmeta_equal_cd
+ | beq CARG4, TMP0, ->vmeta_equal_cd
+ |.endif
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | decode_BC4b TMP2
+ | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2
+ | bne CARG1, CARG2, >2
+ | // Tag and value are equal.
+ if (vk) {
+ |->BC_ISEQV_Z:
+ | add.d PC, PC, TMP2
+ }
+ |1:
+ | ins_next
+ |
+ |2: // Check if the tags are the same and it's a table or userdata.
+ | xor TMP3, CARG3, CARG4 // Same type?
+ | sltui TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1
+ | masknez TMP0, TMP0, TMP3 // TMP0=0: not same type, or same type table/userdata
+ | cleartp TAB:TMP1, CARG1
+ if (vk) {
+ | beqz TMP0, <1
+ } else {
+ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
+ }
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ | ld.d TAB:TMP3, TAB:TMP1->metatable
+ if (vk) {
+ | beqz TAB:TMP3, <1 // No metatable?
+ | ld.bu TMP3, TAB:TMP3->nomm
+ | andi TMP3, TMP3, 1<<MM_eq
+ | addi.w TMP0, r0, 0 // ne = 0
+ | bnez TMP3, <1 // Or 'no __eq' flag set?
+ } else {
+ | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
+ | ld.bu TMP3, TAB:TMP3->nomm
+ | andi TMP3, TMP3, 1<<MM_eq
+ | addi.w TMP0, r0, 1 // ne = 1
+ | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
+ }
+ | b ->vmeta_equal // Handle __eq metamethod.
+ break;
+
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
+ | add.d RA, BASE, RA
+ | addi.d PC, PC, 4
+ | ld.d CARG1, 0(RA)
+ | sub.d RD, KBASE, RD
+ | ld.hu TMP2, -4+OFS_RD(PC)
+ | ld.d CARG2, -8(RD) // KBASE-8-str_const*8
+ |.if FFI
+ | gettp CARG3, CARG1
+ | addi.w TMP1, r0, LJ_TCDATA
+ |.endif
+ | addi.w TMP0, r0, LJ_TSTR
+ | decode_BC4b TMP2
+ | settp CARG2, TMP0
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ |.if FFI
+ | beq CARG3, TMP1, ->vmeta_equal_cd
+ |.endif
+ | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D
+ | add.w TMP2, TMP2, TMP3
+ if (vk) {
+ | masknez TMP2, TMP2, TMP0
+ } else {
+ | maskeqz TMP2, TMP2, TMP0
+ }
+ | add.d PC, PC, TMP2
+ | ins_next
+ break;
+
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | // RA = src*8, RD = num_const*8, JMP with RD = target
+ | add.d RA, BASE, RA
+ | add.d RD, KBASE, RD
+ | ld.d CARG1, 0(RA)
+ | ld.d CARG2, 0(RD)
+ | ld.hu TMP2, OFS_RD(PC)
+ | addi.d PC, PC, 4
+ | gettp CARG3, CARG1
+ | gettp CARG4, CARG2
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ if (vk) {
+ |->BC_ISEQN_Z:
+ } else {
+ |->BC_ISNEN_Z:
+ }
+ | decode_BC4b TMP2
+ | bne CARG3, TISNUM, >4
+ | add.w TMP2, TMP2, TMP3
+ | bne CARG4, TISNUM, >6
+ | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D
+ |1:
+ if (vk) {
+ | masknez TMP2, TMP2, TMP0
+ | add.d PC, PC, TMP2
+ |2:
+ } else {
+ | maskeqz TMP2, TMP2, TMP0
+ |2:
+ | add.d PC, PC, TMP2
+ }
+ |3:
+ | ins_next
+ |
+ |4: // RA is not an integer.
+ | sltu TMP0, CARG3, TISNUM
+ | add.w TMP2, TMP2, TMP3
+ |.if FFI
+ | beqz TMP0, >7
+ |.else
+ | beqz TMP0, <2
+ |.endif
+ | movgr2fr.d FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ | bne CARG4, TISNUM, >5
+ |// RA is a number, RD is an integer.
+ | ffint.d.w FTMP2, FTMP2
+ |
+ |5: // RA and RD are both numbers.
+ | fcmp.cune.d FCC0, FTMP0, FTMP2
+ | movcf2gr TMP0, FCC0
+ | b <1
+ |
+ |6: // RA is an integer, RD is a number.
+ | sltu TMP0, CARG4, TISNUM
+ |.if FFI
+ | beqz TMP0, >8
+ |.else
+ | beqz TMP0, <2
+ |.endif
+ | movgr2fr.w FTMP0, CARG1
+ | movgr2fr.d FTMP2, CARG2
+ | ffint.d.w FTMP0, FTMP0
+ | b <5
+ |
+ |.if FFI
+ |7: // RA not int, not number
+ | addi.w TMP0, r0, LJ_TCDATA
+ | bne CARG3, TMP0, <2
+ | b ->vmeta_equal_cd
+ |
+ |8: // RD not int, not number
+ | addi.w TMP0, r0, LJ_TCDATA
+ | bne CARG4, TMP0, <2
+ | b ->vmeta_equal_cd
+ |.endif
+ break;
+
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
+ | add.d RA, BASE, RA
+ | srli.w TMP0, RD, 3
+ | ld.d TMP1, 0(RA)
+ | nor TMP0, TMP0, r0 // ~TMP0: ~0 ~1 ~2
+ | ld.hu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target + | gettp TMP1, TMP1
+ | addi.d PC, PC, 4
+ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D
+ |.if FFI
+ | addi.w TMP3, r0, LJ_TCDATA
+ | beq TMP1, TMP3, ->vmeta_equal_cd
+ |.endif
+ | decode_BC4b TMP2
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
+ if (vk) {
+ | masknez TMP2, TMP2, TMP0
+ } else {
+ | maskeqz TMP2, TMP2, TMP0
+ }
+ | add.d PC, PC, TMP2
+ | ins_next
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */ +
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
+ | add.d RD, BASE, RD
+ | ld.hu TMP2, OFS_RD(PC)
+ | ld.d TMP0, 0(RD)
+ | addi.d PC, PC, 4
+ | gettp TMP0, TMP0
+ | add.d RA, BASE, RA
+ | sltui TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false
+ | decode_BC4b TMP2
+ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4
+ | ld.d CRET1, 0(RD)
+ | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2
+ if (op == BC_IST || op == BC_ISTC) {
+ | beqz TMP0, >1
+ if (op == BC_ISTC) {
+ | st.d CRET1, 0(RA)
+ }
+ } else {
+ | bnez TMP0, >1
+ if (op == BC_ISFC) {
+ | st.d CRET1, 0(RA)
+ }
+ }
+ | add.d PC, PC, TMP2
+ |1:
+ | ins_next
+ break;
+
+ case BC_ISTYPE:
+ | // RA = src*8, RD = -type*8
+ | add.d TMP0, BASE, RA
+ | srli.w TMP1, RD, 3
+ | ld.d TMP0, 0(TMP0)
+ | gettp TMP0, TMP0
+ | add.d TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0 + | bnez TMP0, ->vmeta_istype
+ | ins_next
+ break;
+ case BC_ISNUM:
+ | // RA = src*8, RD = -(TISNUM-1)*8
+ | add.d TMP0, BASE, RA
+ | ld.d TMP0, 0(TMP0)
+ | checknum TMP0, ->vmeta_istype
+ | ins_next
+ break;
+
+ /* -- Unary ops --------------------------------------------------------- */ +
+ case BC_MOV:
+ | // RA = dst*8, RD = src*8
+ | add.d RD, BASE, RD
+ | add.d RA, BASE, RA
+ | ld.d TMP0, 0(RD)
+ | ins_next1
+ | st.d TMP0, 0(RA)
+ | ins_next2
+ break;
+ case BC_NOT:
+ | // RA = dst*8, RD = src*8
+ | add.d RD, BASE, RD
+ | add.d RA, BASE, RA
+ | ld.d TMP0, 0(RD)
+ | addi.d TMP1, r0, LJ_TTRUE
+ | ins_next1
+ | gettp TMP0, TMP0
+ | sltu TMP0, TMP1, TMP0
+ | addi.w TMP0, TMP0, 1
+ | slli.d TMP0, TMP0, 47
+ | nor TMP0, TMP0, r0
+ | st.d TMP0, 0(RA)
+ | ins_next2
+ break;
+ case BC_UNM:
+ | // RA = dst*8, RD = src*8
+ | add.d RB, BASE, RD
+ | add.d RA, BASE, RA
+ | ld.d TMP0, 0(RB)
+ | addu16i.d TMP1, r0, 0x8000
+ | gettp CARG3, TMP0
+ | bne CARG3, TISNUM, >1
+ | sub.w TMP0, r0, TMP0
+ | beq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
+ | bstrpick.d TMP0, TMP0, 31, 0
+ | settp TMP0, TISNUM
+ | b >2
+ |1:
+ | sltui TMP3, CARG3, LJ_TISNUM
+ | slli.d TMP1, TMP1, 32
+ | beqz TMP3, ->vmeta_unm
+ | xor TMP0, TMP0, TMP1 // sign => ~sign
+ |2:
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+ case BC_LEN:
+ | // RA = dst*8, RD = src*8
+ | add.d CARG2, BASE, RD
+ | ld.d TMP0, 0(CARG2)
+ | add.d RA, BASE, RA
+ | gettp TMP1, TMP0
+ | addi.d TMP2, TMP1, -LJ_TSTR
+ | cleartp STR:CARG1, TMP0
+ | bnez TMP2, >2
+ | ld.w CARG1, STR:CARG1->len
+ |1:
+ | settp CARG1, TISNUM
+ | st.d CARG1, 0(RA)
+ | ins_next
+ |2:
+ | addi.d TMP2, TMP1, -LJ_TTAB
+ | bnez TMP2, ->vmeta_len
+#if LJ_52
+ | ld.d TAB:TMP2, TAB:CARG1->metatable
+ | bnez TAB:TMP2, >9
+ |3:
+#endif
+ |->BC_LEN_Z:
+ | bl extern lj_tab_len // (GCtab *t)
+ | // Returns uint32_t (but less than 2^31).
+ | b <1
+#if LJ_52
+ |9:
+ | ld.bu TMP0, TAB:TMP2->nomm
+ | andi TMP0, TMP0, 1<<MM_len
+ | bnez TMP0, <3 // 'no __len' flag set: done.
+ | b ->vmeta_len
+#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */ +
+ |.macro fpmod, a, b, c
+ | fdiv.d FARG1, b, c
+ | bl ->vm_floor // floor(b/c)
+ | fmul.d a, FRET1, c
+ | fsub.d a, b, a // b - floor(b/c)*c
+ |.endmacro
+ |
+ |.macro ins_arithpre
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
+ ||if (vk == 1) {
+ | // RA = dst*8, RB = num_const*8, RC = src1*8
+ | decode_RB RC, INS
+ | decode_RDtoRC8 RB, RD
+ ||} else {
+ | // RA = dst*8, RB = src1*8, RC = num_const*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ ||}
+ ||switch (vk) {
+ ||case 0: // suffix is VN
+ | add.d RB, BASE, RB
+ | add.d RC, KBASE, RC
+ || break;
+ ||case 1: // suffix is NV
+ | add.d RC, BASE, RC
+ | add.d RB, KBASE, RB
+ || break;
+ ||default: // CAT or suffix is VV
+ | add.d RB, BASE, RB
+ | add.d RC, BASE, RC
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithfp, fpins, itype1, itype2
+ | fld.d FTMP0, 0(RB)
+ | sltu itype1, itype1, TISNUM
+ | sltu itype2, itype2, TISNUM
+ | fld.d FTMP2, 0(RC)
+ | and itype1, itype1, itype2
+ | add.d RA, BASE, RA
+ | beqz itype1, ->vmeta_arith
+ | fpins FRET1, FTMP0, FTMP2
+ | ins_next1
+ | fst.d FRET1, 0(RA)
+ | ins_next2
+ |.endmacro
+ |
+ |.macro ins_arithead, itype1, itype2, tval1, tval2
+ | ld.d tval1, 0(RB)
+ | ld.d tval2, 0(RC)
+ | // Check for two integers.
+ | gettp itype1, tval1
+ | gettp itype2, tval2
+ |.endmacro
+ |
+ |.macro ins_arithdn, intins, fpins
+ | ins_arithpre
+ | ins_arithead TMP0, TMP1, CARG1, CARG2
+ | bne TMP0, TISNUM, >1
+ | bne TMP1, TISNUM, >1
+ | slli.w CARG3, CARG1, 0
+ | slli.w CARG4, CARG2, 0
+ |.if "intins" == "add.w"
+ | intins CRET1, CARG3, CARG4
+ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
+ | xor TMP2, CRET1, CARG4
+ | and TMP1, TMP1, TMP2
+ | add.d RA, BASE, RA
+ | blt TMP1, r0, ->vmeta_arith
+ |.elif "intins" == "sub.w"
+ | intins CRET1, CARG3, CARG4
+ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
+ | xor TMP2, CARG3, CARG4
+ | and TMP1, TMP1, TMP2
+ | add.d RA, BASE, RA
+ | blt TMP1, r0, ->vmeta_arith
+ |.elif "intins" == "mulw.d.w"
+ | mul.w CRET1, CARG3, CARG4
+ | mulh.w TMP2, CARG3, CARG4
+ | srai.w TMP1, CRET1, 31 // 63-32bit not all 0 or 1: overflow.
+ | add.d RA, BASE, RA
+ | bne TMP1, TMP2, ->vmeta_arith
+ |.endif
+ | bstrpick.d CRET1, CRET1, 31, 0
+ | settp CRET1, TISNUM
+ | st.d CRET1, 0(RA)
+ | ins_next
+ |1: // Check for two numbers.
+ | ins_arithfp, fpins, TMP0, TMP1
+ |.endmacro
+ |
+ |.macro ins_arithdiv, fpins
+ | ins_arithpre
+ | ins_arithead TMP0, TMP1, CARG1, CARG2
+ | ins_arithfp, fpins, TMP0, TMP1
+ |.endmacro
+ |
+ |.macro ins_arithmod, fpins
+ | ins_arithpre
+ | ins_arithead TMP0, TMP1, CARG1, CARG2
+ | bne TMP0, TISNUM, >1
+ | bne TMP1, TISNUM, >1
+ | slli.w CARG1, CARG1, 0
+ | slli.w CARG2, CARG2, 0
+ | add.d RA, BASE, RA
+ | beqz CARG2, ->vmeta_arith
+ | bl extern lj_vm_modi
+ | bstrpick.d CRET1, CRET1, 31, 0
+ | settp CRET1, TISNUM
+ | st.d CRET1, 0(RA)
+ | ins_next
+ |1: // Check for two numbers.
+ | ins_arithfp, fpins, TMP0, TMP1
+ |.endmacro
+
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arithdn add.w, fadd.d
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arithdn sub.w, fsub.d
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arithdn mulw.d.w, fmul.d
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arithdiv fdiv.d
+ break;
+ case BC_MODVN: case BC_MODNV: case BC_MODVV:
+ | ins_arithmod fpmod
+ break;
+ case BC_POW:
+ | ins_arithpre
+ | ld.d CARG1, 0(RB)
+ | ld.d CARG2, 0(RC)
+ | gettp TMP0, CARG1
+ | gettp TMP1, CARG2
+ | sltui TMP0, TMP0, LJ_TISNUM
+ | sltui TMP1, TMP1, LJ_TISNUM
+ | and TMP0, TMP0, TMP1
+ | add.d RA, BASE, RA
+ | beqz TMP0, ->vmeta_arith
+ | fld.d FARG1, 0(RB)
+ | fld.d FARG2, 0(RC)
+ | bl extern pow
+ | ins_next1
+ | fst.d FRET1, 0(RA)
+ | ins_next2
+ break;
+
+ case BC_CAT:
+ | // RA = dst*8, RB = src_start*8, RC = src_end*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | sub.d CARG3, RC, RB
+ | st.d BASE, L->base
+ | add.d CARG2, BASE, RC
+ | or MULTRES, RB, r0
+ |->BC_CAT_Z:
+ | srli.w CARG3, CARG3, 3
+ | st.d PC, SAVE_PC(sp)
+ | or CARG1, L, r0
+ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | ld.d BASE, L->base
+ | bnez CRET1, ->vmeta_binop
+ | add.d RB, BASE, MULTRES
+ | ld.d TMP0, 0(RB)
+ | add.d RA, BASE, RA
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */ +
+ case BC_KSTR:
+ | // RA = dst*8, RD = str_const*8 (~)
+ | sub.d TMP1, KBASE, RD
+ | addi.w TMP2, r0, LJ_TSTR
+ | ld.d TMP0, -8(TMP1) // KBASE-8-str_const*8
+ | add.d RA, BASE, RA
+ | settp TMP0, TMP2
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | // RA = dst*8, RD = cdata_const*8 (~)
+ | sub.d TMP1, KBASE, RD
+ | ld.d TMP0, -8(TMP1) // KBASE-8-cdata_const*8
+ | addi.w TMP2, r0, LJ_TCDATA
+ | add.d RA, BASE, RA
+ | settp TMP0, TMP2
+ | st.d TMP0, 0(RA)
+ | ins_next
+ |.endif
+ break;
+ case BC_KSHORT:
+ | // RA = dst*8, RD = int16_literal*8
+ | srai.w RD, INS, 16
+ | add.d RA, BASE, RA
+ | bstrpick.d RD, RD, 31, 0
+ | settp RD, TISNUM
+ | st.d RD, 0(RA)
+ | ins_next
+ break;
+ case BC_KNUM:
+ | // RA = dst*8, RD = num_const*8
+ | add.d RD, KBASE, RD
+ | add.d RA, BASE, RA
+ | ld.d TMP0, 0(RD)
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+ case BC_KPRI:
+ | // RA = dst*8, RD = primitive_type*8 (~)
+ | add.d RA, BASE, RA
+ | slli.d TMP0, RD, 44 // 44+3
+ | nor TMP0, TMP0, r0
+ | st.d TMP0, 0(RA)
+ | ins_next
+ break;
+ case BC_KNIL:
+ | // RA = base*8, RD = end*8
+ | add.d RA, BASE, RA
+ | st.d TISNIL, 0(RA)
+ | addi.d RA, RA, 8
+ | add.d RD, BASE, RD
+ |1:
+ | st.d TISNIL, 0(RA)
+ | slt TMP0, RA, RD
+ | addi.d RA, RA, 8
+ | bnez TMP0, <1
+ | ins_next
+ break;
+
+ /* -- Upvalue and function ops ------------------------------------------ */ +
+ case BC_UGET:
+ | // RA = dst*8, RD = uvnum*8
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | add.d RA, BASE, RA
+ | cleartp LFUNC:TMP0
+ | add.d RD, RD, LFUNC:TMP0
+ | ld.d UPVAL:TMP0, LFUNC:RD->uvptr
+ | ld.d TMP1, UPVAL:TMP0->v
+ | ld.d TMP2, 0(TMP1)
+ | ins_next1
+ | st.d TMP2, 0(RA)
+ | ins_next2
+ break;
+ case BC_USETV:
+ | // RA = uvnum*8, RD = src*8
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | add.d RD, BASE, RD
+ | cleartp LFUNC:TMP0
+ | add.d RA, RA, LFUNC:TMP0
+ | ld.d UPVAL:TMP0, LFUNC:RA->uvptr
+ | ld.d CRET1, 0(RD)
+ | ld.bu TMP3, UPVAL:TMP0->marked
+ | ld.d CARG2, UPVAL:TMP0->v
+ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
+ | ld.bu TMP0, UPVAL:TMP0->closed
+ | gettp TMP2, CRET1
+ | st.d CRET1, 0(CARG2)
+ | or TMP3, TMP3, TMP0
+ | addi.d TMP0, r0, LJ_GC_BLACK|1
+ | addi.d TMP2, TMP2, -(LJ_TNUMX+1)
+ | beq TMP3, TMP0, >2 // Upvalue is closed and black? + |1:
+ | ins_next
+ |
+ |2: // Check if new value is collectable.
+ | sltui TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
+ | cleartp GCOBJ:CRET1, CRET1
+ | beqz TMP0, <1 // tvisgcv(v)
+ | ld.bu TMP3, GCOBJ:CRET1->gch.marked
+ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
+ | beqz TMP3, <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | .ADD16I CARG1, DISPATCH, GG_DISP2G
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | b <1
+ break;
+ case BC_USETS:
+ | // RA = uvnum*8, RD = str_const*8 (~)
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | sub.d TMP1, KBASE, RD
+ | cleartp LFUNC:TMP0
+ | add.d RA, RA, LFUNC:TMP0
+ | ld.d UPVAL:TMP0, LFUNC:RA->uvptr
+ | ld.d STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
+ | ld.bu TMP2, UPVAL:TMP0->marked
+ | ld.d CARG2, UPVAL:TMP0->v
+ | ld.bu TMP3, STR:TMP1->marked
+ | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv)
+ | ld.bu TMP2, UPVAL:TMP0->closed
+ | addi.d TMP0, r0, LJ_TSTR
+ | settp TMP1, TMP0
+ | st.d TMP1, 0(CARG2)
+ | bnez TMP4, >2
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+ | beqz TMP2, <1
+ | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str)
+ | beqz TMP0, <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | .ADD16I CARG1, DISPATCH, GG_DISP2G
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | b <1
+ break;
+ case BC_USETN:
+ | // RA = uvnum*8, RD = num_const*8
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | add.d RD, KBASE, RD
+ | cleartp LFUNC:TMP0
+ | add.d TMP0, RA, LFUNC:TMP0
+ | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr
+ | ld.d TMP1, 0(RD)
+ | ld.d TMP0, UPVAL:TMP0->v
+ | st.d TMP1, 0(TMP0)
+ | ins_next
+ break;
+ case BC_USETP:
+ | // RA = uvnum*8, RD = primitive_type*8 (~)
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | slli.d TMP2, RD, 44
+ | cleartp LFUNC:TMP0
+ | add.d TMP0, RA, LFUNC:TMP0
+ | nor TMP2, TMP2, r0
+ | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr
+ | ld.d TMP1, UPVAL:TMP0->v
+ | st.d TMP2, 0(TMP1)
+ | ins_next
+ break;
+
+ case BC_UCLO:
+ | // RA = level*8, RD = target
+ | ld.d TMP2, L->openupval
+ | branch_RD // Do this first since RD is not saved. + | st.d BASE, L->base
+ | or CARG1, L, r0
+ | beqz TMP2, >1
+ | add.d CARG2, BASE, RA
+ | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
+ | ld.d BASE, L->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
+ | sub.d TMP1, KBASE, RD
+ | ld.d CARG3, FRAME_FUNC(BASE)
+ | ld.d CARG2, -8(TMP1) // KBASE-8-tab_const*8
+ | st.d BASE, L->base
+ | st.d PC, SAVE_PC(sp)
+ | cleartp CARG3
+ | or CARG1, L, r0
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | bl extern lj_func_newL_gc
+ | // Returns GCfuncL *.
+ | addi.d TMP0, r0, LJ_TFUNC
+ | ld.d BASE, L->base
+ | settp CRET1, TMP0
+ | add.d RA, BASE, RA
+ | st.d CRET1, 0(RA)
+ | ins_next
+ break;
+
+ /* -- Table ops --------------------------------------------------------- */ +
+ case BC_TNEW:
+ case BC_TDUP:
+ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
+ | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total)
+ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold)
+ | st.d BASE, L->base
+ | sltu TMP2, TMP0, TMP1
+ | st.d PC, SAVE_PC(sp)
+ | beqz TMP2, >5
+ |1:
+ if (op == BC_TNEW) {
+ | srli.w CARG2, RD, 3
+ | andi CARG2, CARG2, 0x7ff
+ | ori TMP0, r0, 0x801
+ | addi.w TMP2, CARG2, -0x7ff
+ | srli.w CARG3, RD, 14
+ | masknez TMP0, TMP0, TMP2
+ | maskeqz CARG2, CARG2, TMP2
+ | or CARG2, CARG2, TMP0
+ | // (lua_State *L, int32_t asize, uint32_t hbits)
+ | or CARG1, L, r0
+ | bl extern lj_tab_new
+ | // Returns Table *.
+ } else {
+ | sub.d TMP1, KBASE, RD
+ | or CARG1, L, r0
+ | ld.d CARG2, -8(TMP1) // KBASE-8-str_const*8
+ | bl extern lj_tab_dup // (lua_State *L, Table *kt)
+ | // Returns Table *.
+ }
+ | addi.d TMP0, r0, LJ_TTAB
+ | ld.d BASE, L->base
+ | ins_next1
+ | settp CRET1, TMP0
+ | add.d RA, BASE, RA
+ | st.d CRET1, 0(RA)
+ | ins_next2
+ |5:
+ | or MULTRES, RD, r0
+ | or CARG1, L, r0
+ | bl extern lj_gc_step_fixtop // (lua_State *L)
+ | or RD, MULTRES, r0
+ | b <1
+ break;
+
+ case BC_GGET:
+ | // RA = dst*8, RD = str_const*8 (~)
+ case BC_GSET:
+ | // RA = src*8, RD = str_const*8 (~)
+ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE)
+ | sub.d TMP1, KBASE, RD
+ | ld.d STR:RC, -8(TMP1) // KBASE-8-str_const*8
+ | cleartp LFUNC:TMP0
+ | ld.d TAB:RB, LFUNC:TMP0->env
+ | add.d RA, BASE, RA
+ if (op == BC_GGET) {
+ | b ->BC_TGETS_Z
+ } else {
+ | b ->BC_TSETS_Z
+ }
+ break;
+
+ case BC_TGETV:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d CARG2, BASE, RB
+ | add.d CARG3, BASE, RC
+ | ld.d TAB:RB, 0(CARG2)
+ | ld.d TMP2, 0(CARG3)
+ | add.d RA, BASE, RA
+ | checktab TAB:RB, ->vmeta_tgetv
+ | gettp TMP3, TMP2
+ | ld.w TMP0, TAB:RB->asize
+ | bne TMP3, TISNUM, >5 // Integer key?
+ | slli.w TMP2, TMP2, 0
+ | ld.d TMP1, TAB:RB->array
+ | sltu TMP3, TMP2, TMP0 //array part (keys = [0, asize-1])
+ | slli.w TMP2, TMP2, 3
+ | beqz TMP3, ->vmeta_tgetv // Integer key and in array part?
+ | add.d TMP2, TMP1, TMP2
+ | ld.d CRET1, 0(TMP2)
+ | beq CRET1, TISNIL, >2
+ |1:
+ | st.d CRET1, 0(RA)
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | ld.d TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ | ld.bu TMP0, TAB:TMP2->nomm
+ | andi TMP0, TMP0, 1<<MM_index
+ | bnez TMP0, <1 // 'no __index' flag set: done.
+ | b ->vmeta_tgetv
+ |
+ |5:
+ | addi.d TMP0, r0, LJ_TSTR
+ | cleartp RC, TMP2
+ | bne TMP3, TMP0, ->vmeta_tgetv // String key?
+ | b ->BC_TGETS_Z
+ break;
+ case BC_TGETS:
+ | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d CARG2, BASE, RB
+ | sub.d CARG3, KBASE, RC
+ | ld.d TAB:RB, 0(CARG2)
+ | add.d RA, BASE, RA
+ | ld.d STR:RC, -8(CARG3) // KBASE-8-str_const*8
+ | checktab TAB:RB, ->vmeta_tgets1
+ |->BC_TGETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | ld.w TMP0, TAB:RB->hmask
+ | ld.w TMP1, STR:RC->sid
+ | ld.d NODE:TMP2, TAB:RB->node
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | slli.w TMP0, TMP1, 5
+ | slli.w TMP1, TMP1, 3
+ | sub.w TMP1, TMP0, TMP1
+ | addi.d TMP3, r0, LJ_TSTR
+ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ | settp STR:RC, TMP3 // Tagged key to look for.
+ |1:
+ | ld.d CARG1, NODE:TMP2->key
+ | ld.d CARG2, NODE:TMP2->val
+ | ld.d NODE:TMP1, NODE:TMP2->next
+ | ld.d TAB:TMP3, TAB:RB->metatable
+ | bne CARG1, RC, >4
+ | beq CARG2, TISNIL, >5 // Key found, but nil value?
+ |3:
+ | st.d CARG2, 0(RA)
+ | ins_next
+ |
+ |4: // Follow hash chain.
+ | or NODE:TMP2, NODE:TMP1, r0
+ | bnez NODE:TMP1, <1
+ | // End of hash chain: key not found, nil result.
+ |
+ |5: // Check for __index if table value is nil.
+ | or CARG2, TISNIL, r0
+ | beqz TAB:TMP3, <3 // No metatable: done.
+ | ld.bu TMP0, TAB:TMP3->nomm
+ | andi TMP0, TMP0, 1<<MM_index
+ | bnez TMP0, <3 // 'no __index' flag set: done.
+ | b ->vmeta_tgets
+ break;
+ case BC_TGETB:
+ | // RA = dst*8, RB = table*8, RC = index*8
+ | decode_RB RB, INS
+ | add.d CARG2, BASE, RB
+ | decode_RDtoRC8 RC, RD
+ | ld.d TAB:RB, 0(CARG2)
+ | add.d RA, BASE, RA
+ | srli.w TMP0, RC, 3
+ | checktab TAB:RB, ->vmeta_tgetb
+ | ld.w TMP1, TAB:RB->asize
+ | ld.d TMP2, TAB:RB->array
+ | sltu TMP1, TMP0, TMP1
+ | add.d RC, TMP2, RC
+ | beqz TMP1, ->vmeta_tgetb
+ | ld.d CRET1, 0(RC)
+ | beq CRET1, TISNIL, >5
+ |1:
+ | st.d CRET1, 0(RA)
+ | ins_next
+ |
+ |5: // Check for __index if table value is nil.
+ | ld.d TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ | ld.bu TMP1, TAB:TMP2->nomm
+ | andi TMP1, TMP1, 1<<MM_index
+ | bnez TMP1, <1 // 'no __index' flag set: done.
+ | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
+ break;
+ case BC_TGETR:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d RB, BASE, RB
+ | add.d RC, BASE, RC
+ | ld.d TAB:CARG1, 0(RB)
+ | ld.w CARG2, 0(RC)
+ | add.d RA, BASE, RA
+ | cleartp TAB:CARG1
+ | ld.w TMP0, TAB:CARG1->asize
+ | ld.d TMP1, TAB:CARG1->array
+ | sltu TMP0, CARG2, TMP0
+ | slli.w TMP2, CARG2, 3
+ | add.d TMP3, TMP1, TMP2
+ | beqz TMP0, ->vmeta_tgetr // In array part?
+ | ld.d TMP1, 0(TMP3)
+ |->BC_TGETR_Z:
+ | ins_next1
+ | st.d TMP1, 0(RA)
+ | ins_next2
+ break;
+
+ case BC_TSETV:
+ | // RA = src*8, RB = table*8, RC = key*8
+ | decode_RB RB, INS
+ | decode_RDtoRC8 RC, RD
+ | add.d CARG2, BASE, RB
+ | add.d CARG3, BASE, RC
+ | ld.d TAB:RB, 0(CARG2)
+ | ld.d TMP2, 0(CARG3)
+ | add.d RA, BASE, RA
+ | checktab TAB:RB, ->vmeta_tsetv
+ | slli.w RC, TMP2, 0
+ | checkint TMP2, >5
+ | ld.w TMP0, TAB:RB->asize
+ | ld.d TMP1, TAB:RB->array
+ | sltu TMP0, RC, TMP0
+ | slli.w TMP2, RC, 3
+ | beqz TMP0, ->vmeta_tsetv // Integer key and in array part?
+ | add.d TMP1, TMP1, TMP2
+ | ld.bu TMP3, TAB:RB->marked
+ | ld.d TMP0, 0(TMP1)
+ | ld.d CRET1, 0(RA)
+ | beq TMP0, TISNIL, >3
+ |1:
+ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
+ | st.d CRET1, 0(TMP1)
+ | bnez TMP2, >7
+ |2:
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | ld.d TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ | ld.bu TMP2, TAB:TMP2->nomm
+ | andi TMP2, TMP2, 1<<MM_newindex
+ | bnez TMP2, <1 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsetv
+ |5:
+ | gettp TMP0, TMP2
[continued in next message]
--- SoupGate-Win32 v1.05
* Origin: fsxNet Usenet Gateway (21:1/5)