The package rpms/luajit.git has added or updated architecture specific content in its spec file (ExclusiveArch/ExcludeArch or %ifarch/%ifnarch) in commit(s): https://src.fedoraproject.org/cgit/rpms/luajit.git/commit/?id=3bc1e4725fb68d....
Change: +ExclusiveArch: %{arm} %{ix86} x86_64 %{mips} aarch64
Thanks.
Full change: ============
commit ee6c7ab93f1b70e0297868d285935cae3aea43b9 Author: Siddhesh Poyarekar siddhesh@gotplt.org Date: Mon Nov 29 18:35:24 2021 +0530
Update dates in changelog for F35
diff --git a/luajit.spec b/luajit.spec index 49540fc..4d1d67a 100644 --- a/luajit.spec +++ b/luajit.spec @@ -94,10 +94,10 @@ make check || true %{_libdir}/pkgconfig/%{name}.pc
%changelog -* Tue Oct 26 2021 Siddhesh Poyarekar siddhesh@gotplt.org - 2.1.0-0.22beta3 +* Mon Nov 29 2021 Siddhesh Poyarekar siddhesh@gotplt.org - 2.1.0-0.22beta3 - Bring back the earlier code to do ln -sf.
-* Tue Oct 12 2021 Andreas Schneider asn@redhat.com - 2.1.0-0.21beta3 +* Mon Nov 29 2021 Andreas Schneider asn@redhat.com - 2.1.0-0.21beta3 - Rebase onto https://github.com/LuaJIT/LuaJIT/tree/v2.1 - Dropped support for ppc64le - Dropped support for s390x
commit 74fc89321e0bd683e944f505dde072fd73d8a2ce Author: Siddhesh Poyarekar siddhesh@gotplt.org Date: Tue Oct 26 21:38:28 2021 +0530
Bring back the earlier code to do ln -sf
diff --git a/luajit.spec b/luajit.spec index 48300de..49540fc 100644 --- a/luajit.spec +++ b/luajit.spec @@ -4,7 +4,7 @@ Name: luajit Version: 2.1.0 %global apiver %(v=%{version}; echo ${v%.${v#[0-9].[0-9].}}) %global srcver %{version}%{?rctag:-%{rctag}} -Release: 0.21%{?rctag:%{rctag}}%{?dist} +Release: 0.22%{?rctag:%{rctag}}%{?dist} Summary: Just-In-Time Compiler for Lua License: MIT URL: http://luajit.org/ @@ -60,14 +60,17 @@ make amalg Q= E=@: PREFIX=%{_prefix} TARGET_STRIP=: \ %make_install PREFIX=%{_prefix} \ MULTILIB=%{_lib}
-ln -sf luajit-2.1.0-beta3 %{buildroot}%{_bindir}/luajit - rm -rf _tmp_html ; mkdir _tmp_html cp -a doc _tmp_html/html
# Remove static .a find %{buildroot} -type f -name *.a -delete -print
+%if %{defined rctag} +# Development versions are not doing such symlink +ln -s %{name}-%{srcver} %{buildroot}%{_bindir}/%{name} +%endif + %ldconfig_scriptlets
%check @@ -91,6 +94,9 @@ make check || true %{_libdir}/pkgconfig/%{name}.pc
%changelog +* Tue Oct 26 2021 Siddhesh Poyarekar siddhesh@gotplt.org - 2.1.0-0.22beta3 +- Bring back the earlier code to do ln -sf. + * Tue Oct 12 2021 Andreas Schneider asn@redhat.com - 2.1.0-0.21beta3 - Rebase onto https://github.com/LuaJIT/LuaJIT/tree/v2.1 - Dropped support for ppc64le
commit 3bc1e4725fb68d9b8b5a528673b143d437084948 Author: Andreas Schneider asn@cryptomilk.org Date: Tue Oct 12 22:14:42 2021 +0200
Apply patches from https://github.com/LuaJIT/LuaJIT/
diff --git a/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch b/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch deleted file mode 100644 index 16aca3b..0000000 --- a/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch +++ /dev/null @@ -1,31 +0,0 @@ -commit 31afda31814ec02f82ffb0519bee496c87eeaa89 -Merge: 8271c64 1c89933 -Author: Mike Pall <mike> -Date: Tue May 9 21:01:23 2017 +0200 - - Merge branch 'master' into v2.1 - -commit 1c89933f129dde76944336c6bfd05297b8d67730 -Author: Mike Pall <mike> -Date: Tue May 9 20:59:37 2017 +0200 - - Fix LJ_MAX_JSLOTS assertion in rec_check_slots(). - - Thanks to Yichun Zhang. - -diff --git a/src/lj_record.c b/src/lj_record.c -index 9d0469c..c2d0274 100644 ---- a/src/lj_record.c -+++ b/src/lj_record.c -@@ -87,9 +87,9 @@ static void rec_check_slots(jit_State *J) - BCReg s, nslots = J->baseslot + J->maxslot; - int32_t depth = 0; - cTValue *base = J->L->base - J->baseslot; -- lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); -+ lua_assert(J->baseslot >= 1+LJ_FR2); - lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); -- lua_assert(nslots < LJ_MAX_JSLOTS); -+ lua_assert(nslots <= LJ_MAX_JSLOTS); - for (s = 0; s < nslots; s++) { - TRef tr = J->slot[s]; - if (tr) { diff --git a/0002-Add-missing-LJ_MAX_JSLOTS-check.patch b/0002-Add-missing-LJ_MAX_JSLOTS-check.patch deleted file mode 100644 index 70ccfd5..0000000 --- a/0002-Add-missing-LJ_MAX_JSLOTS-check.patch +++ /dev/null @@ -1,40 +0,0 @@ -commit 6259c0b909a8c00fabe3c7e6bd81150ee08cbf9f -Merge: 31afda3 630ff31 -Author: Mike Pall <mike> -Date: Wed May 17 17:38:53 2017 +0200 - - Merge branch 'master' into v2.1 - -commit 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c -Author: Mike Pall <mike> -Date: Wed May 17 17:37:35 2017 +0200 - - Add missing LJ_MAX_JSLOTS check. - - Thanks to Yichun Zhang. - -From 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 17 May 2017 17:37:35 +0200 -Subject: [PATCH 02/72] Add missing LJ_MAX_JSLOTS check. - -Thanks to Yichun Zhang. ---- - src/lj_record.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/lj_record.c b/src/lj_record.c -index cecacd2..bc4e8a6 100644 ---- a/src/lj_record.c -+++ b/src/lj_record.c -@@ -633,6 +633,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) - J->framedepth++; - J->base += func+1+LJ_FR2; - J->baseslot += func+1+LJ_FR2; -+ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) -+ lj_trace_err(J, LJ_TRERR_STACKOV); - } - - /* Record tail call. */ --- -2.20.1 diff --git a/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch b/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch deleted file mode 100644 index 9d8300f..0000000 --- a/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 7381b620358c2561e8690149f1d25828fdad6675 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 7 Jun 2017 19:16:22 +0200 -Subject: [PATCH 03/72] MIPS: Use precise search for exit jump patching. - -Contributed by Djordje Kovacevic and Stefan Pejic. ---- - src/lj_asm_mips.h | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h -index 03270cc..d0a1ca5 100644 ---- a/src/lj_asm_mips.h -+++ b/src/lj_asm_mips.h -@@ -1933,7 +1933,11 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) - MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); - for (p++; p < pe; p++) { - if (*p == exitload) { /* Look for load of exit number. */ -- if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */ -+ /* Look for exitstub branch. Yes, this covers all used branch variants. */ -+ if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && -+ ((p[-1] & 0xf0000000u) == MIPSI_BEQ || -+ (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || -+ (p[-1] & 0xffe00000u) == MIPSI_BC1F)) { - ptrdiff_t delta = target - p; - if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ - patchbranch: --- -2.20.1 - diff --git a/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch b/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch deleted file mode 100644 index 4da6b4d..0000000 --- a/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch +++ /dev/null @@ -1,77 +0,0 @@ -From c7c3c4da432ddb543d4b0a9abbb245f11b26afd0 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 7 Jun 2017 19:36:46 +0200 -Subject: [PATCH 04/72] MIPS: Fix handling of spare long-range jump slots. - -Contributed by Djordje Kovacevic and Stefan Pejic. ---- - src/lj_asm_mips.h | 9 +++++---- - src/lj_jit.h | 6 ++++++ - src/lj_mcode.c | 6 ------ - 3 files changed, 11 insertions(+), 10 deletions(-) - -diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h -index d0a1ca5..7631190 100644 ---- a/src/lj_asm_mips.h -+++ b/src/lj_asm_mips.h -@@ -65,10 +65,9 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) - static void asm_sparejump_setup(ASMState *as) - { - MCode *mxp = as->mcbot; -- /* Assumes sizeof(MCLink) == 8. */ -- if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { -+ if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) { - lua_assert(MIPSI_NOP == 0); -- memset(mxp+2, 0, MIPS_SPAREJUMP*8); -+ memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); - mxp += MIPS_SPAREJUMP*2; - lua_assert(mxp < as->mctop); - lj_mcode_sync(as->mcbot, mxp); -@@ -1947,7 +1946,9 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) - if (!cstart) cstart = p-1; - } else { /* Branch out of range. Use spare jump slot in mcarea. */ - int i; -- for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) { -+ for (i = (int)(sizeof(MCLink)/sizeof(MCode)); -+ i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2); -+ i += 2) { - if (mcarea[i] == tjump) { - delta = mcarea+i - p; - goto patchbranch; -diff --git a/src/lj_jit.h b/src/lj_jit.h -index a2e8fd9..3f38d28 100644 ---- a/src/lj_jit.h -+++ b/src/lj_jit.h -@@ -155,6 +155,12 @@ typedef uint8_t MCode; - typedef uint32_t MCode; - #endif - -+/* Linked list of MCode areas. */ -+typedef struct MCLink { -+ MCode *next; /* Next area. */ -+ size_t size; /* Size of current area. */ -+} MCLink; -+ - /* Stack snapshot header. */ - typedef struct SnapShot { - uint16_t mapofs; /* Offset into snapshot map. */ -diff --git a/src/lj_mcode.c b/src/lj_mcode.c -index f0a1f69..5ea89f6 100644 ---- a/src/lj_mcode.c -+++ b/src/lj_mcode.c -@@ -272,12 +272,6 @@ static void *mcode_alloc(jit_State *J, size_t sz) - - /* -- MCode area management ----------------------------------------------- */ - --/* Linked list of MCode areas. */ --typedef struct MCLink { -- MCode *next; /* Next area. */ -- size_t size; /* Size of current area. */ --} MCLink; -- - /* Allocate a new MCode area. */ - static void mcode_allocarea(jit_State *J) - { --- -2.20.1 - diff --git a/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch b/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch deleted file mode 100644 index dda4ae2..0000000 --- a/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch +++ /dev/null @@ -1,982 +0,0 @@ -From a057a07ab702e225e21848d4f918886c5b0ac06b Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 7 Jun 2017 23:56:54 +0200 -Subject: [PATCH 05/72] MIPS64: Add soft-float support to JIT compiler backend. - -Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -Sponsored by Cisco Systems, Inc. ---- - src/lj_arch.h | 4 +- - src/lj_asm.c | 8 +- - src/lj_asm_mips.h | 217 +++++++++++++++++++++++++++++++++++++-------- - src/lj_crecord.c | 4 +- - src/lj_emit_mips.h | 2 + - src/lj_ffrecord.c | 2 +- - src/lj_ircall.h | 43 ++++++--- - src/lj_iropt.h | 2 +- - src/lj_jit.h | 4 +- - src/lj_obj.h | 3 + - src/lj_opt_split.c | 2 +- - src/lj_snap.c | 21 +++-- - src/vm_mips64.dasc | 49 ++++++++++ - 13 files changed, 286 insertions(+), 75 deletions(-) - -diff --git a/src/lj_arch.h b/src/lj_arch.h -index c8d7138..b770564 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -337,9 +337,6 @@ - #define LJ_ARCH_BITS 32 - #define LJ_TARGET_MIPS32 1 - #else --#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU --#define LJ_ARCH_NOJIT 1 /* NYI */ --#endif - #define LJ_ARCH_BITS 64 - #define LJ_TARGET_MIPS64 1 - #define LJ_TARGET_GC64 1 -@@ -512,6 +509,7 @@ - #define LJ_ABI_SOFTFP 0 - #endif - #define LJ_SOFTFP (!LJ_ARCH_HASFPU) -+#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32) - - #if LJ_ARCH_ENDIAN == LUAJIT_BE - #define LJ_LE 0 -diff --git a/src/lj_asm.c b/src/lj_asm.c -index c2cf5a9..bed2268 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -338,7 +338,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) - ra_modified(as, r); - ir->r = RID_INIT; /* Do not keep any hint. */ - RA_DBGX((as, "remat $i $r", ir, r)); --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - if (ir->o == IR_KNUM) { - emit_loadk64(as, r, ir); - } else -@@ -1305,7 +1305,7 @@ static void asm_call(ASMState *as, IRIns *ir) - asm_gencall(as, ci, args); - } - --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) - { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; -@@ -1652,10 +1652,10 @@ static void asm_ir(ASMState *as, IRIns *ir) - case IR_MUL: asm_mul(as, ir); break; - case IR_MOD: asm_mod(as, ir); break; - case IR_NEG: asm_neg(as, ir); break; --#if LJ_SOFTFP -+#if LJ_SOFTFP32 - case IR_DIV: case IR_POW: case IR_ABS: - case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: -- lua_assert(0); /* Unused for LJ_SOFTFP. */ -+ lua_assert(0); /* Unused for LJ_SOFTFP32. */ - break; - #else - case IR_DIV: asm_div(as, ir); break; -diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h -index 05af3d0..1406a87 100644 ---- a/src/lj_asm_mips.h -+++ b/src/lj_asm_mips.h -@@ -290,7 +290,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - { - ra_leftov(as, gpr, ref); - gpr++; --#if LJ_64 -+#if LJ_64 && !LJ_SOFTFP - fpr++; - #endif - } -@@ -301,7 +301,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - #else -- emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); -+ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0)); - ofs += 8; - #endif - } -@@ -312,7 +312,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - #endif - if (gpr <= REGARG_LASTGPR) { - gpr++; --#if LJ_64 -+#if LJ_64 && !LJ_SOFTFP - fpr++; - #endif - } else { -@@ -461,12 +461,36 @@ static void asm_tobit(ASMState *as, IRIns *ir) - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fgh(as, MIPSI_ADD_D, tmp, left, right); - } -+#elif LJ_64 /* && LJ_SOFTFP */ -+static void asm_tointg(ASMState *as, IRIns *ir, Reg r) -+{ -+ /* The modified regs must match with the *.dasc implementation. */ -+ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| -+ RID2RSET(RID_R1)|RID2RSET(RID_R12); -+ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); -+ ra_evictset(as, drop); -+ /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ -+ ra_destreg(as, ir, RID_RET); -+ asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO); -+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0); -+ if (r == RID_NONE) -+ ra_leftov(as, REGARG_FIRSTGPR, ir->op1); -+ else if (r != REGARG_FIRSTGPR) -+ emit_move(as, REGARG_FIRSTGPR, r); -+} -+ -+static void asm_tobit(ASMState *as, IRIns *ir) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ emit_dta(as, MIPSI_SLL, dest, dest, 0); -+ asm_callid(as, ir, IRCALL_lj_vm_tobit); -+} - #endif - - static void asm_conv(ASMState *as, IRIns *ir) - { - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - int stfp = (st == IRT_NUM || st == IRT_FLOAT); - #endif - #if LJ_64 -@@ -477,12 +501,13 @@ static void asm_conv(ASMState *as, IRIns *ir) - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ - #endif --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP32 - /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); - /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ - #else - lua_assert(irt_type(ir->t) != st); -+#if !LJ_SOFTFP - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ -@@ -608,6 +633,42 @@ static void asm_conv(ASMState *as, IRIns *ir) - } - } - } else -+#else -+ if (irt_isfp(ir->t)) { -+#if LJ_64 && LJ_HASFFI -+ if (stfp) { /* FP to FP conversion. */ -+ asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d : -+ IRCALL_softfp_d2f); -+ } else { /* Integer to FP conversion. */ -+ IRCallID cid = ((IRT_IS64 >> st) & 1) ? -+ (irt_isnum(ir->t) ? -+ (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) : -+ (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) : -+ (irt_isnum(ir->t) ? -+ (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) : -+ (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f)); -+ asm_callid(as, ir, cid); -+ } -+#else -+ asm_callid(as, ir, IRCALL_softfp_i2d); -+#endif -+ } else if (stfp) { /* FP to integer conversion. */ -+ if (irt_isguard(ir->t)) { -+ /* Checked conversions are only supported from number to int. */ -+ lua_assert(irt_isint(ir->t) && st == IRT_NUM); -+ asm_tointg(as, ir, RID_NONE); -+ } else { -+ IRCallID cid = irt_is64(ir->t) ? -+ ((st == IRT_NUM) ? -+ (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : -+ (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : -+ ((st == IRT_NUM) ? -+ (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : -+ (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); -+ asm_callid(as, ir, cid); -+ } -+ } else -+#endif - #endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); -@@ -665,7 +726,7 @@ static void asm_strto(ASMState *as, IRIns *ir) - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - int32_t ofs = 0; --#if LJ_SOFTFP -+#if LJ_SOFTFP32 - ra_evictset(as, RSET_SCRATCH); - if (ra_used(ir)) { - if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && -@@ -806,7 +867,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP32 - if (!isk) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); -@@ -826,7 +887,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - } - } - #else -- if (irt_isnum(kt)) { -+ if (!LJ_SOFTFP && irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); - } else if (!irt_ispri(kt)) { -@@ -882,6 +943,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); - emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); - emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); -+ } else if (LJ_SOFTFP && irt_isnum(kt)) { -+ emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); -+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } else if (irt_isaddr(kt)) { - Reg refk = tmp2; - if (isk) { -@@ -960,7 +1024,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); - if (irt_isnum(kt)) { - emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); -- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); -+ emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0); - emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); - #if !LJ_SOFTFP - emit_tg(as, MIPSI_DMFC1, tmp1, key); -@@ -1123,7 +1187,7 @@ static MIPSIns asm_fxloadins(IRIns *ir) - case IRT_U8: return MIPSI_LBU; - case IRT_I16: return MIPSI_LH; - case IRT_U16: return MIPSI_LHU; -- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; -+ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; - } -@@ -1134,7 +1198,7 @@ static MIPSIns asm_fxstoreins(IRIns *ir) - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return MIPSI_SB; - case IRT_I16: case IRT_U16: return MIPSI_SH; -- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; -+ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; - } -@@ -1199,7 +1263,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) - - static void asm_ahuvload(ASMState *as, IRIns *ir) - { -- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); -+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); - Reg dest = RID_NONE, type = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = 0; -@@ -1212,7 +1276,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) - } - } - if (ra_used(ir)) { -- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || -+ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); -@@ -1261,10 +1325,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir) - int32_t ofs = 0; - if (ir->r == RID_SINK) - return; -- if (!LJ_SOFTFP && irt_isnum(ir->t)) { -- src = ra_alloc1(as, ir->op2, RSET_FPR); -+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { -+ src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); -- emit_hsi(as, MIPSI_SDC1, src, idx, ofs); -+ emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs); - } else { - #if LJ_32 - if (!irt_ispri(ir->t)) { -@@ -1312,7 +1376,7 @@ static void asm_sload(ASMState *as, IRIns *ir) - IRType1 t = ir->t; - #if LJ_32 - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); -- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); -+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); - if (hiop) - t.irt = IRT_NUM; - #else -@@ -1320,7 +1384,7 @@ static void asm_sload(ASMState *as, IRIns *ir) - #endif - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP32 - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); -@@ -1328,29 +1392,44 @@ static void asm_sload(ASMState *as, IRIns *ir) - } - #else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { -- dest = ra_scratch(as, RSET_FPR); -+ dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else - #endif - if (ra_used(ir)) { -- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || -+ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -- if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { -+ if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { -- Reg tmp = ra_scratch(as, RSET_FPR); -+ Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR); -+#if LJ_SOFTFP -+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); -+ ra_destreg(as, ir, RID_RET); -+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0); -+ if (tmp != REGARG_FIRSTGPR) -+ emit_move(as, REGARG_FIRSTGPR, tmp); -+#else - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); -+#endif - dest = tmp; - t.irt = IRT_NUM; /* Check for original type. */ - } else { - Reg tmp = ra_scratch(as, RSET_GPR); -+#if LJ_SOFTFP -+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); -+ ra_destreg(as, ir, RID_RET); -+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0); -+ emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0); -+#else - emit_fg(as, MIPSI_CVT_D_W, dest, dest); - emit_tg(as, MIPSI_MTC1, tmp, dest); -+#endif - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } -@@ -1399,7 +1478,7 @@ dotypecheck: - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); -- if (ra_hasreg(dest)) -+ if (!LJ_SOFTFP && ra_hasreg(dest)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - } else { - asm_guard(as, MIPSI_BNE, RID_TMP, -@@ -1409,7 +1488,7 @@ dotypecheck: - } - emit_tsi(as, MIPSI_LD, type, base, ofs); - } else if (ra_hasreg(dest)) { -- if (irt_isnum(t)) -+ if (!LJ_SOFTFP && irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - else - emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, -@@ -1548,26 +1627,40 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_fg(as, mi, dest, left); - } -+#endif - -+#if !LJ_SOFTFP32 - static void asm_fpmath(ASMState *as, IRIns *ir) - { - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; -+#if !LJ_SOFTFP - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, MIPSI_SQRT_D); - else -+#endif - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - } - #endif - -+#if !LJ_SOFTFP -+#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D) -+#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D) -+#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D) -+#elif LJ_64 /* && LJ_SOFTFP */ -+#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add) -+#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub) -+#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul) -+#endif -+ - static void asm_add(ASMState *as, IRIns *ir) - { - IRType1 t = ir->t; --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - if (irt_isnum(t)) { -- asm_fparith(as, ir, MIPSI_ADD_D); -+ asm_fpadd(as, ir); - } else - #endif - { -@@ -1589,9 +1682,9 @@ static void asm_add(ASMState *as, IRIns *ir) - - static void asm_sub(ASMState *as, IRIns *ir) - { --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - if (irt_isnum(ir->t)) { -- asm_fparith(as, ir, MIPSI_SUB_D); -+ asm_fpsub(as, ir); - } else - #endif - { -@@ -1605,9 +1698,9 @@ static void asm_sub(ASMState *as, IRIns *ir) - - static void asm_mul(ASMState *as, IRIns *ir) - { --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - if (irt_isnum(ir->t)) { -- asm_fparith(as, ir, MIPSI_MUL_D); -+ asm_fpmul(as, ir); - } else - #endif - { -@@ -1634,7 +1727,7 @@ static void asm_mod(ASMState *as, IRIns *ir) - asm_callid(as, ir, IRCALL_lj_vm_modi); - } - --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - static void asm_pow(ASMState *as, IRIns *ir) - { - #if LJ_64 && LJ_HASFFI -@@ -1654,7 +1747,11 @@ static void asm_div(ASMState *as, IRIns *ir) - IRCALL_lj_carith_divu64); - else - #endif -+#if !LJ_SOFTFP - asm_fparith(as, ir, MIPSI_DIV_D); -+#else -+ asm_callid(as, ir, IRCALL_softfp_div); -+#endif - } - #endif - -@@ -1664,6 +1761,13 @@ static void asm_neg(ASMState *as, IRIns *ir) - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, MIPSI_NEG_D); - } else -+#elif LJ_64 /* && LJ_SOFTFP */ -+ if (irt_isnum(ir->t)) { -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -+ emit_dst(as, MIPSI_XOR, dest, left, -+ ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest))); -+ } else - #endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); -@@ -1673,7 +1777,17 @@ static void asm_neg(ASMState *as, IRIns *ir) - } - } - -+#if !LJ_SOFTFP - #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) -+#elif LJ_64 /* && LJ_SOFTFP */ -+static void asm_abs(ASMState *as, IRIns *ir) -+{ -+ Reg dest = ra_dest(as, ir, RSET_GPR); -+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -+ emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0); -+} -+#endif -+ - #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) - #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -@@ -1918,15 +2032,21 @@ static void asm_bror(ASMState *as, IRIns *ir) - } - } - --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP - static void asm_sfpmin_max(ASMState *as, IRIns *ir) - { - CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; -+#if LJ_64 -+ IRRef args[2]; -+ args[0] = ir->op1; -+ args[1] = ir->op2; -+#else - IRRef args[4]; - args[0^LJ_BE] = ir->op1; - args[1^LJ_BE] = (ir+1)->op1; - args[2^LJ_BE] = ir->op2; - args[3^LJ_BE] = (ir+1)->op2; -+#endif - asm_setupresult(as, ir, &ci); - emit_call(as, (void *)ci.func, 0); - ci.func = NULL; -@@ -1936,7 +2056,10 @@ static void asm_sfpmin_max(ASMState *as, IRIns *ir) - - static void asm_min_max(ASMState *as, IRIns *ir, int ismax) - { -- if (!LJ_SOFTFP && irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { -+#if LJ_SOFTFP -+ asm_sfpmin_max(as, ir); -+#else - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; -@@ -1947,6 +2070,7 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) - if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); - } - emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); -+#endif - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); -@@ -1967,18 +2091,24 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) - - /* -- Comparisons --------------------------------------------------------- */ - --#if LJ_32 && LJ_SOFTFP -+#if LJ_SOFTFP - /* SFP comparisons. */ - static void asm_sfpcomp(ASMState *as, IRIns *ir) - { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; -+#if LJ_64 -+ IRRef args[2]; -+ args[0] = ir->op1; -+ args[1] = ir->op2; -+#else - IRRef args[4]; - args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; - args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; -+#endif - -- for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { -+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) { - if (!rset_test(as->freeset, r) && - regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) - rset_clear(drop, r); -@@ -2032,11 +2162,15 @@ static void asm_comp(ASMState *as, IRIns *ir) - { - /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ - IROp op = ir->o; -- if (!LJ_SOFTFP && irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { -+#if LJ_SOFTFP -+ asm_sfpcomp(as, ir); -+#else - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); -+#endif - } else { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - if (op == IR_ABC) op = IR_UGT; -@@ -2068,9 +2202,13 @@ static void asm_equal(ASMState *as, IRIns *ir) - Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? - RSET_FPR : RSET_GPR); - right = (left >> 8); left &= 255; -- if (!LJ_SOFTFP && irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { -+#if LJ_SOFTFP -+ asm_sfpcomp(as, ir); -+#else - asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); -+#endif - } else { - asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); - } -@@ -2263,7 +2401,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { --#if LJ_SOFTFP -+#if LJ_SOFTFP32 - Reg tmp; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ -@@ -2272,6 +2410,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) - if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); - emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); -+#elif LJ_SOFTFP /* && LJ_64 */ -+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); -+ emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs); - #else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); -diff --git a/src/lj_crecord.c b/src/lj_crecord.c -index e32ae23..fd59e28 100644 ---- a/src/lj_crecord.c -+++ b/src/lj_crecord.c -@@ -212,7 +212,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp, - ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); - ml[i].trofs = trofs; - i++; -- rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; -+ rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1; - if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ - rwin = 0; - for ( ; j < i; j++) { -@@ -1130,7 +1130,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, - else - tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); - } -- } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { -+ } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) { - lj_needsplit(J); - } - #if LJ_TARGET_X86 -diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h -index 8a9ee24..bb6593a 100644 ---- a/src/lj_emit_mips.h -+++ b/src/lj_emit_mips.h -@@ -12,6 +12,8 @@ static intptr_t get_k64val(IRIns *ir) - return (intptr_t)ir_kgc(ir); - } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { - return (intptr_t)ir_kptr(ir); -+ } else if (LJ_SOFTFP && ir->o == IR_KNUM) { -+ return (intptr_t)ir_knum(ir)->u64; - } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); - return ir->i; /* Sign-extended. */ -diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c -index dfdee2d..849d7a2 100644 ---- a/src/lj_ffrecord.c -+++ b/src/lj_ffrecord.c -@@ -1012,7 +1012,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) - handle_num: - tra = lj_ir_tonum(J, tra); - tr = lj_ir_call(J, id, tr, trsf, tra); -- if (LJ_SOFTFP) lj_needsplit(J); -+ if (LJ_SOFTFP32) lj_needsplit(J); - break; - case STRFMT_STR: - if (!tref_isstr(tra)) { -diff --git a/src/lj_ircall.h b/src/lj_ircall.h -index 973c36e..7312006 100644 ---- a/src/lj_ircall.h -+++ b/src/lj_ircall.h -@@ -51,7 +51,7 @@ typedef struct CCallInfo { - #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) - #define CCI_XA (1u << CCI_XARGS_SHIFT) - --#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) - #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) - #else - #define CCI_XNARGS(ci) CCI_NARGS((ci)) -@@ -78,13 +78,19 @@ typedef struct CCallInfo { - #define IRCALLCOND_SOFTFP_FFI(x) NULL - #endif - --#if LJ_SOFTFP && LJ_TARGET_MIPS32 -+#if LJ_SOFTFP && LJ_TARGET_MIPS - #define IRCALLCOND_SOFTFP_MIPS(x) x - #else - #define IRCALLCOND_SOFTFP_MIPS(x) NULL - #endif - --#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) -+#if LJ_SOFTFP && LJ_TARGET_MIPS64 -+#define IRCALLCOND_SOFTFP_MIPS64(x) x -+#else -+#define IRCALLCOND_SOFTFP_MIPS64(x) NULL -+#endif -+ -+#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) - - #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) - #define IRCALLCOND_FP64_FFI(x) x -@@ -112,6 +118,14 @@ typedef struct CCallInfo { - #define XA2_FP 0 - #endif - -+#if LJ_SOFTFP32 -+#define XA_FP32 CCI_XA -+#define XA2_FP32 (CCI_XA+CCI_XA) -+#else -+#define XA_FP32 0 -+#define XA2_FP32 0 -+#endif -+ - #if LJ_32 - #define XA_64 CCI_XA - #define XA2_64 (CCI_XA+CCI_XA) -@@ -181,20 +195,21 @@ typedef struct CCallInfo { - _(ANY, pow, 2, N, NUM, XA2_FP) \ - _(ANY, atan2, 2, N, NUM, XA2_FP) \ - _(ANY, ldexp, 2, N, NUM, XA_FP) \ -- _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ -- _(SOFTFP, softfp_add, 4, N, NUM, 0) \ -- _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ -- _(SOFTFP, softfp_mul, 4, N, NUM, 0) \ -- _(SOFTFP, softfp_div, 4, N, NUM, 0) \ -- _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ -+ _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ -+ _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \ - _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ -- _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ -- _(SOFTFP_MIPS, lj_vm_sfmin, 4, N, NUM, 0) \ -- _(SOFTFP_MIPS, lj_vm_sfmax, 4, N, NUM, 0) \ -+ _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \ -+ _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \ -+ _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ - _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ - _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ -- _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ -- _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ -+ _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ -+ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ - _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ - _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ - _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ -diff --git a/src/lj_iropt.h b/src/lj_iropt.h -index 73aef0e..a59ba3f 100644 ---- a/src/lj_iropt.h -+++ b/src/lj_iropt.h -@@ -150,7 +150,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); - /* Optimization passes. */ - LJ_FUNC void lj_opt_dce(jit_State *J); - LJ_FUNC int lj_opt_loop(jit_State *J); --#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) - LJ_FUNC void lj_opt_split(jit_State *J); - #else - #define lj_opt_split(J) UNUSED(J) -diff --git a/src/lj_jit.h b/src/lj_jit.h -index 2fa8efc..f37e792 100644 ---- a/src/lj_jit.h -+++ b/src/lj_jit.h -@@ -374,7 +374,7 @@ enum { - ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) - - /* Set/reset flag to activate the SPLIT pass for the current trace. */ --#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) - #define lj_needsplit(J) (J->needsplit = 1) - #define lj_resetsplit(J) (J->needsplit = 0) - #else -@@ -437,7 +437,7 @@ typedef struct jit_State { - MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ - - PostProc postproc; /* Required post-processing after execution. */ --#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) - uint8_t needsplit; /* Need SPLIT pass. */ - #endif - uint8_t retryrec; /* Retry recording. */ -diff --git a/src/lj_obj.h b/src/lj_obj.h -index 52372c3..c7e4742 100644 ---- a/src/lj_obj.h -+++ b/src/lj_obj.h -@@ -924,6 +924,9 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) - - #if LJ_SOFTFP - LJ_ASMF int32_t lj_vm_tobit(double x); -+#if LJ_TARGET_MIPS64 -+LJ_ASMF int32_t lj_vm_tointg(double x); -+#endif - #endif - - static LJ_AINLINE int32_t lj_num2bit(lua_Number n) -diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c -index fc93520..79ac3cc 100644 ---- a/src/lj_opt_split.c -+++ b/src/lj_opt_split.c -@@ -8,7 +8,7 @@ - - #include "lj_obj.h" - --#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) -+#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) - - #include "lj_err.h" - #include "lj_buf.h" -diff --git a/src/lj_snap.c b/src/lj_snap.c -index bb063c2..44fa379 100644 ---- a/src/lj_snap.c -+++ b/src/lj_snap.c -@@ -93,7 +93,7 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) - (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) - sn |= SNAP_NORESTORE; - } -- if (LJ_SOFTFP && irt_isnum(ir->t)) -+ if (LJ_SOFTFP32 && irt_isnum(ir->t)) - sn |= SNAP_SOFTFPNUM; - map[n++] = sn; - } -@@ -374,7 +374,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) - break; - } - } -- } else if (LJ_SOFTFP && ir->o == IR_HIOP) { -+ } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { - ref++; - } else if (ir->o == IR_PVAL) { - ref = ir->op1 + REF_BIAS; -@@ -486,7 +486,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) - } else { - IRType t = irt_type(ir->t); - uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; -- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; -+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; - if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); - tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); - } -@@ -520,7 +520,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) - if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { - if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) - snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); -- else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && -+ else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && - irs+1 < irlast && (irs+1)->o == IR_HIOP) - snap_pref(J, T, map, nent, seen, (irs+1)->op2); - } -@@ -579,10 +579,10 @@ void lj_snap_replay(jit_State *J, GCtrace *T) - lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); - val = snap_pref(J, T, map, nent, seen, irc->op1); - val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); -- } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && -+ } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && - irs+1 < irlast && (irs+1)->o == IR_HIOP) { - IRType t = IRT_I64; -- if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) -+ if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) - t = IRT_NUM; - lj_needsplit(J); - if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { -@@ -635,7 +635,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, - int32_t *sps = &ex->spill[regsp_spill(rs)]; - if (irt_isinteger(t)) { - setintV(o, *sps); --#if !LJ_SOFTFP -+#if !LJ_SOFTFP32 - } else if (irt_isnum(t)) { - o->u64 = *(uint64_t *)sps; - #endif -@@ -660,6 +660,9 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, - #if !LJ_SOFTFP - } else if (irt_isnum(t)) { - setnumV(o, ex->fpr[r-RID_MIN_FPR]); -+#elif LJ_64 /* && LJ_SOFTFP */ -+ } else if (irt_isnum(t)) { -+ o->u64 = ex->gpr[r-RID_MIN_GPR]; - #endif - #if LJ_64 && !LJ_GC64 - } else if (irt_is64(t)) { -@@ -813,7 +816,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, - val = lj_tab_set(J->L, t, &tmp); - /* NOBARRIER: The table is new (marked white). */ - snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); -- if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { -+ if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { - snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); - val->u32.hi = tmp.u32.lo; - } -@@ -874,7 +877,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) - continue; - } - snap_restoreval(J, T, ex, snapno, rfilt, ref, o); -- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { -+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { - TValue tmp; - snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); - o->u32.hi = tmp.u32.lo; -diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc -index c06270a..75b38de 100644 ---- a/src/vm_mips64.dasc -+++ b/src/vm_mips64.dasc -@@ -1980,6 +1980,38 @@ static void build_subroutines(BuildCtx *ctx) - |1: - | jr ra - |. move CRET1, r0 -+ | -+ |// FP number to int conversion with a check for soft-float. -+ |// Modifies CARG1, CRET1, CRET2, TMP0, AT. -+ |->vm_tointg: -+ |.if JIT -+ | dsll CRET2, CARG1, 1 -+ | beqz CRET2, >2 -+ |. li TMP0, 1076 -+ | dsrl AT, CRET2, 53 -+ | dsubu TMP0, TMP0, AT -+ | sltiu AT, TMP0, 54 -+ | beqz AT, >1 -+ |. dextm CRET2, CRET2, 0, 20 -+ | dinsu CRET2, AT, 21, 21 -+ | slt AT, CARG1, r0 -+ | dsrlv CRET1, CRET2, TMP0 -+ | dsubu CARG1, r0, CRET1 -+ | movn CRET1, CARG1, AT -+ | li CARG1, 64 -+ | subu TMP0, CARG1, TMP0 -+ | dsllv CRET2, CRET2, TMP0 // Integer check. -+ | sextw AT, CRET1 -+ | xor AT, CRET1, AT // Range check. -+ | jr ra -+ |. movz CRET2, AT, CRET2 -+ |1: -+ | jr ra -+ |. li CRET2, 1 -+ |2: -+ | jr ra -+ |. move CRET1, r0 -+ |.endif - |.endif - | - |.macro .ffunc_bit, name -@@ -2665,6 +2697,23 @@ static void build_subroutines(BuildCtx *ctx) - |. li CRET1, 0 - |.endif - | -+ |.macro sfmin_max, name, intins -+ |->vm_sf .. name: -+ |.if JIT and not FPU -+ | move TMP2, ra -+ | bal ->vm_sfcmpolt -+ |. nop -+ | move ra, TMP2 -+ | move TMP0, CRET1 -+ | move CRET1, CARG1 -+ | jr ra -+ |. intins CRET1, CARG2, TMP0 -+ |.endif -+ |.endmacro -+ | -+ | sfmin_max min, movz -+ | sfmin_max max, movn -+ | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- --- -2.20.1 - diff --git a/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch b/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch deleted file mode 100644 index 133018d..0000000 --- a/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch +++ /dev/null @@ -1,26 +0,0 @@ -From b0ecc6dd65a0b40e1868f20719c4f7c4880dc32d Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 8 Jun 2017 00:15:15 +0200 -Subject: [PATCH 06/72] FreeBSD/x64: Avoid changing resource limits, if not - needed. - ---- - src/lj_alloc.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_alloc.c b/src/lj_alloc.c -index 95d15d0..9fc761c 100644 ---- a/src/lj_alloc.c -+++ b/src/lj_alloc.c -@@ -343,7 +343,7 @@ static void *CALL_MMAP(size_t size) - } - #endif - --#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 -+#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 - - #include <sys/resource.h> - --- -2.20.1 - diff --git a/0007-Remove-unused-define.patch b/0007-Remove-unused-define.patch deleted file mode 100644 index c4729e1..0000000 --- a/0007-Remove-unused-define.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 6a71e71c1430e5a8f794a52cb2da66e2693db796 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 11 Jun 2017 10:02:08 +0200 -Subject: [PATCH 07/72] Remove unused define. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Suggested by 罗泽轩. ---- - src/lj_def.h | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/src/lj_def.h b/src/lj_def.h -index 2d8fff6..e67bb24 100644 ---- a/src/lj_def.h -+++ b/src/lj_def.h -@@ -80,7 +80,6 @@ typedef unsigned int uintptr_t; - #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ - #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ - #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ --#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */ - - /* JIT compiler limits. */ - #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ --- -2.20.1 - diff --git a/0008-Modify-fix-for-warning-from-ar.patch b/0008-Modify-fix-for-warning-from-ar.patch deleted file mode 100644 index 4d9b0e4..0000000 --- a/0008-Modify-fix-for-warning-from-ar.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 82151a4514e6538086f3f5e01cb8d4b22287b14f Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Mon, 12 Jun 2017 09:24:00 +0200 -Subject: [PATCH 08/72] Modify fix for warning from 'ar'. - ---- - src/Makefile | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/Makefile b/src/Makefile -index f7f81a4..24e8c0e 100644 ---- a/src/Makefile -+++ b/src/Makefile -@@ -208,7 +208,7 @@ TARGET_CC= $(STATIC_CC) - TARGET_STCC= $(STATIC_CC) - TARGET_DYNCC= $(DYNAMIC_CC) - TARGET_LD= $(CROSS)$(CC) --TARGET_AR= $(CROSS)ar rcus 2>/dev/null -+TARGET_AR= $(CROSS)ar rcus - TARGET_STRIP= $(CROSS)strip - - TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) -@@ -293,6 +293,7 @@ ifeq (Windows,$(TARGET_SYS)) - TARGET_XSHLDFLAGS= -shared - TARGET_DYNXLDOPTS= - else -+ TARGET_AR+= 2>/dev/null - ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) - TARGET_XCFLAGS+= -fno-stack-protector - endif --- -2.20.1 - diff --git a/0009-x64-LJ_GC64-Fix-emit_rma.patch b/0009-x64-LJ_GC64-Fix-emit_rma.patch deleted file mode 100644 index ff59f09..0000000 --- a/0009-x64-LJ_GC64-Fix-emit_rma.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 7e662e4f87134f1e84f7bea80933e033c5bf53a3 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 26 Jul 2017 09:52:53 +0200 -Subject: [PATCH 09/72] x64/LJ_GC64: Fix emit_rma(). - ---- - src/lj_emit_x86.h | 24 +++++++++++++++++++++--- - 1 file changed, 21 insertions(+), 3 deletions(-) - -diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h -index 5207f9d..5b139bd 100644 ---- a/src/lj_emit_x86.h -+++ b/src/lj_emit_x86.h -@@ -343,9 +343,27 @@ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) - emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); - } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { - emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); -- } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { -- emit_rmro(as, xo, rr, rr, 0); -- emit_loadu64(as, rr, (uintptr_t)addr); -+ } else if (!checki32((intptr_t)addr)) { -+ Reg ra = (rr & 15); -+ if (xo != XO_MOV) { -+ /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ -+ uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; -+ uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; -+ ra = RID_DISPATCH; -+ if (checku32(dispaddr)) { -+ emit_loadi(as, ra, (int32_t)dispaddr); -+ } else { /* Full-size 64 bit load. */ -+ MCode *p = as->mcp; -+ *(uint64_t *)(p-8) = dispaddr; -+ p[-9] = (MCode)(XI_MOVri+(ra&7)); -+ p[-10] = 0x48 + ((ra>>3)&1); -+ p -= 10; -+ as->mcp = p; -+ } -+ if (xo == XO_GROUP3b) emit_i8(as, i8); -+ } -+ emit_rmro(as, xo, rr, ra, 0); -+ emit_loadu64(as, ra, (uintptr_t)addr); - } else - #endif - { --- -2.20.1 - diff --git a/0010-PPC-Add-soft-float-support-to-interpreter.patch b/0010-PPC-Add-soft-float-support-to-interpreter.patch deleted file mode 100644 index 52d3638..0000000 --- a/0010-PPC-Add-soft-float-support-to-interpreter.patch +++ /dev/null @@ -1,2761 +0,0 @@ -From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 26 Jul 2017 09:52:19 +0200 -Subject: [PATCH 10/72] PPC: Add soft-float support to interpreter. - -Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -Sponsored by Cisco Systems, Inc. ---- - src/host/buildvm_asm.c | 2 +- - src/lj_arch.h | 29 +- - src/lj_ccall.c | 38 +- - src/lj_ccall.h | 4 +- - src/lj_ccallback.c | 30 +- - src/lj_frame.h | 2 +- - src/lj_ircall.h | 2 +- - src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++------- - 8 files changed, 1101 insertions(+), 255 deletions(-) - -diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c -index ffd1490..43595b3 100644 ---- a/src/host/buildvm_asm.c -+++ b/src/host/buildvm_asm.c -@@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx) - #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) - fprintf(ctx->fp, "\t.section .note.GNU-stack,""," ELFASM_PX "progbits\n"); - #endif --#if LJ_TARGET_PPC && !LJ_TARGET_PS3 -+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP - /* Hard-float ABI. */ - fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); - #endif -diff --git a/src/lj_arch.h b/src/lj_arch.h -index b770564..0145a7c 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -254,6 +254,29 @@ - #else - #define LJ_ARCH_BITS 32 - #define LJ_ARCH_NAME "ppc" -+ -+#if !defined(LJ_ARCH_HASFPU) -+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) -+#define LJ_ARCH_HASFPU 0 -+#else -+#define LJ_ARCH_HASFPU 1 -+#endif -+#endif -+ -+#if !defined(LJ_ABI_SOFTFP) -+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) -+#define LJ_ABI_SOFTFP 1 -+#else -+#define LJ_ABI_SOFTFP 0 -+#endif -+#endif -+#endif -+ -+#if LJ_ABI_SOFTFP -+#define LJ_ARCH_NOJIT 1 /* NYI */ -+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -+#else -+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE - #endif - - #define LJ_TARGET_PPC 1 -@@ -262,7 +285,6 @@ - #define LJ_TARGET_MASKSHIFT 0 - #define LJ_TARGET_MASKROT 1 - #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ --#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE - - #if LJ_TARGET_CONSOLE - #define LJ_ARCH_PPC32ON64 1 -@@ -415,16 +437,13 @@ - #error "No support for ILP32 model on ARM64" - #endif - #elif LJ_TARGET_PPC --#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) --#error "No support for PowerPC CPUs without double-precision FPU" --#endif - #if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE - #error "No support for little-endian PPC32" - #endif - #if LJ_ARCH_PPC64 - #error "No support for PowerPC 64 bit mode (yet)" - #endif --#ifdef __NO_FPRS__ -+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) - #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" - #endif - #elif LJ_TARGET_MIPS32 -diff --git a/src/lj_ccall.c b/src/lj_ccall.c -index 5c252e5..799be48 100644 ---- a/src/lj_ccall.c -+++ b/src/lj_ccall.c -@@ -387,6 +387,24 @@ - #define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -+#define CCALL_HANDLE_GPR \ -+ /* Try to pass argument in GPRs. */ \ -+ if (n > 1) { \ -+ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ -+ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \ -+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ -+ else if (ngpr + n > maxgpr) \ -+ ngpr = maxgpr; /* Prevent reordering. */ \ -+ } \ -+ if (ngpr + n <= maxgpr) { \ -+ dp = &cc->gpr[ngpr]; \ -+ ngpr += n; \ -+ goto done; \ -+ } \ -+ -+#if LJ_ABI_SOFTFP -+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR -+#else - #define CCALL_HANDLE_REGARG \ - if (isfp) { /* Try to pass argument in FPRs. */ \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ -@@ -395,24 +413,16 @@ - d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ - goto done; \ - } \ -- } else { /* Try to pass argument in GPRs. */ \ -- if (n > 1) { \ -- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ -- if (ctype_isinteger(d->info)) \ -- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ -- else if (ngpr + n > maxgpr) \ -- ngpr = maxgpr; /* Prevent reordering. */ \ -- } \ -- if (ngpr + n <= maxgpr) { \ -- dp = &cc->gpr[ngpr]; \ -- ngpr += n; \ -- goto done; \ -- } \ -+ } else { \ -+ CCALL_HANDLE_GPR \ - } -+#endif - -+#if !LJ_ABI_SOFTFP - #define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ -+#endif - - #elif LJ_TARGET_MIPS32 - /* -- MIPS o32 calling conventions ---------------------------------------- */ -@@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, - } - if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ - --#if LJ_TARGET_X64 || LJ_TARGET_PPC -+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) - cc->nfpr = nfpr; /* Required for vararg functions. */ - #endif - cc->nsp = nsp; -diff --git a/src/lj_ccall.h b/src/lj_ccall.h -index 59f6648..6efa48c 100644 ---- a/src/lj_ccall.h -+++ b/src/lj_ccall.h -@@ -86,9 +86,9 @@ typedef union FPRArg { - #elif LJ_TARGET_PPC - - #define CCALL_NARG_GPR 8 --#define CCALL_NARG_FPR 8 -+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) - #define CCALL_NRET_GPR 4 /* For complex double. */ --#define CCALL_NRET_FPR 1 -+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) - #define CCALL_SPS_EXTRA 4 - #define CCALL_SPS_FREE 0 - -diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c -index 846827b..03494a7 100644 ---- a/src/lj_ccallback.c -+++ b/src/lj_ccallback.c -@@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *cts) - - #elif LJ_TARGET_PPC - -+#define CALLBACK_HANDLE_GPR \ -+ if (n > 1) { \ -+ lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ -+ ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \ -+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ -+ } \ -+ if (ngpr + n <= maxgpr) { \ -+ sp = &cts->cb.gpr[ngpr]; \ -+ ngpr += n; \ -+ goto done; \ -+ } -+ -+#if LJ_ABI_SOFTFP -+#define CALLBACK_HANDLE_REGARG \ -+ CALLBACK_HANDLE_GPR \ -+ UNUSED(isfp); -+#else - #define CALLBACK_HANDLE_REGARG \ - if (isfp) { \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ -@@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *cts) - goto done; \ - } \ - } else { /* Try to pass argument in GPRs. */ \ -- if (n > 1) { \ -- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ -- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ -- } \ -- if (ngpr + n <= maxgpr) { \ -- sp = &cts->cb.gpr[ngpr]; \ -- ngpr += n; \ -- goto done; \ -- } \ -+ CALLBACK_HANDLE_GPR \ - } -+#endif - -+#if !LJ_ABI_SOFTFP - #define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ -+#endif - - #elif LJ_TARGET_MIPS32 - -diff --git a/src/lj_frame.h b/src/lj_frame.h -index 19c49a4..04cb5a3 100644 ---- a/src/lj_frame.h -+++ b/src/lj_frame.h -@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ - #define CFRAME_OFS_L 36 - #define CFRAME_OFS_PC 32 - #define CFRAME_OFS_MULTRES 28 --#define CFRAME_SIZE 272 -+#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128) - #define CFRAME_SHIFT_MULTRES 3 - #endif - #elif LJ_TARGET_MIPS32 -diff --git a/src/lj_ircall.h b/src/lj_ircall.h -index 7312006..9b3883b 100644 ---- a/src/lj_ircall.h -+++ b/src/lj_ircall.h -@@ -287,7 +287,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; - #define fp64_f2l __aeabi_f2lz - #define fp64_f2ul __aeabi_f2ulz - #endif --#elif LJ_TARGET_MIPS -+#elif LJ_TARGET_MIPS || LJ_TARGET_PPC - #define softfp_add __adddf3 - #define softfp_sub __subdf3 - #define softfp_mul __muldf3 -diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc -index b4260eb..0839668 100644 ---- a/src/vm_ppc.dasc -+++ b/src/vm_ppc.dasc -@@ -103,6 +103,18 @@ - |// Fixed register assignments for the interpreter. - |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) - | -+|.macro .FPU, a, b -+|.if FPU -+| a, b -+|.endif -+|.endmacro -+| -+|.macro .FPU, a, b, c -+|.if FPU -+| a, b, c -+|.endif -+|.endmacro -+| - |// The following must be C callee-save (but BASE is often refetched). - |.define BASE, r14 // Base of current Lua stack frame. - |.define KBASE, r15 // Constants of current Lua function. -@@ -116,8 +128,10 @@ - |.define TISNUM, r22 - |.define TISNIL, r23 - |.define ZERO, r24 -+|.if FPU - |.define TOBIT, f30 // 2^52 + 2^51. - |.define TONUM, f31 // 2^52 + 2^51 + 2^31. -+|.endif - | - |// The following temporaries are not saved across C calls, except for RA. - |.define RA, r20 // Callee-save. -@@ -133,6 +147,7 @@ - | - |// Saved temporaries. - |.define SAVE0, r21 -+|.define SAVE1, r25 - | - |// Calling conventions. - |.define CARG1, r3 -@@ -141,8 +156,10 @@ - |.define CARG4, r6 // Overlaps TMP3. - |.define CARG5, r7 // Overlaps INS. - | -+|.if FPU - |.define FARG1, f1 - |.define FARG2, f2 -+|.endif - | - |.define CRET1, r3 - |.define CRET2, r4 -@@ -213,10 +230,16 @@ - |.endif - |.else - | -+|.if FPU - |.define SAVE_LR, 276(sp) - |.define CFRAME_SPACE, 272 // Delta for sp. - |// Back chain for sp: 272(sp) <-- sp entering interpreter - |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. -+|.else -+|.define SAVE_LR, 132(sp) -+|.define CFRAME_SPACE, 128 // Delta for sp. -+|// Back chain for sp: 128(sp) <-- sp entering interpreter -+|.endif - |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. - |.define SAVE_CR, 52(sp) // 32 bit CR save. - |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. -@@ -226,16 +249,25 @@ - |.define SAVE_PC, 32(sp) - |.define SAVE_MULTRES, 28(sp) - |.define UNUSED1, 24(sp) -+|.if FPU - |.define TMPD_LO, 20(sp) - |.define TMPD_HI, 16(sp) - |.define TONUM_LO, 12(sp) - |.define TONUM_HI, 8(sp) -+|.else -+|.define SFSAVE_4, 20(sp) -+|.define SFSAVE_3, 16(sp) -+|.define SFSAVE_2, 12(sp) -+|.define SFSAVE_1, 8(sp) -+|.endif - |// Next frame lr: 4(sp) - |// Back chain for sp: 0(sp) <-- sp while in interpreter - | -+|.if FPU - |.define TMPD_BLO, 23(sp) - |.define TMPD, TMPD_HI - |.define TONUM_D, TONUM_HI -+|.endif - | - |.endif - | -@@ -245,7 +277,7 @@ - |.else - | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) - |.endif --| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -+| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) - |.endmacro - |.macro rest_, reg - |.if GPR64 -@@ -253,7 +285,7 @@ - |.else - | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) - |.endif --| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -+| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) - |.endmacro - | - |.macro saveregs -@@ -323,6 +355,7 @@ - |// Trap for not-yet-implemented parts. - |.macro NYI; tw 4, sp, sp; .endmacro - | -+|.if FPU - |// int/FP conversions. - |.macro tonum_i, freg, reg - | xoris reg, reg, 0x8000 -@@ -346,6 +379,7 @@ - |.macro toint, reg, freg - | toint reg, freg, freg - |.endmacro -+|.endif - | - |//----------------------------------------------------------------------- - | -@@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *ctx) - | beq >2 - |1: - | addic. TMP1, TMP1, -8 -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz CARG1, 0(RA) -+ | lwz CARG2, 4(RA) -+ |.endif - | addi RA, RA, 8 -+ |.if FPU - | stfd f0, 0(BASE) -+ |.else -+ | stw CARG1, 0(BASE) -+ | stw CARG2, 4(BASE) -+ |.endif - | addi BASE, BASE, 8 - | bney <1 - | -@@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *ctx) - | .toc ld TOCREG, SAVE_TOC - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp BASE, L->base -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | li ZERO, 0 -- | stw TMP3, TMPD -+ | .FPU stw TMP3, TMPD - | li TMP1, LJ_TFALSE -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | li TISNIL, LJ_TNIL - | li_vmstate INTERP -- | lfs TOBIT, TMPD -+ | .FPU lfs TOBIT, TMPD - | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | la RA, -8(BASE) // Results start at BASE-8. -- | stw TMP3, TMPD -+ | .FPU stw TMP3, TMPD - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw TMP1, 0(RA) // Prepend false to error message. - | li RD, 16 // 2 results: false + error message. - | st_vmstate -- | lfs TONUM, TMPD -+ | .FPU lfs TONUM, TMPD - | b ->vm_returnc - | - |//----------------------------------------------------------------------- -@@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *ctx) - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top - | lwz PC, FRAME_PC(BASE) -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stb CARG3, L->status -- | stw TMP3, TMPD -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -- | lfs TOBIT, TMPD -+ | .FPU stw TMP3, TMPD -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU lfs TOBIT, TMPD - | sub RD, TMP1, BASE -- | stw TMP3, TMPD -- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | .FPU stw TMP3, TMPD -+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | addi RD, RD, 8 -- | stw TMP0, TONUM_HI -+ | .FPU stw TMP0, TONUM_HI - | li_vmstate INTERP - | li ZERO, 0 - | st_vmstate - | andix. TMP0, PC, FRAME_TYPE - | mr MULTRES, RD -- | lfs TONUM, TMPD -+ | .FPU lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | beq ->BC_RET_Z - | b ->vm_return -@@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *ctx) - | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | add PC, PC, BASE -- | stw TMP3, TMPD -+ | .FPU stw TMP3, TMPD - | li ZERO, 0 -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -- | lfs TOBIT, TMPD -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU lfs TOBIT, TMPD - | sub PC, PC, TMP2 // PC = frame delta + frame type -- | stw TMP3, TMPD -- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | .FPU stw TMP3, TMPD -+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | sub NARGS8:RC, TMP1, BASE -- | stw TMP0, TONUM_HI -+ | .FPU stw TMP0, TONUM_HI - | li_vmstate INTERP -- | lfs TONUM, TMPD -+ | .FPU lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | st_vmstate - | -@@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *ctx) - | lwz INS, -4(PC) - | subi CARG2, RB, 16 - | decode_RB8 SAVE0, INS -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz TMP2, 0(RA) -+ | lwz TMP3, 4(RA) -+ |.endif - | add TMP1, BASE, SAVE0 - | stp BASE, L->base - | cmplw TMP1, CARG2 - | sub CARG3, CARG2, TMP1 - | decode_RA8 RA, INS -+ |.if FPU - | stfd f0, 0(CARG2) -+ |.else -+ | stw TMP2, 0(CARG2) -+ | stw TMP3, 4(CARG2) -+ |.endif - | bney ->BC_CAT_Z -+ |.if FPU - | stfdx f0, BASE, RA -+ |.else -+ | stwux TMP2, RA, BASE -+ | stw TMP3, 4(RA) -+ |.endif - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- -@@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *ctx) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | beq >3 -+ |.if FPU - | lfd f0, 0(CRET1) -+ |.else -+ | lwz TMP0, 0(CRET1) -+ | lwz TMP1, 4(CRET1) -+ |.endif - | ins_next1 -+ |.if FPU - | stfdx f0, BASE, RA -+ |.else -+ | stwux TMP0, RA, BASE -+ | stw TMP1, 4(RA) -+ |.endif - | ins_next2 - | - |3: // Call __index metamethod. -@@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *ctx) - | // Returns cTValue * or NULL. - | cmplwi CRET1, 0 - | beq >1 -+ |.if FPU - | lfd f14, 0(CRET1) -+ |.else -+ | lwz SAVE0, 0(CRET1) -+ | lwz SAVE1, 4(CRET1) -+ |.endif - | b ->BC_TGETR_Z - |1: - | stwx TISNIL, BASE, RA -@@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *ctx) - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 -+ |.if FPU - | lfdx f0, BASE, RA -+ |.else -+ | lwzux TMP2, RA, BASE -+ | lwz TMP3, 4(RA) -+ |.endif - | beq >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 -+ |.if FPU - | stfd f0, 0(CRET1) -+ |.else -+ | stw TMP2, 0(CRET1) -+ | stw TMP3, 4(CRET1) -+ |.endif - | ins_next2 - | - |3: // Call __newindex metamethod. -@@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *ctx) - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 24 // 3 args for func(t, k, v) -+ |.if FPU - | stfd f0, 16(BASE) // Copy value to third argument. -+ |.else -+ | stw TMP2, 16(BASE) -+ | stw TMP3, 20(BASE) -+ |.endif - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: -@@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *ctx) - | stw PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. -+ |.if FPU - | stfd f14, 0(CRET1) -+ |.else -+ | stw SAVE0, 0(CRET1) -+ | stw SAVE1, 4(CRET1) -+ |.endif - | b ->cont_nop - | - |//-- Comparison metamethods --------------------------------------------- -@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx) - | - |->cont_ra: // RA = resultptr - | lwz INS, -4(PC) -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz CARG1, 0(RA) -+ | lwz CARG2, 4(RA) -+ |.endif - | decode_RA8 TMP1, INS -+ |.if FPU - | stfdx f0, BASE, TMP1 -+ |.else -+ | stwux CARG1, TMP1, BASE -+ | stw CARG2, 4(TMP1) -+ |.endif - | b ->cont_nop - | - |->cont_condt: // RA = resultptr -@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx) - |.macro .ffunc_n, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 8 -- | lwz CARG3, 0(BASE) -+ | lwz CARG1, 0(BASE) -+ |.if FPU - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG2, 4(BASE) -+ |.endif - | blt ->fff_fallback -- | checknum CARG3; bge ->fff_fallback -+ | checknum CARG1; bge ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 16 -- | lwz CARG3, 0(BASE) -+ | lwz CARG1, 0(BASE) -+ |.if FPU - | lfd FARG1, 0(BASE) -- | lwz CARG4, 8(BASE) -+ | lwz CARG3, 8(BASE) - | lfd FARG2, 8(BASE) -+ |.else -+ | lwz CARG2, 4(BASE) -+ | lwz CARG3, 8(BASE) -+ | lwz CARG4, 12(BASE) -+ |.endif - | blt ->fff_fallback -+ | checknum CARG1; bge ->fff_fallback - | checknum CARG3; bge ->fff_fallback -- | checknum CARG4; bge ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. -@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx) - | bge cr1, ->fff_fallback - | stw CARG3, 0(RA) - | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. -+ | addi TMP1, BASE, 8 -+ | add TMP2, RA, NARGS8:RC - | stw CARG1, 4(RA) - | beq ->fff_res // Done if exactly 1 argument. -- | li TMP1, 8 -- | subi RC, RC, 8 - |1: -- | cmplw TMP1, RC -- | lfdx f0, BASE, TMP1 -- | stfdx f0, RA, TMP1 -+ | cmplw TMP1, TMP2 -+ |.if FPU -+ | lfd f0, 0(TMP1) -+ | stfd f0, 0(TMP1) -+ |.else -+ | lwz CARG1, 0(TMP1) -+ | lwz CARG2, 4(TMP1) -+ | stw CARG1, -8(TMP1) -+ | stw CARG2, -4(TMP1) -+ |.endif - | addi TMP1, TMP1, 8 - | bney <1 - | b ->fff_res -@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx) - | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 - | slwi TMP1, TMP1, 3 -+ |.if FPU - | la TMP2, CFUNC:RB->upvalue - | lfdx FARG1, TMP2, TMP1 -+ |.else -+ | add TMP1, CFUNC:RB, TMP1 -+ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi -+ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo -+ |.endif - | b ->fff_resn - | - |//-- Base library: getters and setters --------------------------------- -@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx) - | mr CARG1, L - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. -+ |.if FPU - | lfd FARG1, 0(CRET1) -+ |.else -+ | lwz CARG2, 4(CRET1) -+ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1. -+ |.endif - | b ->fff_resn - | - |//-- Base library: conversions ------------------------------------------ -@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx) - | // Only handles the number case inline (without a base argument). - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) -+ |.if FPU - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG2, 4(BASE) -+ |.endif - | bne ->fff_fallback // Exactly one argument. - | checknum CARG1; bgt ->fff_fallback - | b ->fff_resn -@@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *ctx) - | cmplwi CRET1, 0 - | li CARG3, LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. -- | lfd f0, 8(BASE) // Copy key and value to results. - | la RA, -8(BASE) -+ |.if FPU -+ | lfd f0, 8(BASE) // Copy key and value to results. - | lfd f1, 16(BASE) - | stfd f0, 0(RA) -- | li RD, (2+1)*8 - | stfd f1, 8(RA) -+ |.else -+ | lwz CARG1, 8(BASE) -+ | lwz CARG2, 12(BASE) -+ | lwz CARG3, 16(BASE) -+ | lwz CARG4, 20(BASE) -+ | stw CARG1, 0(RA) -+ | stw CARG2, 4(RA) -+ | stw CARG3, 8(RA) -+ | stw CARG4, 12(RA) -+ |.endif -+ | li RD, (2+1)*8 - | b ->fff_res - | - |.ffunc_1 pairs -@@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *ctx) - | bne ->fff_fallback - #if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable -+ |.if FPU - | lfd f0, CFUNC:RB->upvalue[0] -+ |.else -+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi -+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo -+ |.endif - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback - #else -+ |.if FPU - | lfd f0, CFUNC:RB->upvalue[0] -+ |.else -+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi -+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo -+ |.endif - | la RA, -8(BASE) - #endif - | stw TISNIL, 8(BASE) - | li RD, (3+1)*8 -+ |.if FPU - | stfd f0, 0(RA) -+ |.else -+ | stw TMP0, 0(RA) -+ | stw TMP1, 4(RA) -+ |.endif - | b ->fff_res - | - |.ffunc ipairs_aux -@@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *ctx) - | stfd FARG2, 0(RA) - |.endif - | ble >2 // Not in array part? -+ |.if FPU - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 -+ |.else -+ | lwzux TMP2, TMP1, TMP3 -+ | lwz TMP3, 4(TMP1) -+ |.endif - |1: - | checknil TMP2 - | li RD, (0+1)*8 - | beq ->fff_res // End of iteration, return 0 results. - | li RD, (2+1)*8 -+ |.if FPU - | stfd f0, 8(RA) -+ |.else -+ | stw TMP2, 8(RA) -+ | stw TMP3, 12(RA) -+ |.endif - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | lwz TMP0, TAB:CARG1->hmask -@@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *ctx) - | li RD, (0+1)*8 - | beq ->fff_res - | lwz TMP2, 0(CRET1) -+ |.if FPU - | lfd f0, 0(CRET1) -+ |.else -+ | lwz TMP3, 4(CRET1) -+ |.endif - | b <1 - | - |.ffunc_1 ipairs -@@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *ctx) - | bne ->fff_fallback - #if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable -+ |.if FPU - | lfd f0, CFUNC:RB->upvalue[0] -+ |.else -+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi -+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo -+ |.endif - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback - #else -+ |.if FPU - | lfd f0, CFUNC:RB->upvalue[0] -+ |.else -+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi -+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo -+ |.endif - | la RA, -8(BASE) - #endif - |.if DUALNUM -@@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *ctx) - |.endif - | stw ZERO, 12(BASE) - | li RD, (3+1)*8 -+ |.if FPU - | stfd f0, 0(RA) -+ |.else -+ | stw TMP0, 0(RA) -+ | stw TMP1, 4(RA) -+ |.endif - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- -@@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *ctx) - | - |.ffunc xpcall - | cmplwi NARGS8:RC, 16 -- | lwz CARG4, 8(BASE) -+ | lwz CARG3, 8(BASE) -+ |.if FPU - | lfd FARG2, 8(BASE) - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG1, 0(BASE) -+ | lwz CARG2, 4(BASE) -+ | lwz CARG4, 12(BASE) -+ |.endif - | blt ->fff_fallback - | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | mr TMP2, BASE -- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. -+ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function. - | la BASE, 16(BASE) - | // Remember active hook before pcall. - | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 -+ |.if FPU - | stfd FARG2, 0(TMP2) // Swap function and traceback. -- | subi NARGS8:RC, NARGS8:RC, 16 - | stfd FARG1, 8(TMP2) -+ |.else -+ | stw CARG3, 0(TMP2) -+ | stw CARG4, 4(TMP2) -+ | stw CARG1, 8(TMP2) -+ | stw CARG2, 12(TMP2) -+ |.endif -+ | subi NARGS8:RC, NARGS8:RC, 16 - | addi PC, TMP1, 16+FRAME_PCALL - | b ->vm_call_dispatch - | -@@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *ctx) - | stp BASE, L->top - |2: // Move args to coroutine. - | cmpw TMP1, NARGS8:RC -+ |.if FPU - | lfdx f0, BASE, TMP1 -+ |.else -+ | add CARG3, BASE, TMP1 -+ | lwz TMP2, 0(CARG3) -+ | lwz TMP3, 4(CARG3) -+ |.endif - | beq >3 -+ |.if FPU - | stfdx f0, CARG2, TMP1 -+ |.else -+ | add CARG3, CARG2, TMP1 -+ | stw TMP2, 0(CARG3) -+ | stw TMP3, 4(CARG3) -+ |.endif - | addi TMP1, TMP1, 8 - | b <2 - |3: -@@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *ctx) - | stp TMP2, L:SAVE0->top // Clear coroutine stack. - |5: // Move results from coroutine. - | cmplw TMP1, TMP3 -+ |.if FPU - | lfdx f0, TMP2, TMP1 - | stfdx f0, BASE, TMP1 -+ |.else -+ | add CARG3, TMP2, TMP1 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ | add CARG3, BASE, TMP1 -+ | stw CARG1, 0(CARG3) -+ | stw CARG2, 4(CARG3) -+ |.endif - | addi TMP1, TMP1, 8 - | bne <5 - |6: -@@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *ctx) - | andix. TMP0, PC, FRAME_TYPE - | la TMP3, -8(TMP3) - | li TMP1, LJ_TFALSE -+ |.if FPU - | lfd f0, 0(TMP3) -+ |.else -+ | lwz CARG1, 0(TMP3) -+ | lwz CARG2, 4(TMP3) -+ |.endif - | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | stw TMP1, -8(BASE) // Prepend false to results. - | la RA, -8(BASE) -+ |.if FPU - | stfd f0, 0(BASE) // Copy error message. -+ |.else -+ | stw CARG1, 0(BASE) // Copy error message. -+ | stw CARG2, 4(BASE) -+ |.endif - | b <7 - |.else - | mr CARG1, L -@@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *ctx) - | lus CARG1, 0x8000 // -(2^31). - | beqy ->fff_resi - |5: -+ |.if FPU - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG1, 0(BASE) -+ | lwz CARG2, 4(BASE) -+ |.endif - | blex func - | b ->fff_resn - |.endmacro -@@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *ctx) - | - |.ffunc math_log - | cmplwi NARGS8:RC, 8 -- | lwz CARG3, 0(BASE) -- | lfd FARG1, 0(BASE) -+ | lwz CARG1, 0(BASE) - | bne ->fff_fallback // Need exactly 1 argument. -- | checknum CARG3; bge ->fff_fallback -+ | checknum CARG1; bge ->fff_fallback -+ |.if FPU -+ | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG2, 4(BASE) -+ |.endif - | blex log - | b ->fff_resn - | -@@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *ctx) - |.if DUALNUM - |.ffunc math_ldexp - | cmplwi NARGS8:RC, 16 -- | lwz CARG3, 0(BASE) -+ | lwz TMP0, 0(BASE) -+ |.if FPU - | lfd FARG1, 0(BASE) -- | lwz CARG4, 8(BASE) -+ |.else -+ | lwz CARG1, 0(BASE) -+ | lwz CARG2, 4(BASE) -+ |.endif -+ | lwz TMP1, 8(BASE) - |.if GPR64 - | lwz CARG2, 12(BASE) -- |.else -+ |.elif FPU - | lwz CARG1, 12(BASE) -+ |.else -+ | lwz CARG3, 12(BASE) - |.endif - | blt ->fff_fallback -- | checknum CARG3; bge ->fff_fallback -- | checknum CARG4; bne ->fff_fallback -+ | checknum TMP0; bge ->fff_fallback -+ | checknum TMP1; bne ->fff_fallback - |.else - |.ffunc_nn math_ldexp - |.if GPR64 -@@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *ctx) - |.ffunc_n math_frexp - |.if GPR64 - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) -- |.else -+ |.elif FPU - | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) -+ |.else -+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex frexp -@@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *ctx) - |.if not DUALNUM - | tonum_i FARG2, TMP1 - |.endif -+ |.if FPU - | stfd FARG1, 0(RA) -+ |.else -+ | stw CRET1, 0(RA) -+ | stw CRET2, 4(RA) -+ |.endif - | li RD, (2+1)*8 - |.if DUALNUM - | stw TISNUM, 8(RA) -@@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *ctx) - |.ffunc_n math_modf - |.if GPR64 - | la CARG2, -8(BASE) -- |.else -+ |.elif FPU - | la CARG1, -8(BASE) -+ |.else -+ | la CARG3, -8(BASE) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex modf - | la RA, -8(BASE) -+ |.if FPU - | stfd FARG1, 0(BASE) -+ |.else -+ | stw CRET1, 0(BASE) -+ | stw CRET2, 4(BASE) -+ |.endif - | li RD, (2+1)*8 - | b ->fff_res - | -@@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *ctx) - |.if DUALNUM - | .ffunc_1 name - | checknum CARG3 -- | addi TMP1, BASE, 8 -- | add TMP2, BASE, NARGS8:RC -+ | addi SAVE0, BASE, 8 -+ | add SAVE1, BASE, NARGS8:RC - | bne >4 - |1: // Handle integers. -- | lwz CARG4, 0(TMP1) -- | cmplw cr1, TMP1, TMP2 -- | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 0(SAVE0) -+ | cmplw cr1, SAVE0, SAVE1 -+ | lwz CARG2, 4(SAVE0) - | bge cr1, ->fff_resi - | checknum CARG4 - | xoris TMP0, CARG1, 0x8000 -@@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *ctx) - |.if GPR64 - | rldicl CARG1, CARG1, 0, 32 - |.endif -- | addi TMP1, TMP1, 8 -+ | addi SAVE0, SAVE0, 8 - | b <1 - |3: - | bge ->fff_fallback - | // Convert intermediate result to number and continue below. -+ |.if FPU - | tonum_i FARG1, CARG1 -- | lfd FARG2, 0(TMP1) -+ | lfd FARG2, 0(SAVE0) -+ |.else -+ | mr CARG2, CARG1 -+ | bl ->vm_sfi2d_1 -+ | lwz CARG3, 0(SAVE0) -+ | lwz CARG4, 4(SAVE0) -+ |.endif - | b >6 - |4: -+ |.if FPU - | lfd FARG1, 0(BASE) -+ |.else -+ | lwz CARG1, 0(BASE) -+ | lwz CARG2, 4(BASE) -+ |.endif - | bge ->fff_fallback - |5: // Handle numbers. -- | lwz CARG4, 0(TMP1) -- | cmplw cr1, TMP1, TMP2 -- | lfd FARG2, 0(TMP1) -+ | lwz CARG3, 0(SAVE0) -+ | cmplw cr1, SAVE0, SAVE1 -+ |.if FPU -+ | lfd FARG2, 0(SAVE0) -+ |.else -+ | lwz CARG4, 4(SAVE0) -+ |.endif - | bge cr1, ->fff_resn -- | checknum CARG4; bge >7 -+ | checknum CARG3; bge >7 - |6: -+ | addi SAVE0, SAVE0, 8 -+ |.if FPU - | fsub f0, FARG1, FARG2 -- | addi TMP1, TMP1, 8 - |.if ismax - | fsel FARG1, f0, FARG1, FARG2 - |.else - | fsel FARG1, f0, FARG2, FARG1 - |.endif -+ |.else -+ | stw CARG1, SFSAVE_1 -+ | stw CARG2, SFSAVE_2 -+ | stw CARG3, SFSAVE_3 -+ | stw CARG4, SFSAVE_4 -+ | blex __ledf2 -+ | cmpwi CRET1, 0 -+ |.if ismax -+ | blt >8 -+ |.else -+ | bge >8 -+ |.endif -+ | lwz CARG1, SFSAVE_1 -+ | lwz CARG2, SFSAVE_2 -+ | b <5 -+ |8: -+ | lwz CARG1, SFSAVE_3 -+ | lwz CARG2, SFSAVE_4 -+ |.endif - | b <5 - |7: // Convert integer to number and continue above. -- | lwz CARG2, 4(TMP1) -+ | lwz CARG3, 4(SAVE0) - | bne ->fff_fallback -- | tonum_i FARG2, CARG2 -+ |.if FPU -+ | tonum_i FARG2, CARG3 -+ |.else -+ | bl ->vm_sfi2d_2 -+ |.endif - | b <6 - |.else - | .ffunc_n name -@@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *ctx) - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name -- | addi TMP1, BASE, 8 -- | add TMP2, BASE, NARGS8:RC -+ | addi SAVE0, BASE, 8 -+ | add SAVE1, BASE, NARGS8:RC - |1: -- | lwz CARG4, 0(TMP1) -- | cmplw cr1, TMP1, TMP2 -+ | lwz CARG4, 0(SAVE0) -+ | cmplw cr1, SAVE0, SAVE1 - |.if DUALNUM -- | lwz CARG2, 4(TMP1) -+ | lwz CARG2, 4(SAVE0) - |.else -- | lfd FARG1, 0(TMP1) -+ | lfd FARG1, 0(SAVE0) - |.endif - | bgey cr1, ->fff_resi - | checknum CARG4 - |.if DUALNUM -+ |.if FPU - | bnel ->fff_bitop_fb - |.else -+ | beq >3 -+ | stw CARG1, SFSAVE_1 -+ | bl ->fff_bitop_fb -+ | mr CARG2, CARG1 -+ | lwz CARG1, SFSAVE_1 -+ |3: -+ |.endif -+ |.else - | fadd FARG1, FARG1, TOBIT - | bge ->fff_fallback - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - |.endif - | ins CARG1, CARG1, CARG2 -- | addi TMP1, TMP1, 8 -+ | addi SAVE0, SAVE0, 8 - | b <1 - |.endmacro - | -@@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *ctx) - |.macro .ffunc_bit_sh, name, ins, shmod - |.if DUALNUM - | .ffunc_2 bit_..name -+ |.if FPU - | checknum CARG3; bnel ->fff_tobit_fb -+ |.else -+ | checknum CARG3; beq >1 -+ | bl ->fff_tobit_fb -+ | lwz CARG2, 12(BASE) // Conversion polluted CARG2. -+ |1: -+ |.endif - | // Note: no inline conversion from number for 2nd argument! - | checknum CARG4; bne ->fff_fallback - |.else -@@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *ctx) - |->fff_resn: - | lwz PC, FRAME_PC(BASE) - | la RA, -8(BASE) -+ |.if FPU - | stfd FARG1, -8(BASE) -+ |.else -+ | stw CARG1, -8(BASE) -+ | stw CARG2, -4(BASE) -+ |.endif - | b ->fff_res1 - | - |// Fallback FP number to bit conversion. - |->fff_tobit_fb: - |.if DUALNUM -+ |.if FPU - | lfd FARG1, 0(BASE) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - | blr -+ |.else -+ | bgt ->fff_fallback -+ | mr CARG2, CARG1 -+ | mr CARG1, CARG3 -+ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2. -+ |->vm_tobit: -+ | slwi TMP2, CARG1, 1 -+ | addis TMP2, TMP2, 0x0020 -+ | cmpwi TMP2, 0 -+ | bge >2 -+ | li TMP1, 0x3e0 -+ | srawi TMP2, TMP2, 21 -+ | not TMP1, TMP1 -+ | sub. TMP2, TMP1, TMP2 -+ | cmpwi cr7, CARG1, 0 -+ | blt >1 -+ | slwi TMP1, CARG1, 11 -+ | srwi TMP0, CARG2, 21 -+ | oris TMP1, TMP1, 0x8000 -+ | or TMP1, TMP1, TMP0 -+ | srw CARG1, TMP1, TMP2 -+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint. -+ | neg CARG1, CARG1 -+ | blr -+ |1: -+ | addi TMP2, TMP2, 21 -+ | srw TMP1, CARG2, TMP2 -+ | slwi CARG2, CARG1, 12 -+ | subfic TMP2, TMP2, 20 -+ | slw TMP0, CARG2, TMP2 -+ | or CARG1, TMP1, TMP0 -+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint. -+ | neg CARG1, CARG1 -+ | blr -+ |2: -+ | li CARG1, 0 -+ | blr -+ |.endif - |.endif - |->fff_bitop_fb: - |.if DUALNUM -- | lfd FARG1, 0(TMP1) -+ |.if FPU -+ | lfd FARG1, 0(SAVE0) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - | blr -+ |.else -+ | bgt ->fff_fallback -+ | mr CARG1, CARG4 -+ | b ->vm_tobit -+ |.endif - |.endif - | - |//----------------------------------------------------------------------- -@@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *ctx) - | decode_RA8 RC, INS // Call base. - | beq >2 - |1: // Move results down. -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz CARG1, 0(RA) -+ | lwz CARG2, 4(RA) -+ |.endif - | addic. TMP1, TMP1, -8 - | addi RA, RA, 8 -+ |.if FPU - | stfdx f0, BASE, RC -+ |.else -+ | add CARG3, BASE, RC -+ | stw CARG1, 0(CARG3) -+ | stw CARG2, 4(CARG3) -+ |.endif - | addi RC, RC, 8 - | bne <1 - |2: -@@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *ctx) - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b, c, d -+ |.if FPU - | stfd f..a, 16+a*8(sp) - | stfd f..b, 16+b*8(sp) - | stfd f..c, 16+c*8(sp) - | stfd f..d, 16+d*8(sp) -+ |.endif - |.endmacro - | - |->vm_exit_handler: -@@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *ctx) - | lwz KBASE, PC2PROTO(k)(TMP1) - | // Setup type comparison constants. - | li TISNUM, LJ_TISNUM -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -- | stw TMP3, TMPD -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU stw TMP3, TMPD - | li ZERO, 0 -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -- | lfs TOBIT, TMPD -- | stw TMP3, TMPD -- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU lfs TOBIT, TMPD -+ | .FPU stw TMP3, TMPD -+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | li TISNIL, LJ_TNIL -- | stw TMP0, TONUM_HI -- | lfs TONUM, TMPD -+ | .FPU stw TMP0, TONUM_HI -+ | .FPU lfs TONUM, TMPD - | // Modified copy of ins_next which handles function header dispatch, too. - | lwz INS, 0(PC) - | addi PC, PC, 4 -@@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *ctx) - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | -- |// NYI: Use internal implementations of floor, ceil, trunc. -+ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp. -+ | -+ |.macro sfi2d, AHI, ALO -+ |.if not FPU -+ | mr. AHI, ALO -+ | bclr 12, 2 // Handle zero first. -+ | srawi TMP0, ALO, 31 -+ | xor TMP1, ALO, TMP0 -+ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1. -+ | cntlzw AHI, TMP1 -+ | andix. TMP0, TMP0, 0x800 // Mask sign bit. -+ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1. -+ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI. -+ | slwi ALO, TMP1, 21 -+ | or AHI, AHI, TMP0 // Sign | Exponent. -+ | srwi TMP1, TMP1, 11 -+ | slwi AHI, AHI, 20 // Align left. -+ | add AHI, AHI, TMP1 // Add mantissa, increment exponent. -+ | blr -+ |.endif -+ |.endmacro -+ | -+ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1. -+ |->vm_sfi2d_1: -+ | sfi2d CARG1, CARG2 -+ | -+ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1. -+ |->vm_sfi2d_2: -+ | sfi2d CARG3, CARG4 - | - |->vm_modi: - | divwo. TMP0, CARG1, CARG2 -@@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *ctx) - | addi DISPATCH, r12, GG_G2DISP - | stw r11, CTSTATE->cb.slot - | stw r3, CTSTATE->cb.gpr[0] -- | stfd f1, CTSTATE->cb.fpr[0] -+ | .FPU stfd f1, CTSTATE->cb.fpr[0] - | stw r4, CTSTATE->cb.gpr[1] -- | stfd f2, CTSTATE->cb.fpr[1] -+ | .FPU stfd f2, CTSTATE->cb.fpr[1] - | stw r5, CTSTATE->cb.gpr[2] -- | stfd f3, CTSTATE->cb.fpr[2] -+ | .FPU stfd f3, CTSTATE->cb.fpr[2] - | stw r6, CTSTATE->cb.gpr[3] -- | stfd f4, CTSTATE->cb.fpr[3] -+ | .FPU stfd f4, CTSTATE->cb.fpr[3] - | stw r7, CTSTATE->cb.gpr[4] -- | stfd f5, CTSTATE->cb.fpr[4] -+ | .FPU stfd f5, CTSTATE->cb.fpr[4] - | stw r8, CTSTATE->cb.gpr[5] -- | stfd f6, CTSTATE->cb.fpr[5] -+ | .FPU stfd f6, CTSTATE->cb.fpr[5] - | stw r9, CTSTATE->cb.gpr[6] -- | stfd f7, CTSTATE->cb.fpr[6] -+ | .FPU stfd f7, CTSTATE->cb.fpr[6] - | stw r10, CTSTATE->cb.gpr[7] -- | stfd f8, CTSTATE->cb.fpr[7] -+ | .FPU stfd f8, CTSTATE->cb.fpr[7] - | addi TMP0, sp, CFRAME_SPACE+8 - | stw TMP0, CTSTATE->cb.stack - | mr CARG1, CTSTATE -@@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *ctx) - | lp BASE, L:CRET1->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp RC, L:CRET1->top -- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). -+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li ZERO, 0 - | mr L, CRET1 -- | stw TMP3, TMPD -- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) -+ | .FPU stw TMP3, TMPD -+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | lwz LFUNC:RB, FRAME_FUNC(BASE) -- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -- | stw TMP0, TONUM_HI -+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). -+ | .FPU stw TMP0, TONUM_HI - | li TISNIL, LJ_TNIL - | li_vmstate INTERP -- | lfs TOBIT, TMPD -- | stw TMP3, TMPD -+ | .FPU lfs TOBIT, TMPD -+ | .FPU stw TMP3, TMPD - | sub RC, RC, BASE - | st_vmstate -- | lfs TONUM, TMPD -+ | .FPU lfs TONUM, TMPD - | ins_callt - |.endif - | -@@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *ctx) - | mr CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | lwz CRET1, CTSTATE->cb.gpr[0] -- | lfd FARG1, CTSTATE->cb.fpr[0] -+ | .FPU lfd FARG1, CTSTATE->cb.fpr[0] - | lwz CRET2, CTSTATE->cb.gpr[1] - | b ->vm_leave_unw - |.endif -@@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *ctx) - | bge <1 - |2: - | bney cr1, >3 -- | lfd f1, CCSTATE->fpr[0] -- | lfd f2, CCSTATE->fpr[1] -- | lfd f3, CCSTATE->fpr[2] -- | lfd f4, CCSTATE->fpr[3] -- | lfd f5, CCSTATE->fpr[4] -- | lfd f6, CCSTATE->fpr[5] -- | lfd f7, CCSTATE->fpr[6] -- | lfd f8, CCSTATE->fpr[7] -+ | .FPU lfd f1, CCSTATE->fpr[0] -+ | .FPU lfd f2, CCSTATE->fpr[1] -+ | .FPU lfd f3, CCSTATE->fpr[2] -+ | .FPU lfd f4, CCSTATE->fpr[3] -+ | .FPU lfd f5, CCSTATE->fpr[4] -+ | .FPU lfd f6, CCSTATE->fpr[5] -+ | .FPU lfd f7, CCSTATE->fpr[6] -+ | .FPU lfd f8, CCSTATE->fpr[7] - |3: - | lp TMP0, CCSTATE->func - | lwz CARG2, CCSTATE->gpr[1] -@@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *ctx) - | lwz TMP2, -4(r14) - | lwz TMP0, 4(r14) - | stw CARG1, CCSTATE:TMP1->gpr[0] -- | stfd FARG1, CCSTATE:TMP1->fpr[0] -+ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] - | stw CARG2, CCSTATE:TMP1->gpr[1] - | mtlr TMP0 - | stw CARG3, CCSTATE:TMP1->gpr[2] -@@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM -- | lwzux TMP0, RA, BASE -+ | lwzux CARG1, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) -- | lwzux TMP1, RD, BASE -+ | lwzux CARG3, RD, BASE - | lwz TMP2, -4(PC) -- | checknum cr0, TMP0 -- | lwz CARG3, 4(RD) -+ | checknum cr0, CARG1 -+ | lwz CARG4, 4(RD) - | decode_RD4 TMP2, TMP2 -- | checknum cr1, TMP1 -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | checknum cr1, CARG3 -+ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) - | bne cr0, >7 - | bne cr1, >8 -- | cmpw CARG2, CARG3 -+ | cmpw CARG2, CARG4 - if (op == BC_ISLT) { - | bge >2 - } else if (op == BC_ISGE) { -@@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | ble >2 - } - |1: -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |2: - | ins_next - | - |7: // RA is not an integer. - | bgt cr0, ->vmeta_comp - | // RA is a number. -- | lfd f0, 0(RA) -+ | .FPU lfd f0, 0(RA) - | bgt cr1, ->vmeta_comp - | blt cr1, >4 - | // RA is a number, RD is an integer. -- | tonum_i f1, CARG3 -+ |.if FPU -+ | tonum_i f1, CARG4 -+ |.else -+ | bl ->vm_sfi2d_2 -+ |.endif - | b >5 - | - |8: // RA is an integer, RD is not an integer. - | bgt cr1, ->vmeta_comp - | // RA is an integer, RD is a number. -+ |.if FPU - | tonum_i f0, CARG2 -+ |.else -+ | bl ->vm_sfi2d_1 -+ |.endif - |4: -- | lfd f1, 0(RD) -+ | .FPU lfd f1, 0(RD) - |5: -+ |.if FPU - | fcmpu cr0, f0, f1 -+ |.else -+ | blex __ledf2 -+ | cmpwi CRET1, 0 -+ |.endif - if (op == BC_ISLT) { - | bge <2 - } else if (op == BC_ISGE) { -@@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM -- | lwzux TMP0, RA, BASE -+ | lwzux CARG1, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) -- | lwzux TMP1, RD, BASE -- | checknum cr0, TMP0 -- | lwz TMP2, -4(PC) -- | checknum cr1, TMP1 -- | decode_RD4 TMP2, TMP2 -- | lwz CARG3, 4(RD) -+ | lwzux CARG3, RD, BASE -+ | checknum cr0, CARG1 -+ | lwz SAVE0, -4(PC) -+ | checknum cr1, CARG3 -+ | decode_RD4 SAVE0, SAVE0 -+ | lwz CARG4, 4(RD) - | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) - if (vk) { - | ble cr7, ->BC_ISEQN_Z - } else { - | ble cr7, ->BC_ISNEN_Z - } - |.else -- | lwzux TMP0, RA, BASE -- | lwz TMP2, 0(PC) -+ | lwzux CARG1, RA, BASE -+ | lwz SAVE0, 0(PC) - | lfd f0, 0(RA) - | addi PC, PC, 4 -- | lwzux TMP1, RD, BASE -- | checknum cr0, TMP0 -- | decode_RD4 TMP2, TMP2 -+ | lwzux CARG3, RD, BASE -+ | checknum cr0, CARG1 -+ | decode_RD4 SAVE0, SAVE0 - | lfd f1, 0(RD) -- | checknum cr1, TMP1 -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | checknum cr1, CARG3 -+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) - | bge cr0, >5 - | bge cr1, >5 - | fcmpu cr0, f0, f1 - if (vk) { - | bne >1 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - } else { - | beq >1 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - } - |1: - | ins_next -@@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |5: // Either or both types are not numbers. - |.if not DUALNUM - | lwz CARG2, 4(RA) -- | lwz CARG3, 4(RD) -+ | lwz CARG4, 4(RD) - |.endif - |.if FFI -- | cmpwi cr7, TMP0, LJ_TCDATA -- | cmpwi cr5, TMP1, LJ_TCDATA -+ | cmpwi cr7, CARG1, LJ_TCDATA -+ | cmpwi cr5, CARG3, LJ_TCDATA - |.endif -- | not TMP3, TMP0 -- | cmplw TMP0, TMP1 -- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? -+ | not TMP2, CARG1 -+ | cmplw CARG1, CARG3 -+ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive? - |.if FFI - | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq - |.endif -- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? -+ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata? - |.if FFI - | beq cr7, ->vmeta_equal_cd - |.endif -- | cmplw cr5, CARG2, CARG3 -+ | cmplw cr5, CARG2, CARG4 - | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. - | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. - | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. -- | mr SAVE0, PC -+ | mr SAVE1, PC - | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. - if (vk) { - | bne cr0, >6 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |6: - } else { - | beq cr0, >6 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |6: - } - |.if DUALNUM -@@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! -+ | mr CARG3, CARG4 - | lwz TAB:TMP2, TAB:CARG2->metatable - | li CARG4, 1-vk // ne = 0 or 1. - | cmplwi TAB:TMP2, 0 -@@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<<MM_eq - | bne <1 // Or 'no __eq' flag set? -- | mr PC, SAVE0 // Restore old PC. -+ | mr PC, SAVE1 // Restore old PC. - | b ->vmeta_equal // Handle __eq metamethod. - break; - -@@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - |.if DUALNUM -- | lwzux TMP0, RA, BASE -+ | lwzux CARG1, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) -- | lwzux TMP1, RD, KBASE -- | checknum cr0, TMP0 -- | lwz TMP2, -4(PC) -- | checknum cr1, TMP1 -- | decode_RD4 TMP2, TMP2 -- | lwz CARG3, 4(RD) -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | lwzux CARG3, RD, KBASE -+ | checknum cr0, CARG1 -+ | lwz SAVE0, -4(PC) -+ | checknum cr1, CARG3 -+ | decode_RD4 SAVE0, SAVE0 -+ | lwz CARG4, 4(RD) -+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) - if (vk) { - |->BC_ISEQN_Z: - } else { -@@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - } - | bne cr0, >7 - | bne cr1, >8 -- | cmpw CARG2, CARG3 -+ | cmpw CARG2, CARG4 - |4: - |.else - if (vk) { -@@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - } else { - |->BC_ISNEN_Z: // Dummy label. - } -- | lwzx TMP0, BASE, RA -+ | lwzx CARG1, BASE, RA - | addi PC, PC, 4 - | lfdx f0, BASE, RA -- | lwz TMP2, -4(PC) -+ | lwz SAVE0, -4(PC) - | lfdx f1, KBASE, RD -- | decode_RD4 TMP2, TMP2 -- | checknum TMP0 -- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) -+ | decode_RD4 SAVE0, SAVE0 -+ | checknum CARG1 -+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) - | bge >3 - | fcmpu cr0, f0, f1 - |.endif - if (vk) { - | bne >1 -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |1: - |.if not FFI - |3: -@@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |.if not FFI - |3: - |.endif -- | add PC, PC, TMP2 -+ | add PC, PC, SAVE0 - |2: - } - | ins_next - |.if FFI - |3: -- | cmpwi TMP0, LJ_TCDATA -+ | cmpwi CARG1, LJ_TCDATA - | beq ->vmeta_equal_cd - | b <1 - |.endif -@@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |7: // RA is not an integer. - | bge cr0, <3 - | // RA is a number. -- | lfd f0, 0(RA) -+ | .FPU lfd f0, 0(RA) - | blt cr1, >1 - | // RA is a number, RD is an integer. -- | tonum_i f1, CARG3 -+ |.if FPU -+ | tonum_i f1, CARG4 -+ |.else -+ | bl ->vm_sfi2d_2 -+ |.endif - | b >2 - | - |8: // RA is an integer, RD is a number. -+ |.if FPU - | tonum_i f0, CARG2 -+ |.else -+ | bl ->vm_sfi2d_1 -+ |.endif - |1: -- | lfd f1, 0(RD) -+ | .FPU lfd f1, 0(RD) - |2: -+ |.if FPU - | fcmpu cr0, f0, f1 -+ |.else -+ | blex __ledf2 -+ | cmpwi CRET1, 0 -+ |.endif - | b <4 - |.endif - break; -@@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | add PC, PC, TMP2 - } else { - | li TMP1, LJ_TFALSE -+ |.if FPU - | lfdx f0, BASE, RD -+ |.else -+ | lwzux CARG1, RD, BASE -+ | lwz CARG2, 4(RD) -+ |.endif - | cmplw TMP0, TMP1 - if (op == BC_ISTC) { - | bge >1 -@@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - } - | addis PC, PC, -(BCBIAS_J*4 >> 16) - | decode_RD4 TMP2, INS -+ |.if FPU - | stfdx f0, BASE, RA -+ |.else -+ | stwux CARG1, RA, BASE -+ | stw CARG2, 4(RA) -+ |.endif - | add PC, PC, TMP2 - |1: - } -@@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - case BC_MOV: - | // RA = dst*8, RD = src*8 - | ins_next1 -+ |.if FPU - | lfdx f0, BASE, RD - | stfdx f0, BASE, RA -+ |.else -+ | lwzux TMP0, RD, BASE -+ | lwz TMP1, 4(RD) -+ | stwux TMP0, RA, BASE -+ | stw TMP1, 4(RA) -+ |.endif - | ins_next2 - break; - case BC_NOT: -@@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: -- | lwzx TMP1, BASE, RB -+ | lwzx CARG1, BASE, RB - | .if DUALNUM -- | lwzx TMP2, KBASE, RC -+ | lwzx CARG3, KBASE, RC - | .endif -+ | .if FPU - | lfdx f14, BASE, RB - | lfdx f15, KBASE, RC -+ | .else -+ | add TMP1, BASE, RB -+ | add TMP2, KBASE, RC -+ | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 4(TMP2) -+ | .endif - | .if DUALNUM -- | checknum cr0, TMP1 -- | checknum cr1, TMP2 -+ | checknum cr0, CARG1 -+ | checknum cr1, CARG3 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vn - | .else -- | checknum TMP1; bge ->vmeta_arith_vn -+ | checknum CARG1; bge ->vmeta_arith_vn - | .endif - || break; - ||case 1: -- | lwzx TMP1, BASE, RB -+ | lwzx CARG1, BASE, RB - | .if DUALNUM -- | lwzx TMP2, KBASE, RC -+ | lwzx CARG3, KBASE, RC - | .endif -+ | .if FPU - | lfdx f15, BASE, RB - | lfdx f14, KBASE, RC -+ | .else -+ | add TMP1, BASE, RB -+ | add TMP2, KBASE, RC -+ | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 4(TMP2) -+ | .endif - | .if DUALNUM -- | checknum cr0, TMP1 -- | checknum cr1, TMP2 -+ | checknum cr0, CARG1 -+ | checknum cr1, CARG3 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_nv - | .else -- | checknum TMP1; bge ->vmeta_arith_nv -+ | checknum CARG1; bge ->vmeta_arith_nv - | .endif - || break; - ||default: -- | lwzx TMP1, BASE, RB -- | lwzx TMP2, BASE, RC -+ | lwzx CARG1, BASE, RB -+ | lwzx CARG3, BASE, RC -+ | .if FPU - | lfdx f14, BASE, RB - | lfdx f15, BASE, RC -- | checknum cr0, TMP1 -- | checknum cr1, TMP2 -+ | .else -+ | add TMP1, BASE, RB -+ | add TMP2, BASE, RC -+ | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 4(TMP2) -+ | .endif -+ | checknum cr0, CARG1 -+ | checknum cr1, CARG3 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - || break; -@@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | fsub a, b, a // b - floor(b/c)*c - |.endmacro - | -+ |.macro sfpmod -+ |->BC_MODVN_Z: -+ | stw CARG1, SFSAVE_1 -+ | stw CARG2, SFSAVE_2 -+ | mr SAVE0, CARG3 -+ | mr SAVE1, CARG4 -+ | blex __divdf3 -+ | blex floor -+ | mr CARG3, SAVE0 -+ | mr CARG4, SAVE1 -+ | blex __muldf3 -+ | mr CARG3, CRET1 -+ | mr CARG4, CRET2 -+ | lwz CARG1, SFSAVE_1 -+ | lwz CARG2, SFSAVE_2 -+ | blex __subdf3 -+ |.endmacro -+ | - |.macro ins_arithfp, fpins - | ins_arithpre - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. -- |.else -+ |.elif FPU - | fpins f0, f14, f15 - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 -+ |.else -+ | blex __divdf3 // Only soft-float div uses this macro. -+ | ins_next1 -+ | stwux CRET1, RA, BASE -+ | stw CRET2, 4(RA) -+ | ins_next2 - |.endif - |.endmacro - | -- |.macro ins_arithdn, intins, fpins -+ |.macro ins_arithdn, intins, fpins, fpcall - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: -- | lwzux TMP1, RB, BASE -- | lwzux TMP2, RC, KBASE -- | lwz CARG1, 4(RB) -- | checknum cr0, TMP1 -- | lwz CARG2, 4(RC) -+ | lwzux CARG1, RB, BASE -+ | lwzux CARG3, RC, KBASE -+ | lwz CARG2, 4(RB) -+ | checknum cr0, CARG1 -+ | lwz CARG4, 4(RC) -+ | checknum cr1, CARG3 - || break; - ||case 1: -- | lwzux TMP1, RB, BASE -- | lwzux TMP2, RC, KBASE -- | lwz CARG2, 4(RB) -- | checknum cr0, TMP1 -- | lwz CARG1, 4(RC) -+ | lwzux CARG3, RB, BASE -+ | lwzux CARG1, RC, KBASE -+ | lwz CARG4, 4(RB) -+ | checknum cr0, CARG3 -+ | lwz CARG2, 4(RC) -+ | checknum cr1, CARG1 - || break; - ||default: -- | lwzux TMP1, RB, BASE -- | lwzux TMP2, RC, BASE -- | lwz CARG1, 4(RB) -- | checknum cr0, TMP1 -- | lwz CARG2, 4(RC) -+ | lwzux CARG1, RB, BASE -+ | lwzux CARG3, RC, BASE -+ | lwz CARG2, 4(RB) -+ | checknum cr0, CARG1 -+ | lwz CARG4, 4(RC) -+ | checknum cr1, CARG3 - || break; - ||} -- | checknum cr1, TMP2 - | bne >5 - | bne cr1, >5 -- | intins CARG1, CARG1, CARG2 -+ |.if "intins" == "intmod" -+ | mr CARG1, CARG2 -+ | mr CARG2, CARG4 -+ |.endif -+ | intins CARG1, CARG2, CARG4 - | bso >4 - |1: - | ins_next1 -@@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | checkov TMP0, <1 // Ignore unrelated overflow. - | ins_arithfallback b - |5: // FP variant. -+ |.if FPU - ||if (vk == 1) { - | lfd f15, 0(RB) -- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f14, 0(RC) - ||} else { - | lfd f14, 0(RB) -- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f15, 0(RC) - ||} -+ |.endif -+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | ins_arithfallback bge - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else -+ |.if FPU - | fpins f0, f14, f15 -- | ins_next1 - | stfdx f0, BASE, RA -+ |.else -+ |.if "fpcall" == "sfpmod" -+ | sfpmod -+ |.else -+ | blex fpcall -+ |.endif -+ | stwux CRET1, RA, BASE -+ | stw CRET2, 4(RA) -+ |.endif -+ | ins_next1 - | b <2 - |.endif - |.endmacro - | -- |.macro ins_arith, intins, fpins -+ |.macro ins_arith, intins, fpins, fpcall - |.if DUALNUM -- | ins_arithdn intins, fpins -+ | ins_arithdn intins, fpins, fpcall - |.else - | ins_arithfp fpins - |.endif -@@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | addo. TMP0, TMP0, TMP3 - | add y, a, b - |.endmacro -- | ins_arith addo32., fadd -+ | ins_arith addo32., fadd, __adddf3 - |.else -- | ins_arith addo., fadd -+ | ins_arith addo., fadd, __adddf3 - |.endif - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: -@@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | subo. TMP0, TMP0, TMP3 - | sub y, a, b - |.endmacro -- | ins_arith subo32., fsub -+ | ins_arith subo32., fsub, __subdf3 - |.else -- | ins_arith subo., fsub -+ | ins_arith subo., fsub, __subdf3 - |.endif - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: -- | ins_arith mullwo., fmul -+ | ins_arith mullwo., fmul, __muldf3 - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp fdiv - break; - case BC_MODVN: -- | ins_arith intmod, fpmod -+ | ins_arith intmod, fpmod, sfpmod - break; - case BC_MODNV: case BC_MODVV: -- | ins_arith intmod, fpmod_ -+ | ins_arith intmod, fpmod_, sfpmod - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. -- | lwzx TMP1, BASE, RB -+ | lwzx CARG1, BASE, RB -+ | lwzx CARG3, BASE, RC -+ |.if FPU - | lfdx FARG1, BASE, RB -- | lwzx TMP2, BASE, RC - | lfdx FARG2, BASE, RC -- | checknum cr0, TMP1 -- | checknum cr1, TMP2 -+ |.else -+ | add TMP1, BASE, RB -+ | add TMP2, BASE, RC -+ | lwz CARG2, 4(TMP1) -+ | lwz CARG4, 4(TMP2) -+ |.endif -+ | checknum cr0, CARG1 -+ | checknum cr1, CARG3 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - | blex pow - | ins_next1 -+ |.if FPU - | stfdx FARG1, BASE, RA -+ |.else -+ | stwux CARG1, RA, BASE -+ | stw CARG2, 4(RA) -+ |.endif - | ins_next2 - break; - -@@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lp BASE, L->base - | bne ->vmeta_binop - | ins_next1 -+ |.if FPU - | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. - | stfdx f0, BASE, RA -+ |.else -+ | lwzux TMP0, SAVE0, BASE -+ | lwz TMP1, 4(SAVE0) -+ | stwux TMP0, RA, BASE -+ | stw TMP1, 4(RA) -+ |.endif - | ins_next2 - break; - -@@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | ins_next1 -+ |.if FPU - | lfdx f0, KBASE, RD - | stfdx f0, BASE, RA -+ |.else -+ | lwzux TMP0, RD, KBASE -+ | lwz TMP1, 4(RD) -+ | stwux TMP0, RA, BASE -+ | stw TMP1, 4(RA) -+ |.endif - | ins_next2 - break; - case BC_KPRI: -@@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwzx UPVAL:RB, LFUNC:RB, RD - | ins_next1 - | lwz TMP1, UPVAL:RB->v -+ |.if FPU - | lfd f0, 0(TMP1) - | stfdx f0, BASE, RA -+ |.else -+ | lwz TMP2, 0(TMP1) -+ | lwz TMP3, 4(TMP1) -+ | stwux TMP2, RA, BASE -+ | stw TMP3, 4(RA) -+ |.endif - | ins_next2 - break; - case BC_USETV: -@@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) -+ |.if FPU - | lfdux f0, RD, BASE -+ |.else -+ | lwzux CARG1, RD, BASE -+ | lwz CARG3, 4(RD) -+ |.endif - | lwzx UPVAL:RB, LFUNC:RB, RA - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP0, UPVAL:RB->closed - | lwz TMP2, 0(RD) -+ |.if FPU - | stfd f0, 0(CARG2) -+ |.else -+ | stw CARG1, 0(CARG2) -+ | stw CARG3, 4(CARG2) -+ |.endif - | cmplwi cr1, TMP0, 0 - | lwz TMP1, 4(RD) - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq -@@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) -+ |.if FPU - | lfdx f0, KBASE, RD -+ |.else -+ | lwzux TMP2, RD, KBASE -+ | lwz TMP3, 4(RD) -+ |.endif - | lwzx UPVAL:RB, LFUNC:RB, RA - | ins_next1 - | lwz TMP1, UPVAL:RB->v -+ |.if FPU - | stfd f0, 0(TMP1) -+ |.else -+ | stw TMP2, 0(TMP1) -+ | stw TMP3, 4(TMP1) -+ |.endif - | ins_next2 - break; - case BC_USETP: -@@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |.endif - | ble ->vmeta_tgetv // Integer key and in array part? - | lwzx TMP0, TMP1, TMP2 -+ |.if FPU - | lfdx f14, TMP1, TMP2 -+ |.else -+ | lwzux SAVE0, TMP1, TMP2 -+ | lwz SAVE1, 4(TMP1) -+ |.endif - | checknil TMP0; beq >2 - |1: - | ins_next1 -+ |.if FPU - | stfdx f14, BASE, RA -+ |.else -+ | stwux SAVE0, RA, BASE -+ | stw SAVE1, 4(RA) -+ |.endif - | ins_next2 - | - |2: // Check for __index if table value is nil. -@@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | cmplw TMP0, TMP1; bge ->vmeta_tgetb -+ |.if FPU - | lwzx TMP1, TMP2, RC - | lfdx f0, TMP2, RC -+ |.else -+ | lwzux TMP1, TMP2, RC -+ | lwz TMP3, 4(TMP2) -+ |.endif - | checknil TMP1; beq >5 - |1: - | ins_next1 -+ |.if FPU - | stfdx f0, BASE, RA -+ |.else -+ | stwux TMP1, RA, BASE -+ | stw TMP3, 4(RA) -+ |.endif - | ins_next2 - | - |5: // Check for __index if table value is nil. -@@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | cmplw TMP0, CARG2 - | slwi TMP2, CARG2, 3 - | ble ->vmeta_tgetr // In array part? -+ |.if FPU - | lfdx f14, TMP1, TMP2 -+ |.else -+ | lwzux SAVE0, TMP2, TMP1 -+ | lwz SAVE1, 4(TMP2) -+ |.endif - |->BC_TGETR_Z: - | ins_next1 -+ |.if FPU - | stfdx f14, BASE, RA -+ |.else -+ | stwux SAVE0, RA, BASE -+ | stw SAVE1, 4(RA) -+ |.endif - | ins_next2 - break; - -@@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | ble ->vmeta_tsetv // Integer key and in array part? - | lwzx TMP2, TMP1, TMP0 - | lbz TMP3, TAB:RB->marked -+ |.if FPU - | lfdx f14, BASE, RA -+ |.else -+ | add SAVE1, BASE, RA -+ | lwz SAVE0, 0(SAVE1) -+ | lwz SAVE1, 4(SAVE1) -+ |.endif - | checknil TMP2; beq >3 - |1: - | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) -+ |.if FPU - | stfdx f14, TMP1, TMP0 -+ |.else -+ | stwux SAVE0, TMP1, TMP0 -+ | stw SAVE1, 4(TMP1) -+ |.endif - | bne >7 - |2: - | ins_next -@@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz NODE:TMP2, TAB:RB->node - | stb ZERO, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask -+ |.if FPU - | lfdx f14, BASE, RA -+ |.else -+ | add CARG2, BASE, RA -+ | lwz SAVE0, 0(CARG2) -+ | lwz SAVE1, 4(CARG2) -+ |.endif - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 -@@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | checknil CARG2; beq >4 // Key found, but nil value? - |2: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) -+ |.if FPU - | stfd f14, NODE:TMP2->val -+ |.else -+ | stw SAVE0, NODE:TMP2->val.u32.hi -+ | stw SAVE1, NODE:TMP2->val.u32.lo -+ |.endif - | bne >7 - |3: - | ins_next -@@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | lp BASE, L->base -+ |.if FPU - | stfd f14, 0(CRET1) -+ |.else -+ | stw SAVE0, 0(CRET1) -+ | stw SAVE1, 4(CRET1) -+ |.endif - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. -@@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | lwz TMP2, TAB:RB->array - | lbz TMP3, TAB:RB->marked - | cmplw TMP0, TMP1 -+ |.if FPU - | lfdx f14, BASE, RA -+ |.else -+ | add CARG2, BASE, RA -+ | lwz SAVE0, 0(CARG2) -+ | lwz SAVE1, 4(CARG2) -+ |.endif - | bge ->vmeta_tsetb - | lwzx TMP1, TMP2, RC - | checknil TMP1; beq >5 - |1: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) -+ |.if FPU - | stfdx f14, TMP2, RC -+ |.else -+ | stwux SAVE0, RC, TMP2 -+ | stw SAVE1, 4(RC) -+ |.endif - | bne >7 - |2: - | ins_next -@@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |2: - | cmplw TMP0, CARG3 - | slwi TMP2, CARG3, 3 -+ |.if FPU - | lfdx f14, BASE, RA -+ |.else -+ | lwzux SAVE0, RA, BASE -+ | lwz SAVE1, 4(RA) -+ |.endif - | ble ->vmeta_tsetr // In array part? - | ins_next1 -+ |.if FPU - | stfdx f14, TMP1, TMP2 -+ |.else -+ | stwux SAVE0, TMP1, TMP2 -+ | stw SAVE1, 4(TMP1) -+ |.endif - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. -@@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | add TMP1, TMP1, TMP0 - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. -+ |.if FPU - | lfd f0, 0(RA) -+ |.else -+ | lwz SAVE0, 0(RA) -+ | lwz SAVE1, 4(RA) -+ |.endif - | addi RA, RA, 8 - | cmpw cr1, RA, TMP2 -+ |.if FPU - | stfd f0, 0(TMP1) -+ |.else -+ | stw SAVE0, 0(TMP1) -+ | stw SAVE1, 4(TMP1) -+ |.endif - | addi TMP1, TMP1, 8 - | blt cr1, <3 - | bne >7 -@@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | beq cr1, >3 - |2: - | addi TMP3, TMP2, 8 -+ |.if FPU - | lfdx f0, RA, TMP2 -+ |.else -+ | add CARG3, RA, TMP2 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ |.endif - | cmplw cr1, TMP3, NARGS8:RC -+ |.if FPU - | stfdx f0, BASE, TMP2 -+ |.else -+ | stwux CARG1, TMP2, BASE -+ | stw CARG2, 4(TMP2) -+ |.endif - | mr TMP2, TMP3 - | bne cr1, <2 - |3: -@@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | add BASE, BASE, RA - | lwz TMP1, -24(BASE) - | lwz LFUNC:RB, -20(BASE) -+ |.if FPU - | lfd f1, -8(BASE) - | lfd f0, -16(BASE) -+ |.else -+ | lwz CARG1, -8(BASE) -+ | lwz CARG2, -4(BASE) -+ | lwz CARG3, -16(BASE) -+ | lwz CARG4, -12(BASE) -+ |.endif - | stw TMP1, 0(BASE) // Copy callable. - | stw LFUNC:RB, 4(BASE) - | checkfunc TMP1 -- | stfd f1, 16(BASE) // Copy control var. - | li NARGS8:RC, 16 // Iterators get 2 arguments. -+ |.if FPU -+ | stfd f1, 16(BASE) // Copy control var. - | stfdu f0, 8(BASE) // Copy state. -+ |.else -+ | stw CARG1, 16(BASE) // Copy control var. -+ | stw CARG2, 20(BASE) -+ | stwu CARG3, 8(BASE) // Copy state. -+ | stw CARG4, 4(BASE) -+ |.endif - | bne ->vmeta_call - | ins_call - break; -@@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | slwi TMP3, RC, 3 - | bge >5 // Index points after array part? - | lwzx TMP2, TMP1, TMP3 -+ |.if FPU - | lfdx f0, TMP1, TMP3 -+ |.else -+ | lwzux CARG1, TMP3, TMP1 -+ | lwz CARG2, 4(TMP3) -+ |.endif - | checknil TMP2 - | lwz INS, -4(PC) - | beq >4 -@@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |.endif - | addi RC, RC, 1 - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) -+ |.if FPU - | stfd f0, 8(RA) -+ |.else -+ | stw CARG1, 8(RA) -+ | stw CARG2, 12(RA) -+ |.endif - | decode_RD4 TMP1, INS - | stw RC, -4(RA) // Update control var. - | add PC, TMP1, TMP3 -@@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | slwi RB, RC, 3 - | sub TMP3, TMP3, RB - | lwzx RB, TMP2, TMP3 -+ |.if FPU - | lfdx f0, TMP2, TMP3 -+ |.else -+ | add CARG3, TMP2, TMP3 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ |.endif - | add NODE:TMP3, TMP2, TMP3 - | checknil RB - | lwz INS, -4(PC) - | beq >7 -+ |.if FPU - | lfd f1, NODE:TMP3->key -+ |.else -+ | lwz CARG3, NODE:TMP3->key.u32.hi -+ | lwz CARG4, NODE:TMP3->key.u32.lo -+ |.endif - | addis TMP2, PC, -(BCBIAS_J*4 >> 16) -+ |.if FPU - | stfd f0, 8(RA) -+ |.else -+ | stw CARG1, 8(RA) -+ | stw CARG2, 12(RA) -+ |.endif - | add RC, RC, TMP0 - | decode_RD4 TMP1, INS -+ |.if FPU - | stfd f1, 0(RA) -+ |.else -+ | stw CARG3, 0(RA) -+ | stw CARG4, 4(RA) -+ |.endif - | addi RC, RC, 1 - | add PC, TMP1, TMP2 - | stw RC, -4(RA) // Update control var. -@@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | subi TMP2, TMP2, 16 - | ble >2 // No vararg slots? - |1: // Copy vararg slots to destination slots. -+ |.if FPU - | lfd f0, 0(RC) -+ |.else -+ | lwz CARG1, 0(RC) -+ | lwz CARG2, 4(RC) -+ |.endif - | addi RC, RC, 8 -+ |.if FPU - | stfd f0, 0(RA) -+ |.else -+ | stw CARG1, 0(RA) -+ | stw CARG2, 4(RA) -+ |.endif - | cmplw RA, TMP2 - | cmplw cr1, RC, TMP3 - | bge >3 // All destination slots filled? -@@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | addi MULTRES, TMP1, 8 - | bgt >7 - |6: -+ |.if FPU - | lfd f0, 0(RC) -+ |.else -+ | lwz CARG1, 0(RC) -+ | lwz CARG2, 4(RC) -+ |.endif - | addi RC, RC, 8 -+ |.if FPU - | stfd f0, 0(RA) -+ |.else -+ | stw CARG1, 0(RA) -+ | stw CARG2, 4(RA) -+ |.endif - | cmplw RC, TMP3 - | addi RA, RA, 8 - | blt <6 // More vararg slots? -@@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | li TMP1, 0 - |2: - | addi TMP3, TMP1, 8 -+ |.if FPU - | lfdx f0, RA, TMP1 -+ |.else -+ | add CARG3, RA, TMP1 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ |.endif - | cmpw TMP3, RC -+ |.if FPU - | stfdx f0, TMP2, TMP1 -+ |.else -+ | add CARG3, TMP2, TMP1 -+ | stw CARG1, 0(CARG3) -+ | stw CARG2, 4(CARG3) -+ |.endif - | beq >3 - | addi TMP1, TMP3, 8 -+ |.if FPU - | lfdx f1, RA, TMP3 -+ |.else -+ | add CARG3, RA, TMP3 -+ | lwz CARG1, 0(CARG3) -+ | lwz CARG2, 4(CARG3) -+ |.endif - | cmpw TMP1, RC -+ |.if FPU - | stfdx f1, TMP2, TMP3 -+ |.else -+ | add CARG3, TMP2, TMP3 -+ | stw CARG1, 0(CARG3) -+ | stw CARG2, 4(CARG3) -+ |.endif - | bne <2 - |3: - |5: -@@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | subi TMP2, BASE, 8 - | decode_RB8 RB, INS - if (op == BC_RET1) { -+ |.if FPU - | lfd f0, 0(RA) - | stfd f0, 0(TMP2) -+ |.else -+ | lwz CARG1, 0(RA) -+ | lwz CARG2, 4(RA) -+ | stw CARG1, 0(TMP2) -+ | stw CARG2, 4(TMP2) -+ |.endif - } - |5: - | cmplw RB, RD -@@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |4: - | stw CARG1, FORL_IDX*8+4(RA) - } else { -- | lwz TMP3, FORL_STEP*8(RA) -+ | lwz SAVE0, FORL_STEP*8(RA) - | lwz CARG3, FORL_STEP*8+4(RA) - | lwz TMP2, FORL_STOP*8(RA) - | lwz CARG2, FORL_STOP*8+4(RA) -- | cmplw cr7, TMP3, TISNUM -+ | cmplw cr7, SAVE0, TISNUM - | cmplw cr1, TMP2, TISNUM - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq - | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq -@@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - if (vk) { - |.if DUALNUM - |9: // FP loop. -+ |.if FPU - | lfd f1, FORL_IDX*8(RA) - |.else -+ | lwz CARG1, FORL_IDX*8(RA) -+ | lwz CARG2, FORL_IDX*8+4(RA) -+ |.endif -+ |.else - | lfdux f1, RA, BASE - |.endif -+ |.if FPU - | lfd f3, FORL_STEP*8(RA) - | lfd f2, FORL_STOP*8(RA) -- | lwz TMP3, FORL_STEP*8(RA) - | fadd f1, f1, f3 - | stfd f1, FORL_IDX*8(RA) -+ |.else -+ | lwz CARG3, FORL_STEP*8(RA) -+ | lwz CARG4, FORL_STEP*8+4(RA) -+ | mr SAVE1, RD -+ | blex __adddf3 -+ | mr RD, SAVE1 -+ | stw CRET1, FORL_IDX*8(RA) -+ | stw CRET2, FORL_IDX*8+4(RA) -+ | lwz CARG3, FORL_STOP*8(RA) -+ | lwz CARG4, FORL_STOP*8+4(RA) -+ |.endif -+ | lwz SAVE0, FORL_STEP*8(RA) - } else { - |.if DUALNUM - |9: // FP loop. - |.else - | lwzux TMP1, RA, BASE -- | lwz TMP3, FORL_STEP*8(RA) -+ | lwz SAVE0, FORL_STEP*8(RA) - | lwz TMP2, FORL_STOP*8(RA) - | cmplw cr0, TMP1, TISNUM -- | cmplw cr7, TMP3, TISNUM -+ | cmplw cr7, SAVE0, TISNUM - | cmplw cr1, TMP2, TISNUM - |.endif -+ |.if FPU - | lfd f1, FORL_IDX*8(RA) -+ |.else -+ | lwz CARG1, FORL_IDX*8(RA) -+ | lwz CARG2, FORL_IDX*8+4(RA) -+ |.endif - | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt -+ |.if FPU - | lfd f2, FORL_STOP*8(RA) -+ |.else -+ | lwz CARG3, FORL_STOP*8(RA) -+ | lwz CARG4, FORL_STOP*8+4(RA) -+ |.endif - | bge ->vmeta_for - } -- | cmpwi cr6, TMP3, 0 -+ | cmpwi cr6, SAVE0, 0 - if (op != BC_JFORL) { - | srwi RD, RD, 1 - } -+ |.if FPU - | stfd f1, FORL_EXT*8(RA) -+ |.else -+ | stw CARG1, FORL_EXT*8(RA) -+ | stw CARG2, FORL_EXT*8+4(RA) -+ |.endif - if (op != BC_JFORL) { - | add RD, PC, RD - } -+ |.if FPU - | fcmpu cr0, f1, f2 -+ |.else -+ | mr SAVE1, RD -+ | blex __ledf2 -+ | cmpwi CRET1, 0 -+ | mr RD, SAVE1 -+ |.endif - if (op == BC_JFORI) { - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } --- -2.20.1 - diff --git a/0011-Use-https-for-freelists.org-links.patch b/0011-Use-https-for-freelists.org-links.patch deleted file mode 100644 index c0c2a19..0000000 --- a/0011-Use-https-for-freelists.org-links.patch +++ /dev/null @@ -1,25 +0,0 @@ -From f3d75075ed91137699c6071abe49e2252e794a9c Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Fri, 18 Aug 2017 12:52:14 +0200 -Subject: [PATCH 11/72] Use https for freelists.org links. - ---- - doc/ext_ffi_semantics.html | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html -index 899640c..ae3c037 100644 ---- a/doc/ext_ffi_semantics.html -+++ b/doc/ext_ffi_semantics.html -@@ -844,7 +844,7 @@ place of a type, you'd need to use <tt>ffi.typeof("int")</tt> instead. - <p> - The main use for parameterized types are libraries implementing abstract - data types --(<a href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8"><span class="ext">»</span> example</a>), -+(<a href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8">example</a>), - similar to what can be achieved with C++ template metaprogramming. - Another use case are derived types of anonymous structs, which avoids - pollution of the global struct namespace. --- -2.20.1 - diff --git a/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch b/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch deleted file mode 100644 index 80ca5b0..0000000 --- a/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 6b0824852677cc12570c20a3211fbfe0e4f0ce14 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Mon, 28 Aug 2017 10:43:37 +0200 -Subject: [PATCH 12/72] x64/LJ_GC64: Fix fallback case of asm_fuseloadk64(). - -Contributed by Peter Cawley. ---- - src/lj_asm_x86.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h -index 3e189b1..55c02d2 100644 ---- a/src/lj_asm_x86.h -+++ b/src/lj_asm_x86.h -@@ -387,6 +387,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) - ir->i = (int32_t)(as->mctop - as->mcbot); - as->mcbot += 8; - as->mclim = as->mcbot + MCLIM_REDZONE; -+ lj_mcode_commitbot(as->J, as->mcbot); - } - as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); - as->mrm.base = RID_RIP; --- -2.20.1 - diff --git a/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch b/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch deleted file mode 100644 index faaa94a..0000000 --- a/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch +++ /dev/null @@ -1,751 +0,0 @@ -From 71b7bc88341945f13f3951e2bb5fd247b639ff7a Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 3 Sep 2017 23:20:53 +0200 -Subject: [PATCH 13/72] PPC: Add soft-float support to JIT compiler backend. - -Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -Sponsored by Cisco Systems, Inc. ---- - src/lj_arch.h | 1 - - src/lj_asm_ppc.h | 321 ++++++++++++++++++++++++++++++++++++++++------- - 2 files changed, 278 insertions(+), 44 deletions(-) - -diff --git a/src/lj_arch.h b/src/lj_arch.h -index 0145a7c..5962f3a 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -273,7 +273,6 @@ - #endif - - #if LJ_ABI_SOFTFP --#define LJ_ARCH_NOJIT 1 /* NYI */ - #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - #else - #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE -diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h -index 6daa861..1955429 100644 ---- a/src/lj_asm_ppc.h -+++ b/src/lj_asm_ppc.h -@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, - emit_tab(as, pi, rt, left, right); - } - -+#if !LJ_SOFTFP - /* Fuse to multiply-add/sub instruction. */ - static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) - { -@@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) - } - return 0; - } -+#endif - - /* -- Calls --------------------------------------------------------------- */ - -@@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - { - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 8; -- Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; -+ Reg gpr = REGARG_FIRSTGPR; -+#if !LJ_SOFTFP -+ Reg fpr = REGARG_FIRSTFPR; -+#endif - if ((void *)ci->func) - emit_call(as, (void *)ci->func); - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - if (ref) { - IRIns *ir = IR(ref); -+#if !LJ_SOFTFP - if (irt_isfp(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ -@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - } -- } else { -+ } else -+#endif -+ { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ - ra_leftov(as, gpr, ref); -@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) - } - checkmclim(as); - } -+#if !LJ_SOFTFP - if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ - emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); -+#endif - } - - /* Setup result reg/sp for call. Evict scratch regs. */ -@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) - { - RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); -+#if !LJ_SOFTFP - if ((ci->flags & CCI_NOFPRCLOBBER)) - drop &= ~RSET_FPR; -+#endif - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - if (hiop && ra_hasreg((ir+1)->r)) -@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); -- if (irt_isfp(ir->t)) { -+ if (!LJ_SOFTFP && irt_isfp(ir->t)) { - if ((ci->flags & CCI_CASTU64)) { - /* Use spill slot or temp slots. */ - int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; -@@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir) - - /* -- Type conversions ---------------------------------------------------- */ - -+#if !LJ_SOFTFP - static void asm_tointg(ASMState *as, IRIns *ir, Reg left) - { - RegSet allow = RSET_FPR; -@@ -409,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir) - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fab(as, PPCI_FADD, tmp, left, right); - } -+#endif - - static void asm_conv(ASMState *as, IRIns *ir) - { - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -+#if !LJ_SOFTFP - int stfp = (st == IRT_NUM || st == IRT_FLOAT); -+#endif - IRRef lref = ir->op1; -- lua_assert(irt_type(ir->t) != st); - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ -+#if LJ_SOFTFP -+ /* FP conversions are handled by SPLIT. */ -+ lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); -+ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ -+#else -+ lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ -@@ -476,7 +497,9 @@ static void asm_conv(ASMState *as, IRIns *ir) - emit_fb(as, PPCI_FCTIWZ, tmp, left); - } - } -- } else { -+ } else -+#endif -+ { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -@@ -496,17 +519,41 @@ static void asm_strto(ASMState *as, IRIns *ir) - { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; -- int32_t ofs; -+ int32_t ofs = SPOFS_TMP; -+#if LJ_SOFTFP -+ ra_evictset(as, RSET_SCRATCH); -+ if (ra_used(ir)) { -+ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && -+ (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { -+ int i; -+ for (i = 0; i < 2; i++) { -+ Reg r = (ir+i)->r; -+ if (ra_hasreg(r)) { -+ ra_free(as, r); -+ ra_modified(as, r); -+ emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); -+ } -+ } -+ ofs = sps_scale(ir->s & ~1); -+ } else { -+ Reg rhi = ra_dest(as, ir+1, RSET_GPR); -+ Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); -+ emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); -+ emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); -+ } -+ } -+#else - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ - ra_evictset(as, drop); -+ if (ir->s) ofs = sps_scale(ir->s); -+#endif - asm_guardcc(as, CC_EQ); - emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - /* Store the result to the spill slot or temp slots. */ -- ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); - } - -@@ -530,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); - } -- type = ra_allock(as, irt_toitype(ir->t), allow); -+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) -+ type = ra_alloc1(as, ref+1, allow); -+ else -+ type = ra_allock(as, irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); - } - } -@@ -574,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - Reg tisnum = RID_NONE, tmpnum = RID_NONE; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); -+ int isk = irref_isk(refkey); - IRType1 kt = irkey->t; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); -+#if LJ_SOFTFP -+ if (!isk) { -+ key = ra_alloc1(as, refkey, allow); -+ rset_clear(allow, key); -+ if (irkey[1].o == IR_HIOP) { -+ if (ra_hasreg((irkey+1)->r)) { -+ tmpnum = (irkey+1)->r; -+ ra_noweak(as, tmpnum); -+ } else { -+ tmpnum = ra_allocref(as, refkey+1, allow); -+ } -+ rset_clear(allow, tmpnum); -+ } -+ } -+#else - if (irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); -@@ -588,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } -+#endif - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); - -@@ -610,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - asm_guardcc(as, CC_EQ); - else - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); -- if (irt_isnum(kt)) { -+ if (!LJ_SOFTFP && irt_isnum(kt)) { - emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); - emit_condbranch(as, PPCI_BC, CC_GE, l_next); - emit_ab(as, PPCI_CMPLW, tmp1, tisnum); -@@ -620,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_ab(as, PPCI_CMPW, tmp2, key); - emit_condbranch(as, PPCI_BC, CC_NE, l_next); - } -- emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); -+ if (LJ_SOFTFP && ra_hasreg(tmpnum)) -+ emit_ab(as, PPCI_CMPW, tmp1, tmpnum); -+ else -+ emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); - if (!irt_ispri(kt)) - emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); - } -@@ -629,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - (((char *)as->mcp-(char *)l_loop) & 0xffffu); - - /* Load main position relative to tab->node into dest. */ -- khash = irref_isk(refkey) ? ir_khash(irkey) : 1; -+ khash = isk ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - Reg tmphash = tmp1; -- if (irref_isk(refkey)) -+ if (isk) - tmphash = ra_allock(as, khash, allow); - emit_tab(as, PPCI_ADD, dest, dest, tmp1); - emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); - emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); -- if (irref_isk(refkey)) { -+ if (isk) { - /* Nothing to do. */ - } else if (irt_isstr(kt)) { - emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); -@@ -651,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); - emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); - emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); -- if (irt_isnum(kt)) { -+ if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { -+#if LJ_SOFTFP -+ emit_asb(as, PPCI_XOR, tmp2, key, tmp1); -+ emit_rotlwi(as, dest, tmp1, HASH_ROT1); -+ emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); -+#else - int32_t ofs = ra_spill(as, irkey); - emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); - emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); - emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); - emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); -+#endif - } else { - emit_asb(as, PPCI_XOR, tmp2, key, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); -@@ -784,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir) - case IRT_U8: return PPCI_LBZ; - case IRT_I16: return PPCI_LHA; - case IRT_U16: return PPCI_LHZ; -- case IRT_NUM: return PPCI_LFD; -- case IRT_FLOAT: return PPCI_LFS; -+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD; -+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; - default: return PPCI_LWZ; - } - } -@@ -795,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir) - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return PPCI_STB; - case IRT_I16: case IRT_U16: return PPCI_STH; -- case IRT_NUM: return PPCI_STFD; -- case IRT_FLOAT: return PPCI_STFS; -+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD; -+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; - default: return PPCI_STW; - } - } -@@ -839,7 +915,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) - - static void asm_xload(ASMState *as, IRIns *ir) - { -- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); -+ Reg dest = ra_dest(as, ir, -+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - if (irt_isi8(ir->t)) - emit_as(as, PPCI_EXTSB, dest, dest); -@@ -857,7 +934,8 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) - Reg src = ra_alloc1(as, irb->op1, RSET_GPR); - asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); - } else { -- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); -+ Reg src = ra_alloc1(as, ir->op2, -+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -@@ -871,10 +949,19 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) - Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = AHUREF_LSX; -+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { -+ t.irt = IRT_NUM; -+ if (ra_used(ir+1)) { -+ type = ra_dest(as, ir+1, allow); -+ rset_clear(allow, type); -+ } -+ ofs = 0; -+ } - if (ra_used(ir)) { -- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); -- if (!irt_isnum(t)) ofs = 0; -- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); -+ lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || -+ irt_isint(ir->t) || irt_isaddr(ir->t)); -+ if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; -+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); -@@ -883,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) - asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, type, tisnum); - if (ra_hasreg(dest)) { -- if (ofs == AHUREF_LSX) { -+ if (!LJ_SOFTFP && ofs == AHUREF_LSX) { - tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, - (idx&255)), (idx>>8))); - emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); - } else { -- emit_fai(as, PPCI_LFD, dest, idx, ofs); -+ emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, -+ ofs+4*LJ_SOFTFP); - } - } - } else { -@@ -911,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) - int32_t ofs = AHUREF_LSX; - if (ir->r == RID_SINK) - return; -- if (irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP && irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - } else { - if (!irt_ispri(ir->t)) { -@@ -919,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) - rset_clear(allow, src); - ofs = 0; - } -- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); -+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) -+ type = ra_alloc1(as, (ir+1)->op2, allow); -+ else -+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); -- if (irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP && irt_isnum(ir->t)) { - if (ofs == AHUREF_LSX) { - emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); - emit_slwi(as, RID_TMP, (idx>>8), 3); -@@ -948,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir) - IRType1 t = ir->t; - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; -+ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); -+ if (hiop) -+ t.irt = IRT_NUM; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ -- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -+ lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); -+#if LJ_SOFTFP -+ lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ -+ if (hiop && ra_used(ir+1)) { -+ type = ra_dest(as, ir+1, allow); -+ rset_clear(allow, type); -+ } -+#else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ -- } else if (ra_used(ir)) { -+ } else -+#endif -+ if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); -- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); -+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -- if ((ir->op2 & IRSLOAD_CONVERT)) { -+ if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - dest = ra_scratch(as, RSET_FPR); -@@ -994,10 +1097,13 @@ dotypecheck: - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); - asm_guardcc(as, CC_GE); -- emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); -+#if !LJ_SOFTFP - type = RID_TMP; -+#endif -+ emit_ab(as, PPCI_CMPLW, type, tisnum); - } -- if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); -+ if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, -+ base, ofs-(LJ_SOFTFP?0:4)); - } else { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - asm_guardcc(as, CC_NE); -@@ -1119,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir) - - /* -- Arithmetic and logic operations ------------------------------------- */ - -+#if !LJ_SOFTFP - static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) - { - Reg dest = ra_dest(as, ir, RSET_FPR); -@@ -1146,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRIns *ir) - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - } -+#endif - - static void asm_add(ASMState *as, IRIns *ir) - { -+#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) - asm_fparith(as, ir, PPCI_FADD); -- } else { -+ } else -+#endif -+ { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - PPCIns pi; -@@ -1191,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir) - - static void asm_sub(ASMState *as, IRIns *ir) - { -+#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) - asm_fparith(as, ir, PPCI_FSUB); -- } else { -+ } else -+#endif -+ { - PPCIns pi = PPCI_SUBF; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left, right; -@@ -1220,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir) - - static void asm_mul(ASMState *as, IRIns *ir) - { -+#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, PPCI_FMUL); -- } else { -+ } else -+#endif -+ { - PPCIns pi = PPCI_MULLW; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); -@@ -1250,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns *ir) - - static void asm_neg(ASMState *as, IRIns *ir) - { -+#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, PPCI_FNEG); -- } else { -+ } else -+#endif -+ { - Reg dest, left; - PPCIns pi = PPCI_NEG; - if (as->flagmcp == as->mcp) { -@@ -1563,9 +1683,40 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) - PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) - #define asm_bror(as, ir) lua_assert(0) - -+#if LJ_SOFTFP -+static void asm_sfpmin_max(ASMState *as, IRIns *ir) -+{ -+ CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; -+ IRRef args[4]; -+ MCLabel l_right, l_end; -+ Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); -+ Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); -+ Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); -+ PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; -+ righthi = (lefthi >> 8); lefthi &= 255; -+ rightlo = (leftlo >> 8); leftlo &= 255; -+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; -+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; -+ l_end = emit_label(as); -+ if (desthi != righthi) emit_mr(as, desthi, righthi); -+ if (destlo != rightlo) emit_mr(as, destlo, rightlo); -+ l_right = emit_label(as); -+ if (l_end != l_right) emit_jmp(as, l_end); -+ if (desthi != lefthi) emit_mr(as, desthi, lefthi); -+ if (destlo != leftlo) emit_mr(as, destlo, leftlo); -+ if (l_right == as->mcp+1) { -+ cond ^= 4; l_right = l_end; ++as->mcp; -+ } -+ emit_condbranch(as, PPCI_BC, cond, l_right); -+ ra_evictset(as, RSET_SCRATCH); -+ emit_cmpi(as, RID_RET, 1); -+ asm_gencall(as, &ci, args); -+} -+#endif -+ - static void asm_min_max(ASMState *as, IRIns *ir, int ismax) - { -- if (irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg tmp = dest; - Reg right, left = ra_alloc2(as, ir, RSET_FPR); -@@ -1653,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) - static void asm_comp(ASMState *as, IRIns *ir) - { - PPCCC cc = asm_compmap[ir->o]; -- if (irt_isnum(ir->t)) { -+ if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guardcc(as, (cc >> 4)); -@@ -1674,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns *ir) - - #define asm_equal(as, ir) asm_comp(as, ir) - -+#if LJ_SOFTFP -+/* SFP comparisons. */ -+static void asm_sfpcomp(ASMState *as, IRIns *ir) -+{ -+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; -+ RegSet drop = RSET_SCRATCH; -+ Reg r; -+ IRRef args[4]; -+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; -+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; -+ -+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { -+ if (!rset_test(as->freeset, r) && -+ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) -+ rset_clear(drop, r); -+ } -+ ra_evictset(as, drop); -+ asm_setupresult(as, ir, ci); -+ switch ((IROp)ir->o) { -+ case IR_ULT: -+ asm_guardcc(as, CC_EQ); -+ emit_ai(as, PPCI_CMPWI, RID_RET, 0); -+ case IR_ULE: -+ asm_guardcc(as, CC_EQ); -+ emit_ai(as, PPCI_CMPWI, RID_RET, 1); -+ break; -+ case IR_GE: case IR_GT: -+ asm_guardcc(as, CC_EQ); -+ emit_ai(as, PPCI_CMPWI, RID_RET, 2); -+ default: -+ asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); -+ emit_ai(as, PPCI_CMPWI, RID_RET, 0); -+ break; -+ } -+ asm_gencall(as, ci, args); -+} -+#endif -+ - #if LJ_HASFFI - /* 64 bit integer comparisons. */ - static void asm_comp64(ASMState *as, IRIns *ir) -@@ -1703,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRIns *ir) - /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ - static void asm_hiop(ASMState *as, IRIns *ir) - { --#if LJ_HASFFI -+#if LJ_HASFFI || LJ_SOFTFP - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ -+#if LJ_HASFFI && !LJ_SOFTFP - if (usehi || uselo) - asm_conv64(as, ir); - return; -+#endif - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -+#if LJ_SOFTFP -+ if (!irt_isint(ir->t)) { -+ asm_sfpcomp(as, ir-1); -+ return; -+ } -+#endif -+#if LJ_HASFFI - asm_comp64(as, ir); -+#endif -+ return; -+#if LJ_SOFTFP -+ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { -+ as->curins--; /* Always skip the loword min/max. */ -+ if (uselo || usehi) -+ asm_sfpmin_max(as, ir-1); - return; -+#endif - } else if ((ir-1)->o == IR_XSTORE) { - as->curins--; /* Handle both stores here. */ - if ((ir-1)->r != RID_SINK) { -@@ -1726,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns *ir) - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { -+#if LJ_HASFFI - case IR_ADD: as->curins--; asm_add64(as, ir); break; - case IR_SUB: as->curins--; asm_sub64(as, ir); break; - case IR_NEG: as->curins--; asm_neg64(as, ir); break; -+#endif -+#if LJ_SOFTFP -+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: -+ case IR_STRTO: -+ if (!uselo) -+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ -+ break; -+#endif - case IR_CALLN: -+ case IR_CALLS: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; -+#if LJ_SOFTFP -+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -+#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; -@@ -1797,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { -+#if LJ_SOFTFP -+ Reg tmp; -+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE); -+ lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ -+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); -+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); -+ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); -+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); -+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); -+#else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); -+#endif - } else { - Reg type; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); -@@ -1811,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), allow); -+#if LJ_SOFTFP -+ } else if ((sn & SNAP_SOFTFPNUM)) { -+ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); -+#endif - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - } -@@ -1947,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) - int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) -- if (args[i] && irt_isfp(IR(args[i])->t)) { -+ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; -- return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -+ return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : -+ REGSP_HINT(RID_RET); - } - - static void asm_setup_target(ASMState *as) --- -2.20.1 - diff --git a/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch b/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch deleted file mode 100644 index 7e9dd8a..0000000 --- a/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 05fbdf565c700365d22e38f11478101a0d92a23e Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 10 Sep 2017 14:05:30 +0200 -Subject: [PATCH 14/72] x64/LJ_GC64: Fix type-check-only variant of SLOAD. - -Thanks to Peter Cawley. ---- - src/lj_asm_x86.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h -index 55c02d2..af54dc7 100644 ---- a/src/lj_asm_x86.h -+++ b/src/lj_asm_x86.h -@@ -1759,7 +1759,7 @@ static void asm_sload(ASMState *as, IRIns *ir) - emit_i8(as, irt_toitype(t)); - emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); - emit_shifti(as, XOg_SAR|REX_64, tmp, 47); -- emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); -+ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); - #else - } else { - emit_i8(as, irt_toitype(t)); --- -2.20.1 - diff --git a/0015-MIPS64-Hide-internal-function.patch b/0015-MIPS64-Hide-internal-function.patch deleted file mode 100644 index 0e2f4fd..0000000 --- a/0015-MIPS64-Hide-internal-function.patch +++ /dev/null @@ -1,26 +0,0 @@ -From bf12f1dafb157008b963f829b57b2472b6993cc8 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Mon, 18 Sep 2017 09:50:22 +0200 -Subject: [PATCH 15/72] MIPS64: Hide internal function. - ---- - src/lj_ccall.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/lj_ccall.c b/src/lj_ccall.c -index 799be48..25e938c 100644 ---- a/src/lj_ccall.c -+++ b/src/lj_ccall.c -@@ -848,7 +848,8 @@ noth: /* Not a homogeneous float/double aggregate. */ - return 0; /* Struct is in GPRs. */ - } - --void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) -+static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, -+ int ft) - { - if (LJ_ABI_SOFTFP ? ft : - ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { --- -2.20.1 - diff --git a/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch b/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch deleted file mode 100644 index 66f5bf0..0000000 --- a/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch +++ /dev/null @@ -1,34 +0,0 @@ -commit 6a2d8b0b4d49eb5aac600c219e5903420806e56e -Merge: bf12f1d 0c0e7b1 -Author: Mike Pall <mike> -Date: Wed Sep 20 19:42:34 2017 +0200 - - Merge branch 'master' into v2.1 - -From 0c0e7b168ea147866835954267c151ef789f64fb Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 20 Sep 2017 19:39:50 +0200 -Subject: [PATCH 16/72] DynASM/x86: Fix potential REL_A overflow. - -Thanks to Joshua Haberman. ---- - dynasm/dasm_x86.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h -index 90dc5d1..f9260b0 100644 ---- a/dynasm/dasm_x86.h -+++ b/dynasm/dasm_x86.h -@@ -395,7 +395,8 @@ int dasm_encode(Dst_DECL, void *buffer) - } - case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; - b++; n = (int)(ptrdiff_t)D->globals[-n]; -- case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ -+ case DASM_REL_A: rel_a: -+ n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ - case DASM_REL_PC: rel_pc: { - int shrink = *b++; - int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } --- -2.20.1 - diff --git a/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch b/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch deleted file mode 100644 index aff6f20..0000000 --- a/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch +++ /dev/null @@ -1,29 +0,0 @@ -From b4ed3219a1a98dd9fe7d1e3eeea3b82f5a780948 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Mon, 2 Oct 2017 09:22:46 +0200 -Subject: [PATCH 17/72] LJ_GC64: Fix ir_khash for non-string GCobj. - -Contributed by Peter Cawley. ---- - src/lj_asm.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/lj_asm.c b/src/lj_asm.c -index bed2268..d961927 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -1017,7 +1017,11 @@ static uint32_t ir_khash(IRIns *ir) - } else { - lua_assert(irt_isgcv(ir->t)); - lo = u32ptr(ir_kgc(ir)); -+#if LJ_GC64 -+ hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); -+#else - hi = lo + HASH_BIAS; -+#endif - } - return hashrot(lo, hi); - } --- -2.20.1 - diff --git a/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch b/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch deleted file mode 100644 index d604876..0000000 --- a/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 850f8c59d3d04a9847f21f32a6c36d8269b5b6b1 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Mon, 2 Oct 2017 23:10:56 +0200 -Subject: [PATCH 18/72] LJ_GC64: Make ASMREF_L references 64 bit. - -Reported by Yichun Zhang. ---- - src/lj_asm.c | 1 + - src/lj_ir.h | 4 +++- - src/lj_opt_sink.c | 1 + - 3 files changed, 5 insertions(+), 1 deletion(-) - -diff --git a/src/lj_asm.c b/src/lj_asm.c -index d961927..753fe6b 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -2015,6 +2015,7 @@ static void asm_setup_regsp(ASMState *as) - ir->prev = REGSP_INIT; - if (irt_is64(ir->t) && ir->o != IR_KNULL) { - #if LJ_GC64 -+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ - ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ - #else - /* Make life easier for backends by putting address of constant in i. */ -diff --git a/src/lj_ir.h b/src/lj_ir.h -index 34c2785..8057a75 100644 ---- a/src/lj_ir.h -+++ b/src/lj_ir.h -@@ -377,10 +377,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1; - #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) - - #if LJ_GC64 -+/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ - #define IRT_IS64 \ - ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\ - (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\ -- (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)) -+ (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\ -+ (1u<<IRT_NIL)) - #elif LJ_64 - #define IRT_IS64 \ - ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) -diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c -index 929ccb6..a16d112 100644 ---- a/src/lj_opt_sink.c -+++ b/src/lj_opt_sink.c -@@ -219,6 +219,7 @@ static void sink_sweep_ins(jit_State *J) - for (ir = IR(J->cur.nk); ir < irbase; ir++) { - irt_clearmark(ir->t); - ir->prev = REGSP_INIT; -+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ - if (irt_is64(ir->t) && ir->o != IR_KNULL) - ir++; - } --- -2.20.1 - diff --git a/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch b/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch deleted file mode 100644 index c999ce8..0000000 --- a/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 9f0caad0e43f97a4613850b3874b851cb1bc301d Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 8 Nov 2017 12:53:05 +0100 -Subject: [PATCH 19/72] Fix FOLD rule for strength reduction of widening. - -Reported by Matthew Burk. ---- - src/lj_opt_fold.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c -index 3d0e35a..5dc7ae3 100644 ---- a/src/lj_opt_fold.c -+++ b/src/lj_opt_fold.c -@@ -1052,7 +1052,7 @@ LJFOLDF(simplify_conv_sext) - if (ref == J->scev.idx) { - IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; - lua_assert(irt_isint(J->scev.t)); -- if (lo && IR(lo)->i + ofs >= 0) { -+ if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { - ok_reduce: - #if LJ_TARGET_X64 - /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ --- -2.20.1 - diff --git a/0020-ARM64-Fix-assembly-of-HREFK.patch b/0020-ARM64-Fix-assembly-of-HREFK.patch deleted file mode 100644 index 3200304..0000000 --- a/0020-ARM64-Fix-assembly-of-HREFK.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 06cd9fce7df440323647174f1ca4a01281ec8acd Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 8 Nov 2017 12:53:48 +0100 -Subject: [PATCH 20/72] ARM64: Fix assembly of HREFK. - -Reported by Jason Teplitz. ---- - src/lj_asm_arm64.h | 11 +++++------ - 1 file changed, 5 insertions(+), 6 deletions(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index 8fd92e7..cbb186d 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -869,14 +869,12 @@ static void asm_hrefk(ASMState *as, IRIns *ir) - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - int bigofs = !emit_checkofs(A64I_LDRx, ofs); -- RegSet allow = RSET_GPR; - Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; -- Reg node = ra_alloc1(as, ir->op1, allow); -- Reg key = ra_scratch(as, rset_clear(allow, node)); -- Reg idx = node; -+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); -+ Reg key, idx = node; -+ RegSet allow = rset_exclude(RSET_GPR, node); - uint64_t k; - lua_assert(ofs % sizeof(Node) == 0); -- rset_clear(allow, key); - if (bigofs) { - idx = dest; - rset_clear(allow, dest); -@@ -892,7 +890,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) - } else { - k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); - } -- emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); -+ key = ra_scratch(as, allow); -+ emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); - emit_lso(as, A64I_LDRx, key, idx, kofs); - if (bigofs) - emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); --- -2.20.1 - diff --git a/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch b/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch deleted file mode 100644 index 80fad2f..0000000 --- a/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 99cdfbf6a1e8856f64908072ef10443a7eab14f2 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 8 Nov 2017 12:54:03 +0100 -Subject: [PATCH 21/72] MIPS64: Fix register allocation in assembly of HREF. - -Contributed by James Cowgill. ---- - src/lj_asm_mips.h | 42 +++++++++++++++++++++++++----------------- - 1 file changed, 25 insertions(+), 17 deletions(-) - -diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h -index 1406a87..3a4679b 100644 ---- a/src/lj_asm_mips.h -+++ b/src/lj_asm_mips.h -@@ -859,6 +859,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; -+#if LJ_64 -+ Reg cmp64 = RID_NONE; -+#endif - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - int isk = irref_isk(refkey); -@@ -901,6 +904,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - #endif - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); -+#if LJ_64 -+ if (LJ_SOFTFP || !irt_isnum(kt)) { -+ /* Allocate cmp64 register used for 64-bit comparisons */ -+ if (LJ_SOFTFP && irt_isnum(kt)) { -+ cmp64 = key; -+ } else if (!isk && irt_isaddr(kt)) { -+ cmp64 = tmp2; -+ } else { -+ int64_t k; -+ if (isk && irt_isaddr(kt)) { -+ k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; -+ } else { -+ lua_assert(irt_ispri(kt) && !irt_isnil(kt)); -+ k = ~((int64_t)~irt_toitype(ir->t) << 47); -+ } -+ cmp64 = ra_allock(as, k, allow); -+ rset_clear(allow, cmp64); -+ } -+ } -+#endif - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); -@@ -943,24 +966,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); - emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); - emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); -- } else if (LJ_SOFTFP && irt_isnum(kt)) { -- emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); -- emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); -- } else if (irt_isaddr(kt)) { -- Reg refk = tmp2; -- if (isk) { -- int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; -- refk = ra_allock(as, k, allow); -- rset_clear(allow, refk); -- } -- emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); -- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); - } else { -- Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); -- rset_clear(allow, pri); -- lua_assert(irt_ispri(kt) && !irt_isnil(kt)); -- emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); -- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); -+ emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end); -+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } - *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); - if (!isk && irt_isaddr(kt)) { --- -2.20.1 - diff --git a/0022-ARM64-Fix-xpcall-error-case.patch b/0022-ARM64-Fix-xpcall-error-case.patch deleted file mode 100644 index ec05a7c..0000000 --- a/0022-ARM64-Fix-xpcall-error-case.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 33082a6f4778aa152f6a4a684a7fe79436f1ecb6 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 16 Nov 2017 12:53:34 +0100 -Subject: [PATCH 22/72] ARM64: Fix xpcall() error case. - -Thanks to Stefan Pejic. ---- - src/vm_arm64.dasc | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc -index 3eaf376..241c58a 100644 ---- a/src/vm_arm64.dasc -+++ b/src/vm_arm64.dasc -@@ -1185,12 +1185,12 @@ static void build_subroutines(BuildCtx *ctx) - | subs NARGS8:RC, NARGS8:RC, #16 - | blo ->fff_fallback - | mov RB, BASE -- | add BASE, BASE, #24 - | asr ITYPE, CARG2, #47 - | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 - | cmn ITYPE, #-LJ_TFUNC - | add PC, TMP0, #24+FRAME_PCALL - | bne ->fff_fallback // Traceback must be a function. -+ | add BASE, BASE, #24 - | stp CARG2, CARG1, [RB] // Swap function and traceback. - | cbz NARGS8:RC, ->vm_call_dispatch - | b <1 --- -2.20.1 - diff --git a/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch b/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch deleted file mode 100644 index 740a5a7..0000000 --- a/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 7dbf0b05f1228c1c719866db5e5f3d58f87f74c8 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 16 Nov 2017 12:58:12 +0100 -Subject: [PATCH 23/72] Fix saved bytecode encapsulated in ELF objects. - -Thanks to Dimitry Andric. ---- - src/jit/bcsave.lua | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua -index aa677df..c94064e 100644 ---- a/src/jit/bcsave.lua -+++ b/src/jit/bcsave.lua -@@ -275,7 +275,7 @@ typedef struct { - o.sect[2].size = fofs(ofs) - o.sect[3].type = f32(3) -- .strtab - o.sect[3].ofs = fofs(sofs + ofs) -- o.sect[3].size = fofs(#symname+1) -+ o.sect[3].size = fofs(#symname+2) - ffi.copy(o.space+ofs+1, symname) - ofs = ofs + #symname + 2 - o.sect[4].type = f32(1) -- .rodata --- -2.20.1 - diff --git a/0024-ARM64-Fix-xpcall-error-case-really.patch b/0024-ARM64-Fix-xpcall-error-case-really.patch deleted file mode 100644 index ab518e1..0000000 --- a/0024-ARM64-Fix-xpcall-error-case-really.patch +++ /dev/null @@ -1,37 +0,0 @@ -From d417ded17945b4211608d497d50b509e0274f5e0 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sat, 18 Nov 2017 12:23:57 +0100 -Subject: [PATCH 24/72] ARM64: Fix xpcall() error case (really). -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Thanks to François Perrad and Stefan Pejic. ---- - src/vm_arm64.dasc | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc -index 241c58a..c55794a 100644 ---- a/src/vm_arm64.dasc -+++ b/src/vm_arm64.dasc -@@ -1182,7 +1182,7 @@ static void build_subroutines(BuildCtx *ctx) - |.ffunc xpcall - | ldp CARG1, CARG2, [BASE] - | ldrb TMP0w, GL->hookmask -- | subs NARGS8:RC, NARGS8:RC, #16 -+ | subs NARGS8:TMP1, NARGS8:RC, #16 - | blo ->fff_fallback - | mov RB, BASE - | asr ITYPE, CARG2, #47 -@@ -1190,6 +1190,7 @@ static void build_subroutines(BuildCtx *ctx) - | cmn ITYPE, #-LJ_TFUNC - | add PC, TMP0, #24+FRAME_PCALL - | bne ->fff_fallback // Traceback must be a function. -+ | mov NARGS8:RC, NARGS8:TMP1 - | add BASE, BASE, #24 - | stp CARG2, CARG1, [RB] // Swap function and traceback. - | cbz NARGS8:RC, ->vm_call_dispatch --- -2.20.1 - diff --git a/0025-MIPS64-Fix-xpcall-error-case.patch b/0025-MIPS64-Fix-xpcall-error-case.patch deleted file mode 100644 index 5b17e81..0000000 --- a/0025-MIPS64-Fix-xpcall-error-case.patch +++ /dev/null @@ -1,39 +0,0 @@ -From ea7071d3c30b6432bfe6f8a9d263e0285cec25e3 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sat, 18 Nov 2017 12:25:35 +0100 -Subject: [PATCH 25/72] MIPS64: Fix xpcall() error case. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Thanks to François Perrad and Stefan Pejic. ---- - src/vm_mips64.dasc | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc -index 75b38de..a78cd25 100644 ---- a/src/vm_mips64.dasc -+++ b/src/vm_mips64.dasc -@@ -1399,15 +1399,16 @@ static void build_subroutines(BuildCtx *ctx) - |. nop - | - |.ffunc xpcall -- | daddiu NARGS8:RC, NARGS8:RC, -16 -+ | daddiu NARGS8:TMP0, NARGS8:RC, -16 - | ld CARG1, 0(BASE) - | ld CARG2, 8(BASE) -- | bltz NARGS8:RC, ->fff_fallback -+ | bltz NARGS8:TMP0, ->fff_fallback - |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | gettp AT, CARG2 - | daddiu AT, AT, -LJ_TFUNC - | bnez AT, ->fff_fallback // Traceback must be a function. - |. move TMP2, BASE -+ | move NARGS8:RC, NARGS8:TMP0 - | daddiu BASE, BASE, 24 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT --- -2.20.1 - diff --git a/0026-Fix-IR_BUFPUT-assembly.patch b/0026-Fix-IR_BUFPUT-assembly.patch deleted file mode 100644 index c942467..0000000 --- a/0026-Fix-IR_BUFPUT-assembly.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 58d0dde0a2df49abc991decbabff15230010829a Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 14 Jan 2018 13:57:00 +0100 -Subject: [PATCH 26/72] Fix IR_BUFPUT assembly. - -Thanks to Peter Cawley. ---- - src/lj_asm.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/lj_asm.c b/src/lj_asm.c -index 753fe6b..5f83779 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -1119,7 +1119,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; - IRRef args[3]; - IRIns *irs; -- int kchar = -1; -+ int kchar = -129; - args[0] = ir->op1; /* SBuf * */ - args[1] = ir->op2; /* GCstr * */ - irs = IR(ir->op2); -@@ -1127,7 +1127,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) - if (irs->o == IR_KGC) { - GCstr *s = ir_kstr(irs); - if (s->len == 1) { /* Optimize put of single-char string constant. */ -- kchar = strdata(s)[0]; -+ kchar = (int8_t)strdata(s)[0]; /* Signed! */ - args[1] = ASMREF_TMP1; /* int, truncated to char */ - ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; - } -@@ -1154,7 +1154,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) - asm_gencall(as, ci, args); - if (args[1] == ASMREF_TMP1) { - Reg tmp = ra_releasetmp(as, ASMREF_TMP1); -- if (kchar == -1) -+ if (kchar == -129) - asm_tvptr(as, tmp, irs->op1); - else - ra_allockreg(as, kchar, tmp); --- -2.20.1 - diff --git a/0027-Fix-string.format-c-0.patch b/0027-Fix-string.format-c-0.patch deleted file mode 100644 index caece09..0000000 --- a/0027-Fix-string.format-c-0.patch +++ /dev/null @@ -1,15 +0,0 @@ -commit 4660dbfa8a4f9eea5218b739075d04faadfeeef6 -Merge: 58d0dde 430d9f8 -Author: Mike Pall <mike> -Date: Sun Jan 14 14:26:10 2018 +0100 - - Merge branch 'master' into v2.1 - -From 430d9f8f7ebb779948dbd43944b876b1a3f58551 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 14 Jan 2018 14:11:59 +0100 -Subject: [PATCH 27/72] Fix string.format("%c", 0). - ---- - src/lib_string.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/0028-Fix-ARMv8-32-bit-subset-detection.patch b/0028-Fix-ARMv8-32-bit-subset-detection.patch deleted file mode 100644 index 00687af..0000000 --- a/0028-Fix-ARMv8-32-bit-subset-detection.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 9eaad8574f5b2271b981cd31966b1e832cd8de12 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 18 Jan 2018 12:24:36 +0100 -Subject: [PATCH 28/72] Fix ARMv8 (32 bit subset) detection. - -Thanks to Markus Oberhumber. ---- - src/lj_arch.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_arch.h b/src/lj_arch.h -index 5962f3a..fcebd84 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -201,7 +201,7 @@ - #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ - #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - --#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ -+#if __ARM_ARCH_8__ || __ARM_ARCH_8A__ - #define LJ_ARCH_VERSION 80 - #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ - #define LJ_ARCH_VERSION 70 --- -2.20.1 - diff --git a/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch b/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch deleted file mode 100644 index 70ae35a..0000000 --- a/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch +++ /dev/null @@ -1,28 +0,0 @@ -From c88602f080dcafea6ba222a2f7cc1ea0e41ef3cc Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 18 Jan 2018 12:29:39 +0100 -Subject: [PATCH 29/72] Fix LuaJIT API docs for LUAJIT_MODE_*. - -Thanks to sunfishgao. ---- - doc/ext_c_api.html | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html -index 041a722..4bb8251 100644 ---- a/doc/ext_c_api.html -+++ b/doc/ext_c_api.html -@@ -89,8 +89,8 @@ other Lua/C API functions). - </p> - <p> - The third argument specifies the mode, which is 'or'ed with a flag. --The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on, --<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or -+The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off, -+<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or - <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code. - </p> - <p> --- -2.20.1 - diff --git a/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch b/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch deleted file mode 100644 index 8ee3a17..0000000 --- a/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 8071aa4ad65cf09e3b7adda4a7787d8897e5314c Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Mon, 29 Jan 2018 12:12:29 +0100 -Subject: [PATCH 30/72] MIPS64: Fix soft-float +-0.0 vs. +-0.0 comparison. - -Thanks to Stefan Pejic. ---- - src/vm_mips64.dasc | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc -index a78cd25..0a3f8e5 100644 ---- a/src/vm_mips64.dasc -+++ b/src/vm_mips64.dasc -@@ -2661,7 +2661,7 @@ static void build_subroutines(BuildCtx *ctx) - |. slt CRET1, CARG2, CARG1 - |8: - | jr ra -- |. nop -+ |. li CRET1, 0 - |9: - | jr ra - |. move CRET1, CRET2 --- -2.20.1 - diff --git a/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch b/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch deleted file mode 100644 index b95ca0c..0000000 --- a/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch +++ /dev/null @@ -1,69 +0,0 @@ -commit 74c544d68c07bcd416225598cdf15f88e62fd457 -Merge: 8071aa4 b03a56f -Author: Mike Pall <mike> -Date: Mon Jan 29 12:53:42 2018 +0100 - - Merge branch 'master' into v2.1 - -From b03a56f28ec360bbcf43091afd0607890a4a33c7 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Mon, 29 Jan 2018 12:47:08 +0100 -Subject: [PATCH 31/72] FFI: Don't assert on #1LL (5.2 compatibility mode - only). - -Reported by Denis Golovan. ---- - src/lib_ffi.c | 2 +- - src/lj_carith.c | 9 +++++++++ - src/lj_carith.h | 1 + - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/src/lib_ffi.c b/src/lib_ffi.c -index f2f2ede..83483d9 100644 ---- a/src/lib_ffi.c -+++ b/src/lib_ffi.c -@@ -193,7 +193,7 @@ LJLIB_CF(ffi_meta___eq) LJLIB_REC(cdata_arith MM_eq) - - LJLIB_CF(ffi_meta___len) LJLIB_REC(cdata_arith MM_len) - { -- return ffi_arith(L); -+ return lj_carith_len(L); - } - - LJLIB_CF(ffi_meta___lt) LJLIB_REC(cdata_arith MM_lt) -diff --git a/src/lj_carith.c b/src/lj_carith.c -index 6224dee..c34596c 100644 ---- a/src/lj_carith.c -+++ b/src/lj_carith.c -@@ -272,6 +272,15 @@ int lj_carith_op(lua_State *L, MMS mm) - return lj_carith_meta(L, cts, &ca, mm); - } - -+/* No built-in functionality for length of cdata. */ -+int lj_carith_len(lua_State *L) -+{ -+ CTState *cts = ctype_cts(L); -+ CDArith ca; -+ carith_checkarg(L, cts, &ca); -+ return lj_carith_meta(L, cts, &ca, MM_len); -+} -+ - /* -- 64 bit bit operations helpers --------------------------------------- */ - - #if LJ_64 -diff --git a/src/lj_carith.h b/src/lj_carith.h -index 3c15591..82fc824 100644 ---- a/src/lj_carith.h -+++ b/src/lj_carith.h -@@ -11,6 +11,7 @@ - #if LJ_HASFFI - - LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); -+LJ_FUNC int lj_carith_len(lua_State *L); - - #if LJ_32 - LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh); - --- -2.20.1 - diff --git a/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch b/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch deleted file mode 100644 index 192f271..0000000 --- a/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch +++ /dev/null @@ -1,291 +0,0 @@ -commit 0bf46e1edf94c43795b5e491efe682ab70974ce7 -Merge: 74c544d d4ee803 -Author: Mike Pall <mike> -Date: Mon Jan 29 13:19:30 2018 +0100 - - Merge branch 'master' into v2.1 - -From d4ee80342770d1281e2ce877f8ae8ab1d99e6528 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Mon, 29 Jan 2018 13:06:13 +0100 -Subject: [PATCH 32/72] Fix GCC 7 -Wimplicit-fallthrough warnings. - ---- - dynasm/dasm_arm.h | 2 ++ - dynasm/dasm_mips.h | 1 + - dynasm/dasm_ppc.h | 1 + - dynasm/dasm_x86.h | 14 ++++++++++++-- - src/lj_asm.c | 3 ++- - src/lj_cparse.c | 10 ++++++++++ - src/lj_err.c | 1 + - src/lj_opt_sink.c | 2 +- - src/lj_parse.c | 3 ++- - src/luajit.c | 1 + - 10 files changed, 33 insertions(+), 5 deletions(-) - -diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h -index a43f7c6..1d404cc 100644 ---- a/dynasm/dasm_arm.h -+++ b/dynasm/dasm_arm.h -@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...) - case DASM_IMMV8: - CK((n & 3) == 0, RANGE_I); - n >>= 2; -+ /* fallthrough */ - case DASM_IMML8: - case DASM_IMML12: - CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : -@@ -371,6 +372,7 @@ int dasm_encode(Dst_DECL, void *buffer) - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); -+ /* fallthrough */ - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; -diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h -index 7eac669..46af034 100644 ---- a/dynasm/dasm_mips.h -+++ b/dynasm/dasm_mips.h -@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer) - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); -+ /* fallthrough */ - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n); -diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h -index 6110361..81b9a76 100644 ---- a/dynasm/dasm_ppc.h -+++ b/dynasm/dasm_ppc.h -@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer) - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); -+ /* fallthrough */ - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); -diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h -index f9260b0..8ae911d 100644 ---- a/dynasm/dasm_x86.h -+++ b/dynasm/dasm_x86.h -@@ -194,12 +194,13 @@ void dasm_put(Dst_DECL, int start, ...) - switch (action) { - case DASM_DISP: - if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } -- case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; -+ /* fallthrough */ -+ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ - case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ - case DASM_IMM_D: ofs += 4; break; - case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; - case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; -- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; -+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ - case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; - case DASM_SPACE: p++; ofs += n; break; - case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ -@@ -323,11 +324,14 @@ int dasm_link(Dst_DECL, size_t *szp) - pos += 2; - break; - } -+ /* fallthrough */ - case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; -+ /* fallthrough */ - case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: - case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: - case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; - case DASM_LABEL_LG: p++; -+ /* fallthrough */ - case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ - case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ - case DASM_EXTERN: p += 2; break; -@@ -385,12 +389,15 @@ int dasm_encode(Dst_DECL, void *buffer) - if (mrm != 5) { mm[-1] -= 0x80; break; } } - if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; - } -+ /* fallthrough */ - case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; - case DASM_IMM_DB: if (((n+128)&-256) == 0) { - db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; - } else mark = NULL; -+ /* fallthrough */ - case DASM_IMM_D: wd: dasmd(n); break; - case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; -+ /* fallthrough */ - case DASM_IMM_W: dasmw(n); break; - case DASM_VREG: { - int t = *p++; -@@ -397,6 +404,7 @@ - } - case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; - b++; n = (int)(ptrdiff_t)D->globals[-n]; -+ /* fallthrough */ - case DASM_REL_A: rel_a: - n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ - case DASM_REL_PC: rel_pc: { -@@ -407,6 +415,7 @@ int dasm_encode(Dst_DECL, void *buffer) - } - case DASM_IMM_LG: - p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } -+ /* fallthrough */ - case DASM_IMM_PC: { - int *pb = DASM_POS2PTR(D, n); - n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); -@@ -427,6 +436,7 @@ int dasm_encode(Dst_DECL, void *buffer) - case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; - case DASM_MARK: mark = cp; break; - case DASM_ESC: action = *p++; -+ /* fallthrough */ - default: *cp++ = action; break; - case DASM_SECTION: case DASM_STOP: goto stop; - } -diff --git a/src/lj_asm.c b/src/lj_asm.c -index 02714d4..dd7186f 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -2136,6 +2136,7 @@ static void asm_setup_regsp(ASMState *as) - case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: - if (REGARG_NUMGPR < 3 && as->evenspill < 3) - as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ -+ /* fallthrough */ - #if LJ_TARGET_X86 && LJ_HASFFI - if (0) { - case IR_CNEW: -@@ -2176,7 +2177,7 @@ static void asm_setup_regsp(ASMState *as) - continue; - #endif - } -- /* fallthrough for integer POW */ -+ /* fallthrough */ /* for integer POW */ - case IR_DIV: case IR_MOD: - if (!irt_isnum(ir->t)) { - ir->prev = REGSP_HINT(RID_RET); -diff --git a/src/lj_cparse.c b/src/lj_cparse.c -index 2ba50a7..f111537 100644 ---- a/src/lj_cparse.c -+++ b/src/lj_cparse.c -@@ -590,28 +590,34 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - k->id = k2.id > k3.id ? k2.id : k3.id; - continue; - } -+ /* fallthrough */ - case 1: - if (cp_opt(cp, CTOK_OROR)) { - cp_expr_sub(cp, &k2, 2); k->i32 = k->u32 || k2.u32; k->id = CTID_INT32; - continue; - } -+ /* fallthrough */ - case 2: - if (cp_opt(cp, CTOK_ANDAND)) { - cp_expr_sub(cp, &k2, 3); k->i32 = k->u32 && k2.u32; k->id = CTID_INT32; - continue; - } -+ /* fallthrough */ - case 3: - if (cp_opt(cp, '|')) { - cp_expr_sub(cp, &k2, 4); k->u32 = k->u32 | k2.u32; goto arith_result; - } -+ /* fallthrough */ - case 4: - if (cp_opt(cp, '^')) { - cp_expr_sub(cp, &k2, 5); k->u32 = k->u32 ^ k2.u32; goto arith_result; - } -+ /* fallthrough */ - case 5: - if (cp_opt(cp, '&')) { - cp_expr_sub(cp, &k2, 6); k->u32 = k->u32 & k2.u32; goto arith_result; - } -+ /* fallthrough */ - case 6: - if (cp_opt(cp, CTOK_EQ)) { - cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 == k2.u32; k->id = CTID_INT32; -@@ -620,6 +626,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 != k2.u32; k->id = CTID_INT32; - continue; - } -+ /* fallthrough */ - case 7: - if (cp_opt(cp, '<')) { - cp_expr_sub(cp, &k2, 8); -@@ -654,6 +661,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - k->id = CTID_INT32; - continue; - } -+ /* fallthrough */ - case 8: - if (cp_opt(cp, CTOK_SHL)) { - cp_expr_sub(cp, &k2, 9); k->u32 = k->u32 << k2.u32; -@@ -666,6 +674,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - k->u32 = k->u32 >> k2.u32; - continue; - } -+ /* fallthrough */ - case 9: - if (cp_opt(cp, '+')) { - cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 + k2.u32; -@@ -675,6 +684,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) - } else if (cp_opt(cp, '-')) { - cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 - k2.u32; goto arith_result; - } -+ /* fallthrough */ - case 10: - if (cp_opt(cp, '*')) { - cp_expr_unary(cp, &k2); k->u32 = k->u32 * k2.u32; goto arith_result; -diff --git a/src/lj_err.c b/src/lj_err.c -index 54f42c3..13a1ded 100644 ---- a/src/lj_err.c -+++ b/src/lj_err.c -@@ -153,6 +153,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) - case FRAME_CONT: /* Continuation frame. */ - if (frame_iscont_fficb(frame)) - goto unwind_c; -+ /* fallthrough */ - case FRAME_VARG: /* Vararg frame. */ - frame = frame_prevd(frame); - break; -diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c -index 6a00d04..4efe395 100644 ---- a/src/lj_opt_sink.c -+++ b/src/lj_opt_sink.c -@@ -100,8 +100,8 @@ static void sink_mark_ins(jit_State *J) - (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP && - !sink_checkphi(J, ir, (ir+1)->op2)))) - irt_setmark(ir->t); /* Mark ineligible allocation. */ -- /* fallthrough */ - #endif -+ /* fallthrough */ - case IR_USTORE: - irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ - break; -diff --git a/src/lj_parse.c b/src/lj_parse.c -index 9e5976f..6785495 100644 ---- a/src/lj_parse.c -+++ b/src/lj_parse.c -@@ -2696,7 +2696,8 @@ static int parse_stmt(LexState *ls) - lj_lex_next(ls); - parse_goto(ls); - break; -- } /* else: fallthrough */ -+ } -+ /* fallthrough */ - default: - parse_call_assign(ls); - break; -diff --git a/src/luajit.c b/src/luajit.c -index 9e15b26..0e18dc5 100644 ---- a/src/luajit.c -+++ b/src/luajit.c -@@ -419,6 +419,7 @@ static int collectargs(char **argv, int *flags) - break; - case 'e': - *flags |= FLAGS_EXEC; -+ /* fallthrough */ - case 'j': /* LuaJIT extension */ - case 'l': - *flags |= FLAGS_OPTION; --- -2.20.1 - diff --git a/0033-Clear-stack-after-print_jit_status-in-CLI.patch b/0033-Clear-stack-after-print_jit_status-in-CLI.patch deleted file mode 100644 index 53a4acf..0000000 --- a/0033-Clear-stack-after-print_jit_status-in-CLI.patch +++ /dev/null @@ -1,32 +0,0 @@ -commit fddef924097f28c46a0a5b45483a6086b33cab81 -Merge: 0bf46e1 03cd5aa -Author: Mike Pall <mike> -Date: Mon Jan 29 13:28:53 2018 +0100 - - Merge branch 'master' into v2.1 - -From 03cd5aa749c1bc3bb4b7d4289236b6096cb3dc85 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Mon, 29 Jan 2018 13:25:51 +0100 -Subject: [PATCH 33/72] Clear stack after print_jit_status() in CLI. - -Suggested by Hydroque. ---- - src/luajit.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/luajit.c b/src/luajit.c -index 0e18dc5..9ede59c 100644 ---- a/src/luajit.c -+++ b/src/luajit.c -@@ -151,6 +151,7 @@ static void print_jit_status(lua_State *L) - fputs(s, stdout); - } - putc('\n', stdout); -+ lua_settop(L, 0); /* clear stack */ - } - - static void createargtable(lua_State *L, char **argv, int argc, int argf) --- -2.20.1 - diff --git a/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch b/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch deleted file mode 100644 index 1b90fb3..0000000 --- a/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 046129dbdda5261c1b17469a2895a113d14c070a Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Tue, 27 Feb 2018 23:02:23 +0100 -Subject: [PATCH 34/72] Fix rechaining of pseudo-resurrected string keys. - -This is a serious bug. But extremely hard to reproduce, so it went -undetected for 8 years. One needs two resurrections with different -main nodes, which are both in a hash chain which gets relinked on -key insertion where the colliding node is in a non-main position. Phew. - -Thanks to lbeiming. ---- - src/lj_tab.c | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/src/lj_tab.c b/src/lj_tab.c -index 50f447e..f2f3c0b 100644 ---- a/src/lj_tab.c -+++ b/src/lj_tab.c -@@ -457,6 +457,29 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) - freenode->next = nn->next; - nn->next = n->next; - setmref(n->next, nn); -+ /* -+ ** Rechaining a resurrected string key creates a new dilemma: -+ ** Another string key may have originally been resurrected via -+ ** _any_ of the previous nodes as a chain anchor. Including -+ ** a node that had to be moved, which makes them unreachable. -+ ** It's not feasible to check for all previous nodes, so rechain -+ ** any string key that's currently in a non-main positions. -+ */ -+ while ((nn = nextnode(freenode))) { -+ if (tvisstr(&nn->key) && !tvisnil(&nn->val)) { -+ Node *mn = hashstr(t, strV(&nn->key)); -+ if (mn != freenode) { -+ freenode->next = nn->next; -+ nn->next = mn->next; -+ setmref(mn->next, nn); -+ } else { -+ freenode = nn; -+ } -+ } else { -+ freenode = nn; -+ } -+ } -+ break; - } else { - freenode = nn; - } --- -2.20.1 - diff --git a/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch b/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch deleted file mode 100644 index 832809e..0000000 --- a/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch +++ /dev/null @@ -1,50 +0,0 @@ -From fe651bf6e2b4d02b624be3c289378c08bab2fa9b Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Tue, 27 Feb 2018 23:22:40 +0100 -Subject: [PATCH 35/72] DynASM/x86: Add BMI1 and BMI2 instructions. - -Thanks to Peter Cawley. ---- - dynasm/dasm_x86.lua | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua -index 4c031e2..c1d267a 100644 ---- a/dynasm/dasm_x86.lua -+++ b/dynasm/dasm_x86.lua -@@ -955,6 +955,7 @@ end - -- "u" Use VEX encoding, vvvv unused. - -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is - -- removed from the list used by future characters). -+-- "w" Use VEX encoding, vvvv from 3rd operand. - -- "L" Force VEX.L - -- - -- All of the following characters force a flush of the opcode: -@@ -1677,6 +1678,24 @@ local map_op = { - -- Intel ADX - adcx_2 = "rmqd:660F38F6rM", - adox_2 = "rmqd:F30F38F6rM", -+ -+ -- BMI1 -+ andn_3 = "rrmqd:0F38VF2rM", -+ bextr_3 = "rmrqd:0F38wF7rM", -+ blsi_2 = "rmqd:0F38vF33m", -+ blsmsk_2 = "rmqd:0F38vF32m", -+ blsr_2 = "rmqd:0F38vF31m", -+ tzcnt_2 = "rmqdw:F30FBCrM", -+ -+ -- BMI2 -+ bzhi_3 = "rmrqd:0F38wF5rM", -+ mulx_3 = "rrmqd:F20F38VF6rM", -+ pdep_3 = "rrmqd:F20F38VF5rM", -+ pext_3 = "rrmqd:F30F38VF5rM", -+ rorx_3 = "rmSqd:F20F3AuF0rMS", -+ sarx_3 = "rmrqd:F30F38wF7rM", -+ shrx_3 = "rmrqd:F20F38wF7rM", -+ shlx_3 = "rmrqd:660F38wF7rM", - } - - ------------------------------------------------------------------------------ --- -2.20.1 - diff --git a/0036-Give-expected-results-for-negative-non-base-10-numbe.patch b/0036-Give-expected-results-for-negative-non-base-10-numbe.patch deleted file mode 100644 index 3279dfe..0000000 --- a/0036-Give-expected-results-for-negative-non-base-10-numbe.patch +++ /dev/null @@ -1,55 +0,0 @@ -From f3cf0d6e15240098147437fed7bd436ff55fdf8c Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 22 Apr 2018 13:14:28 +0200 -Subject: [PATCH 36/72] Give expected results for negative non-base-10 numbers - in tonumber(). - -This was undefined in Lua 5.1, but it's defined in 5.2. ---- - src/lib_base.c | 27 ++++++++++++++++++--------- - 1 file changed, 18 insertions(+), 9 deletions(-) - -diff --git a/src/lib_base.c b/src/lib_base.c -index 3a75787..d61e876 100644 ---- a/src/lib_base.c -+++ b/src/lib_base.c -@@ -287,18 +287,27 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) - } else { - const char *p = strdata(lj_lib_checkstr(L, 1)); - char *ep; -+ unsigned int neg = 0; - unsigned long ul; - if (base < 2 || base > 36) - lj_err_arg(L, 2, LJ_ERR_BASERNG); -- ul = strtoul(p, &ep, base); -- if (p != ep) { -- while (lj_char_isspace((unsigned char)(*ep))) ep++; -- if (*ep == '\0') { -- if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) -- setintV(L->base-1-LJ_FR2, (int32_t)ul); -- else -- setnumV(L->base-1-LJ_FR2, (lua_Number)ul); -- return FFH_RES(1); -+ while (lj_char_isspace((unsigned char)(*p))) p++; -+ if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; } -+ if (lj_char_isalnum((unsigned char)(*p))) { -+ ul = strtoul(p, &ep, base); -+ if (p != ep) { -+ while (lj_char_isspace((unsigned char)(*ep))) ep++; -+ if (*ep == '\0') { -+ if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) { -+ if (neg) ul = -ul; -+ setintV(L->base-1-LJ_FR2, (int32_t)ul); -+ } else { -+ lua_Number n = (lua_Number)ul; -+ if (neg) n = -n; -+ setnumV(L->base-1-LJ_FR2, n); -+ } -+ return FFH_RES(1); -+ } - } - } - } --- -2.20.1 - diff --git a/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch b/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch deleted file mode 100644 index c0406a5..0000000 --- a/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 02b521981a1ab919ff2cd4d9bcaee80baf77dce2 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 22 Apr 2018 13:27:25 +0200 -Subject: [PATCH 37/72] FFI: Add tonumber() specialization for failed - conversions. - -Contributed by Javier Guerra Giraldez. ---- - src/lj_crecord.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/lj_crecord.c b/src/lj_crecord.c -index 84fc49e..bc88d63 100644 ---- a/src/lj_crecord.c -+++ b/src/lj_crecord.c -@@ -1661,6 +1661,8 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) - d = ctype_get(cts, CTID_DOUBLE); - J->base[0] = crec_ct_tv(J, d, 0, J->base[0], &rd->argv[0]); - } else { -+ /* Specialize to the ctype that couldn't be converted. */ -+ argv2cdata(J, J->base[0], &rd->argv[0]); - J->base[0] = TREF_NIL; - } - } --- -2.20.1 - diff --git a/0038-Bump-copyright-date-to-2018.patch b/0038-Bump-copyright-date-to-2018.patch deleted file mode 100644 index 1f9e5eb..0000000 --- a/0038-Bump-copyright-date-to-2018.patch +++ /dev/null @@ -1,387 +0,0 @@ -From cf7a0540a3a9f80fc729211eb21d1e9b72acc89c Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 25 Apr 2018 12:07:08 +0200 -Subject: [PATCH 38/72] Bump copyright date to 2018. - ---- - doc/bluequad-print.css | 2 +- - doc/bluequad.css | 2 +- - doc/changes.html | 5 ++--- - doc/contact.html | 7 +++---- - doc/ext_c_api.html | 5 ++--- - doc/ext_ffi.html | 5 ++--- - doc/ext_ffi_api.html | 5 ++--- - doc/ext_ffi_semantics.html | 5 ++--- - doc/ext_ffi_tutorial.html | 5 ++--- - doc/ext_jit.html | 5 ++--- - doc/extensions.html | 5 ++--- - doc/faq.html | 5 ++--- - doc/install.html | 5 ++--- - doc/luajit.html | 7 +++---- - doc/running.html | 5 ++--- - doc/status.html | 5 ++--- - 16 files changed, 32 insertions(+), 46 deletions(-) - -diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css -index 62e1c16..d5a3ea3 100644 ---- a/doc/bluequad-print.css -+++ b/doc/bluequad-print.css -@@ -1,4 +1,4 @@ --/* Copyright (C) 2004-2017 Mike Pall. -+/* Copyright (C) 2004-2018 Mike Pall. - * - * You are welcome to use the general ideas of this design for your own sites. - * But please do not steal the stylesheet, the layout or the color scheme. -diff --git a/doc/bluequad.css b/doc/bluequad.css -index be2c4bf..cfc889a 100644 ---- a/doc/bluequad.css -+++ b/doc/bluequad.css -@@ -1,4 +1,4 @@ --/* Copyright (C) 2004-2017 Mike Pall. -+/* Copyright (C) 2004-2018 Mike Pall. - * - * You are welcome to use the general ideas of this design for your own sites. - * But please do not steal the stylesheet, the layout or the color scheme. -diff --git a/doc/changes.html b/doc/changes.html -index 4a4d4fb..c1848e8 100644 ---- a/doc/changes.html -+++ b/doc/changes.html -@@ -3,8 +3,7 @@ - <head> - <title>LuaJIT Change History</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -1010,7 +1009,7 @@ This is the initial non-public release of LuaJIT. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/contact.html b/doc/contact.html -index 5e07bde..54ddf74 100644 ---- a/doc/contact.html -+++ b/doc/contact.html -@@ -3,8 +3,7 @@ - <head> - <title>Contact</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -91,7 +90,7 @@ xD("fyZKB8xv"FJytmz8.KAB0u52D") - <h2>Copyright</h2> - <p> - All documentation is --Copyright © 2005-2017 Mike Pall. -+Copyright © 2005-2018 Mike Pall. - </p> - - -@@ -99,7 +98,7 @@ Copyright © 2005-2017 Mike Pall. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html -index 4bb8251..3825956 100644 ---- a/doc/ext_c_api.html -+++ b/doc/ext_c_api.html -@@ -3,8 +3,7 @@ - <head> - <title>Lua/C API Extensions</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -177,7 +176,7 @@ Also note that this mechanism is not without overhead. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html -index d48d77f..74ca294 100644 ---- a/doc/ext_ffi.html -+++ b/doc/ext_ffi.html -@@ -3,8 +3,7 @@ - <head> - <title>FFI Library</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -320,7 +319,7 @@ without undue conversion penalties. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html -index 566897c..10f2d02 100644 ---- a/doc/ext_ffi_api.html -+++ b/doc/ext_ffi_api.html -@@ -3,8 +3,7 @@ - <head> - <title>ffi.* API Functions</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -556,7 +555,7 @@ named <tt>i</tt>. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html -index ae3c037..218049d 100644 ---- a/doc/ext_ffi_semantics.html -+++ b/doc/ext_ffi_semantics.html -@@ -3,8 +3,7 @@ - <head> - <title>FFI Semantics</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -1235,7 +1234,7 @@ compiled.</li> - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html -index 29cf549..cd455cf 100644 ---- a/doc/ext_ffi_tutorial.html -+++ b/doc/ext_ffi_tutorial.html -@@ -3,8 +3,7 @@ - <head> - <title>FFI Tutorial</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -591,7 +590,7 @@ it to a local variable in the function scope is unnecessary. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/ext_jit.html b/doc/ext_jit.html -index 5017e3c..ce6dcd6 100644 ---- a/doc/ext_jit.html -+++ b/doc/ext_jit.html -@@ -3,8 +3,7 @@ - <head> - <title>jit.* Library</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -189,7 +188,7 @@ if you want to know more. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/extensions.html b/doc/extensions.html -index 3d9e82b..fa412e0 100644 ---- a/doc/extensions.html -+++ b/doc/extensions.html -@@ -3,8 +3,7 @@ - <head> - <title>Extensions</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -398,7 +397,7 @@ lead to the termination of the process.</li> - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/faq.html b/doc/faq.html -index afeff94..9338be4 100644 ---- a/doc/faq.html -+++ b/doc/faq.html -@@ -3,8 +3,7 @@ - <head> - <title>Frequently Asked Questions (FAQ)</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -174,7 +173,7 @@ the development of certain features, if they are important to you. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/install.html b/doc/install.html -index 4bcc506..befffa7 100644 ---- a/doc/install.html -+++ b/doc/install.html -@@ -3,8 +3,7 @@ - <head> - <title>Installation</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -636,7 +635,7 @@ to me (the upstream) and not you (the package maintainer), anyway. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/luajit.html b/doc/luajit.html -index 0003008..d8f531d 100644 ---- a/doc/luajit.html -+++ b/doc/luajit.html -@@ -3,8 +3,7 @@ - <head> - <title>LuaJIT</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -150,7 +149,7 @@ Lua is a powerful, dynamic and light-weight programming language. - It may be embedded or used as a general-purpose, stand-alone language. - </p> - <p> --LuaJIT is Copyright © 2005-2017 Mike Pall, released under the -+LuaJIT is Copyright © 2005-2018 Mike Pall, released under the - <a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">»</span> MIT open source license</a>. - </p> - <p> -@@ -224,7 +223,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/running.html b/doc/running.html -index 331c22d..08d7f71 100644 ---- a/doc/running.html -+++ b/doc/running.html -@@ -3,8 +3,7 @@ - <head> - <title>Running LuaJIT</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -296,7 +295,7 @@ Here are the parameters and their default settings: - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> -diff --git a/doc/status.html b/doc/status.html -index aa8df93..ea61db1 100644 ---- a/doc/status.html -+++ b/doc/status.html -@@ -3,8 +3,7 @@ - <head> - <title>Status</title> - <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> --<meta name="Author" content="Mike Pall"> --<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> -+<meta name="Copyright" content="Copyright (C) 2005-2018"> - <meta name="Language" content="en"> - <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> - <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> -@@ -100,7 +99,7 @@ garbage collector. - </div> - <div id="foot"> - <hr class="hide"> --Copyright © 2005-2017 Mike Pall -+Copyright © 2005-2018 - <span class="noprint"> - · - <a href="contact.html">Contact</a> --- -2.20.1 - diff --git a/0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch b/0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch deleted file mode 100644 index ee383da..0000000 --- a/0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch +++ /dev/null @@ -1,52 +0,0 @@ -commit 362f034c1b91d52ea2cf971314ed4e0c24348bff -Merge: 260b9b4 f5d424a -Author: Mike Pall <mike> -Date: Sun May 20 12:28:10 2018 +0200 - - Merge branch 'master' into v2.1 - -From f5d424afe8b9395f0df05aba905e0e1f6a2262b8 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 20 May 2018 12:25:36 +0200 -Subject: [PATCH 39/72] FFI: Make FP to U64 conversions match JIT backend - behavior. - ---- - src/lj_obj.h | 18 +++++++++++++----- - 1 file changed, 13 insertions(+), 5 deletions(-) - -diff --git a/src/lj_obj.h b/src/lj_obj.h -index e70b003..2ee526c 100644 ---- a/src/lj_obj.h -+++ b/src/lj_obj.h -@@ -816,14 +816,22 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) - - #define lj_num2int(n) ((int32_t)(n)) - -+/* -+** This must match the JIT backend behavior. In particular for archs -+** that don't have a common hardware instruction for this conversion. -+** Note that signed FP to unsigned int conversions have an undefined -+** result and should never be relied upon in portable FFI code. -+** See also: C99 or C11 standard, 6.3.1.4, footnote of (1). -+*/ - static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) - { --#ifdef _MSC_VER -- if (n >= 9223372036854775808.0) /* They think it's a feature. */ -- return (uint64_t)(int64_t)(n - 18446744073709551616.0); -- else -+#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS -+ int64_t i = (int64_t)n; -+ if (i < 0) i = (int64_t)(n - 18446744073709551616.0); -+ return (uint64_t)i; -+#else -+ return (uint64_t)n; - #endif -- return (uint64_t)n; - } - - static LJ_AINLINE int32_t numberVint(cTValue *o) --- -2.20.1 - diff --git a/0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch b/0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch deleted file mode 100644 index 01dd836..0000000 --- a/0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch +++ /dev/null @@ -1,33 +0,0 @@ -From fb5e522fbc0750c838ef6a926b11c5d870826183 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 20 May 2018 12:40:33 +0200 -Subject: [PATCH 40/72] x86/x64: Check for jcc when using xor r,r in - emit_loadi(). - -Thanks to Peter Cawley. ---- - src/lj_emit_x86.h | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h -index bcceb93..9c371a9 100644 ---- a/src/lj_emit_x86.h -+++ b/src/lj_emit_x86.h -@@ -268,10 +268,12 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) - /* mov r, i / xor r, r */ - static void emit_loadi(ASMState *as, Reg r, int32_t i) - { -- /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */ -+ /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP/jcc. */ - if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP || - (as->curins+1 < as->T->nins && -- IR(as->curins+1)->o == IR_HIOP)))) { -+ IR(as->curins+1)->o == IR_HIOP))) && -+ !((*as->mcp == 0x0f && (as->mcp[1] & 0xf0) == XI_JCCn) || -+ (*as->mcp & 0xf0) == XI_JCCs)) { - emit_rr(as, XO_ARITH(XOg_XOR), r, r); - } else { - MCode *p = as->mcp; --- -2.20.1 - diff --git a/0041-PPC-NetBSD-Fix-endianess-check.patch b/0041-PPC-NetBSD-Fix-endianess-check.patch deleted file mode 100644 index 6800a89..0000000 --- a/0041-PPC-NetBSD-Fix-endianess-check.patch +++ /dev/null @@ -1,33 +0,0 @@ -commit d36afcfea57c29fb51060c24679f3b2c07806545 -Merge: b708297 b025b01 -Author: Mike Pall <mike> -Date: Tue Jun 5 11:39:10 2018 +0200 - - Merge branch 'master' into v2.1 - -From b025b01c5b9d23f6218c7d72b7aafa3f1ab1e08a Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Tue, 5 Jun 2018 11:36:18 +0200 -Subject: [PATCH 41/72] PPC/NetBSD: Fix endianess check. - -Thanks to he32 and daurnimator. ---- - src/lj_arch.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_arch.h b/src/lj_arch.h -index e04c4ee..5f7e445 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -339,7 +339,7 @@ - #error "No support for ILP32 model on ARM64" - #endif - #elif LJ_TARGET_PPC --#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE -+#if !LJ_ARCH_PPC64 && (defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))) - #error "No support for little-endian PPC32" - #endif - #if LJ_ARCH_PPC64 --- -2.20.1 - diff --git a/0042-DynASM-x86-Add-FMA3-instructions.patch b/0042-DynASM-x86-Add-FMA3-instructions.patch deleted file mode 100644 index 0fe390a..0000000 --- a/0042-DynASM-x86-Add-FMA3-instructions.patch +++ /dev/null @@ -1,91 +0,0 @@ -From cc299958bb412f229844e53473a035c280544ec3 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Tue, 5 Jun 2018 12:23:13 +0200 -Subject: [PATCH 42/72] DynASM/x86: Add FMA3 instructions. - -Thanks to Alexander Nasonov. ---- - dynasm/dasm_x86.lua | 67 +++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 67 insertions(+) - -diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua -index c1d267a..73502f6 100644 ---- a/dynasm/dasm_x86.lua -+++ b/dynasm/dasm_x86.lua -@@ -1696,6 +1696,73 @@ local map_op = { - sarx_3 = "rmrqd:F30F38wF7rM", - shrx_3 = "rmrqd:F20F38wF7rM", - shlx_3 = "rmrqd:660F38wF7rM", -+ -+ -- FMA3 -+ vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", -+ vfmaddsub132ps_3 = "rrmoy:660F38V96rM", -+ vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", -+ vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", -+ vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", -+ vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", -+ -+ vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", -+ vfmsubadd132ps_3 = "rrmoy:660F38V97rM", -+ vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", -+ vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", -+ vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", -+ vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", -+ -+ vfmadd132pd_3 = "rrmoy:660F38VX98rM", -+ vfmadd132ps_3 = "rrmoy:660F38V98rM", -+ vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", -+ vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", -+ vfmadd213pd_3 = "rrmoy:660F38VXA8rM", -+ vfmadd213ps_3 = "rrmoy:660F38VA8rM", -+ vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", -+ vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", -+ vfmadd231pd_3 = "rrmoy:660F38VXB8rM", -+ vfmadd231ps_3 = "rrmoy:660F38VB8rM", -+ vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", -+ vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", -+ -+ vfmsub132pd_3 = "rrmoy:660F38VX9ArM", -+ vfmsub132ps_3 = "rrmoy:660F38V9ArM", -+ vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", -+ vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", -+ vfmsub213pd_3 = "rrmoy:660F38VXAArM", -+ vfmsub213ps_3 = "rrmoy:660F38VAArM", -+ vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", -+ vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", -+ vfmsub231pd_3 = "rrmoy:660F38VXBArM", -+ vfmsub231ps_3 = "rrmoy:660F38VBArM", -+ vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", -+ vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", -+ -+ vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", -+ vfnmadd132ps_3 = "rrmoy:660F38V9CrM", -+ vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", -+ vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", -+ vfnmadd213pd_3 = "rrmoy:660F38VXACrM", -+ vfnmadd213ps_3 = "rrmoy:660F38VACrM", -+ vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", -+ vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", -+ vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", -+ vfnmadd231ps_3 = "rrmoy:660F38VBCrM", -+ vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", -+ vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", -+ -+ vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", -+ vfnmsub132ps_3 = "rrmoy:660F38V9ErM", -+ vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", -+ vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", -+ vfnmsub213pd_3 = "rrmoy:660F38VXAErM", -+ vfnmsub213ps_3 = "rrmoy:660F38VAErM", -+ vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", -+ vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", -+ vfnmsub231pd_3 = "rrmoy:660F38VXBErM", -+ vfnmsub231ps_3 = "rrmoy:660F38VBErM", -+ vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", -+ vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", - } - - ------------------------------------------------------------------------------ --- -2.20.1 - diff --git a/0043-x86-Disassemble-FMA3-instructions.patch b/0043-x86-Disassemble-FMA3-instructions.patch deleted file mode 100644 index 1d64c0a..0000000 --- a/0043-x86-Disassemble-FMA3-instructions.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 55f70823242aa4e6acc248bde5cf8194ba1b27e3 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Tue, 5 Jun 2018 12:23:29 +0200 -Subject: [PATCH 43/72] x86: Disassemble FMA3 instructions. - -Thanks to Alexander Nasonov. ---- - src/jit/dis_x86.lua | 24 +++++++++++++++++++++++- - 1 file changed, 23 insertions(+), 1 deletion(-) - -diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua -index 4371233..3a68c93 100644 ---- a/src/jit/dis_x86.lua -+++ b/src/jit/dis_x86.lua -@@ -239,6 +239,24 @@ nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", - --8x - [0x8c] = "||pmaskmovXrvVSm", - [0x8e] = "||pmaskmovVSmXvr", -+--9x -+[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm", -+[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm", -+[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm", -+[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm", -+[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm", -+--Ax -+[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm", -+[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm", -+[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm", -+[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm", -+[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm", -+--Bx -+[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm", -+[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm", -+[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm", -+[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm", -+[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm", - --Dx - [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", - [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", -@@ -483,7 +501,7 @@ local function putpat(ctx, name, pat) - local operands, regs, sz, mode, sp, rm, sc, rx, sdisp - local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl - -- -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz -+ -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz - for p in gmatch(pat, ".") do - local x = nil - if p == "V" or p == "U" then -@@ -506,6 +524,9 @@ local function putpat(ctx, name, pat) - sz = ctx.o16 and "X" or "M"; ctx.o16 = false - if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end - regs = map_regs[sz] -+ elseif p == "H" then -+ name = name..(ctx.rexw and "d" or "s") -+ ctx.rexw = false - elseif p == "S" then - name = name..lower(sz) - elseif p == "s" then -@@ -735,6 +756,7 @@ map_act = { - V = putpat, U = putpat, T = putpat, - M = putpat, X = putpat, P = putpat, - F = putpat, G = putpat, Y = putpat, -+ H = putpat, - - -- Collect prefixes. - [":"] = function(ctx, name, pat) --- -2.20.1 - diff --git a/0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch b/0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch deleted file mode 100644 index 315b528..0000000 --- a/0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch +++ /dev/null @@ -1,49 +0,0 @@ -From a5a89ab586a3b5bb4f266949bbf3dc2b140e2374 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Tue, 5 Jun 2018 12:23:56 +0200 -Subject: [PATCH 44/72] From Lua 5.3: assert() accepts any type of error - object. - ---- - doc/extensions.html | 1 + - src/lib_base.c | 10 +++++----- - 2 files changed, 6 insertions(+), 5 deletions(-) - -diff --git a/doc/extensions.html b/doc/extensions.html -index 55c4b70..7379041 100644 ---- a/doc/extensions.html -+++ b/doc/extensions.html -@@ -373,6 +373,7 @@ LuaJIT supports some extensions from Lua 5.3: - <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li> - <li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> - <li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li> -+<li><tt>assert()</tt> accepts any type of error object.</li> - <li><tt>table.move(a1, f, e, t [,a2])</tt>.</li> - <li><tt>coroutine.isyieldable()</tt>.</li> - <li>Lua/C API extensions: -diff --git a/src/lib_base.c b/src/lib_base.c -index d61e876..1cd8305 100644 ---- a/src/lib_base.c -+++ b/src/lib_base.c -@@ -42,13 +42,13 @@ - - LJLIB_ASM(assert) LJLIB_REC(.) - { -- GCstr *s; - lj_lib_checkany(L, 1); -- s = lj_lib_optstr(L, 2); -- if (s) -- lj_err_callermsg(L, strdata(s)); -- else -+ if (L->top == L->base+1) - lj_err_caller(L, LJ_ERR_ASSERT); -+ else if (tvisstr(L->base+1) || tvisnumber(L->base+1)) -+ lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2))); -+ else -+ lj_err_run(L); - return FFH_UNREACHABLE; - } - --- -2.20.1 - diff --git a/0045-Windows-Add-UWP-support-part-1.patch b/0045-Windows-Add-UWP-support-part-1.patch deleted file mode 100644 index fcb91fb..0000000 --- a/0045-Windows-Add-UWP-support-part-1.patch +++ /dev/null @@ -1,359 +0,0 @@ -From c3c54ce1aef782823936808a75460e6b53aada2c Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Tue, 5 Jun 2018 17:03:08 +0200 -Subject: [PATCH 45/72] Windows: Add UWP support, part 1. - -Contributed by Ben Pye. ---- - doc/ext_ffi_api.html | 2 ++ - src/lib_ffi.c | 3 +++ - src/lib_io.c | 4 ++-- - src/lib_package.c | 24 +++++++++++++++++++++++- - src/lj_alloc.c | 6 +++--- - src/lj_arch.h | 19 +++++++++++++++++++ - src/lj_ccallback.c | 4 ++-- - src/lj_clib.c | 20 ++++++++++++++++---- - src/lj_mcode.c | 8 ++++---- - src/lj_profile.c | 8 ++++---- - 10 files changed, 78 insertions(+), 20 deletions(-) - -diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html -index 25cc974..54ff0ce 100644 ---- a/doc/ext_ffi_api.html -+++ b/doc/ext_ffi_api.html -@@ -468,6 +468,8 @@ otherwise. The following parameters are currently defined: - <tr class="odd"> - <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> - <tr class="even"> -+<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr> -+<tr class="odd"> - <td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr> - </table> - -diff --git a/src/lib_ffi.c b/src/lib_ffi.c -index 199cfc9..8032411 100644 ---- a/src/lib_ffi.c -+++ b/src/lib_ffi.c -@@ -746,6 +746,9 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.) - #endif - #if LJ_ABI_WIN - case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ -+#endif -+#if LJ_TARGET_UWP -+ case H_(a40f0bcb,a40f0bcb): b = 1; break; /* uwp */ - #endif - case H_(3af93066,1f001464): b = 1; break; /* le/be */ - #if LJ_GC64 -diff --git a/src/lib_io.c b/src/lib_io.c -index 9763ed4..73fd932 100644 ---- a/src/lib_io.c -+++ b/src/lib_io.c -@@ -99,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof) - int stat = -1; - #if LJ_TARGET_POSIX - stat = pclose(iof->fp); --#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE -+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP - stat = _pclose(iof->fp); - #else - lua_assert(0); -@@ -406,7 +406,7 @@ LJLIB_CF(io_open) - - LJLIB_CF(io_popen) - { --#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE) -+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP) - const char *fname = strdata(lj_lib_checkstr(L, 1)); - GCstr *s = lj_lib_optstr(L, 2); - const char *mode = s ? strdata(s) : "r"; -diff --git a/src/lib_package.c b/src/lib_package.c -index 6fac43e..bedd6d7 100644 ---- a/src/lib_package.c -+++ b/src/lib_package.c -@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym) - BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); - #endif - -+#if LJ_TARGET_UWP -+void *LJ_WIN_LOADLIBA(const char *path) -+{ -+ DWORD err = GetLastError(); -+ wchar_t wpath[256]; -+ HANDLE lib = NULL; -+ if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) { -+ lib = LoadPackagedLibrary(wpath, 0); -+ } -+ SetLastError(err); -+ return lib; -+} -+#endif -+ - #undef setprogdir - - static void setprogdir(lua_State *L) -@@ -119,7 +133,7 @@ static void ll_unloadlib(void *lib) - - static void *ll_load(lua_State *L, const char *path, int gl) - { -- HINSTANCE lib = LoadLibraryExA(path, NULL, 0); -+ HINSTANCE lib = LJ_WIN_LOADLIBA(path); - if (lib == NULL) pusherror(L); - UNUSED(gl); - return lib; -@@ -132,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) - return f; - } - -+#if LJ_TARGET_UWP -+EXTERN_C IMAGE_DOS_HEADER __ImageBase; -+#endif -+ - static const char *ll_bcsym(void *lib, const char *sym) - { - if (lib) { - return (const char *)GetProcAddress((HINSTANCE)lib, sym); - } else { -+#if LJ_TARGET_UWP -+ return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym); -+#else - HINSTANCE h = GetModuleHandleA(NULL); - const char *p = (const char *)GetProcAddress(h, sym); - if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (const char *)ll_bcsym, &h)) - p = (const char *)GetProcAddress(h, sym); - return p; -+#endif - } - } - -diff --git a/src/lj_alloc.c b/src/lj_alloc.c -index 9fc761c..f3b6a54 100644 ---- a/src/lj_alloc.c -+++ b/src/lj_alloc.c -@@ -167,7 +167,7 @@ static void *DIRECT_MMAP(size_t size) - static void *CALL_MMAP(size_t size) - { - DWORD olderr = GetLastError(); -- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); -+ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - SetLastError(olderr); - return ptr ? ptr : MFAIL; - } -@@ -176,8 +176,8 @@ static void *CALL_MMAP(size_t size) - static void *DIRECT_MMAP(size_t size) - { - DWORD olderr = GetLastError(); -- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, -- PAGE_READWRITE); -+ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, -+ PAGE_READWRITE); - SetLastError(olderr); - return ptr ? ptr : MFAIL; - } -diff --git a/src/lj_arch.h b/src/lj_arch.h -index e796912..31a1159 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -135,6 +135,13 @@ - #define LJ_TARGET_GC64 1 - #endif - -+#ifdef _UWP -+#define LJ_TARGET_UWP 1 -+#if LUAJIT_TARGET == LUAJIT_ARCH_X64 -+#define LJ_TARGET_GC64 1 -+#endif -+#endif -+ - #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ - #define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ - #define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ -@@ -570,6 +577,18 @@ - #define LJ_NO_UNWIND 1 - #endif - -+#if LJ_TARGET_WINDOWS -+#if LJ_TARGET_UWP -+#define LJ_WIN_VALLOC VirtualAllocFromApp -+#define LJ_WIN_VPROTECT VirtualProtectFromApp -+extern void *LJ_WIN_LOADLIBA(const char *path); -+#else -+#define LJ_WIN_VALLOC VirtualAlloc -+#define LJ_WIN_VPROTECT VirtualProtect -+#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0) -+#endif -+#endif -+ - /* Compatibility with Lua 5.1 vs. 5.2. */ - #ifdef LUAJIT_ENABLE_LUA52COMPAT - #define LJ_52 1 -diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c -index 03494a7..412dbf8 100644 ---- a/src/lj_ccallback.c -+++ b/src/lj_ccallback.c -@@ -267,7 +267,7 @@ static void callback_mcode_new(CTState *cts) - if (CALLBACK_MAX_SLOT == 0) - lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); - #if LJ_TARGET_WINDOWS -- p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); -+ p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - if (!p) - lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); - #elif LJ_TARGET_POSIX -@@ -285,7 +285,7 @@ static void callback_mcode_new(CTState *cts) - #if LJ_TARGET_WINDOWS - { - DWORD oprot; -- VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); -+ LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot); - } - #elif LJ_TARGET_POSIX - mprotect(p, sz, (PROT_READ|PROT_EXEC)); -diff --git a/src/lj_clib.c b/src/lj_clib.c -index 6142659..f016b06 100644 ---- a/src/lj_clib.c -+++ b/src/lj_clib.c -@@ -158,11 +158,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); - /* Default libraries. */ - enum { - CLIB_HANDLE_EXE, -+#if !LJ_TARGET_UWP - CLIB_HANDLE_DLL, - CLIB_HANDLE_CRT, - CLIB_HANDLE_KERNEL32, - CLIB_HANDLE_USER32, - CLIB_HANDLE_GDI32, -+#endif - CLIB_HANDLE_MAX - }; - -@@ -208,7 +210,7 @@ static const char *clib_extname(lua_State *L, const char *name) - static void *clib_loadlib(lua_State *L, const char *name, int global) - { - DWORD oldwerr = GetLastError(); -- void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0); -+ void *h = LJ_WIN_LOADLIBA(clib_extname(L, name)); - if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); - SetLastError(oldwerr); - UNUSED(global); -@@ -218,6 +220,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global) - static void clib_unloadlib(CLibrary *cl) - { - if (cl->handle == CLIB_DEFHANDLE) { -+#if !LJ_TARGET_UWP - MSize i; - for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { - void *h = clib_def_handle[i]; -@@ -226,11 +229,16 @@ static void clib_unloadlib(CLibrary *cl) - FreeLibrary((HINSTANCE)h); - } - } -+#endif - } else if (cl->handle) { - FreeLibrary((HINSTANCE)cl->handle); - } - } - -+#if LJ_TARGET_UWP -+EXTERN_C IMAGE_DOS_HEADER __ImageBase; -+#endif -+ - static void *clib_getsym(CLibrary *cl, const char *name) - { - void *p = NULL; -@@ -239,6 +247,9 @@ static void *clib_getsym(CLibrary *cl, const char *name) - for (i = 0; i < CLIB_HANDLE_MAX; i++) { - HINSTANCE h = (HINSTANCE)clib_def_handle[i]; - if (!(void *)h) { /* Resolve default library handles (once). */ -+#if LJ_TARGET_UWP -+ h = (HINSTANCE)&__ImageBase; -+#else - switch (i) { - case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; - case CLIB_HANDLE_DLL: -@@ -249,11 +260,12 @@ static void *clib_getsym(CLibrary *cl, const char *name) - GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (const char *)&_fmode, &h); - break; -- case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0); break; -- case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break; -- case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break; -+ case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break; -+ case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break; -+ case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break; - } - if (!h) continue; -+#endif - clib_def_handle[i] = (void *)h; - } - p = (void *)GetProcAddress(h, name); -diff --git a/src/lj_mcode.c b/src/lj_mcode.c -index e46e3ef..64b0ca9 100644 ---- a/src/lj_mcode.c -+++ b/src/lj_mcode.c -@@ -66,8 +66,8 @@ void lj_mcode_sync(void *start, void *end) - - static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) - { -- void *p = VirtualAlloc((void *)hint, sz, -- MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); -+ void *p = LJ_WIN_VALLOC((void *)hint, sz, -+ MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); - if (!p && !hint) - lj_trace_err(J, LJ_TRERR_MCODEAL); - return p; -@@ -82,7 +82,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz) - static int mcode_setprot(void *p, size_t sz, DWORD prot) - { - DWORD oprot; -- return !VirtualProtect(p, sz, prot, &oprot); -+ return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); - } - - #elif LJ_TARGET_POSIX -@@ -255,7 +255,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) - /* All memory addresses are reachable by relative jumps. */ - static void *mcode_alloc(jit_State *J, size_t sz) - { --#ifdef __OpenBSD__ -+#if defined(__OpenBSD__) || LJ_TARGET_UWP - /* Allow better executable memory allocation for OpenBSD W^X mode. */ - void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); - if (p && mcode_setprot(p, sz, MCPROT_GEN)) { -diff --git a/src/lj_profile.c b/src/lj_profile.c -index 116998e..3223697 100644 ---- a/src/lj_profile.c -+++ b/src/lj_profile.c -@@ -247,7 +247,7 @@ static DWORD WINAPI profile_thread(void *psx) - { - ProfileState *ps = (ProfileState *)psx; - int interval = ps->interval; --#if LJ_TARGET_WINDOWS -+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP - ps->wmm_tbp(interval); - #endif - while (1) { -@@ -255,7 +255,7 @@ static DWORD WINAPI profile_thread(void *psx) - if (ps->abort) break; - profile_trigger(ps); - } --#if LJ_TARGET_WINDOWS -+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP - ps->wmm_tep(interval); - #endif - return 0; -@@ -264,9 +264,9 @@ static DWORD WINAPI profile_thread(void *psx) - /* Start profiling timer thread. */ - static void profile_timer_start(ProfileState *ps) - { --#if LJ_TARGET_WINDOWS -+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP - if (!ps->wmm) { /* Load WinMM library on-demand. */ -- ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0); -+ ps->wmm = LJ_WIN_LOADLIBA("winmm.dll"); - if (ps->wmm) { - ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod"); - ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod"); --- -2.20.1 - diff --git a/0046-ARM64-Fix-write-barrier-in-BC_USETS.patch b/0046-ARM64-Fix-write-barrier-in-BC_USETS.patch deleted file mode 100644 index 69eea9a..0000000 --- a/0046-ARM64-Fix-write-barrier-in-BC_USETS.patch +++ /dev/null @@ -1,26 +0,0 @@ -From c785131ca5a6d24adc519e5e0bf1b69b671d912f Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 24 Jun 2018 13:18:03 +0200 -Subject: [PATCH 46/72] ARM64: Fix write barrier in BC_USETS. - -Contributed by Javier Guerra Giraldez. ---- - src/vm_arm64.dasc | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc -index c55794a..fb226e3 100644 ---- a/src/vm_arm64.dasc -+++ b/src/vm_arm64.dasc -@@ -2780,7 +2780,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |2: // Check if string is white and ensure upvalue is closed. - | ldrb TMP0w, UPVAL:CARG1->closed - | tst TMP1w, #LJ_GC_WHITES // iswhite(str) -- | ccmp TMP0w, #0, #0, ne -+ | ccmp TMP0w, #0, #4, ne - | beq <1 - | // Crossed a write barrier. Move the barrier forward. - | mov CARG1, GL --- -2.20.1 - diff --git a/0047-ARM64-Fix-exit-stub-patching.patch b/0047-ARM64-Fix-exit-stub-patching.patch deleted file mode 100644 index 740d52e..0000000 --- a/0047-ARM64-Fix-exit-stub-patching.patch +++ /dev/null @@ -1,238 +0,0 @@ -From 9da06535092d6d9dec442641a26c64bce5574322 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 24 Jun 2018 14:08:59 +0200 -Subject: [PATCH 47/72] ARM64: Fix exit stub patching. - -Contributed by Javier Guerra Giraldez. ---- - src/lj_asm_arm64.h | 64 +++++++++++++++++++++++++------------------ - src/lj_emit_arm64.h | 18 ++++++------ - src/lj_target_arm64.h | 7 +++-- - 3 files changed, 51 insertions(+), 38 deletions(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index cbb186d..baafa21 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) - asm_mclimit(as); - /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ - for (i = nexits-1; (int32_t)i >= 0; i--) -- *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu)); -- *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno)); -+ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); -+ *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); - mxp--; -- *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu)); -- *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP)); -+ *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); -+ *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); - as->mctop = mxp; - } - -@@ -77,7 +77,7 @@ static void asm_guardcc(ASMState *as, A64CC cc) - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; -- *p = A64I_B | ((target-p) & 0x03ffffffu); -+ *p = A64I_B | A64F_S26(target-p); - emit_cond_branch(as, cc^1, p-1); - return; - } -@@ -91,7 +91,7 @@ static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; -- *p = A64I_B | ((target-p) & 0x03ffffffu); -+ *p = A64I_B | A64F_S26(target-p); - emit_tnb(as, ai^0x01000000u, r, bit, p-1); - return; - } -@@ -105,7 +105,7 @@ static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; -- *p = A64I_B | ((target-p) & 0x03ffffffu); -+ *p = A64I_B | A64F_S26(target-p); - emit_cnb(as, ai^0x01000000u, r, p-1); - return; - } -@@ -1850,7 +1850,7 @@ static void asm_loop_fixup(ASMState *as) - p[-2] |= ((uint32_t)delta & mask) << 5; - } else { - ptrdiff_t delta = target - (p - 1); -- p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); -+ p[-1] = A64I_B | A64F_S26(delta); - } - } - -@@ -1919,7 +1919,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; -- p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); -+ p[-1] = A64I_B | A64F_S26((target-p)+1); - } - - /* Prepare tail of code. */ -@@ -1982,40 +1982,50 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) - { - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); -- MCode *cstart = NULL, *cend = p; -+ MCode *cstart = NULL; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode *px = exitstub_trace_addr(T, exitno); -+ /* Note: this assumes a trace exit is only ever patched once. */ - for (; p < pe; p++) { - /* Look for exitstub branch, replace with branch to target. */ -+ ptrdiff_t delta = target - p; - MCode ins = A64I_LE(*p); - if ((ins & 0xff000000u) == 0x54000000u && - ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { -- /* Patch bcc exitstub. */ -- *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u)); -- cend = p+1; -- if (!cstart) cstart = p; -+ /* Patch bcc, if within range. */ -+ if (A64F_S_OK(delta, 19)) { -+ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); -+ if (!cstart) cstart = p; -+ } - } else if ((ins & 0xfc000000u) == 0x14000000u && - ((ins ^ (px-p)) & 0x03ffffffu) == 0) { -- /* Patch b exitstub. */ -- *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu)); -- cend = p+1; -+ /* Patch b. */ -+ lua_assert(A64F_S_OK(delta, 26)); -+ *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta)); - if (!cstart) cstart = p; - } else if ((ins & 0x7e000000u) == 0x34000000u && - ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { -- /* Patch cbz/cbnz exitstub. */ -- *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u)); -- cend = p+1; -- if (!cstart) cstart = p; -+ /* Patch cbz/cbnz, if within range. */ -+ if (A64F_S_OK(delta, 19)) { -+ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); -+ if (!cstart) cstart = p; -+ } - } else if ((ins & 0x7e000000u) == 0x36000000u && - ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { -- /* Patch tbz/tbnz exitstub. */ -- *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u)); -- cend = p+1; -- if (!cstart) cstart = p; -+ /* Patch tbz/tbnz, if within range. */ -+ if (A64F_S_OK(delta, 14)) { -+ *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta)); -+ if (!cstart) cstart = p; -+ } - } - } -- lua_assert(cstart != NULL); -- lj_mcode_sync(cstart, cend); -+ { /* Always patch long-range branch in exit stub itself. */ -+ ptrdiff_t delta = target - px; -+ lua_assert(A64F_S_OK(delta, 26)); -+ *px = A64I_B | A64F_S26(delta); -+ if (!cstart) cstart = px; -+ } -+ lj_mcode_sync(cstart, px+1); - lj_mcode_patch(J, mcarea, 1); - } - -diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h -index 6da4c7d..1001b1d 100644 ---- a/src/lj_emit_arm64.h -+++ b/src/lj_emit_arm64.h -@@ -241,7 +241,7 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) - #define mcpofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) - #define checkmcpofs(as, k) \ -- ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) -+ (A64F_S_OK(mcpofs(as, k)>>2, 19)) - - static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -@@ -312,7 +312,7 @@ static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) - { - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; -- lua_assert(((delta + 0x40000) >> 19) == 0); -+ lua_assert(A64F_S_OK(delta, 19)); - *p = A64I_BCC | A64F_S19(delta) | cond; - } - -@@ -320,24 +320,24 @@ static void emit_branch(ASMState *as, A64Ins ai, MCode *target) - { - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; -- lua_assert(((delta + 0x02000000) >> 26) == 0); -- *p = ai | ((uint32_t)delta & 0x03ffffffu); -+ lua_assert(A64F_S_OK(delta, 26)); -+ *p = ai | A64F_S26(delta); - } - - static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) - { - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; -- lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0); -+ lua_assert(bit < 63 && A64F_S_OK(delta, 14)); - if (bit > 31) ai |= A64I_X; -- *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r; -+ *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r; - } - - static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) - { - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; -- lua_assert(((delta + 0x40000) >> 19) == 0); -+ lua_assert(A64F_S_OK(delta, 19)); - *p = ai | A64F_S19(delta) | r; - } - -@@ -347,8 +347,8 @@ static void emit_call(ASMState *as, void *target) - { - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; -- if ((((delta>>2) + 0x02000000) >> 26) == 0) { -- *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); -+ if (A64F_S_OK(delta>>2, 26)) { -+ *p = A64I_BL | A64F_S26(delta>>2); - } else { /* Target out of range: need indirect call. But don't use R0-R7. */ - Reg r = ra_allock(as, i64ptr(target), - RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); -diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h -index 520023a..a207a2b 100644 ---- a/src/lj_target_arm64.h -+++ b/src/lj_target_arm64.h -@@ -132,9 +132,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) - #define A64F_IMMR(x) ((x) << 16) - #define A64F_U16(x) ((x) << 5) - #define A64F_U12(x) ((x) << 10) --#define A64F_S26(x) (x) -+#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu)) - #define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) --#define A64F_S14(x) ((x) << 5) -+#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5) - #define A64F_S9(x) ((x) << 12) - #define A64F_BIT(x) ((x) << 19) - #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) -@@ -145,6 +145,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) - #define A64F_LSL16(x) (((x) / 16) << 21) - #define A64F_BSH(sh) ((sh) << 10) - -+/* Check for valid field range. */ -+#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) -+ - typedef enum A64Ins { - A64I_S = 0x20000000, - A64I_X = 0x80000000, --- -2.20.1 - diff --git a/0048-DynASM-Fix-warning.patch b/0048-DynASM-Fix-warning.patch deleted file mode 100644 index 4af1c32..0000000 --- a/0048-DynASM-Fix-warning.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 9b41062156779160b88fe5e1eb1ece1ee1fe6a74 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 24 Jun 2018 14:10:21 +0200 -Subject: [PATCH 48/72] DynASM: Fix warning. - ---- - dynasm/dasm_arm64.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h -index 47e1e07..ff21236 100644 ---- a/dynasm/dasm_arm64.h -+++ b/dynasm/dasm_arm64.h -@@ -427,6 +427,7 @@ int dasm_encode(Dst_DECL, void *buffer) - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); -+ /* fallthrough */ - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; --- -2.20.1 - diff --git a/0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch b/0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch deleted file mode 100644 index 40bcbce..0000000 --- a/0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 26f1023819efb843e10014232cd88bb1d52ea4f5 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Wed, 22 Aug 2018 13:35:41 +0200 -Subject: [PATCH 49/72] DynASM/x86: Fix vroundps/vroundpd encoding. - -Thanks to Alexander Nasonov. ---- - dynasm/dasm_x86.lua | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua -index 73502f6..7f536af 100644 ---- a/dynasm/dasm_x86.lua -+++ b/dynasm/dasm_x86.lua -@@ -1537,8 +1537,8 @@ local map_op = { - vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", - vrsqrtps_2 = "rmoy:0Fu52rM", - vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", -- vroundpd_3 = "rmioy:660F3AV09rMU", -- vroundps_3 = "rmioy:660F3AV08rMU", -+ vroundpd_3 = "rmioy:660F3Au09rMU", -+ vroundps_3 = "rmioy:660F3Au08rMU", - vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", - vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", - vshufpd_4 = "rrmioy:660FVC6rMU", --- -2.20.1 - diff --git a/0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch b/0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch deleted file mode 100644 index 9b29c4e..0000000 --- a/0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 646148e747759f0af3b47f9bd287cedd7e174631 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 13 Sep 2018 17:58:50 +0200 -Subject: [PATCH 50/72] Fix memory probing allocator to check for valid end - address, too. - ---- - src/lj_alloc.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/lj_alloc.c b/src/lj_alloc.c -index f3b6a54..33a2eb8 100644 ---- a/src/lj_alloc.c -+++ b/src/lj_alloc.c -@@ -255,7 +255,8 @@ static void *mmap_probe(size_t size) - for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { - void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); - uintptr_t addr = (uintptr_t)p; -- if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) { -+ if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER && -+ ((addr + size) >> LJ_ALLOC_MBITS) == 0) { - /* We got a suitable address. Bump the hint address. */ - hint_addr = addr + size; - errno = olderr; --- -2.20.1 - diff --git a/0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch b/0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch deleted file mode 100644 index 3226e33..0000000 --- a/0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 9c1b637898f38dd4606da08ba1a82a174c3e64b6 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 14 Oct 2018 15:12:59 +0200 -Subject: [PATCH 51/72] MIPS/MIPS64: Fix TSETR barrier (again). - ---- - src/vm_mips.dasc | 2 +- - src/vm_mips64.dasc | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc -index 1afd611..f324812 100644 ---- a/src/vm_mips.dasc -+++ b/src/vm_mips.dasc -@@ -4317,7 +4317,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. -- | barrierback TAB:CARG2, TMP3, TMP0, <2 -+ | barrierback TAB:CARG2, TMP3, CRET1, <2 - break; - - case BC_TSETM: -diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc -index 0a3f8e5..1682c81 100644 ---- a/src/vm_mips64.dasc -+++ b/src/vm_mips64.dasc -@@ -4263,7 +4263,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. -- | barrierback TAB:CARG2, TMP3, TMP0, <2 -+ | barrierback TAB:CARG2, TMP3, CRET1, <2 - break; - - case BC_TSETM: --- -2.20.1 - diff --git a/0052-Actually-implement-maxirconst-trace-limit.patch b/0052-Actually-implement-maxirconst-trace-limit.patch deleted file mode 100644 index 5281168..0000000 --- a/0052-Actually-implement-maxirconst-trace-limit.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 0a9ff94c4a1fcec2c310dcb092da694f23186e23 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Sun, 14 Oct 2018 15:21:37 +0200 -Subject: [PATCH 52/72] Actually implement maxirconst trace limit. - -Suggested by spacewander. ---- - src/lj_record.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/lj_record.c b/src/lj_record.c -index 1a2b1c5..7f37d6c 100644 ---- a/src/lj_record.c -+++ b/src/lj_record.c -@@ -2470,8 +2470,9 @@ void lj_record_ins(jit_State *J) - #undef rbv - #undef rcv - -- /* Limit the number of recorded IR instructions. */ -- if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) -+ /* Limit the number of recorded IR instructions and constants. */ -+ if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] || -+ J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst]) - lj_trace_err(J, LJ_TRERR_TRACEOV); - } - --- -2.20.1 - diff --git a/0053-Better-detection-of-MinGW-build.patch b/0053-Better-detection-of-MinGW-build.patch deleted file mode 100644 index 9805f09..0000000 --- a/0053-Better-detection-of-MinGW-build.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 3404183e2387f48e3464bd79116d3e8021ca781e Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 10 Jan 2019 12:02:15 +0100 -Subject: [PATCH 53/72] Better detection of MinGW build. - ---- - src/Makefile | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/Makefile b/src/Makefile -index 24e8c0e..962aa94 100644 ---- a/src/Makefile -+++ b/src/Makefile -@@ -165,6 +165,10 @@ else - HOST_SYS= Windows - HOST_MSYS= mingw - endif -+ ifneq (,$(findstring MSYS,$(HOST_SYS))) -+ HOST_SYS= Windows -+ HOST_MSYS= mingw -+ endif - ifneq (,$(findstring CYGWIN,$(HOST_SYS))) - HOST_SYS= Windows - HOST_MSYS= cygwin --- -2.20.1 - diff --git a/0054-Fix-overflow-of-snapshot-map-offset.patch b/0054-Fix-overflow-of-snapshot-map-offset.patch deleted file mode 100644 index 723cb74..0000000 --- a/0054-Fix-overflow-of-snapshot-map-offset.patch +++ /dev/null @@ -1,131 +0,0 @@ -commit 749e99ce2a88bf337bd2f6279940d6761ce5f616 -Merge: e2cc89b 380e440 -Author: Mike Pall <mike> -Date: Thu Jan 10 12:24:17 2019 +0100 - - Merge branch 'master' into v2.1 - -From 380e4409a70725df85034f02c968b6ebd7a5e513 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 10 Jan 2019 12:19:30 +0100 -Subject: [PATCH 54/72] Fix overflow of snapshot map offset. - -Thanks to Yichun Zhang. ---- - src/lj_jit.h | 10 +++++----- - src/lj_opt_loop.c | 8 ++++---- - src/lj_snap.c | 6 +++--- - 3 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/src/lj_jit.h b/src/lj_jit.h -index 3f38d28..0bc6258 100644 ---- a/src/lj_jit.h -+++ b/src/lj_jit.h -@@ -163,7 +163,7 @@ typedef struct MCLink { - - /* Stack snapshot header. */ - typedef struct SnapShot { -- uint16_t mapofs; /* Offset into snapshot map. */ -+ uint32_t mapofs; /* Offset into snapshot map. */ - IRRef1 ref; /* First IR ref for this snapshot. */ - uint8_t nslots; /* Number of valid slots. */ - uint8_t topslot; /* Maximum frame extent. */ -@@ -217,17 +217,15 @@ typedef enum { - /* Trace object. */ - typedef struct GCtrace { - GCHeader; -- uint8_t topslot; /* Top stack slot already checked to be allocated. */ -- uint8_t linktype; /* Type of link. */ -+ uint16_t nsnap; /* Number of snapshots. */ - IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ - #if LJ_GC64 - uint32_t unused_gc64; - #endif - GCRef gclist; - IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ - IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ -- uint16_t nsnap; /* Number of snapshots. */ -- uint16_t nsnapmap; /* Number of snapshot map elements. */ -+ uint32_t nsnapmap; /* Number of snapshot map elements. */ - SnapShot *snap; /* Snapshot array. */ - SnapEntry *snapmap; /* Snapshot map. */ - GCRef startpt; /* Starting prototype. */ -@@ -241,6 +239,8 @@ typedef struct GCtrace { - TraceNo1 nextroot; /* Next root trace for same prototype. */ - TraceNo1 nextside; /* Next side trace of same root trace. */ - uint8_t sinktags; /* Trace has SINK tags. */ -+ uint8_t topslot; /* Top stack slot already checked to be allocated. */ -+ uint8_t linktype; /* Type of link. */ - uint8_t unused1; - #ifdef LUAJIT_USE_GDBJIT - void *gdbjit_entry; /* GDB JIT entry. */ -diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c -index 36317b3..cc88111 100644 ---- a/src/lj_opt_loop.c -+++ b/src/lj_opt_loop.c -@@ -223,7 +223,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, - } - J->guardemit.irt = 0; - /* Setup new snapshot. */ -- snap->mapofs = (uint16_t)nmapofs; -+ snap->mapofs = (uint32_t)nmapofs; - snap->ref = (IRRef1)J->cur.nins; - snap->nslots = nslots; - snap->topslot = osnap->topslot; -@@ -251,7 +251,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, - nmap += nn; - while (omap < nextmap) /* Copy PC + frame links. */ - *nmap++ = *omap++; -- J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap); -+ J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); - } - - typedef struct LoopState { -@@ -362,7 +362,7 @@ static void loop_unroll(jit_State *J) - } - } - if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ -- J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; -+ J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs; - lua_assert(J->cur.nsnapmap <= J->sizesnapmap); - *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ - -@@ -376,7 +376,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap) - SnapShot *snap = &J->cur.snap[nsnap-1]; - SnapEntry *map = J->cur.snapmap; - map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ -- J->cur.nsnapmap = (uint16_t)nsnapmap; -+ J->cur.nsnapmap = (uint32_t)nsnapmap; - J->cur.nsnap = nsnap; - J->guardemit.irt = 0; - lj_ir_rollback(J, ins); -diff --git a/src/lj_snap.c b/src/lj_snap.c -index e891f7a..73f2500 100644 ---- a/src/lj_snap.c -+++ b/src/lj_snap.c -@@ -129,11 +129,11 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) - nent = snapshot_slots(J, p, nslots); - snap->nent = (uint8_t)nent; - nent += snapshot_framelinks(J, p + nent, &snap->topslot); -- snap->mapofs = (uint16_t)nsnapmap; -+ snap->mapofs = (uint32_t)nsnapmap; - snap->ref = (IRRef1)J->cur.nins; - snap->nslots = (uint8_t)nslots; - snap->count = 0; -- J->cur.nsnapmap = (uint16_t)(nsnapmap + nent); -+ J->cur.nsnapmap = (uint32_t)(nsnapmap + nent); - } - - /* Add or merge a snapshot. */ -@@ -294,7 +294,7 @@ void lj_snap_shrink(jit_State *J) - snap->nent = (uint8_t)m; - nlim = J->cur.nsnapmap - snap->mapofs - 1; - while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */ -- J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */ -+ J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */ - } - - /* -- Snapshot access ----------------------------------------------------- */ --- -2.20.1 - diff --git a/0055-DynASM-PPC-Fix-shadowed-variable.patch b/0055-DynASM-PPC-Fix-shadowed-variable.patch deleted file mode 100644 index 4f80b3a..0000000 --- a/0055-DynASM-PPC-Fix-shadowed-variable.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 20e4c529458fa42ef6651a0042e3955723ee20c2 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 10 Jan 2019 12:28:24 +0100 -Subject: [PATCH 55/72] DynASM/PPC: Fix shadowed variable. - -Cleanup only, bug cannot trigger. -Thanks to Domingo Alvarez Duarte. ---- - dynasm/dasm_ppc.lua | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua -index 4e1656e..77031fb 100644 ---- a/dynasm/dasm_ppc.lua -+++ b/dynasm/dasm_ppc.lua -@@ -1056,9 +1056,9 @@ map_op[".template__"] = function(params, template, nparams) - elseif p == "M" then - op = op + parse_shiftmask(params[n], false); n = n + 1 - elseif p == "J" or p == "K" then -- local mode, n, s = parse_label(params[n], false) -- if p == "K" then n = n + 2048 end -- waction("REL_"..mode, n, s, 1) -+ local mode, m, s = parse_label(params[n], false) -+ if p == "K" then m = m + 2048 end -+ waction("REL_"..mode, m, s, 1) - n = n + 1 - elseif p == "0" then - if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end --- -2.20.1 - diff --git a/0056-DynASM-MIPS-Fix-shadowed-variable.patch b/0056-DynASM-MIPS-Fix-shadowed-variable.patch deleted file mode 100644 index e3fc081..0000000 --- a/0056-DynASM-MIPS-Fix-shadowed-variable.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 5c911998a3c85d024a8006feafc68d0b4c962fd8 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 10 Jan 2019 12:32:08 +0100 -Subject: [PATCH 56/72] DynASM/MIPS: Fix shadowed variable. - -Cleanup only, bug cannot trigger. -Thanks to Domingo Alvarez Duarte. ---- - dynasm/dasm_mips.lua | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua -index 8e250ce..af53042 100644 ---- a/dynasm/dasm_mips.lua -+++ b/dynasm/dasm_mips.lua -@@ -757,9 +757,9 @@ map_op[".template__"] = function(params, template, nparams) - elseif p == "X" then - op = op + parse_index(params[n]); n = n + 1 - elseif p == "B" or p == "J" then -- local mode, n, s = parse_label(params[n], false) -- if p == "B" then n = n + 2048 end -- waction("REL_"..mode, n, s, 1) -+ local mode, m, s = parse_label(params[n], false) -+ if p == "B" then m = m + 2048 end -+ waction("REL_"..mode, m, s, 1) - n = n + 1 - elseif p == "A" then - op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 --- -2.20.1 - diff --git a/0057-Fix-MinGW-build.patch b/0057-Fix-MinGW-build.patch deleted file mode 100644 index d23aa4c..0000000 --- a/0057-Fix-MinGW-build.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 61464b0a5b685489bee7b6680c0e9663f2143a84 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 10 Jan 2019 12:37:09 +0100 -Subject: [PATCH 57/72] Fix MinGW build. - -Thanks to Victor Bombi. ---- - src/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/Makefile b/src/Makefile -index 962aa94..2c780de 100644 ---- a/src/Makefile -+++ b/src/Makefile -@@ -194,7 +194,7 @@ CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) - LDOPTIONS= $(CCDEBUG) $(LDFLAGS) - - HOST_CC= $(CC) --HOST_RM= rm -f -+HOST_RM?= rm -f - # If left blank, minilua is built and used. You can supply an installed - # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua - HOST_LUA= --- -2.20.1 - diff --git a/0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch b/0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch deleted file mode 100644 index 40324b7..0000000 --- a/0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch +++ /dev/null @@ -1,32 +0,0 @@ -From fc63c938b522e147ea728b75f385728bf4a8fc35 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 10 Jan 2019 12:47:28 +0100 -Subject: [PATCH 58/72] Fix os.date() for wider libc strftime() compatibility. - -Thanks to Jesper Lundgren. ---- - src/lib_os.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/lib_os.c b/src/lib_os.c -index 9e78d49..ffbc3fd 100644 ---- a/src/lib_os.c -+++ b/src/lib_os.c -@@ -205,12 +205,12 @@ LJLIB_CF(os_date) - setboolfield(L, "isdst", stm->tm_isdst); - } else if (*s) { - SBuf *sb = &G(L)->tmpbuf; -- MSize sz = 0; -+ MSize sz = 0, retry = 4; - const char *q; - for (q = s; *q; q++) - sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */ - setsbufL(sb, L); -- for (;;) { -+ while (retry--) { /* Limit growth for invalid format or empty result. */ - char *buf = lj_buf_need(sb, sz); - size_t len = strftime(buf, sbufsz(sb), s, stm); - if (len) { --- -2.20.1 - diff --git a/0059-Improve-luaL_addlstring.patch b/0059-Improve-luaL_addlstring.patch deleted file mode 100644 index 0bf7cf5..0000000 --- a/0059-Improve-luaL_addlstring.patch +++ /dev/null @@ -1,35 +0,0 @@ -From f0e865dd4861520258299d0f2a56491bd9d602e1 Mon Sep 17 00:00:00 2001 -From: Mike Pall <mike> -Date: Thu, 10 Jan 2019 13:09:17 +0100 -Subject: [PATCH 59/72] Improve luaL_addlstring(). - -Thanks to Domingo Alvarez Duarte. ---- - src/lib_aux.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/src/lib_aux.c b/src/lib_aux.c -index c40565c..2682a38 100644 ---- a/src/lib_aux.c -+++ b/src/lib_aux.c -@@ -218,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B) - - LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) - { -- while (l--) -- luaL_addchar(B, *s++); -+ if (l <= bufffree(B)) { -+ memcpy(B->p, s, l); -+ B->p += l; -+ } else { -+ emptybuffer(B); -+ lua_pushlstring(B->L, s, l); -+ B->lvl++; -+ adjuststack(B); -+ } - } - - LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) --- -2.20.1 - diff --git a/0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch b/0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch deleted file mode 100644 index 8fe6e8e..0000000 --- a/0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 43a3893b0d7d82bfbfd13bf458a5906f755989c9 Mon Sep 17 00:00:00 2001 -From: Patrick Galizia pgalizia.qdt@qualcommdatacenter.com -Date: Fri, 24 Aug 2018 11:02:15 -0400 -Subject: [PATCH 60/72] Fix arm64 register allocation issue for XLOAD. - -For the arm64 implementation of asm_xload(), it is possible for -the dest register selected to be the same as one of the source -registers generated in the asm_fusexref() call. To prevent this, -exclude the dest register from the list of allowed registers for -that call. - -Thanks to Javier for guidance as well as his script to replicate -the issue. ---- - src/lj_asm_arm64.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index baafa21..045f260 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -1008,7 +1008,8 @@ static void asm_xload(ASMState *as, IRIns *ir) - { - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); -- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); -+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, -+ rset_exclude(RSET_GPR, dest)); - } - - static void asm_xstore(ASMState *as, IRIns *ir) --- -2.20.1 - diff --git a/0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch b/0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch deleted file mode 100644 index 1f58f88..0000000 --- a/0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch +++ /dev/null @@ -1,59 +0,0 @@ -From ec0d8427ade9346d356623072fcb91c2d11e3bda Mon Sep 17 00:00:00 2001 -From: Patrick Galizia pgalizia.qdt@qualcommdatacenter.com -Date: Wed, 28 Nov 2018 14:14:35 -0500 -Subject: [PATCH 61/72] Fix arm64 register allocation issue for XLOAD. - -For arm64, it's possible for both IRRefs to fail asm_isk32(), but -one of them pass irref_isk(). Add a secondary check for the latter -call if both asm_isk32() calls fail. ---- - src/lj_asm_arm64.h | 18 +++++++++++++----- - 1 file changed, 13 insertions(+), 5 deletions(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index 045f260..ce49cde 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -295,9 +295,18 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, - } else if (asm_isk32(as, ir->op1, &ofs)) { - ref = ir->op2; - } else { -- Reg rn = ra_alloc1(as, ir->op1, allow); -- IRIns *irr = IR(ir->op2); -+ IRRef ref1 = ir->op1; -+ IRRef ref2 = ir->op2; -+ Reg rn; -+ IRIns *irr; - uint32_t m; -+ -+ if (irref_isk(ir->op1)) { -+ ref1 = ir->op2; -+ ref2 = ir->op1; -+ } -+ rn = ra_alloc1(as, ref1, allow); -+ irr = IR(ref2); - if (irr+1 == ir && !ra_used(irr) && - irr->o == IR_ADD && irref_isk(irr->op2)) { - ofs = sizeof(GCstr) + IR(irr->op2)->i; -@@ -307,7 +316,7 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, - goto skipopm; - } - } -- m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); -+ m = asm_fuseopm(as, 0, ref2, rset_exclude(allow, rn)); - ofs = sizeof(GCstr); - skipopm: - emit_lso(as, ai, rd, rd, ofs); -@@ -1008,8 +1017,7 @@ static void asm_xload(ASMState *as, IRIns *ir) - { - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); -- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, -- rset_exclude(RSET_GPR, dest)); -+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); - } - - static void asm_xstore(ASMState *as, IRIns *ir) --- -2.20.1 - diff --git a/0062-Remove-redundant-emit_check_ofs.patch b/0062-Remove-redundant-emit_check_ofs.patch deleted file mode 100644 index 9b34eab..0000000 --- a/0062-Remove-redundant-emit_check_ofs.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 1fae7b08e319ba4028d303b09de72b026109a269 Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@gotplt.org -Date: Fri, 22 Feb 2019 19:05:38 +0000 -Subject: [PATCH 62/72] Remove redundant emit_check_ofs - -Even if the offset is a constant, it is not 32-bit since it failed -that check earlier before it came here. The code is thus useless and -hence removed. This also fixes inconsistencies with op1/op2 renaming -that were introduced in PR #438. They were never triggered because -the code path is effectively dead for arm64. ---- - src/lj_asm_arm64.h | 15 +-------------- - 1 file changed, 1 insertion(+), 14 deletions(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index ce49cde..c214e10 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -298,27 +298,14 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, - IRRef ref1 = ir->op1; - IRRef ref2 = ir->op2; - Reg rn; -- IRIns *irr; -- uint32_t m; - - if (irref_isk(ir->op1)) { - ref1 = ir->op2; - ref2 = ir->op1; - } - rn = ra_alloc1(as, ref1, allow); -- irr = IR(ref2); -- if (irr+1 == ir && !ra_used(irr) && -- irr->o == IR_ADD && irref_isk(irr->op2)) { -- ofs = sizeof(GCstr) + IR(irr->op2)->i; -- if (emit_checkofs(ai, ofs)) { -- Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); -- m = A64F_M(rm) | A64F_EX(A64EX_SXTW); -- goto skipopm; -- } -- } -- m = asm_fuseopm(as, 0, ref2, rset_exclude(allow, rn)); -+ uint32_t m = asm_fuseopm(as, 0, ref2, rset_exclude(allow, rn)); - ofs = sizeof(GCstr); -- skipopm: - emit_lso(as, ai, rd, rd, ofs); - emit_dn(as, A64I_ADDx^m, rd, rn); - return; --- -2.20.1 - diff --git a/0063-aarch64-Use-the-xzr-register-whenever-possible.patch b/0063-aarch64-Use-the-xzr-register-whenever-possible.patch deleted file mode 100644 index c2b0505..0000000 --- a/0063-aarch64-Use-the-xzr-register-whenever-possible.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 8fc4ce1c981967fccd5366ace6add6d14cfcde89 Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@gotplt.org -Date: Mon, 25 Feb 2019 14:40:39 +0000 -Subject: [PATCH 63/72] aarch64: Use the xzr register whenever possible - -Using the xzr register for store inputs and the second operand of -arithmetic operations frees up a register for use elsewhere. ---- - src/lj_asm_arm64.h | 31 ++++++++++++++++++++++++++++--- - 1 file changed, 28 insertions(+), 3 deletions(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index c214e10..a826687 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -1007,10 +1007,30 @@ static void asm_xload(ASMState *as, IRIns *ir) - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); - } - -+static int maybe_zero_val(ASMState *as, IRRef ref) -+{ -+ IRIns *ir = IR(ref); -+ -+ switch(ir->o) { -+ case IR_KNULL: -+ return 1; -+ case IR_KINT: -+ return 0 == ir->i; -+ case IR_KINT64: -+ return 0 == ir_kint64(ir)->u64; -+ } -+ -+ return 0; -+} -+ - static void asm_xstore(ASMState *as, IRIns *ir) - { - if (ir->r != RID_SINK) { -- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); -+ Reg src; -+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2)) -+ src = RID_ZERO; -+ else -+ src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src)); - } -@@ -1198,7 +1218,12 @@ static void asm_cnew(ASMState *as, IRIns *ir) - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - int32_t ofs = sizeof(GCcdata); -- Reg r = ra_alloc1(as, ir->op2, allow); -+ Reg r; -+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2)) -+ r = RID_ZERO; -+ else -+ r = ra_alloc1(as, ir->op2, allow); -+ - lua_assert(sz == 4 || sz == 8); - emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ -@@ -1214,7 +1239,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - { -- Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); -+ Reg r = id == 0 ? RID_ZERO : (id < 65536) ? RID_X1 : ra_allock(as, id, allow); - emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); - emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); - emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); --- -2.20.1 - diff --git a/0065-Add-support-for-FNMADD-and-FNMSUB.patch b/0065-Add-support-for-FNMADD-and-FNMSUB.patch deleted file mode 100644 index c1762f4..0000000 --- a/0065-Add-support-for-FNMADD-and-FNMSUB.patch +++ /dev/null @@ -1,62 +0,0 @@ -From e99ac1bc2df5c1d138bbc98d35d1a1892144cf2b Mon Sep 17 00:00:00 2001 -From: Sameera Deshpande sameera.deshpande@linaro.org -Date: Fri, 15 Feb 2019 07:46:16 +0530 -Subject: [PATCH 65/72] Add support for FNMADD and FNMSUB. - ---- - src/lj_asm_arm64.h | 32 +++++++++++++++++++++++++++++++- - 1 file changed, 31 insertions(+), 1 deletion(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index a826687..470e65d 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -344,6 +344,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) - return 0; - } - -+/* Fuse FP neg-multiply-add/sub. */ -+static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) -+{ -+ IRRef ref = ir->op1; -+ IRIns *irn = IR(ref); -+ if (irn->o != IR_ADD && irn->o != IR_SUB) -+ return 0; -+ -+ if (!mayfuse(as, ref)) -+ return 0; -+ -+ IRRef lref = irn->op1, rref = irn->op2; -+ IRIns *irm; -+ if (lref != rref && -+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && -+ ra_noreg(irm->r)) || -+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && -+ (rref = lref, ra_noreg(irm->r))))) { -+ Reg dest = ra_dest(as, ir, RSET_FPR); -+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); -+ Reg left = ra_alloc2(as, irm, -+ rset_exclude(rset_exclude(RSET_FPR, dest), add)); -+ Reg right = (left >> 8); left &= 255; -+ emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31), (right & 31), (add & 31)); -+ return 1; -+ } -+ return 0; -+} -+ - /* Fuse BAND + BSHL/BSHR into UBFM. */ - static int asm_fuseandshift(ASMState *as, IRIns *ir) - { -@@ -1481,7 +1510,8 @@ static void asm_mod(ASMState *as, IRIns *ir) - static void asm_neg(ASMState *as, IRIns *ir) - { - if (irt_isnum(ir->t)) { -- asm_fpunary(as, ir, A64I_FNEGd); -+ if (!asm_fusenmadd(as, ir, A64I_FNMADDd)) -+ asm_fpunary(as, ir, A64I_FNEGd); - return; - } - asm_intneg(as, ir); --- -2.20.1 - diff --git a/0066-Fix-os.date-for-timezone-change-awareness.patch b/0066-Fix-os.date-for-timezone-change-awareness.patch deleted file mode 100644 index afab3fe..0000000 --- a/0066-Fix-os.date-for-timezone-change-awareness.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 372bb8b22546663ba57e69fad75c97cfd004ac63 Mon Sep 17 00:00:00 2001 -From: Vivien HENRIET bubuabu@bubuabu.org -Date: Wed, 30 Jan 2019 23:44:51 +0100 -Subject: [PATCH 66/72] Fix os.date() for timezone change awareness - -On POSIX target, system timezone change are not taken into account. -To reproduce, -1. call os.date() -2. change your timezone -3. call os.date() within the same luajit instance - -On POSIX target, os.date use localtime_r to retrieve time. -On other target, the function localtime is used. But there is a behaviour -diference between these two function. localtime acts as if it called tzset -which localtime_r don't. - -To fix the issue tzset is called before localtime_r. ---- - src/lib_os.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/lib_os.c b/src/lib_os.c -index ffbc3fd..09dc737 100644 ---- a/src/lib_os.c -+++ b/src/lib_os.c -@@ -185,6 +185,7 @@ LJLIB_CF(os_date) - #endif - } else { - #if LJ_TARGET_POSIX -+ tzset(); - stm = localtime_r(&t, &rtm); - #else - stm = localtime(&t); --- -2.20.1 - diff --git a/0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch b/0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch deleted file mode 100644 index 7f27204..0000000 --- a/0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 49f19e7b31fc033ac1e9208580b5be31e2b66b19 Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Thu, 14 Mar 2019 23:08:24 +0530 -Subject: [PATCH 67/72] Revert "FFI: Make FP to U64 conversions match JIT - backend behavior." - -This reverts commit f5d424afe8b9395f0df05aba905e0e1f6a2262b8. - -The patch breaks test 279, i.e. - - assert(tostring(bit.band(1ll, 1, 1ull, -1)) == "1ULL") - -The patch was put in to make the JIT and interpreter behaviour -consistent[1] for float to unsigned int conversions but it ended up -making things worse. There needs to be a better fix for this. - -[1] https://github.com/LuaJIT/LuaJIT/pull/415 ---- - src/lj_obj.h | 18 +++++------------- - 1 file changed, 5 insertions(+), 13 deletions(-) - -diff --git a/src/lj_obj.h b/src/lj_obj.h -index 72b7ace..c7e4742 100644 ---- a/src/lj_obj.h -+++ b/src/lj_obj.h -@@ -942,22 +942,14 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) - - #define lj_num2int(n) ((int32_t)(n)) - --/* --** This must match the JIT backend behavior. In particular for archs --** that don't have a common hardware instruction for this conversion. --** Note that signed FP to unsigned int conversions have an undefined --** result and should never be relied upon in portable FFI code. --** See also: C99 or C11 standard, 6.3.1.4, footnote of (1). --*/ - static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) - { --#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS -- int64_t i = (int64_t)n; -- if (i < 0) i = (int64_t)(n - 18446744073709551616.0); -- return (uint64_t)i; --#else -- return (uint64_t)n; -+#ifdef _MSC_VER -+ if (n >= 9223372036854775808.0) /* They think it's a feature. */ -+ return (uint64_t)(int64_t)(n - 18446744073709551616.0); -+ else - #endif -+ return (uint64_t)n; - } - - static LJ_AINLINE int32_t numberVint(cTValue *o) --- -2.20.1 - diff --git a/0068-bench-Fix-build-warnings.patch b/0068-bench-Fix-build-warnings.patch deleted file mode 100644 index 5ee8bc7..0000000 --- a/0068-bench-Fix-build-warnings.patch +++ /dev/null @@ -1,47 +0,0 @@ -commit 0513e634f0013083d29af9f5762b225297d3ad6c (HEAD -> v2.1, origin/v2.1) -Author: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Fri Apr 12 20:42:55 2019 +0530 - - Remove built binary from git - - Oops. - -From 9b4f498707569f3ecf81a0561a0d3d91570cec3d Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Fri, 15 Mar 2019 15:51:02 +0530 -Subject: [PATCH 68/72] bench: Fix build warnings - ---- - bench/Makefile | 2 +- - bench/luajit-bench | Bin 571144 -> 571224 bytes - bench/luajit-bench.c | 1 + - 3 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/bench/Makefile b/bench/Makefile -index d0c1e8d..87d213a 100644 ---- a/bench/Makefile -+++ b/bench/Makefile -@@ -44,7 +44,7 @@ endif - LUAJIT_A = ../src/$(FILE_A) - - $(BENCH_BIN): $(LUAJIT_A) $(BENCH_BIN).c Makefile -- $(CC) $@.c $(DURATION) -g -O3 -c -o $@.o -I ../src -+ $(CC) $@.c -std=gnu11 $(DURATION) -g -O3 -c -o $@.o -I ../src - $(CC) $@.o -lpthread $< -lm -ldl -o $@ - - # Build the luajit static library if it doesn't exist. -diff --git a/bench/luajit-bench.c b/bench/luajit-bench.c -index e7b068d..6603132 100644 ---- a/bench/luajit-bench.c -+++ b/bench/luajit-bench.c -@@ -39,6 +39,7 @@ - #include <argp.h> - #include <sys/param.h> - #include <string.h> -+#include <time.h> - - #include "lua.h" - #include "lualib.h" --- -2.20.1 - diff --git a/0069-Guard-against-undefined-behaviour-when-casting-from-.patch b/0069-Guard-against-undefined-behaviour-when-casting-from-.patch deleted file mode 100644 index e498f62..0000000 --- a/0069-Guard-against-undefined-behaviour-when-casting-from-.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 454bea87cff4ff3cd2fd9ae34a3718dd200ce0fb Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Sun, 17 Mar 2019 11:34:04 +0530 -Subject: [PATCH 69/72] Guard against undefined behaviour when casting from - float to unsigned - -Only range (-1.0, UINT64_MAX) can be safely converted to unsigned -directly, and (-INT64_MAX,INT_64_MAX) through a cast to int64_t first. -The remaining range is undefined. - -TODO: Do the same for JIT as well as for float to other ranges. ---- - src/lj_obj.h | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/src/lj_obj.h b/src/lj_obj.h -index c7e4742..4ff5944 100644 ---- a/src/lj_obj.h -+++ b/src/lj_obj.h -@@ -944,12 +944,18 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) - - static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) - { -+ /* Undefined behaviour. This is deliberately not a full check because we -+ don't want to slow down compliant code. */ -+ lua_assert(n >= -9223372036854775809.0); - #ifdef _MSC_VER - if (n >= 9223372036854775808.0) /* They think it's a feature. */ - return (uint64_t)(int64_t)(n - 18446744073709551616.0); - else - #endif -- return (uint64_t)n; -+ if (n > -1.0) -+ return (uint64_t)n; -+ else -+ return (uint64_t)(int64_t)n; - } - - static LJ_AINLINE int32_t numberVint(cTValue *o) --- -2.20.1 - diff --git a/0070-Fix-build-erro-with-fnmsub-fusing.patch b/0070-Fix-build-erro-with-fnmsub-fusing.patch deleted file mode 100644 index a506e4f..0000000 --- a/0070-Fix-build-erro-with-fnmsub-fusing.patch +++ /dev/null @@ -1,25 +0,0 @@ -From ddca2290b8fa73fc32e88f83105219a1f2be75ff Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Mon, 25 Mar 2019 17:56:53 +0530 -Subject: [PATCH 70/72] Fix build erro with fnmsub fusing - ---- - src/lj_asm_arm64.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index 470e65d..42a4fae 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -1510,7 +1510,7 @@ static void asm_mod(ASMState *as, IRIns *ir) - static void asm_neg(ASMState *as, IRIns *ir) - { - if (irt_isnum(ir->t)) { -- if (!asm_fusenmadd(as, ir, A64I_FNMADDd)) -+ if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd)) - asm_fpunary(as, ir, A64I_FNEGd); - return; - } --- -2.20.1 - diff --git a/0071-aarch64-better-float-to-unsigned-int-conversion.patch b/0071-aarch64-better-float-to-unsigned-int-conversion.patch deleted file mode 100644 index 305f07b..0000000 --- a/0071-aarch64-better-float-to-unsigned-int-conversion.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 70e65633d892765bcbaad3493e5b690abd5402f2 Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Thu, 28 Mar 2019 09:19:34 +0530 -Subject: [PATCH 71/72] aarch64: better float to unsigned int conversion - -A straight float to unsigned conversion has a limited range of (-1.0, -UTYPE_MAX) which should be fine in general but for the sake of -consistency across the interpreter and the JIT compiler, it is -necessary to work a wee bit harder to expand this range to (TYPE_MIN, -UTYPE_MAX), which can be done with a simple range check. This adds a -couple of branches but only one of the branches should have a -noticeable performance impact on most processors with branch -predictors, and that too only if the input number varies wildly in -range. - -This currently works only for 64-bit conversions, 32-bit is still WIP. ---- - src/lj_asm_arm64.h | 30 ++++++++++++++++++++++-------- - src/lj_target_arm64.h | 1 + - 2 files changed, 23 insertions(+), 8 deletions(-) - -diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h -index 42a4fae..c72144a 100644 ---- a/src/lj_asm_arm64.h -+++ b/src/lj_asm_arm64.h -@@ -594,14 +594,28 @@ static void asm_conv(ASMState *as, IRIns *ir) - } else { - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg dest = ra_dest(as, ir, RSET_GPR); -- A64Ins ai = irt_is64(ir->t) ? -- (st == IRT_NUM ? -- (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : -- (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : -- (st == IRT_NUM ? -- (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : -- (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); -- emit_dn(as, ai, dest, (left & 31)); -+ -+ A64Ins ai_signed = st == IRT_NUM ? -+ (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) : -+ (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32); -+ -+ if (irt_isi64(ir->t) || irt_isint(ir->t)) -+ emit_dn(as, ai_signed, dest, (left & 31)); -+ else { -+ A64Ins ai_unsigned = st == IRT_NUM ? -+ (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) : -+ (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32); -+ -+ MCLabel l_done = emit_label(as); -+ emit_dn(as, ai_unsigned, dest, (left & 31)); -+ MCLabel l_signed = emit_label(as); -+ emit_jmp(as, l_done); -+ emit_dn(as, ai_signed, dest, (left & 31)); -+ /* The valid range for float to unsigned int conversion is (-1.0, -+ UINT{,64}_MAX-1), but we just compare with 0 to save a load. */ -+ emit_cond_branch(as, CC_PL, l_signed); -+ emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0); -+ } - } - } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg dest = ra_dest(as, ir, RSET_GPR); -diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h -index a207a2b..2f8357f 100644 ---- a/src/lj_target_arm64.h -+++ b/src/lj_target_arm64.h -@@ -279,6 +279,7 @@ typedef enum A64Ins { - A64I_STPs = 0x2d000000, - A64I_STPd = 0x6d000000, - A64I_FCMPd = 0x1e602000, -+ A64I_FCMPZs = 0x1e202008, - A64I_FCMPZd = 0x1e602008, - A64I_FCSELd = 0x1e600c00, - A64I_FRINTMd = 0x1e654000, --- -2.20.1 - diff --git a/0072-Better-behaviour-for-float-to-uint32_t-conversions.patch b/0072-Better-behaviour-for-float-to-uint32_t-conversions.patch deleted file mode 100644 index 20cb957..0000000 --- a/0072-Better-behaviour-for-float-to-uint32_t-conversions.patch +++ /dev/null @@ -1,39 +0,0 @@ -From f2779155495aee6583abaff4700a7acda80864ef Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Thu, 28 Mar 2019 10:50:23 +0530 -Subject: [PATCH 72/72] Better behaviour for float to uint32_t conversions - -This is the uint32_t part of the float to unsigned int conversions for -the interpreter. The cast ends up working correctly for x86 but not -for aarch64 since fcvtzu sets the result to zero on negative inputs. -Work slightly harder to make sure that negative number inputs behave -like x86. - -This fixes the interpreter but not the JIT compiler, which errors out -during the narrowing pass. ---- - src/lj_cconv.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/src/lj_cconv.c b/src/lj_cconv.c -index 13b8230..bf8f8e8 100644 ---- a/src/lj_cconv.c -+++ b/src/lj_cconv.c -@@ -196,7 +196,13 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, - else if (dsize == 2) *(int16_t *)dp = (int16_t)i; - else *(int8_t *)dp = (int8_t)i; - } else if (dsize == 4) { -- *(uint32_t *)dp = (uint32_t)n; -+ /* Undefined behaviour. This is deliberately not a full check because we -+ * don't want to slow down compliant code. */ -+ lua_assert(n >= -2147483649.0); -+ if (n > -1.0) -+ *(uint32_t *)dp = (uint32_t)n; -+ else -+ *(uint32_t *)dp = (uint32_t)(int32_t)n; - } else if (dsize == 8) { - if (!(dinfo & CTF_UNSIGNED)) - *(int64_t *)dp = (int64_t)n; --- -2.20.1 - diff --git a/arm-Fix-up-condition-codes-for-conditional-arithmeti.patch b/arm-Fix-up-condition-codes-for-conditional-arithmeti.patch deleted file mode 100644 index 44aeea4..0000000 --- a/arm-Fix-up-condition-codes-for-conditional-arithmeti.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 24429cc95657332e3953a21581d3220884da3d75 Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Wed, 8 May 2019 22:14:00 +0530 -Subject: [PATCH] arm: Fix up condition codes for conditional arithmetic insn - -When an arithmetic instruction such as add or sub are combined with a -subsequent compare with zero, its following conditional branch code -needs fixing up. This is necessary because one could generate an add -with a subtract of the negative but such a substitution, while correct -on its own, will change the effect on condition flags since while -addition of two positive numbers may signal an overflow, addition of a -positive and a negative number may not. So if earlier the condition -code was GE, it needs to be fixed up to PL to remain correct. - -We did that for bit operations but not for arithmetic, so do that now. ---- - src/lj_asm_arm.h | 38 ++++++++++++++++++++------------------ - 1 file changed, 20 insertions(+), 18 deletions(-) - -diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h -index 37bfa40f..e585b4c2 100644 ---- a/src/lj_asm_arm.h -+++ b/src/lj_asm_arm.h -@@ -1412,13 +1412,28 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) - emit_dn(as, ai^m, dest, left); - } - --static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) -+static ARMIns maybe_drop_zero_cmp(ASMState *as, ARMIns ai) - { -- if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ -+ if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ -+ uint32_t cc = (as->mcp[1] >> 28); - as->flagmcp = NULL; -- as->mcp++; -- ai |= ARMI_S; -+ if (cc <= CC_NE) { -+ as->mcp++; -+ ai |= ARMI_S; -+ } else if (cc == CC_GE) { -+ *++as->mcp ^= ((CC_GE^CC_PL) << 28); -+ ai |= ARMI_S; -+ } else if (cc == CC_LT) { -+ *++as->mcp ^= ((CC_LT^CC_MI) << 28); -+ ai |= ARMI_S; -+ } /* else: other conds don't work with bit ops. */ - } -+ return ai; -+} -+ -+static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) -+{ -+ ai = maybe_drop_zero_cmp(as, ai); - asm_intop(as, ir, ai); - } - -@@ -1514,20 +1529,7 @@ static void asm_neg(ASMState *as, IRIns *ir) - - static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) - { -- if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ -- uint32_t cc = (as->mcp[1] >> 28); -- as->flagmcp = NULL; -- if (cc <= CC_NE) { -- as->mcp++; -- ai |= ARMI_S; -- } else if (cc == CC_GE) { -- *++as->mcp ^= ((CC_GE^CC_PL) << 28); -- ai |= ARMI_S; -- } else if (cc == CC_LT) { -- *++as->mcp ^= ((CC_LT^CC_MI) << 28); -- ai |= ARMI_S; -- } /* else: other conds don't work with bit ops. */ -- } -+ ai = maybe_drop_zero_cmp(as, ai); - if (ir->op2 == 0) { - Reg dest = ra_dest(as, ir, RSET_GPR); - uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); --- -2.21.0 - diff --git a/bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch b/bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch deleted file mode 100644 index 939ac87..0000000 --- a/bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch +++ /dev/null @@ -1,160 +0,0 @@ -From a6a2720ddc22f9f62f119325881d05722c4f392e Mon Sep 17 00:00:00 2001 -From: Thibault Charbonnier thibaultcha@me.com -Date: Tue, 19 Mar 2019 13:52:51 -0700 -Subject: [PATCH 1/3] bugfix: fixed a segfault when unsinking 64-bit pointers. - -The unsinking code was not using the correct layout for GC64 IR -constants (value in adjacent slot) for this case. - -This patch is a derivative of -https://github.com/raptorjit/raptorjit/pull/246 ported for LuaJIT -itself. - -Fixed after an intense debugging session with @lukego. - -Co-authored-by: Luke Gorrie lukego@gmail.com ---- - src/lj_ir.h | 12 ++++++------ - src/lj_snap.c | 2 +- - 2 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/src/lj_ir.h b/src/lj_ir.h -index 8057a750..a46b561f 100644 ---- a/src/lj_ir.h -+++ b/src/lj_ir.h -@@ -562,6 +562,11 @@ typedef union IRIns { - TValue tv; /* TValue constant (overlaps entire slot). */ - } IRIns; - -+#define ir_isk64(ir) ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ -+ (LJ_GC64 && \ -+ ((ir)->o == IR_KGC || \ -+ (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR))) -+ - #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr)) - #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) - #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) -@@ -569,12 +574,7 @@ typedef union IRIns { - #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) - #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) - #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) --#define ir_k64(ir) \ -- check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ -- (LJ_GC64 && \ -- ((ir)->o == IR_KGC || \ -- (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \ -- &(ir)[1].tv) -+#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv) - #define ir_kptr(ir) \ - check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ - mref((ir)[LJ_GC64].ptr, void)) -diff --git a/src/lj_snap.c b/src/lj_snap.c -index ceaf2ca5..75888d80 100644 ---- a/src/lj_snap.c -+++ b/src/lj_snap.c -@@ -688,7 +688,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, - int32_t *src; - uint64_t tmp; - if (irref_isk(ref)) { -- if (ir->o == IR_KNUM || ir->o == IR_KINT64) { -+ if (ir_isk64(ir)) { - src = (int32_t *)&ir[1]; - } else if (sz == 8) { - tmp = (uint64_t)(uint32_t)ir->i; --- -2.21.0 - - -From f36cddf49b664d713bfa7c332673bdc66861d2ad Mon Sep 17 00:00:00 2001 -From: Thibault Charbonnier thibaultcha@me.com -Date: Tue, 19 Mar 2019 13:49:18 -0700 -Subject: [PATCH 2/3] tests: ffi: added a test case unsinking a 64-bit pointer - from a constant. - -This test case reproduces the issue observed at: -https://github.com/openresty/lua-resty-core/issues/232 and was -contributed by @lukego and myself. - -Co-authored-by: Luke Gorrie lukego@gmail.com ---- - test/ffi/unsink_64_kptr.lua | 26 ++++++++++++++++++++++++++ - 1 file changed, 26 insertions(+) - create mode 100644 test/ffi/unsink_64_kptr.lua - -diff --git a/test/ffi/unsink_64_kptr.lua b/test/ffi/unsink_64_kptr.lua -new file mode 100644 -index 00000000..7fab0e89 ---- /dev/null -+++ b/test/ffi/unsink_64_kptr.lua -@@ -0,0 +1,26 @@ -+local ffi = require("ffi") -+ -+local array = ffi.new("struct { int x; } [1]") -+ -+-- This test forces the VM to unsink a pointer that was constructed -+-- from a constant. The IR will include a 'cnewi' instruction to -+-- allocate an FFI pointer object, the pointer value will be an IR -+-- constant, the allocation will be sunk, and the allocation will -+-- at some point be "unsunk" due to a reference in the snapshot for -+-- a taken exit. -+ -+-- Note: JIT will recognize <array> as a "singleton" and allow its -+-- address to be inlined ("constified") instead of looking up the -+-- upvalue at runtime. -+ -+local function fn(i) -+ local struct = array[0] -- Load pointer that the JIT will constify. -+ if i == 1000 then end -- Force trace exit when i==1000. -+ struct.x = 0 -- Ensure that 'struct' is live after exit. -+end -+ -+-- Loop over the function to make it compile and take a trace exit -+-- during the final iteration. -+for i = 1, 1000 do -+ fn(i) -+end --- -2.21.0 - - -From 7b2f874b8061f206b22c04aee336b15030213637 Mon Sep 17 00:00:00 2001 -From: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Tue, 14 May 2019 22:01:37 +0530 -Subject: [PATCH 3/3] Make unsink_64_kptr usable in the testsuite - ---- - test/lib/ffi/index | 1 + - test/{ => lib}/ffi/unsink_64_kptr.lua | 6 ++++-- - 2 files changed, 5 insertions(+), 2 deletions(-) - rename test/{ => lib}/ffi/unsink_64_kptr.lua (93%) - -diff --git a/test/lib/ffi/index b/test/lib/ffi/index -index 59e36dd8..7933c5a7 100644 ---- a/test/lib/ffi/index -+++ b/test/lib/ffi/index -@@ -10,3 +10,4 @@ jit_struct.lua - meta_tostring.lua - redir.lua - type_punning.lua -+unsink_64_kptr.lua -diff --git a/test/ffi/unsink_64_kptr.lua b/test/lib/ffi/unsink_64_kptr.lua -similarity index 93% -rename from test/ffi/unsink_64_kptr.lua -rename to test/lib/ffi/unsink_64_kptr.lua -index 7fab0e89..f285d9ff 100644 ---- a/test/ffi/unsink_64_kptr.lua -+++ b/test/lib/ffi/unsink_64_kptr.lua -@@ -21,6 +21,8 @@ end - - -- Loop over the function to make it compile and take a trace exit - -- during the final iteration. --for i = 1, 1000 do -- fn(i) -+do --- unsink 64-bit pointers -+ for i = 1, 1000 do -+ fn(i) -+ end - end --- -2.21.0 - diff --git a/0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch b/luajit-2.1-fedora.patch similarity index 99% rename from 0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch rename to luajit-2.1-fedora.patch index fb2b611..e84dfa1 100644 --- a/0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch +++ b/luajit-2.1-fedora.patch @@ -1,15 +1,7 @@ -commit 0513e634f0013083d29af9f5762b225297d3ad6c (HEAD -> v2.1, origin/v2.1) -Author: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Fri Apr 12 20:42:55 2019 +0530 - - Remove built binary from git - - Oops. - -From 48eb69061df1da9d843707ec1d6b854255a3c87d Mon Sep 17 00:00:00 2001 +From 86a1a5033a3eb07e694f8e7f7024550928191024 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar siddhesh@sourceware.org -Date: Tue, 12 Mar 2019 12:56:01 +0530 -Subject: [PATCH 64/72] Merge in LuaJIT-test-cleanup into the main repo +Date: Thu, 21 Oct 2021 11:04:58 +0200 +Subject: [PATCH 01/10] Merge in LuaJIT-test-cleanup into the main repo
The tests and benchmarks in the LuaJIT-test-cleanup repo are more or less complete and with scaffolding added, they can now be called @@ -24,7 +16,6 @@ taken) and LuaJIT itself to allow for a more succint copyright notice that credits authors in addition to Mike Pall in the COPYRIGHT file. --- CONTRIBUTORS | 17 + - COPYRIGHT | 3 +- Makefile | 19 +- bench/FASTA_10000 | 1671 + bench/FASTA_1000000 | 166671 ++++++++++++++++++++++ @@ -38,9 +29,9 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. bench/SUMCOL_1.txt | 1000 + bench/SUMCOL_100 | 100 + bench/SUMCOL_1000 | 1000 + - bench/TEST_md5sum.txt | 20 + + bench/TEST_md5sum.txt | 19 + bench/TEST_md5sum_arm64.txt | 15 + - bench/array3d.lua | 59 + + bench/array3d.lua | 58 + bench/binary-trees.lua | 47 + bench/chameneos.lua | 68 + bench/coroutine-ring.lua | 42 + @@ -49,12 +40,11 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. bench/fasta.lua | 95 + bench/k-nucleotide.lua | 62 + bench/life.lua | 111 + - bench/luajit-bench | Bin 0 -> 571144 bytes - bench/luajit-bench.c | 283 + + bench/luajit-bench.c | 284 + bench/luajit-bench.lua | 53 + bench/mandelbrot-bit.lua | 33 + bench/mandelbrot.lua | 23 + - bench/md5.lua | 183 + + bench/md5.lua | 182 + bench/meteor.lua | 220 + bench/nbody.lua | 119 + bench/nsieve-bit-fp.lua | 37 + @@ -66,7 +56,7 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. bench/recursive-ack.lua | 8 + bench/recursive-fib.lua | 7 + bench/revcomp.lua | 39 + - bench/scimark-2010-12-20.lua | 400 + + bench/scimark-2010-12-20.lua | 399 + bench/scimark-fft.lua | 1 + bench/scimark-lu.lua | 1 + bench/scimark-sor.lua | 1 + @@ -79,14 +69,14 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. test/bc/constov.lua | 16 + test/bc/index | 1 + test/common/expect_error.lua | 16 + - test/common/ffi_util.inc | 41 + + test/common/ffi_util.inc | 40 + test/common/test_runner_canary.lua | 1 + test/computations.lua | 113 + test/index | 6 + test/lang/andor.lua | 61 + test/lang/assignment.lua | 46 + test/lang/compare.lua | 323 + - test/lang/compare_nan.lua | 99 + + test/lang/compare_nan.lua | 98 + test/lang/concat.lua | 112 + test/lang/constant/index | 2 + test/lang/constant/number.lua | 12 + @@ -100,9 +90,9 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. test/lang/meta/arith.lua | 118 + test/lang/meta/arith_jit.lua | 68 + test/lang/meta/call.lua | 81 + - test/lang/meta/cat.lua | 61 + + test/lang/meta/cat.lua | 60 + test/lang/meta/comp.lua | 120 + - test/lang/meta/comp_jit.lua | 104 + + test/lang/meta/comp_jit.lua | 103 + test/lang/meta/debuginfo.lua | 81 + test/lang/meta/eq.lua | 30 + test/lang/meta/eq_jit.lua | 35 + @@ -111,14 +101,14 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. test/lang/meta/index.lua | 60 + test/lang/meta/len.lua | 42 + test/lang/meta/newindex.lua | 69 + - test/lang/meta/nomm.lua | 21 + + test/lang/meta/nomm.lua | 20 + test/lang/modulo.lua | 46 + test/lang/self.lua | 19 + test/lang/table.lua | 32 + test/lang/tail_recursion.lua | 20 + test/lang/upvalue/closure.lua | 84 + test/lang/upvalue/index | 1 + - test/lang/vararg_jit.lua | 95 + + test/lang/vararg_jit.lua | 94 + test/lib/base/assert.lua | 33 + test/lib/base/error.lua | 43 + test/lib/base/getfenv.lua | 13 + @@ -135,36 +125,36 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. test/lib/contents.lua | 158 + test/lib/coroutine/index | 1 + test/lib/coroutine/yield.lua | 109 + - test/lib/ffi/bit64.lua | 130 + + test/lib/ffi/bit64.lua | 129 + test/lib/ffi/cdata_var.lua | 47 + - test/lib/ffi/copy_fill.lua | 64 + - test/lib/ffi/err.lua | 35 + - test/lib/ffi/ffi_arith_ptr.lua | 106 + - test/lib/ffi/ffi_bitfield.lua | 108 + - test/lib/ffi/ffi_call.lua | 266 + - test/lib/ffi/ffi_callback.lua | 158 + - test/lib/ffi/ffi_const.lua | 113 + - test/lib/ffi/ffi_convert.lua | 787 + - test/lib/ffi/ffi_enum.lua | 57 + - test/lib/ffi/ffi_gcstep_recursive.lua | 66 + - test/lib/ffi/ffi_jit_arith.lua | 155 + - test/lib/ffi/ffi_jit_call.lua | 154 + + test/lib/ffi/copy_fill.lua | 63 + + test/lib/ffi/err.lua | 34 + + test/lib/ffi/ffi_arith_ptr.lua | 105 + + test/lib/ffi/ffi_bitfield.lua | 107 + + test/lib/ffi/ffi_call.lua | 265 + + test/lib/ffi/ffi_callback.lua | 157 + + test/lib/ffi/ffi_const.lua | 112 + + test/lib/ffi/ffi_convert.lua | 786 + + test/lib/ffi/ffi_enum.lua | 56 + + test/lib/ffi/ffi_gcstep_recursive.lua | 65 + + test/lib/ffi/ffi_jit_arith.lua | 154 + + test/lib/ffi/ffi_jit_call.lua | 153 + test/lib/ffi/ffi_jit_conv.lua | 277 + - test/lib/ffi/ffi_lex_number.lua | 51 + - test/lib/ffi/ffi_metatype.lua | 245 + - test/lib/ffi/ffi_new.lua | 106 + - test/lib/ffi/ffi_parse_array.lua | 78 + - test/lib/ffi/ffi_parse_basic.lua | 131 + - test/lib/ffi/ffi_parse_cdef.lua | 77 + - test/lib/ffi/ffi_parse_struct.lua | 259 + + test/lib/ffi/ffi_lex_number.lua | 50 + + test/lib/ffi/ffi_metatype.lua | 244 + + test/lib/ffi/ffi_new.lua | 105 + + test/lib/ffi/ffi_parse_array.lua | 77 + + test/lib/ffi/ffi_parse_basic.lua | 130 + + test/lib/ffi/ffi_parse_cdef.lua | 76 + + test/lib/ffi/ffi_parse_struct.lua | 258 + test/lib/ffi/ffi_tabov.lua | 12 + test/lib/ffi/index | 12 + test/lib/ffi/istype.lua | 88 + test/lib/ffi/jit_array.lua | 104 + test/lib/ffi/jit_complex.lua | 109 + test/lib/ffi/jit_misc.lua | 109 + - test/lib/ffi/jit_struct.lua | 201 + - test/lib/ffi/meta_tostring.lua | 55 + + test/lib/ffi/jit_struct.lua | 200 + + test/lib/ffi/meta_tostring.lua | 54 + test/lib/ffi/redir.lua | 19 + test/lib/ffi/type_punning.lua | 138 + test/lib/index | 8 + @@ -187,50 +177,50 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. test/lib/string/sub.lua | 189 + test/lib/table/concat.lua | 55 + test/lib/table/index | 6 + - test/lib/table/insert.lua | 17 + - test/lib/table/misc.lua | 58 + + test/lib/table/insert.lua | 16 + + test/lib/table/misc.lua | 55 + test/lib/table/new.lua | 11 + test/lib/table/pack.lua | 7 + test/lib/table/remove.lua | 42 + test/lib/table/sort.lua | 27 + - test/misc/alias_alloc.lua | 54 + - test/misc/api_call.lua | 98 + - test/misc/catch_wrap.lua | 45 + - test/misc/coro_traceback.lua | 8 + - test/misc/coro_yield.lua | 111 + + test/misc/alias_alloc.lua | 53 + + test/misc/api_call.lua | 97 + + test/misc/catch_wrap.lua | 44 + + test/misc/coro_traceback.lua | 7 + + test/misc/coro_yield.lua | 110 + test/misc/debug_gc.lua | 47 + - test/misc/dualnum.lua | 47 + - test/misc/for_dir.lua | 13 + - test/misc/fori_coerce.lua | 33 + - test/misc/gc_rechain.lua | 32 + - test/misc/gc_trace.lua | 37 + - test/misc/gcstep.lua | 33 + - test/misc/hook_active.lua | 95 + - test/misc/hook_line.lua | 41 + - test/misc/hook_norecord.lua | 12 + - test/misc/hook_record.lua | 8 + - test/misc/hook_top.lua | 55 + - test/misc/jit_flush.lua | 50 + - test/misc/lightud.lua | 88 + - test/misc/loop_unroll.lua | 35 + + test/misc/dualnum.lua | 46 + + test/misc/for_dir.lua | 12 + + test/misc/fori_coerce.lua | 32 + + test/misc/gc_rechain.lua | 31 + + test/misc/gc_trace.lua | 36 + + test/misc/gcstep.lua | 32 + + test/misc/hook_active.lua | 94 + + test/misc/hook_line.lua | 40 + + test/misc/hook_norecord.lua | 11 + + test/misc/hook_record.lua | 7 + + test/misc/hook_top.lua | 54 + + test/misc/jit_flush.lua | 49 + + test/misc/lightud.lua | 87 + + test/misc/loop_unroll.lua | 34 + test/misc/parse_comp.lua | 13 + test/misc/parse_esc.lua | 7 + - test/misc/parse_misc.lua | 31 + - test/misc/phi_conv.lua | 53 + - test/misc/recurse_deep.lua | 29 + - test/misc/recurse_tail.lua | 22 + - test/misc/stack_gc.lua | 15 + - test/misc/stack_purge.lua | 25 + - test/misc/stackov.lua | 40 + + test/misc/parse_misc.lua | 30 + + test/misc/phi_conv.lua | 52 + + test/misc/recurse_deep.lua | 28 + + test/misc/recurse_tail.lua | 21 + + test/misc/stack_gc.lua | 14 + + test/misc/stack_purge.lua | 24 + + test/misc/stackov.lua | 39 + test/misc/stackovc.lua | 4 + - test/misc/tcall_base.lua | 20 + + test/misc/tcall_base.lua | 19 + test/misc/tcall_loop.lua | 8 + - test/misc/tonumber_scan.lua | 180 + - test/misc/uclo.lua | 91 + - test/misc/unordered_jit.lua | 96 + + test/misc/tonumber_scan.lua | 179 + + test/misc/uclo.lua | 90 + + test/misc/unordered_jit.lua | 95 + test/misc/wbarrier.lua | 7 + - test/misc/wbarrier_jit.lua | 18 + - test/misc/wbarrier_obar.lua | 22 + + test/misc/wbarrier_jit.lua | 17 + + test/misc/wbarrier_obar.lua | 21 + test/opt/dse/array.lua | 197 + test/opt/dse/field.lua | 70 + test/opt/dse/index | 2 + @@ -251,11 +241,11 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. test/opt/sink/nosink.lua | 109 + test/src/cpptest.cpp | 129 + test/src/ctest.c | 339 + - test/sysdep/catch_cpp.lua | 71 + + test/sysdep/catch_cpp.lua | 70 + test/sysdep/ffi_include_gtk.lua | 9 + - test/sysdep/ffi_include_std.lua | 36 + - test/sysdep/ffi_lib_c.lua | 87 + - test/sysdep/ffi_lib_z.lua | 107 + + test/sysdep/ffi_include_std.lua | 35 + + test/sysdep/ffi_lib_c.lua | 86 + + test/sysdep/ffi_lib_z.lua | 106 + test/test.lua | 416 + test/trace/exit_frame.lua | 79 + test/trace/exit_growstack.lua | 28 + @@ -269,8 +259,8 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. test/trace/snap.lua | 47 + test/trace/stitch.lua | 19 + test/unportable/ffi_arith_int64.lua | 68 + - test/unportable/math_special.lua | 55 + - 247 files changed, 186644 insertions(+), 5 deletions(-) + test/unportable/math_special.lua | 54 + + 245 files changed, 186570 insertions(+), 4 deletions(-) create mode 100644 CONTRIBUTORS create mode 100644 bench/FASTA_10000 create mode 100644 bench/FASTA_1000000 @@ -295,7 +285,6 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file. create mode 100644 bench/fasta.lua create mode 100644 bench/k-nucleotide.lua create mode 100644 bench/life.lua - create mode 100755 bench/luajit-bench create mode 100644 bench/luajit-bench.c create mode 100644 bench/luajit-bench.lua create mode 100644 bench/mandelbrot-bit.lua @@ -519,7 +508,7 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
diff --git a/CONTRIBUTORS b/CONTRIBUTORS new file mode 100644 -index 0000000..a1c9209 +index 00000000..a1c9209b --- /dev/null +++ b/CONTRIBUTORS @@ -0,0 +1,17 @@ @@ -540,22 +529,8 @@ index 0000000..a1c9209 +Siddhesh Poyarekar +Vlad Krasnov +William Adams -diff --git a/COPYRIGHT b/COPYRIGHT -index 6ed4002..1e5c442 100644 ---- a/COPYRIGHT -+++ b/COPYRIGHT -@@ -1,7 +1,8 @@ - =============================================================================== - LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ - --Copyright (C) 2005-2017 Mike Pall. All rights reserved. -+Copyright (C) 2005-2019 Mike Pall. All rights reserved. -+Copyright (C) 2015-2019 LuaJIT Contributors, see CONTRIBUTORS file for a list. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile -index 0f93308..923bf72 100644 +index aa1b84bd..cb2b3418 100644 --- a/Makefile +++ b/Makefile @@ -106,14 +106,14 @@ endif @@ -604,7 +579,7 @@ index 0f93308..923bf72 100644 ############################################################################## diff --git a/bench/FASTA_10000 b/bench/FASTA_10000 new file mode 100644 -index 0000000..fb23263 +index 00000000..fb232633 --- /dev/null +++ b/bench/FASTA_10000 @@ -0,0 +1,1671 @@ @@ -2281,7 +2256,7 @@ index 0000000..fb23263 +gagatacctttgcaattttt diff --git a/bench/FASTA_1000000 b/bench/FASTA_1000000 new file mode 100644 -index 0000000..bafe0c5 +index 00000000..bafe0c5d --- /dev/null +++ b/bench/FASTA_1000000 @@ -0,0 +1,166671 @@ @@ -168958,7 +168933,7 @@ index 0000000..bafe0c5 +tacactgatacgaattattt diff --git a/bench/Makefile b/bench/Makefile new file mode 100644 -index 0000000..d0c1e8d +index 00000000..87d213a5 --- /dev/null +++ b/bench/Makefile @@ -0,0 +1,56 @@ @@ -169008,7 +168983,7 @@ index 0000000..d0c1e8d +LUAJIT_A = ../src/$(FILE_A) + +$(BENCH_BIN): $(LUAJIT_A) $(BENCH_BIN).c Makefile -+ $(CC) $@.c $(DURATION) -g -O3 -c -o $@.o -I ../src ++ $(CC) $@.c -std=gnu11 $(DURATION) -g -O3 -c -o $@.o -I ../src + $(CC) $@.o -lpthread $< -lm -ldl -o $@ + +# Build the luajit static library if it doesn't exist. @@ -169020,7 +168995,7 @@ index 0000000..d0c1e8d +endif diff --git a/bench/PARAM_arm.txt b/bench/PARAM_arm.txt new file mode 100644 -index 0000000..a07fd01 +index 00000000..a07fd010 --- /dev/null +++ b/bench/PARAM_arm.txt @@ -0,0 +1,29 @@ @@ -169055,7 +169030,7 @@ index 0000000..a07fd01 +sum-file 1000 SUMCOL_1000 diff --git a/bench/PARAM_arm64.txt b/bench/PARAM_arm64.txt new file mode 100644 -index 0000000..1c27638 +index 00000000..1c276385 --- /dev/null +++ b/bench/PARAM_arm64.txt @@ -0,0 +1,29 @@ @@ -169090,7 +169065,7 @@ index 0000000..1c27638 +sum-file 2e7 SUMCOL_1000 diff --git a/bench/PARAM_mips.txt b/bench/PARAM_mips.txt new file mode 100644 -index 0000000..e6bcadb +index 00000000..e6bcadba --- /dev/null +++ b/bench/PARAM_mips.txt @@ -0,0 +1,29 @@ @@ -169125,7 +169100,7 @@ index 0000000..e6bcadb +sum-file 100 SUMCOL_100 diff --git a/bench/PARAM_ppc.txt b/bench/PARAM_ppc.txt new file mode 100644 -index 0000000..c8319a1 +index 00000000..c8319a15 --- /dev/null +++ b/bench/PARAM_ppc.txt @@ -0,0 +1,29 @@ @@ -169160,7 +169135,7 @@ index 0000000..c8319a1 +sum-file 1000 SUMCOL_1000 diff --git a/bench/PARAM_x86.txt b/bench/PARAM_x86.txt new file mode 100644 -index 0000000..87088d7 +index 00000000..87088d7b --- /dev/null +++ b/bench/PARAM_x86.txt @@ -0,0 +1,29 @@ @@ -169195,7 +169170,7 @@ index 0000000..87088d7 +sum-file 5000 SUMCOL_5000 diff --git a/bench/README b/bench/README new file mode 100644 -index 0000000..16f55cb +index 00000000..16f55cbb --- /dev/null +++ b/bench/README @@ -0,0 +1,37 @@ @@ -169238,7 +169213,7 @@ index 0000000..16f55cb +results using the benchmark binary with that of the script. diff --git a/bench/SUMCOL_1.txt b/bench/SUMCOL_1.txt new file mode 100644 -index 0000000..956aba1 +index 00000000..956aba14 --- /dev/null +++ b/bench/SUMCOL_1.txt @@ -0,0 +1,1000 @@ @@ -170244,7 +170219,7 @@ index 0000000..956aba1 +264 diff --git a/bench/SUMCOL_100 b/bench/SUMCOL_100 new file mode 100644 -index 0000000..daf0c7b +index 00000000..daf0c7bb --- /dev/null +++ b/bench/SUMCOL_100 @@ -0,0 +1,100 @@ @@ -170350,7 +170325,7 @@ index 0000000..daf0c7b +264 diff --git a/bench/SUMCOL_1000 b/bench/SUMCOL_1000 new file mode 100644 -index 0000000..956aba1 +index 00000000..956aba14 --- /dev/null +++ b/bench/SUMCOL_1000 @@ -0,0 +1,1000 @@ @@ -171356,10 +171331,10 @@ index 0000000..956aba1 +264 diff --git a/bench/TEST_md5sum.txt b/bench/TEST_md5sum.txt new file mode 100644 -index 0000000..15aa8a1 +index 00000000..7d417a88 --- /dev/null +++ b/bench/TEST_md5sum.txt -@@ -0,0 +1,20 @@ +@@ -0,0 +1,19 @@ +binarytrees 10 7202f4e13df7abc5ad8c07f05fe9d644 +chameneos 1e5 a629ce12f63050c6656bce175258cf8f +cheapconcr 1000 d29799d1e263810a4db7bbf43ca66499 @@ -171379,10 +171354,9 @@ index 0000000..15aa8a1 +revcomp x 47de276e2f72519b57b82da39f4c7592 <FASTA_10000 +spectralnorm 200 25f44bd552ccd9faa0ee2ae5617947e2 +sumfile x 2ebd3caa45b31a2e74e436b645eab4b0 <SUMCOL_100 -+ diff --git a/bench/TEST_md5sum_arm64.txt b/bench/TEST_md5sum_arm64.txt new file mode 100644 -index 0000000..deab02e +index 00000000..deab02e5 --- /dev/null +++ b/bench/TEST_md5sum_arm64.txt @@ -0,0 +1,15 @@ @@ -171403,10 +171377,10 @@ index 0000000..deab02e +sum-file x 5d6b881128665a84e8863cac991b18a2 SUMCOL_100 diff --git a/bench/array3d.lua b/bench/array3d.lua new file mode 100644 -index 0000000..d638e4d +index 00000000..0c83c6c8 --- /dev/null +++ b/bench/array3d.lua -@@ -0,0 +1,59 @@ +@@ -0,0 +1,58 @@ + +local function array_set(self, x, y, z, p) + assert(x >= 0 and x < self.nx, "x outside PA") @@ -171465,10 +171439,9 @@ index 0000000..d638e4d + arr:set(x, y, z, x*x) +end +assert(arr.image[dim^3-1] == (dim-1)^2) -+ diff --git a/bench/binary-trees.lua b/bench/binary-trees.lua new file mode 100644 -index 0000000..bf04046 +index 00000000..bf040466 --- /dev/null +++ b/bench/binary-trees.lua @@ -0,0 +1,47 @@ @@ -171521,7 +171494,7 @@ index 0000000..bf04046 + maxdepth, ItemCheck(longlivedtree))) diff --git a/bench/chameneos.lua b/bench/chameneos.lua new file mode 100644 -index 0000000..78b64c3 +index 00000000..78b64c3f --- /dev/null +++ b/bench/chameneos.lua @@ -0,0 +1,68 @@ @@ -171595,7 +171568,7 @@ index 0000000..78b64c3 +io.write(schedule(threads), "\n") diff --git a/bench/coroutine-ring.lua b/bench/coroutine-ring.lua new file mode 100644 -index 0000000..1e8c5ef +index 00000000..1e8c5ef6 --- /dev/null +++ b/bench/coroutine-ring.lua @@ -0,0 +1,42 @@ @@ -171643,7 +171616,7 @@ index 0000000..1e8c5ef +io.write(id, "\n") diff --git a/bench/euler14-bit.lua b/bench/euler14-bit.lua new file mode 100644 -index 0000000..537f2bf +index 00000000..537f2bf3 --- /dev/null +++ b/bench/euler14-bit.lua @@ -0,0 +1,22 @@ @@ -171671,7 +171644,7 @@ index 0000000..537f2bf +io.write("Found ", n, " (chain length: ", m, ")\n") diff --git a/bench/fannkuch.lua b/bench/fannkuch.lua new file mode 100644 -index 0000000..2a4cd42 +index 00000000..2a4cd426 --- /dev/null +++ b/bench/fannkuch.lua @@ -0,0 +1,50 @@ @@ -171727,7 +171700,7 @@ index 0000000..2a4cd42 +io.write("Pfannkuchen(", n, ") = ", fannkuch(n), "\n") diff --git a/bench/fasta.lua b/bench/fasta.lua new file mode 100644 -index 0000000..7ce6080 +index 00000000..7ce60804 --- /dev/null +++ b/bench/fasta.lua @@ -0,0 +1,95 @@ @@ -171828,7 +171801,7 @@ index 0000000..7ce6080 +make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, N*5) diff --git a/bench/k-nucleotide.lua b/bench/k-nucleotide.lua new file mode 100644 -index 0000000..b97e394 +index 00000000..b97e394c --- /dev/null +++ b/bench/k-nucleotide.lua @@ -0,0 +1,62 @@ @@ -171896,7 +171869,7 @@ index 0000000..b97e394 +count(seq, "GGTATTTTAATTTATAGT") diff --git a/bench/life.lua b/bench/life.lua new file mode 100644 -index 0000000..911d9fe +index 00000000..911d9fe1 --- /dev/null +++ b/bench/life.lua @@ -0,0 +1,111 @@ @@ -172013,10 +171986,10 @@ index 0000000..911d9fe +LIFE(40,20) diff --git a/bench/luajit-bench.c b/bench/luajit-bench.c new file mode 100644 -index 0000000..e7b068d +index 00000000..6603132b --- /dev/null +++ b/bench/luajit-bench.c -@@ -0,0 +1,283 @@ +@@ -0,0 +1,284 @@ +/* Benchmark driver. + * + * Copyright (C) 2019 Vlad Krasnov @@ -172058,6 +172031,7 @@ index 0000000..e7b068d +#include <argp.h> +#include <sys/param.h> +#include <string.h> ++#include <time.h> + +#include "lua.h" +#include "lualib.h" @@ -172302,7 +172276,7 @@ index 0000000..e7b068d +} diff --git a/bench/luajit-bench.lua b/bench/luajit-bench.lua new file mode 100644 -index 0000000..7238725 +index 00000000..72387254 --- /dev/null +++ b/bench/luajit-bench.lua @@ -0,0 +1,53 @@ @@ -172361,7 +172335,7 @@ index 0000000..7238725 +end diff --git a/bench/mandelbrot-bit.lua b/bench/mandelbrot-bit.lua new file mode 100644 -index 0000000..91d9697 +index 00000000..91d96975 --- /dev/null +++ b/bench/mandelbrot-bit.lua @@ -0,0 +1,33 @@ @@ -172400,7 +172374,7 @@ index 0000000..91d9697 +end diff --git a/bench/mandelbrot.lua b/bench/mandelbrot.lua new file mode 100644 -index 0000000..0ef595a +index 00000000..0ef595a2 --- /dev/null +++ b/bench/mandelbrot.lua @@ -0,0 +1,23 @@ @@ -172429,10 +172403,10 @@ index 0000000..0ef595a +end diff --git a/bench/md5.lua b/bench/md5.lua new file mode 100644 -index 0000000..fdf6b4a +index 00000000..c4c087ee --- /dev/null +++ b/bench/md5.lua -@@ -0,0 +1,183 @@ +@@ -0,0 +1,182 @@ + +local bit = require("bit") +local tobit, tohex, bnot = bit.tobit or bit.cast, bit.tohex, bit.bnot @@ -172615,10 +172589,9 @@ index 0000000..fdf6b4a + res = md5(txt) +end +assert(res == 'a831e91e0f70eddcb70dc61c6f82f6cd') -+ diff --git a/bench/meteor.lua b/bench/meteor.lua new file mode 100644 -index 0000000..80588ab +index 00000000..80588ab5 --- /dev/null +++ b/bench/meteor.lua @@ -0,0 +1,220 @@ @@ -172844,7 +172817,7 @@ index 0000000..80588ab +printresult() diff --git a/bench/nbody.lua b/bench/nbody.lua new file mode 100644 -index 0000000..e0ff8f7 +index 00000000..e0ff8f77 --- /dev/null +++ b/bench/nbody.lua @@ -0,0 +1,119 @@ @@ -172969,7 +172942,7 @@ index 0000000..e0ff8f7 +io.write( string.format("%0.9f",energy(bodies, nbody)), "\n") diff --git a/bench/nsieve-bit-fp.lua b/bench/nsieve-bit-fp.lua new file mode 100644 -index 0000000..3971ec1 +index 00000000..3971ec1f --- /dev/null +++ b/bench/nsieve-bit-fp.lua @@ -0,0 +1,37 @@ @@ -173012,7 +172985,7 @@ index 0000000..3971ec1 +end diff --git a/bench/nsieve-bit.lua b/bench/nsieve-bit.lua new file mode 100644 -index 0000000..820a372 +index 00000000..820a3726 --- /dev/null +++ b/bench/nsieve-bit.lua @@ -0,0 +1,27 @@ @@ -173045,7 +173018,7 @@ index 0000000..820a372 +end diff --git a/bench/nsieve.lua b/bench/nsieve.lua new file mode 100644 -index 0000000..6de0524 +index 00000000..6de0524f --- /dev/null +++ b/bench/nsieve.lua @@ -0,0 +1,21 @@ @@ -173072,7 +173045,7 @@ index 0000000..6de0524 +end diff --git a/bench/partialsums.lua b/bench/partialsums.lua new file mode 100644 -index 0000000..09ac02f +index 00000000..09ac02f9 --- /dev/null +++ b/bench/partialsums.lua @@ -0,0 +1,29 @@ @@ -173107,7 +173080,7 @@ index 0000000..09ac02f +pr("%.9f\tGregory\n", a9) diff --git a/bench/pidigits-nogmp.lua b/bench/pidigits-nogmp.lua new file mode 100644 -index 0000000..63a1cb0 +index 00000000..63a1cb0e --- /dev/null +++ b/bench/pidigits-nogmp.lua @@ -0,0 +1,100 @@ @@ -173213,7 +173186,7 @@ index 0000000..63a1cb0 +end diff --git a/bench/ray.lua b/bench/ray.lua new file mode 100644 -index 0000000..86f159b +index 00000000..873cc995 --- /dev/null +++ b/bench/ray.lua @@ -0,0 +1,135 @@ @@ -173346,7 +173319,7 @@ index 0000000..86f159b + for d = y, y+.99, iss do + for e = x, x+.99, iss do + dir[1], dir[2], dir[3] = unitise(e, d, n) -+ g = g + ray_trace(light, camera, dir, scene) ++ g = g + ray_trace(light, camera, dir, scene) + end + end + io.write(string.char(math.floor(0.5 + g*gf))) @@ -173354,7 +173327,7 @@ index 0000000..86f159b +end diff --git a/bench/recursive-ack.lua b/bench/recursive-ack.lua new file mode 100644 -index 0000000..fad3058 +index 00000000..fad30589 --- /dev/null +++ b/bench/recursive-ack.lua @@ -0,0 +1,8 @@ @@ -173368,7 +173341,7 @@ index 0000000..fad3058 +io.write("Ack(3,", N ,"): ", Ack(3,N), "\n") diff --git a/bench/recursive-fib.lua b/bench/recursive-fib.lua new file mode 100644 -index 0000000..53b6f96 +index 00000000..53b6f96c --- /dev/null +++ b/bench/recursive-fib.lua @@ -0,0 +1,7 @@ @@ -173381,7 +173354,7 @@ index 0000000..53b6f96 +io.write(string.format("Fib(%d): %d\n", n, fib(n))) diff --git a/bench/revcomp.lua b/bench/revcomp.lua new file mode 100644 -index 0000000..90b3d5c +index 00000000..90b3d5c5 --- /dev/null +++ b/bench/revcomp.lua @@ -0,0 +1,39 @@ @@ -173426,10 +173399,10 @@ index 0000000..90b3d5c +writerev(t, n) diff --git a/bench/scimark-2010-12-20.lua b/bench/scimark-2010-12-20.lua new file mode 100644 -index 0000000..353acb7 +index 00000000..25f34eeb --- /dev/null +++ b/bench/scimark-2010-12-20.lua -@@ -0,0 +1,400 @@ +@@ -0,0 +1,399 @@ +------------------------------------------------------------------------------ +-- Lua SciMark (2010-12-20). +-- @@ -173829,38 +173802,37 @@ index 0000000..353acb7 +end +printf("\nSciMark %8.2f [%s problem sizes]\n", sum / #benchmarks, SIZE_SELECT) +io.flush() -+ diff --git a/bench/scimark-fft.lua b/bench/scimark-fft.lua new file mode 100644 -index 0000000..c05bb69 +index 00000000..c05bb69a --- /dev/null +++ b/bench/scimark-fft.lua @@ -0,0 +1 @@ +require("scimark_lib").FFT(1024)(tonumber(arg and arg[1]) or 50000) diff --git a/bench/scimark-lu.lua b/bench/scimark-lu.lua new file mode 100644 -index 0000000..7636d99 +index 00000000..7636d994 --- /dev/null +++ b/bench/scimark-lu.lua @@ -0,0 +1 @@ +require("scimark_lib").LU(100)(tonumber(arg and arg[1]) or 5000) diff --git a/bench/scimark-sor.lua b/bench/scimark-sor.lua new file mode 100644 -index 0000000..e537e98 +index 00000000..e537e986 --- /dev/null +++ b/bench/scimark-sor.lua @@ -0,0 +1 @@ +require("scimark_lib").SOR(100)(tonumber(arg and arg[1]) or 50000) diff --git a/bench/scimark-sparse.lua b/bench/scimark-sparse.lua new file mode 100644 -index 0000000..01a2258 +index 00000000..01a2258d --- /dev/null +++ b/bench/scimark-sparse.lua @@ -0,0 +1 @@ +require("scimark_lib").SPARSE(1000, 5000)(tonumber(arg and arg[1]) or 150000) diff --git a/bench/scimark_lib.lua b/bench/scimark_lib.lua new file mode 100644 -index 0000000..aeffd75 +index 00000000..aeffd75a --- /dev/null +++ b/bench/scimark_lib.lua @@ -0,0 +1,297 @@ @@ -174163,7 +174135,7 @@ index 0000000..aeffd75 +return benchmarks diff --git a/bench/series.lua b/bench/series.lua new file mode 100644 -index 0000000..f766cb3 +index 00000000..f766cb32 --- /dev/null +++ b/bench/series.lua @@ -0,0 +1,34 @@ @@ -174203,7 +174175,7 @@ index 0000000..f766cb3 + n, tm, (2*n-1)/tm)) diff --git a/bench/spectral-norm.lua b/bench/spectral-norm.lua new file mode 100644 -index 0000000..ecc8011 +index 00000000..ecc80112 --- /dev/null +++ b/bench/spectral-norm.lua @@ -0,0 +1,40 @@ @@ -174249,7 +174221,7 @@ index 0000000..ecc8011 +io.write(string.format("%0.9f\n", math.sqrt(vBv / vv))) diff --git a/bench/sum-file.lua b/bench/sum-file.lua new file mode 100644 -index 0000000..a16632b +index 00000000..a16632b1 --- /dev/null +++ b/bench/sum-file.lua @@ -0,0 +1,8 @@ @@ -174263,7 +174235,7 @@ index 0000000..a16632b +io.write(sum, "\n") diff --git a/test/README.md b/test/README.md new file mode 100644 -index 0000000..ff16ac8 +index 00000000..ff16ac8e --- /dev/null +++ b/test/README.md @@ -0,0 +1,110 @@ @@ -174379,7 +174351,7 @@ index 0000000..ff16ac8 +After that, consult the README file by Mike in the directory above this one. diff --git a/test/bc/constov.lua b/test/bc/constov.lua new file mode 100644 -index 0000000..5827840 +index 00000000..5827840b --- /dev/null +++ b/test/bc/constov.lua @@ -0,0 +1,16 @@ @@ -174401,14 +174373,14 @@ index 0000000..5827840 +end diff --git a/test/bc/index b/test/bc/index new file mode 100644 -index 0000000..dead10f +index 00000000..dead10f5 --- /dev/null +++ b/test/bc/index @@ -0,0 +1 @@ +constov.lua +slow diff --git a/test/common/expect_error.lua b/test/common/expect_error.lua new file mode 100644 -index 0000000..e155090 +index 00000000..e155090e --- /dev/null +++ b/test/common/expect_error.lua @@ -0,0 +1,16 @@ @@ -174430,10 +174402,10 @@ index 0000000..e155090 +end diff --git a/test/common/ffi_util.inc b/test/common/ffi_util.inc new file mode 100644 -index 0000000..1eee8dd +index 00000000..1fa28f3b --- /dev/null +++ b/test/common/ffi_util.inc -@@ -0,0 +1,41 @@ +@@ -0,0 +1,40 @@ +-- This should be turned into a proper module and not use globals. +-- Or combined into a generiv test utility module. With FFI +-- functionality turned off, if the FFI module is not built-in. @@ -174474,17 +174446,16 @@ index 0000000..1eee8dd + fp:close() + ffi.cdef(s) +end -+ diff --git a/test/common/test_runner_canary.lua b/test/common/test_runner_canary.lua new file mode 100644 -index 0000000..fc9cadc +index 00000000..fc9cadc6 --- /dev/null +++ b/test/common/test_runner_canary.lua @@ -0,0 +1 @@ +return "canary is alive" diff --git a/test/computations.lua b/test/computations.lua new file mode 100644 -index 0000000..4fce7fc +index 00000000..64b36af1 --- /dev/null +++ b/test/computations.lua @@ -0,0 +1,113 @@ @@ -174494,7 +174465,7 @@ index 0000000..4fce7fc + if n == 0 then return Ack(m-1, 1) end + return Ack(m-1, (Ack(m, n-1))) -- The parentheses are deliberate. + end -+ ++ + assert(Ack(3,5) == 253) +end + @@ -174504,7 +174475,7 @@ index 0000000..4fce7fc + if n == 0 then return Ack(m-1, 1) end + return (Ack(m-1, (Ack(m, n-1)))) -- The parentheses are deliberate. + end -+ ++ + assert(Ack(3,5) == 253) +end + @@ -174516,7 +174487,7 @@ index 0000000..4fce7fc + end + return x + end -+ ++ + assert(fac(10) == 3628800) +end + @@ -174565,7 +174536,7 @@ index 0000000..4fce7fc + end + return count + end -+ ++ + assert(nsieve(100) == 25) + assert(nsieve(12345) == 1474) +end @@ -174575,7 +174546,7 @@ index 0000000..4fce7fc + if n == 1 then return 1 end + return n + sum(n-1) + end -+ ++ + for i=1, 100 do + assert(sum(i) == i*(i+1)/2) + end @@ -174587,7 +174558,7 @@ index 0000000..4fce7fc + if n == 1 then return 1 end + return abs(n + sum(n-1)) + end -+ ++ + for i=1, 100 do + assert(sum(i) == i*(i+1)/2) + end @@ -174603,7 +174574,7 @@ index 0000000..4fce7fc +end diff --git a/test/index b/test/index new file mode 100644 -index 0000000..bd4081e +index 00000000..bd4081e3 --- /dev/null +++ b/test/index @@ -0,0 +1,6 @@ @@ -174615,7 +174586,7 @@ index 0000000..bd4081e +opt +jit diff --git a/test/lang/andor.lua b/test/lang/andor.lua new file mode 100644 -index 0000000..55b2c75 +index 00000000..55b2c756 --- /dev/null +++ b/test/lang/andor.lua @@ -0,0 +1,61 @@ @@ -174682,7 +174653,7 @@ index 0000000..55b2c75 +end diff --git a/test/lang/assignment.lua b/test/lang/assignment.lua new file mode 100644 -index 0000000..e9745ef +index 00000000..e9745ef6 --- /dev/null +++ b/test/lang/assignment.lua @@ -0,0 +1,46 @@ @@ -174734,7 +174705,7 @@ index 0000000..e9745ef +end diff --git a/test/lang/compare.lua b/test/lang/compare.lua new file mode 100644 -index 0000000..09c5488 +index 00000000..09c5488d --- /dev/null +++ b/test/lang/compare.lua @@ -0,0 +1,323 @@ @@ -175063,10 +175034,10 @@ index 0000000..09c5488 +end diff --git a/test/lang/compare_nan.lua b/test/lang/compare_nan.lua new file mode 100644 -index 0000000..878f39a +index 00000000..dd152fab --- /dev/null +++ b/test/lang/compare_nan.lua -@@ -0,0 +1,99 @@ +@@ -0,0 +1,98 @@ + +local function check(a, b) + if a ~= b then @@ -175165,10 +175136,9 @@ index 0000000..878f39a + check(not (1==nan), true) + check(not (1~=nan), false) +end -+ diff --git a/test/lang/concat.lua b/test/lang/concat.lua new file mode 100644 -index 0000000..04d665b +index 00000000..04d665b2 --- /dev/null +++ b/test/lang/concat.lua @@ -0,0 +1,112 @@ @@ -175286,7 +175256,7 @@ index 0000000..04d665b +end diff --git a/test/lang/constant/index b/test/lang/constant/index new file mode 100644 -index 0000000..e738357 +index 00000000..e738357d --- /dev/null +++ b/test/lang/constant/index @@ -0,0 +1,2 @@ @@ -175294,7 +175264,7 @@ index 0000000..e738357 +table.lua diff --git a/test/lang/constant/number.lua b/test/lang/constant/number.lua new file mode 100644 -index 0000000..fb67356 +index 00000000..fb67356e --- /dev/null +++ b/test/lang/constant/number.lua @@ -0,0 +1,12 @@ @@ -175312,7 +175282,7 @@ index 0000000..fb67356 +end diff --git a/test/lang/constant/table.lua b/test/lang/constant/table.lua new file mode 100644 -index 0000000..899d0f6 +index 00000000..899d0f67 --- /dev/null +++ b/test/lang/constant/table.lua @@ -0,0 +1,15 @@ @@ -175333,7 +175303,7 @@ index 0000000..899d0f6 +end diff --git a/test/lang/coroutine.lua b/test/lang/coroutine.lua new file mode 100644 -index 0000000..405135c +index 00000000..405135c9 --- /dev/null +++ b/test/lang/coroutine.lua @@ -0,0 +1,8 @@ @@ -175347,7 +175317,7 @@ index 0000000..405135c +end diff --git a/test/lang/for.lua b/test/lang/for.lua new file mode 100644 -index 0000000..4982b32 +index 00000000..4982b32b --- /dev/null +++ b/test/lang/for.lua @@ -0,0 +1,45 @@ @@ -175398,7 +175368,7 @@ index 0000000..4982b32 +end diff --git a/test/lang/gc.lua b/test/lang/gc.lua new file mode 100644 -index 0000000..35e6a1f +index 00000000..35e6a1f3 --- /dev/null +++ b/test/lang/gc.lua @@ -0,0 +1,42 @@ @@ -175446,7 +175416,7 @@ index 0000000..35e6a1f +end diff --git a/test/lang/goto.lua b/test/lang/goto.lua new file mode 100644 -index 0000000..1563a23 +index 00000000..978476c8 --- /dev/null +++ b/test/lang/goto.lua @@ -0,0 +1,149 @@ @@ -175460,7 +175430,7 @@ index 0000000..1563a23 + assert(ok, err) + end +end -+ ++ +do --- Basic goto and label semantics. + -- Error: duplicate label. + expect("::a:: ::a::", "'a'") @@ -175601,7 +175571,7 @@ index 0000000..1563a23 +end diff --git a/test/lang/index b/test/lang/index new file mode 100644 -index 0000000..88e2edf +index 00000000..88e2edfa --- /dev/null +++ b/test/lang/index @@ -0,0 +1,18 @@ @@ -175625,7 +175595,7 @@ index 0000000..88e2edf +meta diff --git a/test/lang/length.lua b/test/lang/length.lua new file mode 100644 -index 0000000..67c68ae +index 00000000..67c68ae7 --- /dev/null +++ b/test/lang/length.lua @@ -0,0 +1,23 @@ @@ -175654,7 +175624,7 @@ index 0000000..67c68ae +end diff --git a/test/lang/meta/arith.lua b/test/lang/meta/arith.lua new file mode 100644 -index 0000000..17de4c8 +index 00000000..17de4c8c --- /dev/null +++ b/test/lang/meta/arith.lua @@ -0,0 +1,118 @@ @@ -175778,7 +175748,7 @@ index 0000000..17de4c8 +end diff --git a/test/lang/meta/arith_jit.lua b/test/lang/meta/arith_jit.lua new file mode 100644 -index 0000000..2cb35db +index 00000000..2cb35dbb --- /dev/null +++ b/test/lang/meta/arith_jit.lua @@ -0,0 +1,68 @@ @@ -175852,7 +175822,7 @@ index 0000000..2cb35db +end diff --git a/test/lang/meta/call.lua b/test/lang/meta/call.lua new file mode 100644 -index 0000000..c77c0dd +index 00000000..c77c0dd8 --- /dev/null +++ b/test/lang/meta/call.lua @@ -0,0 +1,81 @@ @@ -175939,10 +175909,10 @@ index 0000000..c77c0dd +end diff --git a/test/lang/meta/cat.lua b/test/lang/meta/cat.lua new file mode 100644 -index 0000000..48a89e4 +index 00000000..3a5db6fc --- /dev/null +++ b/test/lang/meta/cat.lua -@@ -0,0 +1,61 @@ +@@ -0,0 +1,60 @@ +local function create(cat, v1, v2) + local meta = { __concat = cat } + return setmetatable({v1}, meta), setmetatable({v2}, meta) @@ -176003,10 +175973,9 @@ index 0000000..48a89e4 + for i=1,100 do y = a..b.. 1 .. "z" end + assert(y == "ab1z") +end -+ diff --git a/test/lang/meta/comp.lua b/test/lang/meta/comp.lua new file mode 100644 -index 0000000..23f18b0 +index 00000000..23f18b08 --- /dev/null +++ b/test/lang/meta/comp.lua @@ -0,0 +1,120 @@ @@ -176132,10 +176101,10 @@ index 0000000..23f18b0 +end diff --git a/test/lang/meta/comp_jit.lua b/test/lang/meta/comp_jit.lua new file mode 100644 -index 0000000..d0a19d8 +index 00000000..0bf07b9f --- /dev/null +++ b/test/lang/meta/comp_jit.lua -@@ -0,0 +1,104 @@ +@@ -0,0 +1,103 @@ +do --- coverage + local lt, le = false, false + local t, u = {}, {} @@ -176239,10 +176208,9 @@ index 0000000..d0a19d8 + assert(not ok) + end +end -+ diff --git a/test/lang/meta/debuginfo.lua b/test/lang/meta/debuginfo.lua new file mode 100644 -index 0000000..a99941f +index 00000000..a99941fa --- /dev/null +++ b/test/lang/meta/debuginfo.lua @@ -0,0 +1,81 @@ @@ -176329,7 +176297,7 @@ index 0000000..a99941f +end diff --git a/test/lang/meta/eq.lua b/test/lang/meta/eq.lua new file mode 100644 -index 0000000..ebf6043 +index 00000000..ebf60435 --- /dev/null +++ b/test/lang/meta/eq.lua @@ -0,0 +1,30 @@ @@ -176365,7 +176333,7 @@ index 0000000..ebf6043 +end diff --git a/test/lang/meta/eq_jit.lua b/test/lang/meta/eq_jit.lua new file mode 100644 -index 0000000..47e1420 +index 00000000..47e14207 --- /dev/null +++ b/test/lang/meta/eq_jit.lua @@ -0,0 +1,35 @@ @@ -176406,7 +176374,7 @@ index 0000000..47e1420 +end diff --git a/test/lang/meta/framegap.lua b/test/lang/meta/framegap.lua new file mode 100644 -index 0000000..0080633 +index 00000000..0080633a --- /dev/null +++ b/test/lang/meta/framegap.lua @@ -0,0 +1,24 @@ @@ -176436,7 +176404,7 @@ index 0000000..0080633 +end diff --git a/test/lang/meta/index b/test/lang/meta/index new file mode 100644 -index 0000000..f114e78 +index 00000000..f114e78d --- /dev/null +++ b/test/lang/meta/index @@ -0,0 +1,14 @@ @@ -176456,7 +176424,7 @@ index 0000000..f114e78 +debuginfo.lua diff --git a/test/lang/meta/index.lua b/test/lang/meta/index.lua new file mode 100644 -index 0000000..4d6d0ff +index 00000000..4d6d0ffe --- /dev/null +++ b/test/lang/meta/index.lua @@ -0,0 +1,60 @@ @@ -176522,7 +176490,7 @@ index 0000000..4d6d0ff +end diff --git a/test/lang/meta/len.lua b/test/lang/meta/len.lua new file mode 100644 -index 0000000..2410daa +index 00000000..2410daa6 --- /dev/null +++ b/test/lang/meta/len.lua @@ -0,0 +1,42 @@ @@ -176570,7 +176538,7 @@ index 0000000..2410daa +end diff --git a/test/lang/meta/newindex.lua b/test/lang/meta/newindex.lua new file mode 100644 -index 0000000..6c46b8c +index 00000000..6c46b8cb --- /dev/null +++ b/test/lang/meta/newindex.lua @@ -0,0 +1,69 @@ @@ -176645,10 +176613,10 @@ index 0000000..6c46b8c +end diff --git a/test/lang/meta/nomm.lua b/test/lang/meta/nomm.lua new file mode 100644 -index 0000000..2b3db86 +index 00000000..e41f72f4 --- /dev/null +++ b/test/lang/meta/nomm.lua -@@ -0,0 +1,21 @@ +@@ -0,0 +1,20 @@ + +do --- untitled + local keys = {} @@ -176669,10 +176637,9 @@ index 0000000..2b3db86 + end + assert(x == 95) +end -+ diff --git a/test/lang/modulo.lua b/test/lang/modulo.lua new file mode 100644 -index 0000000..eddaea7 +index 00000000..eddaea77 --- /dev/null +++ b/test/lang/modulo.lua @@ -0,0 +1,46 @@ @@ -176724,7 +176691,7 @@ index 0000000..eddaea7 +end diff --git a/test/lang/self.lua b/test/lang/self.lua new file mode 100644 -index 0000000..d374666 +index 00000000..d3746664 --- /dev/null +++ b/test/lang/self.lua @@ -0,0 +1,19 @@ @@ -176749,7 +176716,7 @@ index 0000000..d374666 +end diff --git a/test/lang/table.lua b/test/lang/table.lua new file mode 100644 -index 0000000..3ff38cf +index 00000000..3ff38cfe --- /dev/null +++ b/test/lang/table.lua @@ -0,0 +1,32 @@ @@ -176787,7 +176754,7 @@ index 0000000..3ff38cf +end diff --git a/test/lang/tail_recursion.lua b/test/lang/tail_recursion.lua new file mode 100644 -index 0000000..78f071f +index 00000000..78f071fd --- /dev/null +++ b/test/lang/tail_recursion.lua @@ -0,0 +1,20 @@ @@ -176813,7 +176780,7 @@ index 0000000..78f071f +end diff --git a/test/lang/upvalue/closure.lua b/test/lang/upvalue/closure.lua new file mode 100644 -index 0000000..faa4de1 +index 00000000..faa4de1c --- /dev/null +++ b/test/lang/upvalue/closure.lua @@ -0,0 +1,84 @@ @@ -176903,17 +176870,17 @@ index 0000000..faa4de1 +end diff --git a/test/lang/upvalue/index b/test/lang/upvalue/index new file mode 100644 -index 0000000..3c170db +index 00000000..3c170db9 --- /dev/null +++ b/test/lang/upvalue/index @@ -0,0 +1 @@ +closure.lua diff --git a/test/lang/vararg_jit.lua b/test/lang/vararg_jit.lua new file mode 100644 -index 0000000..4e78f96 +index 00000000..50729f5e --- /dev/null +++ b/test/lang/vararg_jit.lua -@@ -0,0 +1,95 @@ +@@ -0,0 +1,94 @@ + +do --- 1 + local function f(a, b, c, ...) @@ -177008,10 +176975,9 @@ index 0000000..4e78f96 + f(nil) + f() +end -+ diff --git a/test/lib/base/assert.lua b/test/lib/base/assert.lua new file mode 100644 -index 0000000..9c30ba0 +index 00000000..9c30ba02 --- /dev/null +++ b/test/lib/base/assert.lua @@ -0,0 +1,33 @@ @@ -177050,7 +177016,7 @@ index 0000000..9c30ba0 +end diff --git a/test/lib/base/error.lua b/test/lib/base/error.lua new file mode 100644 -index 0000000..9193085 +index 00000000..91930854 --- /dev/null +++ b/test/lib/base/error.lua @@ -0,0 +1,43 @@ @@ -177099,7 +177065,7 @@ index 0000000..9193085 +end diff --git a/test/lib/base/getfenv.lua b/test/lib/base/getfenv.lua new file mode 100644 -index 0000000..9c00ed7 +index 00000000..9c00ed7c --- /dev/null +++ b/test/lib/base/getfenv.lua @@ -0,0 +1,13 @@ @@ -177118,7 +177084,7 @@ index 0000000..9c00ed7 +end diff --git a/test/lib/base/getsetmetatable.lua b/test/lib/base/getsetmetatable.lua new file mode 100644 -index 0000000..7d57343 +index 00000000..7d57343e --- /dev/null +++ b/test/lib/base/getsetmetatable.lua @@ -0,0 +1,33 @@ @@ -177157,7 +177123,7 @@ index 0000000..7d57343 +end diff --git a/test/lib/base/index b/test/lib/base/index new file mode 100644 -index 0000000..942c53c +index 00000000..942c53c0 --- /dev/null +++ b/test/lib/base/index @@ -0,0 +1,11 @@ @@ -177174,7 +177140,7 @@ index 0000000..942c53c +xpcall_jit.lua +compat5.2 diff --git a/test/lib/base/ipairs.lua b/test/lib/base/ipairs.lua new file mode 100644 -index 0000000..a9de087 +index 00000000..a9de087e --- /dev/null +++ b/test/lib/base/ipairs.lua @@ -0,0 +1,41 @@ @@ -177221,7 +177187,7 @@ index 0000000..a9de087 +end diff --git a/test/lib/base/next.lua b/test/lib/base/next.lua new file mode 100644 -index 0000000..0e40615 +index 00000000..0e40615a --- /dev/null +++ b/test/lib/base/next.lua @@ -0,0 +1,17 @@ @@ -177244,7 +177210,7 @@ index 0000000..0e40615 +end diff --git a/test/lib/base/pairs.lua b/test/lib/base/pairs.lua new file mode 100644 -index 0000000..4d89d42 +index 00000000..4d89d42d --- /dev/null +++ b/test/lib/base/pairs.lua @@ -0,0 +1,73 @@ @@ -177323,7 +177289,7 @@ index 0000000..4d89d42 +end diff --git a/test/lib/base/pcall_jit.lua b/test/lib/base/pcall_jit.lua new file mode 100644 -index 0000000..dc9cd5f +index 00000000..dc9cd5fa --- /dev/null +++ b/test/lib/base/pcall_jit.lua @@ -0,0 +1,74 @@ @@ -177403,13 +177369,13 @@ index 0000000..dc9cd5f +end diff --git a/test/lib/base/select.lua b/test/lib/base/select.lua new file mode 100644 -index 0000000..8278e5e +index 00000000..8b2b9467 --- /dev/null +++ b/test/lib/base/select.lua @@ -0,0 +1,105 @@ + +do --- select # -+-- Test whether select("#", 3, 4) returns the correct number of arguments. ++-- Test whether select("#", 3, 4) returns the correct number of arguments. + local x = 0 + for i=1,100 do + x = x + select("#", 3, 4) @@ -177427,7 +177393,7 @@ index 0000000..8278e5e + assert(x == 200) +end + -+do --- select 1 ++do --- select 1 + local x = 0 + for i=1,100 do + x = x + select(1, i) @@ -177514,7 +177480,7 @@ index 0000000..8278e5e +end diff --git a/test/lib/base/tonumber_tostring.lua b/test/lib/base/tonumber_tostring.lua new file mode 100644 -index 0000000..e7f576c +index 00000000..e7f576ce --- /dev/null +++ b/test/lib/base/tonumber_tostring.lua @@ -0,0 +1,81 @@ @@ -177601,7 +177567,7 @@ index 0000000..e7f576c +end diff --git a/test/lib/base/xpcall_jit.lua b/test/lib/base/xpcall_jit.lua new file mode 100644 -index 0000000..f4993cc +index 00000000..f4993cc6 --- /dev/null +++ b/test/lib/base/xpcall_jit.lua @@ -0,0 +1,83 @@ @@ -177690,7 +177656,7 @@ index 0000000..f4993cc +end diff --git a/test/lib/bit.lua b/test/lib/bit.lua new file mode 100644 -index 0000000..1adf550 +index 00000000..1adf5507 --- /dev/null +++ b/test/lib/bit.lua @@ -0,0 +1,98 @@ @@ -177794,7 +177760,7 @@ index 0000000..1adf550 +end diff --git a/test/lib/contents.lua b/test/lib/contents.lua new file mode 100644 -index 0000000..2baacd5 +index 00000000..2baacd5c --- /dev/null +++ b/test/lib/contents.lua @@ -0,0 +1,158 @@ @@ -177958,14 +177924,14 @@ index 0000000..2baacd5 +end diff --git a/test/lib/coroutine/index b/test/lib/coroutine/index new file mode 100644 -index 0000000..9c5c17e +index 00000000..9c5c17ec --- /dev/null +++ b/test/lib/coroutine/index @@ -0,0 +1 @@ +yield.lua diff --git a/test/lib/coroutine/yield.lua b/test/lib/coroutine/yield.lua new file mode 100644 -index 0000000..d995bf8 +index 00000000..d995bf87 --- /dev/null +++ b/test/lib/coroutine/yield.lua @@ -0,0 +1,109 @@ @@ -178080,10 +178046,10 @@ index 0000000..d995bf8 +end diff --git a/test/lib/ffi/bit64.lua b/test/lib/ffi/bit64.lua new file mode 100644 -index 0000000..d1b47be +index 00000000..ffec0def --- /dev/null +++ b/test/lib/ffi/bit64.lua -@@ -0,0 +1,130 @@ +@@ -0,0 +1,129 @@ +local ffi = require("ffi") +local bit = require("bit") + @@ -178213,10 +178179,9 @@ index 0000000..d1b47be + end + assert(b == -8881785180777266821LL) +end -+ diff --git a/test/lib/ffi/cdata_var.lua b/test/lib/ffi/cdata_var.lua new file mode 100644 -index 0000000..42d6028 +index 00000000..42d6028a --- /dev/null +++ b/test/lib/ffi/cdata_var.lua @@ -0,0 +1,47 @@ @@ -178269,10 +178234,10 @@ index 0000000..42d6028 +end diff --git a/test/lib/ffi/copy_fill.lua b/test/lib/ffi/copy_fill.lua new file mode 100644 -index 0000000..2956381 +index 00000000..d50d7cda --- /dev/null +++ b/test/lib/ffi/copy_fill.lua -@@ -0,0 +1,64 @@ +@@ -0,0 +1,63 @@ +local ffi = require("ffi") + +do --- misc @@ -178336,13 +178301,12 @@ index 0000000..2956381 + end + assert(x == "d" and y == "~") +end -+ diff --git a/test/lib/ffi/err.lua b/test/lib/ffi/err.lua new file mode 100644 -index 0000000..4472365 +index 00000000..8cdf9623 --- /dev/null +++ b/test/lib/ffi/err.lua -@@ -0,0 +1,35 @@ +@@ -0,0 +1,34 @@ +local ffi = require("ffi") + +do --- error in FFI metamethod: don't print metamethod frame. @@ -178377,13 +178341,12 @@ index 0000000..4472365 + local line = debug.getinfo(foo).linedefined+3 + assert(string.match(err, "traceback:[^:]*:"..line..":")) +end -+ diff --git a/test/lib/ffi/ffi_arith_ptr.lua b/test/lib/ffi/ffi_arith_ptr.lua new file mode 100644 -index 0000000..8cf890c +index 00000000..82535551 --- /dev/null +++ b/test/lib/ffi/ffi_arith_ptr.lua -@@ -0,0 +1,106 @@ +@@ -0,0 +1,105 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -178489,13 +178452,12 @@ index 0000000..8cf890c + local ok, err = pcall(function(p) return p[1] end, p) + assert(not ok and err:match("size.*unknown")) +end -+ diff --git a/test/lib/ffi/ffi_bitfield.lua b/test/lib/ffi/ffi_bitfield.lua new file mode 100644 -index 0000000..cd0b181 +index 00000000..20b89ad8 --- /dev/null +++ b/test/lib/ffi/ffi_bitfield.lua -@@ -0,0 +1,108 @@ +@@ -0,0 +1,107 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -178603,13 +178565,12 @@ index 0000000..cd0b181 + end + +end -+ diff --git a/test/lib/ffi/ffi_call.lua b/test/lib/ffi/ffi_call.lua new file mode 100644 -index 0000000..1eb5e90 +index 00000000..1a7f4b1b --- /dev/null +++ b/test/lib/ffi/ffi_call.lua -@@ -0,0 +1,266 @@ +@@ -0,0 +1,265 @@ + +local ffi = require("ffi") + @@ -178875,13 +178836,12 @@ index 0000000..1eb5e90 + assert(C.stdcall_ff(12.5, -3.25) == 12.5-3.25) + end +end -+ diff --git a/test/lib/ffi/ffi_callback.lua b/test/lib/ffi/ffi_callback.lua new file mode 100644 -index 0000000..1fd14bd +index 00000000..3e2759e2 --- /dev/null +++ b/test/lib/ffi/ffi_callback.lua -@@ -0,0 +1,158 @@ +@@ -0,0 +1,157 @@ + +local ffi = require("ffi") + @@ -179039,13 +178999,12 @@ index 0000000..1fd14bd + debug.sethook(function() debug.sethook(nil, "", 0); f() end, "", 1) + local x +end -+ diff --git a/test/lib/ffi/ffi_const.lua b/test/lib/ffi/ffi_const.lua new file mode 100644 -index 0000000..d42133a +index 00000000..b2b256d4 --- /dev/null +++ b/test/lib/ffi/ffi_const.lua -@@ -0,0 +1,113 @@ +@@ -0,0 +1,112 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -179158,13 +179117,12 @@ index 0000000..d42133a + x.ccp = ccxa + end +end -+ diff --git a/test/lib/ffi/ffi_convert.lua b/test/lib/ffi/ffi_convert.lua new file mode 100644 -index 0000000..bd3fb1f +index 00000000..1945760a --- /dev/null +++ b/test/lib/ffi/ffi_convert.lua -@@ -0,0 +1,787 @@ +@@ -0,0 +1,786 @@ +local ffi = require("ffi") + +local ctest = require("ctest") @@ -179951,13 +179909,12 @@ index 0000000..bd3fb1f + jit.off(f) + end +end -+ diff --git a/test/lib/ffi/ffi_enum.lua b/test/lib/ffi/ffi_enum.lua new file mode 100644 -index 0000000..e8e40ad +index 00000000..9b63b4ec --- /dev/null +++ b/test/lib/ffi/ffi_enum.lua -@@ -0,0 +1,57 @@ +@@ -0,0 +1,56 @@ + +local ffi = require("ffi") + @@ -180014,13 +179971,12 @@ index 0000000..e8e40ad + assert(f("II")) + assert(not f(0)) +end -+ diff --git a/test/lib/ffi/ffi_gcstep_recursive.lua b/test/lib/ffi/ffi_gcstep_recursive.lua new file mode 100644 -index 0000000..cb19df1 +index 00000000..22eb81af --- /dev/null +++ b/test/lib/ffi/ffi_gcstep_recursive.lua -@@ -0,0 +1,66 @@ +@@ -0,0 +1,65 @@ +-- From Robert G. Jakabosky, 2012-03-20 + +local N=tonumber(arg[1] or 10000) @@ -180086,13 +180042,12 @@ index 0000000..cb19df1 + end + cdata = nil +end -+ diff --git a/test/lib/ffi/ffi_jit_arith.lua b/test/lib/ffi/ffi_jit_arith.lua new file mode 100644 -index 0000000..0554fe6 +index 00000000..0f502784 --- /dev/null +++ b/test/lib/ffi/ffi_jit_arith.lua -@@ -0,0 +1,155 @@ +@@ -0,0 +1,154 @@ +local ffi = require("ffi") + +do @@ -180247,13 +180202,12 @@ index 0000000..0554fe6 + assert(x == 1650) + assert(y == 970) +end -+ diff --git a/test/lib/ffi/ffi_jit_call.lua b/test/lib/ffi/ffi_jit_call.lua new file mode 100644 -index 0000000..b79d60b +index 00000000..ab1e26e3 --- /dev/null +++ b/test/lib/ffi/ffi_jit_call.lua -@@ -0,0 +1,154 @@ +@@ -0,0 +1,153 @@ + +local ffi = require("ffi") + @@ -180407,10 +180361,9 @@ index 0000000..b79d60b + for i=1,100 do assert(lib.stdcall_ff(12.5, -3.25) == 12.5-3.25) end + end +end -+ diff --git a/test/lib/ffi/ffi_jit_conv.lua b/test/lib/ffi/ffi_jit_conv.lua new file mode 100644 -index 0000000..d4707db +index 00000000..d4707db7 --- /dev/null +++ b/test/lib/ffi/ffi_jit_conv.lua @@ -0,0 +1,277 @@ @@ -180693,10 +180646,10 @@ index 0000000..d4707db +end diff --git a/test/lib/ffi/ffi_lex_number.lua b/test/lib/ffi/ffi_lex_number.lua new file mode 100644 -index 0000000..e26650e +index 00000000..1737a8ba --- /dev/null +++ b/test/lib/ffi/ffi_lex_number.lua -@@ -0,0 +1,51 @@ +@@ -0,0 +1,50 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -180747,13 +180700,12 @@ index 0000000..e26650e + ".0ll", + "0ii", +}, function(s) assert(loadstring("return "..s)) end) -+ diff --git a/test/lib/ffi/ffi_metatype.lua b/test/lib/ffi/ffi_metatype.lua new file mode 100644 -index 0000000..2db717f +index 00000000..1d3a20bc --- /dev/null +++ b/test/lib/ffi/ffi_metatype.lua -@@ -0,0 +1,245 @@ +@@ -0,0 +1,244 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -180998,13 +180950,12 @@ index 0000000..2db717f + local o = xt() + assert(o.x == 99) +end -+ diff --git a/test/lib/ffi/ffi_new.lua b/test/lib/ffi/ffi_new.lua new file mode 100644 -index 0000000..9cdbd53 +index 00000000..5f0c88b4 --- /dev/null +++ b/test/lib/ffi/ffi_new.lua -@@ -0,0 +1,106 @@ +@@ -0,0 +1,105 @@ +local ffi = require("ffi") +local bit = require("bit") + @@ -181110,13 +181061,12 @@ index 0000000..9cdbd53 + local p = ffi.gc(ffi.new("int[1]"), function(x) assert(type(x) == "cdata") end) + -- test for lua_close() cleanup. +end -+ diff --git a/test/lib/ffi/ffi_parse_array.lua b/test/lib/ffi/ffi_parse_array.lua new file mode 100644 -index 0000000..3a9616d +index 00000000..08176223 --- /dev/null +++ b/test/lib/ffi/ffi_parse_array.lua -@@ -0,0 +1,78 @@ +@@ -0,0 +1,77 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -181194,13 +181144,12 @@ index 0000000..3a9616d + assert(ffi.sizeof(id, 0x40000000) == nil) + assert(ffi.sizeof(id, 0x3ffffffd) == 4+2*0x3ffffffd) +end -+ diff --git a/test/lib/ffi/ffi_parse_basic.lua b/test/lib/ffi/ffi_parse_basic.lua new file mode 100644 -index 0000000..c054bcf +index 00000000..774d6143 --- /dev/null +++ b/test/lib/ffi/ffi_parse_basic.lua -@@ -0,0 +1,131 @@ +@@ -0,0 +1,130 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -181331,13 +181280,12 @@ index 0000000..c054bcf + 1, 2, "char __attribute__((aligned(8))) const __attribute__((aligned(2)))", + 1, 16, "char __attribute__((aligned(8))) const __attribute__((aligned(16)))", +} -+ diff --git a/test/lib/ffi/ffi_parse_cdef.lua b/test/lib/ffi/ffi_parse_cdef.lua new file mode 100644 -index 0000000..4bb5d90 +index 00000000..43206f31 --- /dev/null +++ b/test/lib/ffi/ffi_parse_cdef.lua -@@ -0,0 +1,77 @@ +@@ -0,0 +1,76 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -181414,13 +181362,12 @@ index 0000000..4bb5d90 +int ext1; +extern int ext2; +]] -+ diff --git a/test/lib/ffi/ffi_parse_struct.lua b/test/lib/ffi/ffi_parse_struct.lua new file mode 100644 -index 0000000..16a3d05 +index 00000000..02b17dd6 --- /dev/null +++ b/test/lib/ffi/ffi_parse_struct.lua -@@ -0,0 +1,259 @@ +@@ -0,0 +1,258 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -181679,10 +181626,9 @@ index 0000000..16a3d05 + assert(ffi.sizeof("struct foo_packintalign8") == 6) + assert(ffi.sizeof("struct foo_packintalign1") == 5) +end -+ diff --git a/test/lib/ffi/ffi_tabov.lua b/test/lib/ffi/ffi_tabov.lua new file mode 100644 -index 0000000..ba62196 +index 00000000..ba621960 --- /dev/null +++ b/test/lib/ffi/ffi_tabov.lua @@ -0,0 +1,12 @@ @@ -181700,7 +181646,7 @@ index 0000000..ba62196 +assert(last > 20000) diff --git a/test/lib/ffi/index b/test/lib/ffi/index new file mode 100644 -index 0000000..59e36dd +index 00000000..59e36dd8 --- /dev/null +++ b/test/lib/ffi/index @@ -0,0 +1,12 @@ @@ -181718,7 +181664,7 @@ index 0000000..59e36dd +type_punning.lua diff --git a/test/lib/ffi/istype.lua b/test/lib/ffi/istype.lua new file mode 100644 -index 0000000..5aba775 +index 00000000..5aba7759 --- /dev/null +++ b/test/lib/ffi/istype.lua @@ -0,0 +1,88 @@ @@ -181812,7 +181758,7 @@ index 0000000..5aba775 +end diff --git a/test/lib/ffi/jit_array.lua b/test/lib/ffi/jit_array.lua new file mode 100644 -index 0000000..e8de4af +index 00000000..e8de4af1 --- /dev/null +++ b/test/lib/ffi/jit_array.lua @@ -0,0 +1,104 @@ @@ -181922,7 +181868,7 @@ index 0000000..e8de4af +end diff --git a/test/lib/ffi/jit_complex.lua b/test/lib/ffi/jit_complex.lua new file mode 100644 -index 0000000..3296f0c +index 00000000..3296f0cb --- /dev/null +++ b/test/lib/ffi/jit_complex.lua @@ -0,0 +1,109 @@ @@ -182037,7 +181983,7 @@ index 0000000..3296f0c +end diff --git a/test/lib/ffi/jit_misc.lua b/test/lib/ffi/jit_misc.lua new file mode 100644 -index 0000000..41e4737 +index 00000000..41e4737b --- /dev/null +++ b/test/lib/ffi/jit_misc.lua @@ -0,0 +1,109 @@ @@ -182152,10 +182098,10 @@ index 0000000..41e4737 +end diff --git a/test/lib/ffi/jit_struct.lua b/test/lib/ffi/jit_struct.lua new file mode 100644 -index 0000000..8aa64c1 +index 00000000..ab7ab07f --- /dev/null +++ b/test/lib/ffi/jit_struct.lua -@@ -0,0 +1,201 @@ +@@ -0,0 +1,200 @@ +local ffi = require("ffi") + +ffi.cdef[[ @@ -182356,13 +182302,12 @@ index 0000000..8aa64c1 + end + f() +end -+ diff --git a/test/lib/ffi/meta_tostring.lua b/test/lib/ffi/meta_tostring.lua new file mode 100644 -index 0000000..bb065e1 +index 00000000..968eaddf --- /dev/null +++ b/test/lib/ffi/meta_tostring.lua -@@ -0,0 +1,55 @@ +@@ -0,0 +1,54 @@ +local ffi = require("ffi") + +ffi.cdef[[ @@ -182417,10 +182362,9 @@ index 0000000..bb065e1 + x.f[1] = -753.125 + assert(tostring(x.cf) == "12.5-753.125i") +end -+ diff --git a/test/lib/ffi/redir.lua b/test/lib/ffi/redir.lua new file mode 100644 -index 0000000..c492055 +index 00000000..c492055a --- /dev/null +++ b/test/lib/ffi/redir.lua @@ -0,0 +1,19 @@ @@ -182445,7 +182389,7 @@ index 0000000..c492055 +end diff --git a/test/lib/ffi/type_punning.lua b/test/lib/ffi/type_punning.lua new file mode 100644 -index 0000000..ac70b4b +index 00000000..ac70b4b4 --- /dev/null +++ b/test/lib/ffi/type_punning.lua @@ -0,0 +1,138 @@ @@ -182589,7 +182533,7 @@ index 0000000..ac70b4b +end diff --git a/test/lib/index b/test/lib/index new file mode 100644 -index 0000000..cc9d7d7 +index 00000000..cc9d7d73 --- /dev/null +++ b/test/lib/index @@ -0,0 +1,8 @@ @@ -182604,7 +182548,7 @@ index 0000000..cc9d7d7 \ No newline at end of file diff --git a/test/lib/math/abs.lua b/test/lib/math/abs.lua new file mode 100644 -index 0000000..4223a78 +index 00000000..4223a780 --- /dev/null +++ b/test/lib/math/abs.lua @@ -0,0 +1,16 @@ @@ -182626,7 +182570,7 @@ index 0000000..4223a78 +end diff --git a/test/lib/math/constants.lua b/test/lib/math/constants.lua new file mode 100644 -index 0000000..ec35b4c +index 00000000..ec35b4ce --- /dev/null +++ b/test/lib/math/constants.lua @@ -0,0 +1,8 @@ @@ -182640,7 +182584,7 @@ index 0000000..ec35b4c +end diff --git a/test/lib/math/index b/test/lib/math/index new file mode 100644 -index 0000000..944e1ae +index 00000000..944e1aeb --- /dev/null +++ b/test/lib/math/index @@ -0,0 +1,3 @@ @@ -182649,7 +182593,7 @@ index 0000000..944e1ae +random.lua diff --git a/test/lib/math/random.lua b/test/lib/math/random.lua new file mode 100644 -index 0000000..dc2ca00 +index 00000000..dc2ca00b --- /dev/null +++ b/test/lib/math/random.lua @@ -0,0 +1,47 @@ @@ -182702,7 +182646,7 @@ index 0000000..dc2ca00 +end diff --git a/test/lib/string/byte.lua b/test/lib/string/byte.lua new file mode 100644 -index 0000000..697a2c2 +index 00000000..697a2c2e --- /dev/null +++ b/test/lib/string/byte.lua @@ -0,0 +1,92 @@ @@ -182800,7 +182744,7 @@ index 0000000..697a2c2 +end diff --git a/test/lib/string/char.lua b/test/lib/string/char.lua new file mode 100644 -index 0000000..544767d +index 00000000..544767de --- /dev/null +++ b/test/lib/string/char.lua @@ -0,0 +1,29 @@ @@ -182835,7 +182779,7 @@ index 0000000..544767d +end diff --git a/test/lib/string/dump.lua b/test/lib/string/dump.lua new file mode 100644 -index 0000000..216c6eb +index 00000000..216c6eb8 --- /dev/null +++ b/test/lib/string/dump.lua @@ -0,0 +1,31 @@ @@ -182872,14 +182816,14 @@ index 0000000..216c6eb +end diff --git a/test/lib/string/format/index b/test/lib/string/format/index new file mode 100644 -index 0000000..4408853 +index 00000000..44088536 --- /dev/null +++ b/test/lib/string/format/index @@ -0,0 +1 @@ +num.lua diff --git a/test/lib/string/format/num.lua b/test/lib/string/format/num.lua new file mode 100644 -index 0000000..e8cb33f +index 00000000..e8cb33f3 --- /dev/null +++ b/test/lib/string/format/num.lua @@ -0,0 +1,184 @@ @@ -183069,7 +183013,7 @@ index 0000000..e8cb33f +end diff --git a/test/lib/string/index b/test/lib/string/index new file mode 100644 -index 0000000..c0638e9 +index 00000000..c0638e9c --- /dev/null +++ b/test/lib/string/index @@ -0,0 +1,11 @@ @@ -183086,7 +183030,7 @@ index 0000000..c0638e9 +sub.lua diff --git a/test/lib/string/len.lua b/test/lib/string/len.lua new file mode 100644 -index 0000000..8ed7e8a +index 00000000..8ed7e8ae --- /dev/null +++ b/test/lib/string/len.lua @@ -0,0 +1,14 @@ @@ -183106,7 +183050,7 @@ index 0000000..8ed7e8a +end diff --git a/test/lib/string/lower_upper.lua b/test/lib/string/lower_upper.lua new file mode 100644 -index 0000000..7370c44 +index 00000000..7370c44c --- /dev/null +++ b/test/lib/string/lower_upper.lua @@ -0,0 +1,51 @@ @@ -183163,7 +183107,7 @@ index 0000000..7370c44 +end diff --git a/test/lib/string/metatable.lua b/test/lib/string/metatable.lua new file mode 100644 -index 0000000..d39ed43 +index 00000000..d39ed432 --- /dev/null +++ b/test/lib/string/metatable.lua @@ -0,0 +1,3 @@ @@ -183172,7 +183116,7 @@ index 0000000..d39ed43 +end diff --git a/test/lib/string/multiple_functions.lua b/test/lib/string/multiple_functions.lua new file mode 100644 -index 0000000..7b9d0f1 +index 00000000..7b9d0f13 --- /dev/null +++ b/test/lib/string/multiple_functions.lua @@ -0,0 +1,16 @@ @@ -183194,7 +183138,7 @@ index 0000000..7b9d0f1 +end diff --git a/test/lib/string/rep.lua b/test/lib/string/rep.lua new file mode 100644 -index 0000000..550c15b +index 00000000..550c15b8 --- /dev/null +++ b/test/lib/string/rep.lua @@ -0,0 +1,68 @@ @@ -183268,7 +183212,7 @@ index 0000000..550c15b +end diff --git a/test/lib/string/reverse.lua b/test/lib/string/reverse.lua new file mode 100644 -index 0000000..deaade7 +index 00000000..deaade7c --- /dev/null +++ b/test/lib/string/reverse.lua @@ -0,0 +1,13 @@ @@ -183287,7 +183231,7 @@ index 0000000..deaade7 +end diff --git a/test/lib/string/sub.lua b/test/lib/string/sub.lua new file mode 100644 -index 0000000..ecb8021 +index 00000000..ecb80216 --- /dev/null +++ b/test/lib/string/sub.lua @@ -0,0 +1,189 @@ @@ -183482,7 +183426,7 @@ index 0000000..ecb8021 +end diff --git a/test/lib/table/concat.lua b/test/lib/table/concat.lua new file mode 100644 -index 0000000..1f2a2f9 +index 00000000..1f2a2f92 --- /dev/null +++ b/test/lib/table/concat.lua @@ -0,0 +1,55 @@ @@ -183543,7 +183487,7 @@ index 0000000..1f2a2f9 +end diff --git a/test/lib/table/index b/test/lib/table/index new file mode 100644 -index 0000000..bd3af0b +index 00000000..bd3af0be --- /dev/null +++ b/test/lib/table/index @@ -0,0 +1,6 @@ @@ -183555,10 +183499,10 @@ index 0000000..bd3af0b +sort.lua diff --git a/test/lib/table/insert.lua b/test/lib/table/insert.lua new file mode 100644 -index 0000000..91d4dd8 +index 00000000..30db18c7 --- /dev/null +++ b/test/lib/table/insert.lua -@@ -0,0 +1,17 @@ +@@ -0,0 +1,16 @@ +local tinsert = table.insert +local assert = assert + @@ -183575,13 +183519,12 @@ index 0000000..91d4dd8 + for i=101,200 do tinsert(t, i, i) end + assert(#t == 300 and t[101] == 101 and t[200] == 200 and t[300] == 200) +end -+ diff --git a/test/lib/table/misc.lua b/test/lib/table/misc.lua new file mode 100644 -index 0000000..e0e2fc5 +index 00000000..c54188e1 --- /dev/null +++ b/test/lib/table/misc.lua -@@ -0,0 +1,58 @@ +@@ -0,0 +1,55 @@ +-- TODO: Organise + +-- ABC elim @@ -183637,12 +183580,9 @@ index 0000000..e0e2fc5 + assert(t[1] == 1 and t[2] == 2 and t[3] == 3 and t[4] == 9 and t[5] == 10 and + t[6] == nil) +end -+ -+ -+ diff --git a/test/lib/table/new.lua b/test/lib/table/new.lua new file mode 100644 -index 0000000..483c129 +index 00000000..483c1298 --- /dev/null +++ b/test/lib/table/new.lua @@ -0,0 +1,11 @@ @@ -183659,7 +183599,7 @@ index 0000000..483c129 +end diff --git a/test/lib/table/pack.lua b/test/lib/table/pack.lua new file mode 100644 -index 0000000..5bd6ecb +index 00000000..5bd6ecbe --- /dev/null +++ b/test/lib/table/pack.lua @@ -0,0 +1,7 @@ @@ -183672,7 +183612,7 @@ index 0000000..5bd6ecb +end diff --git a/test/lib/table/remove.lua b/test/lib/table/remove.lua new file mode 100644 -index 0000000..1b24a4f +index 00000000..1b24a4fb --- /dev/null +++ b/test/lib/table/remove.lua @@ -0,0 +1,42 @@ @@ -183720,7 +183660,7 @@ index 0000000..1b24a4f +end diff --git a/test/lib/table/sort.lua b/test/lib/table/sort.lua new file mode 100644 -index 0000000..6a86fcf +index 00000000..6a86fcf3 --- /dev/null +++ b/test/lib/table/sort.lua @@ -0,0 +1,27 @@ @@ -183753,10 +183693,10 @@ index 0000000..6a86fcf +end diff --git a/test/misc/alias_alloc.lua b/test/misc/alias_alloc.lua new file mode 100644 -index 0000000..02fe618 +index 00000000..6c89baad --- /dev/null +++ b/test/misc/alias_alloc.lua -@@ -0,0 +1,54 @@ +@@ -0,0 +1,53 @@ + +do + local t = {1} @@ -183810,13 +183750,12 @@ index 0000000..02fe618 + w[1] = t[1] + end +end -+ diff --git a/test/misc/api_call.lua b/test/misc/api_call.lua new file mode 100644 -index 0000000..7dbd5e4 +index 00000000..28ce7d2b --- /dev/null +++ b/test/misc/api_call.lua -@@ -0,0 +1,98 @@ +@@ -0,0 +1,97 @@ +local ctest = require("ctest") + +local function ret0() end @@ -183914,13 +183853,12 @@ index 0000000..7dbd5e4 +test_yield(ctest.resume, coroutine.yield) +test_yield(coroutine.resume, ctest.yield) +test_yield(ctest.resume, ctest.yield) -+ diff --git a/test/misc/catch_wrap.lua b/test/misc/catch_wrap.lua new file mode 100644 -index 0000000..7f656bc +index 00000000..8b934066 --- /dev/null +++ b/test/misc/catch_wrap.lua -@@ -0,0 +1,45 @@ +@@ -0,0 +1,44 @@ + +local cp = require("cpptest") +cp.wrapon() @@ -183965,13 +183903,12 @@ index 0000000..7f656bc + assert(a == false and b == "foo") + if unwind then assert(cp.isalloc() == false) end +end -+ diff --git a/test/misc/coro_traceback.lua b/test/misc/coro_traceback.lua new file mode 100644 -index 0000000..2676d2c +index 00000000..e075f5dd --- /dev/null +++ b/test/misc/coro_traceback.lua -@@ -0,0 +1,8 @@ +@@ -0,0 +1,7 @@ + +local co = coroutine.create(function() + local x = nil @@ -183979,13 +183916,12 @@ index 0000000..2676d2c +end) +assert(coroutine.resume(co) == false) +debug.traceback(co) -+ diff --git a/test/misc/coro_yield.lua b/test/misc/coro_yield.lua new file mode 100644 -index 0000000..ae3206e +index 00000000..602ba7d5 --- /dev/null +++ b/test/misc/coro_yield.lua -@@ -0,0 +1,111 @@ +@@ -0,0 +1,110 @@ +local create = coroutine.create +local wrap = coroutine.wrap +local resume = coroutine.resume @@ -184096,10 +184032,9 @@ index 0000000..ae3206e + end, + 42) +end -+ diff --git a/test/misc/debug_gc.lua b/test/misc/debug_gc.lua new file mode 100644 -index 0000000..30fb2b9 +index 00000000..30fb2b99 --- /dev/null +++ b/test/misc/debug_gc.lua @@ -0,0 +1,47 @@ @@ -184152,10 +184087,10 @@ index 0000000..30fb2b9 +caught = "end" diff --git a/test/misc/dualnum.lua b/test/misc/dualnum.lua new file mode 100644 -index 0000000..5f1288c +index 00000000..059bf21d --- /dev/null +++ b/test/misc/dualnum.lua -@@ -0,0 +1,47 @@ +@@ -0,0 +1,46 @@ + +-- Positive overflow +do @@ -184202,13 +184137,12 @@ index 0000000..5f1288c + assert(fmax(-1, -3) == -1) + assert(fmax(-3, -1) == -1) +end -+ diff --git a/test/misc/for_dir.lua b/test/misc/for_dir.lua new file mode 100644 -index 0000000..4dd38de +index 00000000..3146df13 --- /dev/null +++ b/test/misc/for_dir.lua -@@ -0,0 +1,13 @@ +@@ -0,0 +1,12 @@ + +local a,b,c = 10,1,-1 +for i=1,20 do @@ -184221,13 +184155,12 @@ index 0000000..4dd38de + for i=a,b,c do for j=1,10 do end x=x+1 end + assert(x == 10) +end -+ diff --git a/test/misc/fori_coerce.lua b/test/misc/fori_coerce.lua new file mode 100644 -index 0000000..7330943 +index 00000000..03dc37cc --- /dev/null +++ b/test/misc/fori_coerce.lua -@@ -0,0 +1,33 @@ +@@ -0,0 +1,32 @@ + +do + local n = 1 @@ -184260,13 +184193,12 @@ index 0000000..7330943 + end + assert(not pcall(f)) +end -+ diff --git a/test/misc/gc_rechain.lua b/test/misc/gc_rechain.lua new file mode 100644 -index 0000000..285f408 +index 00000000..c98fa5af --- /dev/null +++ b/test/misc/gc_rechain.lua -@@ -0,0 +1,32 @@ +@@ -0,0 +1,31 @@ + +do + local k @@ -184298,13 +184230,12 @@ index 0000000..285f408 + + assert(t[k] == 4) +end -+ diff --git a/test/misc/gc_trace.lua b/test/misc/gc_trace.lua new file mode 100644 -index 0000000..bc38ce0 +index 00000000..e394bd49 --- /dev/null +++ b/test/misc/gc_trace.lua -@@ -0,0 +1,37 @@ +@@ -0,0 +1,36 @@ + +if not jit or not jit.status or not jit.status() then return end + @@ -184341,13 +184272,12 @@ index 0000000..bc38ce0 + end + jit.attach(reccb) +end -+ diff --git a/test/misc/gcstep.lua b/test/misc/gcstep.lua new file mode 100644 -index 0000000..533356b +index 00000000..7ee5565b --- /dev/null +++ b/test/misc/gcstep.lua -@@ -0,0 +1,33 @@ +@@ -0,0 +1,32 @@ + +local function testgc(what, func) + collectgarbage() @@ -184380,13 +184310,12 @@ index 0000000..533356b + local s = "x"..i + end +end) -+ diff --git a/test/misc/hook_active.lua b/test/misc/hook_active.lua new file mode 100644 -index 0000000..37dfc37 +index 00000000..57532568 --- /dev/null +++ b/test/misc/hook_active.lua -@@ -0,0 +1,95 @@ +@@ -0,0 +1,94 @@ +local ctest = require("ctest") + +local called = 0 @@ -184481,13 +184410,12 @@ index 0000000..37dfc37 +called = 2 +do local x = 1 end +assert(called == 2) -+ diff --git a/test/misc/hook_line.lua b/test/misc/hook_line.lua new file mode 100644 -index 0000000..36f7108 +index 00000000..6106e492 --- /dev/null +++ b/test/misc/hook_line.lua -@@ -0,0 +1,41 @@ +@@ -0,0 +1,40 @@ +local lines = {} +local function hook() + lines[#lines+1] = debug.getinfo(2).currentline @@ -184528,13 +184456,12 @@ index 0000000..36f7108 +f() +debug.sethook(nil, "", 0) +for i=1,#lines do assert(lines[i] ~= 36) end -+ diff --git a/test/misc/hook_norecord.lua b/test/misc/hook_norecord.lua new file mode 100644 -index 0000000..8e7cba0 +index 00000000..004f3a3a --- /dev/null +++ b/test/misc/hook_norecord.lua -@@ -0,0 +1,12 @@ +@@ -0,0 +1,11 @@ + +if not jit or not jit.status or not jit.status() then return end + @@ -184546,13 +184473,12 @@ index 0000000..8e7cba0 +assert(called) +-- Check that no trace was generated. +assert(require("jit.util").traceinfo(1) == nil) -+ diff --git a/test/misc/hook_record.lua b/test/misc/hook_record.lua new file mode 100644 -index 0000000..6f1646d +index 00000000..f4283d12 --- /dev/null +++ b/test/misc/hook_record.lua -@@ -0,0 +1,8 @@ +@@ -0,0 +1,7 @@ + +if not jit or not jit.status or not jit.status() then return end + @@ -184560,13 +184486,12 @@ index 0000000..6f1646d +for i=1,10 do end +debug.sethook() +assert((require("jit.util").traceinfo(1))) -+ diff --git a/test/misc/hook_top.lua b/test/misc/hook_top.lua new file mode 100644 -index 0000000..f809fce +index 00000000..3cc7e651 --- /dev/null +++ b/test/misc/hook_top.lua -@@ -0,0 +1,55 @@ +@@ -0,0 +1,54 @@ + +local t = {} +for i=1,26 do t[i] = string.char(96+i) end @@ -184621,13 +184546,12 @@ index 0000000..f809fce + assert(a == "bar") +end +foo5() -+ diff --git a/test/misc/jit_flush.lua b/test/misc/jit_flush.lua new file mode 100644 -index 0000000..ead1e4e +index 00000000..fe1021ce --- /dev/null +++ b/test/misc/jit_flush.lua -@@ -0,0 +1,50 @@ +@@ -0,0 +1,49 @@ + +if not jit or not jit.status or not jit.status() then return end + @@ -184677,13 +184601,12 @@ index 0000000..ead1e4e +jit.flush(2) -- ignored +jit.flush(1) -- ok +jit.flush(1) -- crashes -+ diff --git a/test/misc/lightud.lua b/test/misc/lightud.lua new file mode 100644 -index 0000000..4974d50 +index 00000000..261b106f --- /dev/null +++ b/test/misc/lightud.lua -@@ -0,0 +1,88 @@ +@@ -0,0 +1,87 @@ +local ctest = require("ctest") + +local lightud = ctest.lightud @@ -184771,13 +184694,12 @@ index 0000000..4974d50 + end, t)) + assert(x == 16110) +end -+ diff --git a/test/misc/loop_unroll.lua b/test/misc/loop_unroll.lua new file mode 100644 -index 0000000..1700fac +index 00000000..3dcb3d22 --- /dev/null +++ b/test/misc/loop_unroll.lua -@@ -0,0 +1,35 @@ +@@ -0,0 +1,34 @@ + +-- type instability on loop unroll -> record unroll +do @@ -184812,10 +184734,9 @@ index 0000000..1700fac + j = j+1 + until true +end -+ diff --git a/test/misc/parse_comp.lua b/test/misc/parse_comp.lua new file mode 100644 -index 0000000..5e1948d +index 00000000..5e1948da --- /dev/null +++ b/test/misc/parse_comp.lua @@ -0,0 +1,13 @@ @@ -184834,7 +184755,7 @@ index 0000000..5e1948d +end diff --git a/test/misc/parse_esc.lua b/test/misc/parse_esc.lua new file mode 100644 -index 0000000..4bcce0e +index 00000000..4bcce0e8 --- /dev/null +++ b/test/misc/parse_esc.lua @@ -0,0 +1,7 @@ @@ -184847,10 +184768,10 @@ index 0000000..4bcce0e + def"]])() == "abc def") diff --git a/test/misc/parse_misc.lua b/test/misc/parse_misc.lua new file mode 100644 -index 0000000..8031ec1 +index 00000000..2c9949e3 --- /dev/null +++ b/test/misc/parse_misc.lua -@@ -0,0 +1,31 @@ +@@ -0,0 +1,30 @@ + +-- Ambiguous syntax: function call vs. new statement. +if os.getenv("LUA52") then @@ -184881,13 +184802,12 @@ index 0000000..8031ec1 +assert(#"aäa" == 4) +assert(#"äöü·€晶" == 14) +]]))() -+ diff --git a/test/misc/phi_conv.lua b/test/misc/phi_conv.lua new file mode 100644 -index 0000000..8d7bea5 +index 00000000..0b7261c8 --- /dev/null +++ b/test/misc/phi_conv.lua -@@ -0,0 +1,53 @@ +@@ -0,0 +1,52 @@ + +local bit = require("bit") + @@ -184940,13 +184860,12 @@ index 0000000..8d7bea5 +if jit and jit.status and jit.status() then jit.opt.start("hotloop=1") end + +test() -+ diff --git a/test/misc/recurse_deep.lua b/test/misc/recurse_deep.lua new file mode 100644 -index 0000000..9b9af29 +index 00000000..f18ff5cb --- /dev/null +++ b/test/misc/recurse_deep.lua -@@ -0,0 +1,29 @@ +@@ -0,0 +1,28 @@ + +do + local function sum(n) @@ -184975,13 +184894,12 @@ index 0000000..9b9af29 + end + assert(fib(15) == 987) +end -+ diff --git a/test/misc/recurse_tail.lua b/test/misc/recurse_tail.lua new file mode 100644 -index 0000000..ef76443 +index 00000000..d6296e2b --- /dev/null +++ b/test/misc/recurse_tail.lua -@@ -0,0 +1,22 @@ +@@ -0,0 +1,21 @@ + +do + local tr1 @@ -185003,13 +184921,12 @@ index 0000000..ef76443 + end + assert(tr2(200) == 0) +end -+ diff --git a/test/misc/stack_gc.lua b/test/misc/stack_gc.lua new file mode 100644 -index 0000000..656a06a +index 00000000..f212fec5 --- /dev/null +++ b/test/misc/stack_gc.lua -@@ -0,0 +1,15 @@ +@@ -0,0 +1,14 @@ + +do + local t = setmetatable({}, { __index=function(t, k) @@ -185024,13 +184941,12 @@ index 0000000..656a06a + end}) + local x = t[50] +end -+ diff --git a/test/misc/stack_purge.lua b/test/misc/stack_purge.lua new file mode 100644 -index 0000000..bfaee0f +index 00000000..de53dea4 --- /dev/null +++ b/test/misc/stack_purge.lua -@@ -0,0 +1,25 @@ +@@ -0,0 +1,24 @@ + +-- Must preserve the modified function slot in the RET snapshot. +local function a() @@ -185055,13 +184971,12 @@ index 0000000..bfaee0f + +jit.off(c) +c() -+ diff --git a/test/misc/stackov.lua b/test/misc/stackov.lua new file mode 100644 -index 0000000..ef105af +index 00000000..65c68d95 --- /dev/null +++ b/test/misc/stackov.lua -@@ -0,0 +1,40 @@ +@@ -0,0 +1,39 @@ + +local function f() + f() @@ -185101,10 +185016,9 @@ index 0000000..ef105af + +local err, s = xpcall(vcall, debug.traceback, 1) +assert(err == false) -+ diff --git a/test/misc/stackovc.lua b/test/misc/stackovc.lua new file mode 100644 -index 0000000..c00bcbd +index 00000000..c00bcbd8 --- /dev/null +++ b/test/misc/stackovc.lua @@ -0,0 +1,4 @@ @@ -185114,10 +185028,10 @@ index 0000000..c00bcbd +assert(not ok and string.find(err, "unpack")) diff --git a/test/misc/tcall_base.lua b/test/misc/tcall_base.lua new file mode 100644 -index 0000000..c6c4ae1 +index 00000000..52882519 --- /dev/null +++ b/test/misc/tcall_base.lua -@@ -0,0 +1,20 @@ +@@ -0,0 +1,19 @@ + +local r = 0 +local function g() @@ -185137,10 +185051,9 @@ index 0000000..c6c4ae1 +g() -- Compile this loop first. +for i=1,50 do f() end +assert(r == 51) -+ diff --git a/test/misc/tcall_loop.lua b/test/misc/tcall_loop.lua new file mode 100644 -index 0000000..d3c6f1a +index 00000000..d3c6f1a6 --- /dev/null +++ b/test/misc/tcall_loop.lua @@ -0,0 +1,8 @@ @@ -185154,10 +185067,10 @@ index 0000000..d3c6f1a +assert(x == 100) diff --git a/test/misc/tonumber_scan.lua b/test/misc/tonumber_scan.lua new file mode 100644 -index 0000000..78e1ca3 +index 00000000..a4f51cee --- /dev/null +++ b/test/misc/tonumber_scan.lua -@@ -0,0 +1,180 @@ +@@ -0,0 +1,179 @@ +local ffi = require("ffi") +local bit = require("bit") + @@ -185337,13 +185250,12 @@ index 0000000..78e1ca3 +-- print(" "..tohex64(u.x)..", ""..s.."",") + end +end -+ diff --git a/test/misc/uclo.lua b/test/misc/uclo.lua new file mode 100644 -index 0000000..bd9bd24 +index 00000000..6b36127e --- /dev/null +++ b/test/misc/uclo.lua -@@ -0,0 +1,91 @@ +@@ -0,0 +1,90 @@ + +local function test_for() + local z1, z2 @@ -185434,13 +185346,12 @@ index 0000000..bd9bd24 + x = 2 + assert(f() == 200) +end -+ diff --git a/test/misc/unordered_jit.lua b/test/misc/unordered_jit.lua new file mode 100644 -index 0000000..5ff1a1b +index 00000000..78ce72d3 --- /dev/null +++ b/test/misc/unordered_jit.lua -@@ -0,0 +1,96 @@ +@@ -0,0 +1,95 @@ + +local nan = 0/0 +local t = {} @@ -185536,10 +185447,9 @@ index 0000000..5ff1a1b +do local z; for i=1,100 do z = not (nan >= nan) end; assert(z == true) end +do local z; for i=1,100 do z = not (nan >= 1) end; assert(z == true) end +do local z; for i=1,100 do z = not (1 >= nan) end; assert(z == true) end -+ diff --git a/test/misc/wbarrier.lua b/test/misc/wbarrier.lua new file mode 100644 -index 0000000..5536625 +index 00000000..5536625a --- /dev/null +++ b/test/misc/wbarrier.lua @@ -0,0 +1,7 @@ @@ -185552,10 +185462,10 @@ index 0000000..5536625 +end diff --git a/test/misc/wbarrier_jit.lua b/test/misc/wbarrier_jit.lua new file mode 100644 -index 0000000..2c8dd7f +index 00000000..bf1fc1e7 --- /dev/null +++ b/test/misc/wbarrier_jit.lua -@@ -0,0 +1,18 @@ +@@ -0,0 +1,17 @@ + +do + local t = {[0]={}} @@ -185573,13 +185483,12 @@ index 0000000..2c8dd7f + end + f() +end -+ diff --git a/test/misc/wbarrier_obar.lua b/test/misc/wbarrier_obar.lua new file mode 100644 -index 0000000..258db21 +index 00000000..d6504995 --- /dev/null +++ b/test/misc/wbarrier_obar.lua -@@ -0,0 +1,22 @@ +@@ -0,0 +1,21 @@ +-- DSE of USTORE must eliminate OBAR, too. + +if jit and jit.opt then pcall(jit.opt.start, "-sink") end @@ -185601,10 +185510,9 @@ index 0000000..258db21 +collectgarbage("setstepmul", 1) +collectgarbage("restart") +f() -+ diff --git a/test/opt/dse/array.lua b/test/opt/dse/array.lua new file mode 100644 -index 0000000..8c76624 +index 00000000..8c766248 --- /dev/null +++ b/test/opt/dse/array.lua @@ -0,0 +1,197 @@ @@ -185807,7 +185715,7 @@ index 0000000..8c76624 +end diff --git a/test/opt/dse/field.lua b/test/opt/dse/field.lua new file mode 100644 -index 0000000..d8a5411 +index 00000000..d8a5411c --- /dev/null +++ b/test/opt/dse/field.lua @@ -0,0 +1,70 @@ @@ -185883,7 +185791,7 @@ index 0000000..d8a5411 +end diff --git a/test/opt/dse/index b/test/opt/dse/index new file mode 100644 -index 0000000..7b8ad1f +index 00000000..7b8ad1f4 --- /dev/null +++ b/test/opt/dse/index @@ -0,0 +1,2 @@ @@ -185891,14 +185799,14 @@ index 0000000..7b8ad1f +field.lua diff --git a/test/opt/fold/index b/test/opt/fold/index new file mode 100644 -index 0000000..8b4648c +index 00000000..8b4648c7 --- /dev/null +++ b/test/opt/fold/index @@ -0,0 +1 @@ +kfold.lua diff --git a/test/opt/fold/kfold.lua b/test/opt/fold/kfold.lua new file mode 100644 -index 0000000..9cd3919 +index 00000000..9cd39190 --- /dev/null +++ b/test/opt/fold/kfold.lua @@ -0,0 +1,81 @@ @@ -185985,7 +185893,7 @@ index 0000000..9cd3919 +end diff --git a/test/opt/fuse.lua b/test/opt/fuse.lua new file mode 100644 -index 0000000..a68381e +index 00000000..a68381ef --- /dev/null +++ b/test/opt/fuse.lua @@ -0,0 +1,5 @@ @@ -185996,7 +185904,7 @@ index 0000000..a68381e +end diff --git a/test/opt/fwd/hrefk_rollback.lua b/test/opt/fwd/hrefk_rollback.lua new file mode 100644 -index 0000000..5a6ad87 +index 00000000..5a6ad876 --- /dev/null +++ b/test/opt/fwd/hrefk_rollback.lua @@ -0,0 +1,32 @@ @@ -186034,7 +185942,7 @@ index 0000000..5a6ad87 +end diff --git a/test/opt/fwd/index b/test/opt/fwd/index new file mode 100644 -index 0000000..5bb1537 +index 00000000..5bb1537f --- /dev/null +++ b/test/opt/fwd/index @@ -0,0 +1,3 @@ @@ -186043,7 +185951,7 @@ index 0000000..5bb1537 +upval.lua diff --git a/test/opt/fwd/tnew_tdup.lua b/test/opt/fwd/tnew_tdup.lua new file mode 100644 -index 0000000..9e18fa3 +index 00000000..9e18fa3b --- /dev/null +++ b/test/opt/fwd/tnew_tdup.lua @@ -0,0 +1,69 @@ @@ -186118,7 +186026,7 @@ index 0000000..9e18fa3 +end diff --git a/test/opt/fwd/upval.lua b/test/opt/fwd/upval.lua new file mode 100644 -index 0000000..a3e83df +index 00000000..a3e83dff --- /dev/null +++ b/test/opt/fwd/upval.lua @@ -0,0 +1,50 @@ @@ -186174,7 +186082,7 @@ index 0000000..a3e83df +end diff --git a/test/opt/index b/test/opt/index new file mode 100644 -index 0000000..94d50ae +index 00000000..94d50aec --- /dev/null +++ b/test/opt/index @@ -0,0 +1,6 @@ @@ -186186,14 +186094,14 @@ index 0000000..94d50ae +sink +sink diff --git a/test/opt/loop/index b/test/opt/loop/index new file mode 100644 -index 0000000..e582023 +index 00000000..e5820234 --- /dev/null +++ b/test/opt/loop/index @@ -0,0 +1 @@ +unroll.lua diff --git a/test/opt/loop/unroll.lua b/test/opt/loop/unroll.lua new file mode 100644 -index 0000000..6fbd565 +index 00000000..6fbd565a --- /dev/null +++ b/test/opt/loop/unroll.lua @@ -0,0 +1,32 @@ @@ -186231,7 +186139,7 @@ index 0000000..6fbd565 +end diff --git a/test/opt/sink/alloc.lua b/test/opt/sink/alloc.lua new file mode 100644 -index 0000000..bb2a0f7 +index 00000000..bb2a0f72 --- /dev/null +++ b/test/opt/sink/alloc.lua @@ -0,0 +1,126 @@ @@ -186363,7 +186271,7 @@ index 0000000..bb2a0f7 +end diff --git a/test/opt/sink/ffi.lua b/test/opt/sink/ffi.lua new file mode 100644 -index 0000000..0bba097 +index 00000000..0bba0978 --- /dev/null +++ b/test/opt/sink/ffi.lua @@ -0,0 +1,121 @@ @@ -186490,7 +186398,7 @@ index 0000000..0bba097 +end diff --git a/test/opt/sink/ffi_nosink.lua b/test/opt/sink/ffi_nosink.lua new file mode 100644 -index 0000000..8f7cced +index 00000000..8f7cced2 --- /dev/null +++ b/test/opt/sink/ffi_nosink.lua @@ -0,0 +1,45 @@ @@ -186541,7 +186449,7 @@ index 0000000..8f7cced +end diff --git a/test/opt/sink/index b/test/opt/sink/index new file mode 100644 -index 0000000..8bfa370 +index 00000000..8bfa370e --- /dev/null +++ b/test/opt/sink/index @@ -0,0 +1,4 @@ @@ -186551,7 +186459,7 @@ index 0000000..8bfa370 +ffi_nosink.lua +ffi diff --git a/test/opt/sink/nosink.lua b/test/opt/sink/nosink.lua new file mode 100644 -index 0000000..762aace +index 00000000..762aaced --- /dev/null +++ b/test/opt/sink/nosink.lua @@ -0,0 +1,109 @@ @@ -186666,7 +186574,7 @@ index 0000000..762aace +end diff --git a/test/src/cpptest.cpp b/test/src/cpptest.cpp new file mode 100644 -index 0000000..a5893ed +index 00000000..a5893ed6 --- /dev/null +++ b/test/src/cpptest.cpp @@ -0,0 +1,129 @@ @@ -186801,7 +186709,7 @@ index 0000000..a5893ed +} diff --git a/test/src/ctest.c b/test/src/ctest.c new file mode 100644 -index 0000000..d257567 +index 00000000..d257567b --- /dev/null +++ b/test/src/ctest.c @@ -0,0 +1,339 @@ @@ -187146,10 +187054,10 @@ index 0000000..d257567 +} diff --git a/test/sysdep/catch_cpp.lua b/test/sysdep/catch_cpp.lua new file mode 100644 -index 0000000..b225100 +index 00000000..f2cfca64 --- /dev/null +++ b/test/sysdep/catch_cpp.lua -@@ -0,0 +1,71 @@ +@@ -0,0 +1,70 @@ + +local cp = require("cpptest") + @@ -187220,10 +187128,9 @@ index 0000000..b225100 + local a,b,c,d,e,f = cp.usereg(100, 50, foo, false) + assert(a==164 and b==312 and c==428 and d==3696 and e==404 and f==404) +end -+ diff --git a/test/sysdep/ffi_include_gtk.lua b/test/sysdep/ffi_include_gtk.lua new file mode 100644 -index 0000000..a4bfcea +index 00000000..a4bfceac --- /dev/null +++ b/test/sysdep/ffi_include_gtk.lua @@ -0,0 +1,9 @@ @@ -187238,10 +187145,10 @@ index 0000000..a4bfcea +include"/usr/include/gtk-2.0/gtk/gtk.h" diff --git a/test/sysdep/ffi_include_std.lua b/test/sysdep/ffi_include_std.lua new file mode 100644 -index 0000000..b88c82b +index 00000000..5ef1affa --- /dev/null +++ b/test/sysdep/ffi_include_std.lua -@@ -0,0 +1,36 @@ +@@ -0,0 +1,35 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") @@ -187277,13 +187184,12 @@ index 0000000..b88c82b + os.remove("/tmp/__tmp.c") + ffi.cdef(s) +end -+ diff --git a/test/sysdep/ffi_lib_c.lua b/test/sysdep/ffi_lib_c.lua new file mode 100644 -index 0000000..a368d75 +index 00000000..f0ff0ad9 --- /dev/null +++ b/test/sysdep/ffi_lib_c.lua -@@ -0,0 +1,87 @@ +@@ -0,0 +1,86 @@ +local ffi = require("ffi") + +ffi.cdef[[ @@ -187370,13 +187276,12 @@ index 0000000..a368d75 + ffi.load("pthread") + end +end -+ diff --git a/test/sysdep/ffi_lib_z.lua b/test/sysdep/ffi_lib_z.lua new file mode 100644 -index 0000000..69a19ae +index 00000000..91b1272d --- /dev/null +++ b/test/sysdep/ffi_lib_z.lua -@@ -0,0 +1,107 @@ +@@ -0,0 +1,106 @@ +local ffi = require("ffi") + +local compress, uncompress @@ -187483,10 +187388,9 @@ index 0000000..69a19ae +assert(2*#c < #txt) +local txt2 = uncompress(c, #txt) +assert(txt2 == txt) -+ diff --git a/test/test.lua b/test/test.lua new file mode 100644 -index 0000000..b064eff +index 00000000..f5131ba2 --- /dev/null +++ b/test/test.lua @@ -0,0 +1,416 @@ @@ -187501,7 +187405,7 @@ index 0000000..b064eff + +local function default_tags() + local tags = {} -+ ++ + -- Lua version and features + tags.lua = tonumber(_VERSION:match"%d+%.%d+") + if table.pack then @@ -187535,7 +187439,7 @@ index 0000000..b064eff + tags[flag:lower()] = true + end + end -+ ++ + -- Environment + if dirsep == "\" then + tags.windows = true @@ -187557,7 +187461,7 @@ index 0000000..b064eff + tags["abi".. (bytecode:byte(9, 9) * 8)] = true + end + end -+ ++ + return tags +end + @@ -187594,7 +187498,7 @@ index 0000000..b064eff + want_meta = want_meta, + } + local result = opts -+ ++ + local i, tlen = 1, #t + local joinedval = "" + local function flagval() @@ -187609,7 +187513,7 @@ index 0000000..b064eff + end + return val + end -+ ++ + while i <= tlen do + local arg = t[i] + i = i + 1 @@ -187800,7 +187704,7 @@ index 0000000..b064eff + end + end + seal(_G) -+ ++ + if getmetatable(package.loaded) == sealed_mt then + setmetatable(package.loaded, nil) + end @@ -187908,7 +187812,7 @@ index 0000000..b064eff +end diff --git a/test/trace/exit_frame.lua b/test/trace/exit_frame.lua new file mode 100644 -index 0000000..9537c56 +index 00000000..9537c563 --- /dev/null +++ b/test/trace/exit_frame.lua @@ -0,0 +1,79 @@ @@ -187993,7 +187897,7 @@ index 0000000..9537c56 +end diff --git a/test/trace/exit_growstack.lua b/test/trace/exit_growstack.lua new file mode 100644 -index 0000000..658a31a +index 00000000..4aa6fae8 --- /dev/null +++ b/test/trace/exit_growstack.lua @@ -0,0 +1,28 @@ @@ -188002,7 +187906,7 @@ index 0000000..658a31a + local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a; + local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a; + local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a; -+ if i==90 then return end ++ if i==90 then return end + end + for j=1,5 do + collectgarbage() -- Shrink stack. @@ -188027,7 +187931,7 @@ index 0000000..658a31a +end diff --git a/test/trace/exit_jfuncf.lua b/test/trace/exit_jfuncf.lua new file mode 100644 -index 0000000..67ad7c3 +index 00000000..67ad7c36 --- /dev/null +++ b/test/trace/exit_jfuncf.lua @@ -0,0 +1,30 @@ @@ -188063,7 +187967,7 @@ index 0000000..67ad7c3 +end diff --git a/test/trace/gc64_slot_revival.lua b/test/trace/gc64_slot_revival.lua new file mode 100644 -index 0000000..40b9d87 +index 00000000..40b9d871 --- /dev/null +++ b/test/trace/gc64_slot_revival.lua @@ -0,0 +1,18 @@ @@ -188087,7 +187991,7 @@ index 0000000..40b9d87 +end diff --git a/test/trace/index b/test/trace/index new file mode 100644 -index 0000000..ea7a22e +index 00000000..ea7a22e0 --- /dev/null +++ b/test/trace/index @@ -0,0 +1,7 @@ @@ -188100,7 +188004,7 @@ index 0000000..ea7a22e +stitch.lua diff --git a/test/trace/phi/copyspill.lua b/test/trace/phi/copyspill.lua new file mode 100644 -index 0000000..17a8698 +index 00000000..17a8698f --- /dev/null +++ b/test/trace/phi/copyspill.lua @@ -0,0 +1,53 @@ @@ -188159,7 +188063,7 @@ index 0000000..17a8698 +end diff --git a/test/trace/phi/index b/test/trace/phi/index new file mode 100644 -index 0000000..74a0733 +index 00000000..74a07333 --- /dev/null +++ b/test/trace/phi/index @@ -0,0 +1,3 @@ @@ -188168,7 +188072,7 @@ index 0000000..74a0733 +rotate.lua diff --git a/test/trace/phi/ref.lua b/test/trace/phi/ref.lua new file mode 100644 -index 0000000..3662912 +index 00000000..3662912d --- /dev/null +++ b/test/trace/phi/ref.lua @@ -0,0 +1,131 @@ @@ -188305,7 +188209,7 @@ index 0000000..3662912 +end diff --git a/test/trace/phi/rotate.lua b/test/trace/phi/rotate.lua new file mode 100644 -index 0000000..cb751e0 +index 00000000..cb751e0b --- /dev/null +++ b/test/trace/phi/rotate.lua @@ -0,0 +1,149 @@ @@ -188460,7 +188364,7 @@ index 0000000..cb751e0 +end diff --git a/test/trace/snap.lua b/test/trace/snap.lua new file mode 100644 -index 0000000..ba26326 +index 00000000..ba26326e --- /dev/null +++ b/test/trace/snap.lua @@ -0,0 +1,47 @@ @@ -188513,7 +188417,7 @@ index 0000000..ba26326 +end diff --git a/test/trace/stitch.lua b/test/trace/stitch.lua new file mode 100644 -index 0000000..3f7f973 +index 00000000..3f7f9734 --- /dev/null +++ b/test/trace/stitch.lua @@ -0,0 +1,19 @@ @@ -188538,7 +188442,7 @@ index 0000000..3f7f973 +end diff --git a/test/unportable/ffi_arith_int64.lua b/test/unportable/ffi_arith_int64.lua new file mode 100644 -index 0000000..c05e02a +index 00000000..c05e02a9 --- /dev/null +++ b/test/unportable/ffi_arith_int64.lua @@ -0,0 +1,68 @@ @@ -188612,10 +188516,10 @@ index 0000000..c05e02a +{471871,702627,720692,1385612,1803393,1171039,1772007,763817,1583994,4486762,2380423,566647,1265370,2319256,770581,1990479,4566660,2319835,566647,1265370,2319256,770581,1990479,4566660,2319835,830322,4833809,4644705,1071753,2822313,7709069,4647021,}) diff --git a/test/unportable/math_special.lua b/test/unportable/math_special.lua new file mode 100644 -index 0000000..4916101 +index 00000000..ef4de129 --- /dev/null +++ b/test/unportable/math_special.lua -@@ -0,0 +1,55 @@ +@@ -0,0 +1,54 @@ + +local inp = { 0, -"0", 0.5, -0.5, 1, -1, 1/0, -1/0, 0/0 } + @@ -188670,7 +188574,443 @@ index 0000000..4916101 + +-- Pointless: deg, rad, min, max, pow +-- LATER: %, fmod, frexp, ldexp, modf, sinh, cosh, tanh +-- +2.31.1 + + +From 6508eeb5c841344e3f128267a04e8150dd36f926 Mon Sep 17 00:00:00 2001 +From: Sameera Deshpande sameera.deshpande@linaro.org +Date: Fri, 15 Feb 2019 07:46:16 +0530 +Subject: [PATCH 02/10] Add support for FNMADD and FNMSUB. + +--- + src/lj_asm_arm64.h | 32 +++++++++++++++++++++++++++++++- + 1 file changed, 31 insertions(+), 1 deletion(-) + +diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h +index 67c53ee2..0e913fa5 100644 +--- a/src/lj_asm_arm64.h ++++ b/src/lj_asm_arm64.h +@@ -353,6 +353,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) + return 0; + } + ++/* Fuse FP neg-multiply-add/sub. */ ++static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) ++{ ++ IRRef ref = ir->op1; ++ IRIns *irn = IR(ref); ++ if (irn->o != IR_ADD && irn->o != IR_SUB) ++ return 0; ++ ++ if (!mayfuse(as, ref)) ++ return 0; ++ ++ IRRef lref = irn->op1, rref = irn->op2; ++ IRIns *irm; ++ if (lref != rref && ++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && ++ ra_noreg(irm->r)) || ++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && ++ (rref = lref, ra_noreg(irm->r))))) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); ++ Reg left = ra_alloc2(as, irm, ++ rset_exclude(rset_exclude(RSET_FPR, dest), add)); ++ Reg right = (left >> 8); left &= 255; ++ emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31), (right & 31), (add & 31)); ++ return 1; ++ } ++ return 0; ++} + + /* Fuse BAND + BSHL/BSHR into UBFM. */ + static int asm_fuseandshift(ASMState *as, IRIns *ir) + { +@@ -1466,7 +1495,8 @@ static void asm_mul(ASMState *as, IRIns *ir) + static void asm_neg(ASMState *as, IRIns *ir) + { + if (irt_isnum(ir->t)) { +- asm_fpunary(as, ir, A64I_FNEGd); ++ if (!asm_fusenmadd(as, ir, A64I_FNMADDd)) ++ asm_fpunary(as, ir, A64I_FNEGd); + return; + } + asm_intneg(as, ir); +-- +2.31.1 + + +From aa0b2a0c837af307d26468fce05a7c24ab6045d3 Mon Sep 17 00:00:00 2001 +From: Vivien HENRIET bubuabu@bubuabu.org +Date: Wed, 30 Jan 2019 23:44:51 +0100 +Subject: [PATCH 03/10] Fix os.date() for timezone change awareness + +On POSIX target, system timezone change are not taken into account. +To reproduce, +1. call os.date() +2. change your timezone +3. call os.date() within the same luajit instance + +On POSIX target, os.date use localtime_r to retrieve time. +On other target, the function localtime is used. But there is a behaviour +diference between these two function. localtime acts as if it called tzset +which localtime_r don't. + +To fix the issue tzset is called before localtime_r. +--- + src/lib_os.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/lib_os.c b/src/lib_os.c +index f19b831c..609cb2ec 100644 +--- a/src/lib_os.c ++++ b/src/lib_os.c +@@ -185,6 +185,7 @@ LJLIB_CF(os_date) + #endif + } else { + #if LJ_TARGET_POSIX ++ tzset(); + stm = localtime_r(&t, &rtm); + #else + stm = localtime(&t); +-- +2.31.1 + + +From a62dc6306c4e5a4b672040067c169143da804a4f Mon Sep 17 00:00:00 2001 +From: Siddhesh Poyarekar siddhesh@sourceware.org +Date: Thu, 14 Mar 2019 23:08:24 +0530 +Subject: [PATCH 04/10] Revert "FFI: Make FP to U64 conversions match JIT + backend behavior." + +This reverts commit f5d424afe8b9395f0df05aba905e0e1f6a2262b8. + +The patch breaks test 279, i.e. + + assert(tostring(bit.band(1ll, 1, 1ull, -1)) == "1ULL") + +The patch was put in to make the JIT and interpreter behaviour +consistent[1] for float to unsigned int conversions but it ended up +making things worse. There needs to be a better fix for this. + +[1] https://github.com/LuaJIT/LuaJIT/pull/415 +--- + src/lj_obj.h | 18 +++++------------- + 1 file changed, 5 insertions(+), 13 deletions(-) + +diff --git a/src/lj_obj.h b/src/lj_obj.h +index 1a6445fc..97885683 100644 +--- a/src/lj_obj.h ++++ b/src/lj_obj.h +@@ -995,22 +995,14 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) + + #define lj_num2int(n) ((int32_t)(n)) + +-/* +-** This must match the JIT backend behavior. In particular for archs +-** that don't have a common hardware instruction for this conversion. +-** Note that signed FP to unsigned int conversions have an undefined +-** result and should never be relied upon in portable FFI code. +-** See also: C99 or C11 standard, 6.3.1.4, footnote of (1). +-*/ + static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) + { +-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS +- int64_t i = (int64_t)n; +- if (i < 0) i = (int64_t)(n - 18446744073709551616.0); +- return (uint64_t)i; +-#else +- return (uint64_t)n; ++#ifdef _MSC_VER ++ if (n >= 9223372036854775808.0) /* They think it's a feature. */ ++ return (uint64_t)(int64_t)(n - 18446744073709551616.0); ++ else + #endif ++ return (uint64_t)n; + } + + static LJ_AINLINE int32_t numberVint(cTValue *o) +-- +2.31.1 + + +From fe2399a76bab67b32409fda1de82c34e8d5d7904 Mon Sep 17 00:00:00 2001 +From: Siddhesh Poyarekar siddhesh@sourceware.org +Date: Sun, 17 Mar 2019 11:34:04 +0530 +Subject: [PATCH 05/10] Guard against undefined behaviour when casting from + float to unsigned + +Only range (-1.0, UINT64_MAX) can be safely converted to unsigned +directly, and (-INT64_MAX,INT_64_MAX) through a cast to int64_t first. +The remaining range is undefined. + +TODO: Do the same for JIT as well as for float to other ranges. +--- + src/lj_obj.h | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/src/lj_obj.h b/src/lj_obj.h +index 97885683..9878059f 100644 +--- a/src/lj_obj.h ++++ b/src/lj_obj.h +@@ -997,12 +997,18 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) + + static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) + { ++ /* Undefined behaviour. This is deliberately not a full check because we ++ don't want to slow down compliant code. */ ++ lj_assertX(n >= -9223372036854775809.0, "Overflow"); + #ifdef _MSC_VER + if (n >= 9223372036854775808.0) /* They think it's a feature. */ + return (uint64_t)(int64_t)(n - 18446744073709551616.0); + else + #endif +- return (uint64_t)n; ++ if (n > -1.0) ++ return (uint64_t)n; ++ else ++ return (uint64_t)(int64_t)n; + } + + static LJ_AINLINE int32_t numberVint(cTValue *o) +-- +2.31.1 + + +From c193115e16a138dac69f774a7f57a5b4cc7f1097 Mon Sep 17 00:00:00 2001 +From: Siddhesh Poyarekar siddhesh@sourceware.org +Date: Mon, 25 Mar 2019 17:56:53 +0530 +Subject: [PATCH 06/10] Fix build erro with fnmsub fusing + +--- + src/lj_asm_arm64.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h +index 0e913fa5..4c7bf401 100644 +--- a/src/lj_asm_arm64.h ++++ b/src/lj_asm_arm64.h +@@ -1495,7 +1495,7 @@ static void asm_mul(ASMState *as, IRIns *ir) + static void asm_neg(ASMState *as, IRIns *ir) + { + if (irt_isnum(ir->t)) { +- if (!asm_fusenmadd(as, ir, A64I_FNMADDd)) ++ if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd)) + asm_fpunary(as, ir, A64I_FNEGd); + return; + } +-- +2.31.1 + + +From b5c2492406bf07ab80e134f351c1066e8f6224f2 Mon Sep 17 00:00:00 2001 +From: Siddhesh Poyarekar siddhesh@sourceware.org +Date: Thu, 28 Mar 2019 09:19:34 +0530 +Subject: [PATCH 07/10] aarch64: better float to unsigned int conversion + +A straight float to unsigned conversion has a limited range of (-1.0, +UTYPE_MAX) which should be fine in general but for the sake of +consistency across the interpreter and the JIT compiler, it is +necessary to work a wee bit harder to expand this range to (TYPE_MIN, +UTYPE_MAX), which can be done with a simple range check. This adds a +couple of branches but only one of the branches should have a +noticeable performance impact on most processors with branch +predictors, and that too only if the input number varies wildly in +range. + +This currently works only for 64-bit conversions, 32-bit is still WIP. +--- + src/lj_asm_arm64.h | 30 ++++++++++++++++++++++-------- + src/lj_target_arm64.h | 1 + + 2 files changed, 23 insertions(+), 8 deletions(-) + +diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h +index 4c7bf401..e7e744a8 100644 +--- a/src/lj_asm_arm64.h ++++ b/src/lj_asm_arm64.h +@@ -626,14 +626,28 @@ static void asm_conv(ASMState *as, IRIns *ir) + } else { + Reg left = ra_alloc1(as, lref, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_GPR); +- A64Ins ai = irt_is64(ir->t) ? +- (st == IRT_NUM ? +- (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : +- (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : +- (st == IRT_NUM ? +- (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : +- (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); +- emit_dn(as, ai, dest, (left & 31)); ++ ++ A64Ins ai_signed = st == IRT_NUM ? ++ (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) : ++ (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32); ++ ++ if (irt_isi64(ir->t) || irt_isint(ir->t)) ++ emit_dn(as, ai_signed, dest, (left & 31)); ++ else { ++ A64Ins ai_unsigned = st == IRT_NUM ? ++ (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) : ++ (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32); ++ ++ MCLabel l_done = emit_label(as); ++ emit_dn(as, ai_unsigned, dest, (left & 31)); ++ MCLabel l_signed = emit_label(as); ++ emit_jmp(as, l_done); ++ emit_dn(as, ai_signed, dest, (left & 31)); ++ /* The valid range for float to unsigned int conversion is (-1.0, ++ UINT{,64}_MAX-1), but we just compare with 0 to save a load. */ ++ emit_cond_branch(as, CC_PL, l_signed); ++ emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0); ++ } + } + } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg dest = ra_dest(as, ir, RSET_GPR); +diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h +index 6d39ffb8..370699d9 100644 +--- a/src/lj_target_arm64.h ++++ b/src/lj_target_arm64.h +@@ -283,6 +283,7 @@ typedef enum A64Ins { + A64I_STPs = 0x2d000000, + A64I_STPd = 0x6d000000, + A64I_FCMPd = 0x1e602000, ++ A64I_FCMPZs = 0x1e202008, + A64I_FCMPZd = 0x1e602008, + A64I_FCSELd = 0x1e600c00, + A64I_FRINTMd = 0x1e654000, +-- +2.31.1 + + +From bd79b1d4596ed6780470c8d02f77b8398d80cd3a Mon Sep 17 00:00:00 2001 +From: Siddhesh Poyarekar siddhesh@sourceware.org +Date: Thu, 28 Mar 2019 10:50:23 +0530 +Subject: [PATCH 08/10] Better behaviour for float to uint32_t conversions + +This is the uint32_t part of the float to unsigned int conversions for +the interpreter. The cast ends up working correctly for x86 but not +for aarch64 since fcvtzu sets the result to zero on negative inputs. +Work slightly harder to make sure that negative number inputs behave +like x86. + +This fixes the interpreter but not the JIT compiler, which errors out +during the narrowing pass. +--- + src/lj_cconv.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/src/lj_cconv.c b/src/lj_cconv.c +index 613f66e2..7e8a8b92 100644 +--- a/src/lj_cconv.c ++++ b/src/lj_cconv.c +@@ -203,7 +203,13 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, + else if (dsize == 2) *(int16_t *)dp = (int16_t)i; + else *(int8_t *)dp = (int8_t)i; + } else if (dsize == 4) { +- *(uint32_t *)dp = (uint32_t)n; ++ /* Undefined behaviour. This is deliberately not a full check because we ++ * don't want to slow down compliant code. */ ++ lj_assertX(n >= -2147483649.0, "Overflow"); ++ if (n > -1.0) ++ *(uint32_t *)dp = (uint32_t)n; ++ else ++ *(uint32_t *)dp = (uint32_t)(int32_t)n; + } else if (dsize == 8) { + if (!(dinfo & CTF_UNSIGNED)) + *(int64_t *)dp = (int64_t)n; +-- +2.31.1 + + +From a1636c6e1879b5eeb55a51ebba796501c93614dd Mon Sep 17 00:00:00 2001 +From: Siddhesh Poyarekar siddhesh@sourceware.org +Date: Mon, 17 Jun 2019 13:50:57 +0530 +Subject: [PATCH 09/10] test: Check for package.searchers only in compat5.2 + +LuaJIT version check for lua will return true for +lua<5.2 since it +does not fully implement 5.2. Move the (not package.searchers) check +to +compat5.2 instead of the version check since it is implemented by +compat5.2. +--- + test/lib/contents.lua | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/test/lib/contents.lua b/test/lib/contents.lua +index 2baacd5c..09866f6f 100644 +--- a/test/lib/contents.lua ++++ b/test/lib/contents.lua +@@ -121,10 +121,13 @@ end + + do --- pre-5.2 package +lua<5.2 + assert(package.loaders) +- assert(not package.searchers) + assert(package.seeall) + end + ++do --- 5.2 compat package +compat5.2 ++ assert(package.searchers) ++end ++ + do --- 5.2 package +lua>=5.2 + assert(not package.loaders) + assert(package.searchers) +-- +2.31.1 + + +From ec04137a0873c09eef216b32f3df3b66209f47d5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= ondrej@sury.org +Date: Thu, 19 Nov 2015 16:29:02 +0200 +Subject: [PATCH 10/10] Get rid of LUAJIT_VERSION_SYM that changes ABI on every + patch release + +--- + src/lj_dispatch.c | 5 ----- + src/luajit.c | 2 -- + src/luajit.h | 3 --- + 3 files changed, 10 deletions(-) + +diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c +index 7b73d3dd..3f69d0d1 100644 +--- a/src/lj_dispatch.c ++++ b/src/lj_dispatch.c +@@ -318,11 +318,6 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) + return 1; /* OK. */ + } + +-/* Enforce (dynamic) linker error for version mismatches. See luajit.c. */ +-LUA_API void LUAJIT_VERSION_SYM(void) +-{ +-} +- + /* -- Hooks --------------------------------------------------------------- */ + + /* This function can be called asynchronously (e.g. during a signal). */ +diff --git a/src/luajit.c b/src/luajit.c +index 6aed5337..a4a11cb1 100644 +--- a/src/luajit.c ++++ b/src/luajit.c +@@ -518,8 +518,6 @@ static int pmain(lua_State *L) + globalL = L; + if (argv[0] && argv[0][0]) progname = argv[0]; + +- LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */ +- + argn = collectargs(argv, &flags); + if (argn < 0) { /* Invalid args? */ + print_usage(); +diff --git a/src/luajit.h b/src/luajit.h +index 2ee1f908..04f6b456 100644 +--- a/src/luajit.h ++++ b/src/luajit.h +@@ -73,7 +73,4 @@ LUA_API void luaJIT_profile_stop(lua_State *L); + LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, + int depth, size_t *len); + +-/* Enforce (dynamic) linker error for version mismatches. Call from main. */ +-LUA_API void LUAJIT_VERSION_SYM(void); +- + #endif -- -2.20.1 +2.31.1
diff --git a/luajit-2.1-update.patch b/luajit-2.1-update.patch new file mode 100644 index 0000000..93fa207 --- /dev/null +++ b/luajit-2.1-update.patch @@ -0,0 +1,37572 @@ +diff --git a/COPYRIGHT b/COPYRIGHT +index 6ed40025..9c2bca55 100644 +--- a/COPYRIGHT ++++ b/COPYRIGHT +@@ -1,7 +1,7 @@ + =============================================================================== +-LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ ++LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/ + +-Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++Copyright (C) 2005-2021 Mike Pall. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal +@@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +-[ MIT license: http://www.opensource.org/licenses/mit-license.php ] ++[ MIT license: https://www.opensource.org/licenses/mit-license.php ] + + =============================================================================== + [ LuaJIT includes code from Lua 5.1/5.2, which has this license statement: ] +@@ -51,6 +51,6 @@ THE SOFTWARE. + + This is a version (aka dlmalloc) of malloc/free/realloc written by + Doug Lea and released to the public domain, as explained at +-http://creativecommons.org/licenses/publicdomain ++https://creativecommons.org/licenses/publicdomain + + =============================================================================== +diff --git a/Makefile b/Makefile +index 0f933089..aa1b84bd 100644 +--- a/Makefile ++++ b/Makefile +@@ -10,7 +10,7 @@ + # For MSVC, please follow the instructions given in src/msvcbuild.bat. + # For MinGW and Cygwin, cd to src and run make with the Makefile there. + # +-# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++# Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ############################################################################## + + MAJVER= 2 +@@ -75,7 +75,7 @@ SYMLINK= ln -sf + INSTALL_X= install -m 0755 + INSTALL_F= install -m 0644 + UNINSTALL= $(RM) +-LDCONFIG= ldconfig -n ++LDCONFIG= ldconfig -n 2>/dev/null + SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ + -e "s|^multilib=.*|multilib=$(MULTILIB)|" + +@@ -121,7 +121,7 @@ install: $(INSTALL_DEP) + $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) + cd src && test -f $(FILE_SO) && \ + $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ +- $(LDCONFIG) $(INSTALL_LIB) && \ ++ ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : + cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) +diff --git a/README b/README +index 2b9ae9d2..c9f7d9ad 100644 +--- a/README ++++ b/README +@@ -3,9 +3,9 @@ README for LuaJIT 2.1.0-beta3 + + LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. + +-Project Homepage: http://luajit.org/ ++Project Homepage: https://luajit.org/ + +-LuaJIT is Copyright (C) 2005-2017 Mike Pall. ++LuaJIT is Copyright (C) 2005-2021 Mike Pall. + LuaJIT is free software, released under the MIT license. + See full Copyright Notice in the COPYRIGHT file or in luajit.h. + +diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css +index 62e1c165..0b385cee 100644 +--- a/doc/bluequad-print.css ++++ b/doc/bluequad-print.css +@@ -1,4 +1,4 @@ +-/* Copyright (C) 2004-2017 Mike Pall. ++/* Copyright (C) 2004-2021 Mike Pall. + * + * You are welcome to use the general ideas of this design for your own sites. + * But please do not steal the stylesheet, the layout or the color scheme. +diff --git a/doc/bluequad.css b/doc/bluequad.css +index be2c4bf2..86cd9ac0 100644 +--- a/doc/bluequad.css ++++ b/doc/bluequad.css +@@ -1,4 +1,4 @@ +-/* Copyright (C) 2004-2017 Mike Pall. ++/* Copyright (C) 2004-2021 Mike Pall. + * + * You are welcome to use the general ideas of this design for your own sites. + * But please do not steal the stylesheet, the layout or the color scheme. +diff --git a/doc/changes.html b/doc/changes.html +deleted file mode 100644 +index a66a8d95..00000000 +--- a/doc/changes.html ++++ /dev/null +@@ -1,883 +0,0 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +-<html> +-<head> +-<title>LuaJIT Change History</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> +-<meta name="Language" content="en"> +-<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +-<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +-<style type="text/css"> +-div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; } +-</style> +-</head> +-<body> +-<div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +-</div> +-<div id="head"> +-<h1>LuaJIT Change History</h1> +-</div> +-<div id="nav"> +-<ul><li> +-<a href="luajit.html">LuaJIT</a> +-<ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +-</li><li> +-<a href="install.html">Installation</a> +-</li><li> +-<a href="running.html">Running</a> +-</li></ul> +-</li><li> +-<a href="extensions.html">Extensions</a> +-<ul><li> +-<a href="ext_ffi.html">FFI Library</a> +-<ul><li> +-<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +-</li><li> +-<a href="ext_ffi_api.html">ffi.* API</a> +-</li><li> +-<a href="ext_ffi_semantics.html">FFI Semantics</a> +-</li></ul> +-</li><li> +-<a href="ext_jit.html">jit.* Library</a> +-</li><li> +-<a href="ext_c_api.html">Lua/C API</a> +-</li><li> +-<a href="ext_profiler.html">Profiler</a> +-</li></ul> +-</li><li> +-<a href="status.html">Status</a> +-<ul><li> +-<a class="current" href="changes.html">Changes</a> +-</li></ul> +-</li><li> +-<a href="faq.html">FAQ</a> +-</li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> +-<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +-</li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +-</li></ul> +-</div> +-<div id="main"> +-<p> +-This is a list of changes between the released versions of LuaJIT.<br> +-The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT 2.0.5</strong>.<br> +-</p> +-<p> +-Please check the +-<a href="http://luajit.org/changes.html"><span class="ext">»</span> Online Change History</a> +-to see whether newer versions are available. +-</p> +- +-<div class="major" style="background: #d0d0ff;"> +-<h2 id="LuaJIT-2.1.0-beta3">LuaJIT 2.1.0-beta3 — 2017-05-01</h2> +-<ul> +-<li>Rewrite memory block allocator.</li> +-<li>Add various extension from Lua 5.2/5.3.</li> +-<li>Remove old Lua 5.0 compatibility defines.</li> +-<li>Set arg table before evaluating <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> +-<li>Fix FOLD rules for <tt>math.abs()</tt> and FP negation.</li> +-<li>Fix soft-float <tt>math.abs()</tt> and negation.</li> +-<li>Fix formatting of some small denormals at low precision.</li> +-<li>LJ_GC64: Add JIT compiler support.</li> +-<li>x64/LJ_GC64: Add JIT compiler backend.</li> +-<li>x86/x64: Generate BMI2 shifts and rotates, if available.</li> +-<li>Windows/x86: Add full exception interoperability.</li> +-<li>ARM64: Add big-endian support.</li> +-<li>ARM64: Add JIT compiler backend.</li> +-<li>MIPS: Fix <tt>TSETR</tt> barrier.</li> +-<li>MIPS: Support MIPS16 interlinking.</li> +-<li>MIPS soft-float: Fix code generation for <tt>HREF</tt>.</li> +-<li>MIPS64: Add MIPS64 hard-float JIT compiler backend.</li> +-<li>MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.</li> +-<li>FFI: Compile bitfield loads/stores.</li> +-<li>Various fixes common with the 2.0 branch.</li> +-</ul> +- +-<h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 — 2016-03-03</h2> +-<ul> +-<li>Enable trace stitching.</li> +-<li>Use internal implementation for converting FP numbers to strings.</li> +-<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li> +-<li>Add MIPS soft-float support.</li> +-<li>Switch MIPS port to dual-number mode.</li> +-<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li> +-<li>FFI: Add <tt>ssize_t</tt> declaration.</li> +-<li>FFI: Parse <tt>#line NN</tt> and <tt>#NN</tt>.</li> +-<li>Various minor fixes.</li> +-</ul> +- +-<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 — 2015-08-25</h2> +-<p> +-This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0. +-Please take a look at the commit history for more details. +-</p> +-<ul> +-<li>Changes to the VM core: +-<ul> +-<li>Add low-overhead profiler (<tt>-jp</tt>).</li> +-<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li> +-<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li> +-<li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li> +-<li>Parse binary number literals (<tt>0bxxx</tt>).</li> +-</ul></li> +-<li>Improvements to the JIT compiler: +-<ul> +-<li>Add trace stitching (disabled for now).</li> +-<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li> +-<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li> +-<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li> +-<li>Compile string concatenations (<tt>BC_CAT</tt>).</li> +-<li>Compile <tt>__concat</tt> metamethod.</li> +-<li>Various minor optimizations.</li> +-</ul></li> +-<li>Internal Changes: +-<ul> +-<li>Add support for embedding LuaJIT bytecode for builtins.</li> +-<li>Replace various builtins with embedded bytecode.</li> +-<li>Refactor string buffers and string formatting.</li> +-<li>Remove obsolete non-truncating number to integer conversions.</li> +-</ul></li> +-<li>Ports: +-<ul> +-<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li> +-<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt> mode).</li> +-<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li> +-<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li> +-<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li> +-<li>PPC/e500: Drop support for this architecture.</li> +-</ul></li> +-<li>FFI library: +-<ul> +-<li>FFI: Add 64 bit bitwise operations.</li> +-<li>FFI: Compile VLA/VLS and large cdata allocations with default initialization.</li> +-<li>FFI: Compile conversions from functions to function pointers.</li> +-<li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li> +-<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li> +-<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li> +-</ul></li> +-</ul> +-</div> +- +-<div class="major" style="background: #ffffd0;"> +-<h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 — 2017-05-01</h2> +-<ul> +-<li>Add workaround for MSVC 2015 stdio changes.</li> +-<li>Limit mcode alloc probing, depending on the available pool size.</li> +-<li>Fix overly restrictive range calculation in mcode allocation.</li> +-<li>Fix out-of-scope goto handling in parser.</li> +-<li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li> +-<li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li> +-<li>Fix GC step size calculation.</li> +-<li>Initialize <tt>uv->immutable</tt> for upvalues of loaded chunks.</li> +-<li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li> +-<li>Drop leftover regs in 'for' iterator assignment, too.</li> +-<li>Fix PHI remarking in SINK pass.</li> +-<li>Don't try to record outermost <tt>pcall()</tt> return to lower frame.</li> +-<li>Add guard for obscure aliasing between open upvalues and SSA slots.</li> +-<li>Remove assumption that <tt>lj_math_random_step()</tt> doesn't clobber FPRs.</li> +-<li>Fix handling of non-numeric strings in arithmetic coercions.</li> +-<li>Fix recording of <tt>select(n, ...)</tt> with off-trace varargs</li> +-<li>Fix install for cross-builds.</li> +-<li>Don't allocate unused 2nd result register in JIT compiler backend.</li> +-<li>Drop marks from replayed instructions when sinking.</li> +-<li>Fix unsinking check.</li> +-<li>Properly handle OOM in <tt>trace_save()</tt>.</li> +-<li>Limit number of arguments given to <tt>io.lines()</tt> and <tt>fp:lines()</tt>.</li> +-<li>Fix narrowing of <tt>TOBIT</tt>.</li> +-<li>OSX: Fix build with recent XCode.</li> +-<li>x86/x64: Don't spill an explicit <tt>REF_BASE</tt> in the IR.</li> +-<li>x86/x64: Fix instruction length decoder.</li> +-<li>x86/x64: Search for exit jumps with instruction length decoder.</li> +-<li>ARM: Fix <tt>BLX</tt> encoding for Thumb interworking calls.</li> +-<li>MIPS: Don't use <tt>RID_GP</tt> as a scratch register.</li> +-<li>MIPS: Fix emitted code for U32 to float conversion.</li> +-<li>MIPS: Backport workaround for compact unwind tables.</li> +-<li>MIPS: Fix cross-endian jit.bcsave.</li> +-<li>MIPS: Fix <tt>BC_ISNEXT</tt> fallback path.</li> +-<li>MIPS: Fix use of ffgccheck delay slots in interpreter.</li> +-<li>FFI: Fix FOLD rules for <tt>int64_t</tt> comparisons.</li> +-<li>FFI: Fix SPLIT pass for <tt>CONV i64.u64</tt>.</li> +-<li>FFI: Fix <tt>ipairs()</tt> recording.</li> +-<li>FFI: Don't propagate qualifiers into subtypes of complex.</li> +-</ul> +- +-<h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 — 2015-05-14</h2> +-<ul> +-<li>Fix stack check in narrowing optimization.</li> +-<li>Fix Lua/C API typecheck error for special indexes.</li> +-<li>Fix string to number conversion.</li> +-<li>Fix lexer error for chunks without tokens.</li> +-<li>Don't compile <tt>IR_RETF</tt> after <tt>CALLT</tt> to ff with-side effects.</li> +-<li>Fix <tt>BC_UCLO</tt>/<tt>BC_JMP</tt> join optimization in Lua parser.</li> +-<li>Fix corner case in string to number conversion.</li> +-<li>Gracefully handle <tt>lua_error()</tt> for a suspended coroutine.</li> +-<li>Avoid error messages when building with Clang.</li> +-<li>Fix snapshot #0 handling for traces with a stack check on entry.</li> +-<li>Fix fused constant loads under high register pressure.</li> +-<li>Invalidate backpropagation cache after DCE.</li> +-<li>Fix ABC elimination.</li> +-<li>Fix debug info for main chunk of stripped bytecode.</li> +-<li>Fix FOLD rule for <tt>string.sub(s, ...) == k</tt>.</li> +-<li>Fix FOLD rule for <tt>STRREF</tt> of <tt>SNEW</tt>.</li> +-<li>Fix frame traversal while searching for error function.</li> +-<li>Prevent GC estimate miscalculation due to buffer growth.</li> +-<li>Prevent adding side traces for stack checks.</li> +-<li>Fix top slot calculation for snapshots with continuations.</li> +-<li>Fix check for reuse of SCEV results in <tt>FORL</tt>.</li> +-<li>Add PS Vita port.</li> +-<li>Fix compatibility issues with Illumos.</li> +-<li>Fix DragonFly build (unsupported).</li> +-<li>OpenBSD/x86: Better executable memory allocation for W^X mode.</li> +-<li>x86: Fix argument checks for <tt>ipairs()</tt> iterator.</li> +-<li>x86: <tt>lj_math_random_step()</tt> clobbers XMM regs on OSX Clang.</li> +-<li>x86: Fix code generation for unused result of <tt>math.random()</tt>.</li> +-<li>x64: Allow building with <tt>LUAJIT_USE_SYSMALLOC</tt> and <tt>LUAJIT_USE_VALGRIND</tt>.</li> +-<li>x86/x64: Fix argument check for bit shifts.</li> +-<li>x86/x64: Fix code generation for fused test/arith ops.</li> +-<li>ARM: Fix write barrier check in <tt>BC_USETS</tt>.</li> +-<li>PPC: Fix red zone overflow in machine code generation.</li> +-<li>PPC: Don't use <tt>mcrxr</tt> on PPE.</li> +-<li>Various archs: Fix excess stack growth in interpreter.</li> +-<li>FFI: Fix FOLD rule for <tt>TOBIT</tt> + <tt>CONV num.u32</tt>.</li> +-<li>FFI: Prevent DSE across <tt>ffi.string()</tt>.</li> +-<li>FFI: No meta fallback when indexing pointer to incomplete struct.</li> +-<li>FFI: Fix initialization of unions of subtypes.</li> +-<li>FFI: Fix cdata vs. non-cdata arithmetic and comparisons.</li> +-<li>FFI: Fix <tt>__index</tt>/<tt>__newindex</tt> metamethod resolution for ctypes.</li> +-<li>FFI: Fix compilation of reference field access.</li> +-<li>FFI: Fix frame traversal for backtraces with FFI callbacks.</li> +-<li>FFI: Fix recording of indexing a struct pointer ctype object itself.</li> +-<li>FFI: Allow non-scalar cdata to be compared for equality by address.</li> +-<li>FFI: Fix pseudo type conversions for type punning.</li> +-</ul> +- +-<h2 id="LuaJIT-2.0.3">LuaJIT 2.0.3 — 2014-03-12</h2> +-<ul> +-<li>Add PS4 port.</li> +-<li>Add support for multilib distro builds.</li> +-<li>Fix OSX build.</li> +-<li>Fix MinGW build.</li> +-<li>Fix Xbox 360 build.</li> +-<li>Improve ULOAD forwarding for open upvalues.</li> +-<li>Fix GC steps threshold handling when called by JIT-compiled code.</li> +-<li>Fix argument checks for <tt>math.deg()</tt> and <tt>math.rad()</tt>.</li> +-<li>Fix <tt>jit.flush(func|true)</tt>.</li> +-<li>Respect <tt>jit.off(func)</tt> when returning to a function, too.</li> +-<li>Fix compilation of <tt>string.byte(s, nil, n)</tt>.</li> +-<li>Fix line number for relocated bytecode after closure fixup</li> +-<li>Fix frame traversal for backtraces.</li> +-<li>Fix ABC elimination.</li> +-<li>Fix handling of redundant PHIs.</li> +-<li>Fix snapshot restore for exit to function header.</li> +-<li>Fix type punning alias analysis for constified pointers</li> +-<li>Fix call unroll checks in the presence of metamethod frames.</li> +-<li>Fix initial maxslot for down-recursive traces.</li> +-<li>Prevent BASE register coalescing if parent uses <tt>IR_RETF</tt>.</li> +-<li>Don't purge modified function from stack slots in <tt>BC_RET</tt>.</li> +-<li>Fix recording of <tt>BC_VARG</tt>.</li> +-<li>Don't access dangling reference to reallocated IR.</li> +-<li>Fix frame depth display for bytecode dump in <tt>-jdump</tt>.</li> +-<li>ARM: Fix register allocation when rematerializing FPRs.</li> +-<li>x64: Fix store to upvalue for lightuserdata values.</li> +-<li>FFI: Add missing GC steps for callback argument conversions.</li> +-<li>FFI: Properly unload loaded DLLs.</li> +-<li>FFI: Fix argument checks for <tt>ffi.string()</tt>.</li> +-<li>FFI/x64: Fix passing of vector arguments to calls.</li> +-<li>FFI: Rehash finalizer table after GC cycle, if needed.</li> +-<li>FFI: Fix <tt>cts->L</tt> for cdata unsinking in snapshot restore.</li> +-</ul> +- +-<h2 id="LuaJIT-2.0.2">LuaJIT 2.0.2 — 2013-06-03</h2> +-<ul> +-<li>Fix memory access check for fast string interning.</li> +-<li>Fix MSVC intrinsics for older versions.</li> +-<li>Add missing GC steps for <tt>io.*</tt> functions.</li> +-<li>Fix spurious red zone overflows in machine code generation.</li> +-<li>Fix jump-range constrained mcode allocation.</li> +-<li>Inhibit DSE for implicit loads via calls.</li> +-<li>Fix builtin string to number conversion for overflow digits.</li> +-<li>Fix optional argument handling while recording builtins.</li> +-<li>Fix optional argument handling in <tt>table.concat()</tt>.</li> +-<li>Add partial support for building with MingW64 GCC 4.8-SEH.</li> +-<li>Add missing PHI barrier to <tt>string.sub(str, a, b) == kstr</tt> FOLD rule.</li> +-<li>Fix compatibility issues with Illumos.</li> +-<li>ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.</li> +-<li>MIPS: Fix cache flush/sync for JIT-compiled code jump area.</li> +-<li>PPC: Add <tt>plt</tt> suffix for external calls from assembler code.</li> +-<li>FFI: Fix snapshot substitution in SPLIT pass.</li> +-<li>FFI/x86: Fix register allocation for 64 bit comparisons.</li> +-<li>FFI: Fix tailcall in lowest frame to C function with bool result.</li> +-<li>FFI: Ignore <tt>long</tt> type specifier in <tt>ffi.istype()</tt>.</li> +-<li>FFI: Fix calling conventions for 32 bit OSX and iOS simulator (struct returns).</li> +-<li>FFI: Fix calling conventions for ARM hard-float EABI (nested structs).</li> +-<li>FFI: Improve error messages for arithmetic and comparison operators.</li> +-<li>FFI: Insert no-op type conversion for pointer to integer cast.</li> +-<li>FFI: Fix unroll limit for <tt>ffi.fill()</tt>.</li> +-<li>FFI: Must sink <tt>XBAR</tt> together with <tt>XSTORE</tt>s.</li> +-<li>FFI: Preserve intermediate string for <tt>const char *</tt> conversion.</li> +-</ul> +- +-<h2 id="LuaJIT-2.0.1">LuaJIT 2.0.1 — 2013-02-19</h2> +-<ul> +-<li>Don't clear frame for out-of-memory error.</li> +-<li>Leave hook when resume catches error thrown from hook.</li> +-<li>Add missing GC steps for template table creation.</li> +-<li>Fix discharge order of comparisons in Lua parser.</li> +-<li>Improve buffer handling for <tt>io.read()</tt>.</li> +-<li>OSX: Add support for Mach-O object files to <tt>-b</tt> option.</li> +-<li>Fix PS3 port.</li> +-<li>Fix/enable Xbox 360 port.</li> +-<li>x86/x64: Always mark ref for shift count as non-weak.</li> +-<li>x64: Don't fuse implicitly 32-to-64 extended operands.</li> +-<li>ARM: Fix armhf call argument handling.</li> +-<li>ARM: Fix code generation for integer math.min/math.max.</li> +-<li>PPC/e500: Fix <tt>lj_vm_floor()</tt> for Inf/NaN.</li> +-<li>FFI: Change priority of table initializer variants for structs.</li> +-<li>FFI: Fix code generation for bool call result check on x86/x64.</li> +-<li>FFI: Load FFI library on-demand for bytecode with cdata literals.</li> +-<li>FFI: Fix handling of qualified transparent structs/unions.</li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0">LuaJIT 2.0.0 — 2012-11-08</h2> +-<ul> +-<li>Correctness and completeness: +-<ul> +- <li>Fix Android/x86 build.</li> +- <li>Fix recording of equality comparisons with <tt>__eq</tt> metamethods.</li> +- <li>Fix detection of immutable upvalues.</li> +- <li>Replace error with PANIC for callbacks from JIT-compiled code.</li> +- <li>Fix builtin string to number conversion for <tt>INT_MIN</tt>.</li> +- <li>Don't create unneeded array part for template tables.</li> +- <li>Fix <tt>CONV.num.int</tt> sinking.</li> +- <li>Don't propagate implicitly widened number to index metamethods.</li> +- <li>ARM: Fix ordered comparisons of number vs. non-number.</li> +- <li>FFI: Fix code generation for replay of sunk float fields.</li> +- <li>FFI: Fix signedness of bool.</li> +- <li>FFI: Fix recording of bool call result check on x86/x64.</li> +- <li>FFI: Fix stack-adjustment for <tt>__thiscall</tt> callbacks.</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta11">LuaJIT 2.0.0-beta11 — 2012-10-16</h2> +-<ul> +-<li>New features: +-<ul> +- <li>Use ARM VFP instructions, if available (build-time detection).</li> +- <li>Add support for ARM hard-float EABI (<tt>armhf</tt>).</li> +- <li>Add PS3 port.</li> +- <li>Add many features from Lua 5.2, e.g. <tt>goto</tt>/labels. +- Refer to <a href="extensions.html#lua52">this list</a>.</li> +- <li>FFI: Add parameterized C types.</li> +- <li>FFI: Add support for copy constructors.</li> +- <li>FFI: Equality comparisons never raise an error (treat as unequal instead).</li> +- <li>FFI: Box all accessed or returned enums.</li> +- <li>FFI: Check for <tt>__new</tt> metamethod when calling a constructor.</li> +- <li>FFI: Handle <tt>__pairs</tt>/<tt>__ipairs</tt> metamethods for cdata objects.</li> +- <li>FFI: Convert <tt>io.*</tt> file handle to <tt>FILE *</tt> pointer (but as a <tt>void *</tt>).</li> +- <li>FFI: Detect and support type punning through unions.</li> +- <li>FFI: Improve various error messages.</li> +-</ul></li> +-<li>Build-system reorganization: +-<ul> +- <li>Reorganize directory layout:<br> +- <tt>lib/*</tt> → <tt>src/jit/*</tt><br> +- <tt>src/buildvm_*.dasc</tt> → <tt>src/vm_*.dasc</tt><br> +- <tt>src/buildvm_*.h</tt> → removed<br> +- <tt>src/buildvm*</tt> → <tt>src/host/*</tt></li> +- <li>Add minified Lua interpreter plus Lua BitOp (<tt>minilua</tt>) to run DynASM.</li> +- <li>Change DynASM bit operations to use Lua BitOp</li> +- <li>Translate only <tt>vm_*.dasc</tt> for detected target architecture.</li> +- <li>Improve target detection for <tt>msvcbuild.bat</tt>.</li> +- <li>Fix build issues on Cygwin and MinGW with optional MSys.</li> +- <li>Handle cross-compiles with FPU/no-FPU or hard-fp/soft-fp ABI mismatch.</li> +- <li>Remove some library functions for no-JIT/no-FFI builds.</li> +- <li>Add uninstall target to top-level Makefile.</li> +-</ul></li> +-<li>Correctness and completeness: +-<ul> +- <li>Preserve snapshot #0 PC for all traces.</li> +- <li>Fix argument checks for <tt>coroutine.create()</tt>.</li> +- <li>Command line prints version and JIT status to <tt>stdout</tt>, not <tt>stderr</tt>.</li> +- <li>Fix userdata <tt>__gc</tt> separations at Lua state close.</li> +- <li>Fix <tt>TDUP</tt> to <tt>HLOAD</tt> forwarding for <tt>LJ_DUALNUM</tt> builds.</li> +- <li>Fix buffer check in bytecode writer.</li> +- <li>Make <tt>os.date()</tt> thread-safe.</li> +- <li>Add missing declarations for MSVC intrinsics.</li> +- <li>Fix dispatch table modifications for return hooks.</li> +- <li>Workaround for MSVC conversion bug (<tt>double</tt> → <tt>uint32_t</tt> → <tt>int32_t</tt>).</li> +- <li>Fix FOLD rule <tt>(i-j)-i => 0-j</tt>.</li> +- <li>Never use DWARF unwinder on Windows.</li> +- <li>Fix shrinking of direct mapped blocks in builtin allocator.</li> +- <li>Limit recursion depth in <tt>string.match()</tt> et al.</li> +- <li>Fix late despecialization of <tt>ITERN</tt> after loop has been entered.</li> +- <li>Fix <tt>'f'</tt> and <tt>'L'</tt> options for <tt>debug.getinfo()</tt> and <tt>lua_getinfo()</tt>.</li> +- <li>Fix <tt>package.searchpath()</tt>.</li> +- <li>OSX: Change dylib names to be consistent with other platforms.</li> +- <li>Android: Workaround for broken <tt>sprintf("%g", -0.0)</tt>.</li> +- <li>x86: Remove support for ancient CPUs without <tt>CMOV</tt> (before Pentium Pro).</li> +- <li>x86: Fix register allocation for calls returning register pair.</li> +- <li>x86/x64: Fix fusion of unsigned byte comparisons with swapped operands.</li> +- <li>ARM: Fix <tt>tonumber()</tt> argument check.</li> +- <li>ARM: Fix modulo operator and <tt>math.floor()</tt>/<tt>math.ceil()</tt> for <tt>inf</tt>/<tt>nan</tt>.</li> +- <li>ARM: Invoke SPLIT pass for leftover <tt>IR_TOBIT</tt>.</li> +- <li>ARM: Fix BASE register coalescing.</li> +- <li>PPC: Fix interpreter state setup in callbacks.</li> +- <li>PPC: Fix <tt>string.sub()</tt> range check.</li> +- <li>MIPS: Support generation of MIPS/MIPSEL bytecode object files.</li> +- <li>MIPS: Fix calls to <tt>floor()</tt>/<tt>ceil()</tt><tt>/trunc()</tt>.</li> +- <li>ARM/PPC: Detect more target architecture variants.</li> +- <li>ARM/PPC/e500/MIPS: Fix tailcalls from fast functions, esp. <tt>tostring()</tt>.</li> +- <li>ARM/PPC/MIPS: Fix rematerialization of FP constants.</li> +- <li>FFI: Don't call <tt>FreeLibrary()</tt> on our own EXE/DLL.</li> +- <li>FFI: Resolve metamethods for constructors, too.</li> +- <li>FFI: Properly disable callbacks on iOS (would require executable memory).</li> +- <li>FFI: Fix cdecl string parsing during recording.</li> +- <li>FFI: Show address pointed to for <tt>tostring(ref)</tt>, too.</li> +- <li>FFI: Fix alignment of C call argument/return structure.</li> +- <li>FFI: Initialize all fields of standard types.</li> +- <li>FFI: Fix callback handling when new C types are declared in callback.</li> +- <li>FFI: Fix recording of constructors for pointers.</li> +- <li>FFI: Always resolve metamethods for pointers to structs.</li> +- <li>FFI: Correctly propagate alignment when interning nested types.</li> +-</ul></li> +-<li>Structural and performance enhancements: +-<ul> +- <li>Add allocation sinking and store sinking optimization.</li> +- <li>Constify immutable upvalues.</li> +- <li>Add builtin string to integer or FP number conversion. Improves cross-platform consistency and correctness.</li> +- <li>Create string hash slots in template tables for non-const values, too. Avoids later table resizes.</li> +- <li>Eliminate <tt>HREFK</tt> guard for template table references.</li> +- <li>Add various new FOLD rules.</li> +- <li>Don't use stack unwinding for <tt>lua_yield()</tt> (slow on x64).</li> +- <li>ARM, PPC, MIPS: Improve <tt>XLOAD</tt> operand fusion and register hinting.</li> +- <li>PPC, MIPS: Compile <tt>math.sqrt()</tt> to sqrt instruction, if available.</li> +- <li>FFI: Fold <tt>KPTR</tt> + constant offset in SPLIT pass.</li> +- <li>FFI: Optimize/inline <tt>ffi.copy()</tt> and <tt>ffi.fill()</tt>.</li> +- <li>FFI: Compile and optimize array/struct copies.</li> +- <li>FFI: Compile <tt>ffi.typeof(cdata|ctype)</tt>, <tt>ffi.sizeof()</tt>, <tt>ffi.alignof()</tt>, <tt>ffi.offsetof()</tt> and <tt>ffi.gc()</tt>.</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta10">LuaJIT 2.0.0-beta10 — 2012-05-09</h2> +-<ul> +-<li>New features: +-<ul> +-<li>The MIPS of LuaJIT is complete. It requires a CPU conforming to the +-MIPS32 R1 architecture with hardware FPU. O32 hard-fp ABI, +-little-endian or big-endian.</li> +-<li>Auto-detect target arch via cross-compiler. No need for +-<tt>TARGET=arch</tt> anymore.</li> +-<li>Make DynASM compatible with Lua 5.2.</li> +-<li>From Lua 5.2: Try <tt>__tostring</tt> metamethod on non-string error +-messages..</li> +-</ul></li> +-<li>Correctness and completeness: +-<ul> +-<li>Fix parsing of hex literals with exponents.</li> +-<li>Fix bytecode dump for certain number constants.</li> +-<li>Fix argument type in error message for relative arguments.</li> +-<li>Fix argument error handling on Lua stacks without a frame.</li> +-<li>Add missing mcode limit check in assembler backend.</li> +-<li>Fix compilation on OpenBSD.</li> +-<li>Avoid recursive GC steps after GC-triggered trace exit.</li> +-<li>Replace <tt><unwind.h></tt> definitions with our own.</li> +-<li>Fix OSX build issues. Bump minimum required OSX version to 10.4.</li> +-<li>Fix discharge order of comparisons in Lua parser.</li> +-<li>Ensure running <tt>__gc</tt> of userdata created in <tt>__gc</tt> +-at state close.</li> +-<li>Limit number of userdata <tt>__gc</tt> separations at state close.</li> +-<li>Fix bytecode <tt>JMP</tt> slot range when optimizing +-<tt>and</tt>/<tt>or</tt> with constant LHS.</li> +-<li>Fix DSE of <tt>USTORE</tt>.</li> +-<li>Make <tt>lua_concat()</tt> work from C hook with partial frame.</li> +-<li>Add required PHIs for implicit conversions, e.g. via <tt>XREF</tt> +-forwarding.</li> +-<li>Add more comparison variants to Valgrind suppressions file.</li> +-<li>Disable loading bytecode with an extra header (BOM or <tt>#!</tt>).</li> +-<li>Fix PHI stack slot syncing.</li> +-<li>ARM: Reorder type/value tests to silence Valgrind.</li> +-<li>ARM: Fix register allocation for <tt>ldrd</tt>-optimized +-<tt>HREFK</tt>.</li> +-<li>ARM: Fix conditional branch fixup for <tt>OBAR</tt>.</li> +-<li>ARM: Invoke SPLIT pass for <tt>double</tt> args in FFI call.</li> +-<li>ARM: Handle all <tt>CALL*</tt> ops with <tt>double</tt> results in +-SPLIT pass.</li> +-<li>ARM: Fix rejoin of <tt>POW</tt> in SPLIT pass.</li> +-<li>ARM: Fix compilation of <tt>math.sinh</tt>, <tt>math.cosh</tt>, +-<tt>math.tanh</tt>.</li> +-<li>ARM, PPC: Avoid pointless arg clearing in <tt>BC_IFUNCF</tt>.</li> +-<li>PPC: Fix resume after yield from hook.</li> +-<li>PPC: Fix argument checking for <tt>rawget()</tt>.</li> +-<li>PPC: Fix fusion of floating-point <tt>XLOAD</tt>/<tt>XSTORE</tt>.</li> +-<li>PPC: Fix <tt>HREFK</tt> code generation for huge tables.</li> +-<li>PPC: Use builtin D-Cache/I-Cache sync code.</li> +-</ul></li> +-<li>FFI library: +-<ul> +-<li>Ignore empty statements in <tt>ffi.cdef()</tt>.</li> +-<li>Ignore number parsing errors while skipping definitions.</li> +-<li>Don't touch frame in callbacks with tailcalls to fast functions.</li> +-<li>Fix library unloading on POSIX systems.</li> +-<li>Finalize cdata before userdata when closing the state.</li> +-<li>Change <tt>ffi.load()</tt> library name resolution for Cygwin.</li> +-<li>Fix resolving of function name redirects on Windows/x86.</li> +-<li>Fix symbol resolving error messages on Windows.</li> +-<li>Fix blacklisting of C functions calling callbacks.</li> +-<li>Fix result type of pointer difference.</li> +-<li>Use correct PC in FFI metamethod error message.</li> +-<li>Allow <tt>'typedef _Bool int BOOL;'</tt> for the Windows API.</li> +-<li>Don't record test for bool result of call, if ignored.</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta9">LuaJIT 2.0.0-beta9 — 2011-12-14</h2> +-<ul> +-<li>New features: +-<ul> +-<li>PPC port of LuaJIT is complete. Default is the dual-number port +-(usually faster). Single-number port selectable via <tt>src/Makefile</tt> +-at build time.</li> +-<li>Add FFI callback support.</li> +-<li>Extend <tt>-b</tt> to generate <tt>.c</tt>, <tt>.h</tt> or <tt>.obj/.o</tt> +-files with embedded bytecode.</li> +-<li>Allow loading embedded bytecode with <tt>require()</tt>.</li> +-<li>From Lua 5.2: Change to <tt>'\z'</tt> escape. Reject undefined escape +-sequences.</li> +-</ul></li> +-<li>Correctness and completeness: +-<ul> +-<li>Fix OSX 10.7 build. Fix <tt>install_name</tt> and versioning on OSX.</li> +-<li>Fix iOS build.</li> +-<li>Install <tt>dis_arm.lua</tt>, too.</li> +-<li>Mark installed shared library as executable.</li> +-<li>Add debug option to <tt>msvcbuild.bat</tt> and improve error handling.</li> +-<li>Fix data-flow analysis for iterators.</li> +-<li>Fix forced unwinding triggered by external unwinder.</li> +-<li>Record missing <tt>for</tt> loop slot loads (return to lower frame).</li> +-<li>Always use ANSI variants of Windows system functions.</li> +-<li>Fix GC barrier for multi-result table constructor (<tt>TSETM</tt>).</li> +-<li>Fix/add various FOLD rules.</li> +-<li>Add potential PHI for number conversions due to type instability.</li> +-<li>Do not eliminate PHIs only referenced from other PHIs.</li> +-<li>Correctly anchor implicit number to string conversions in Lua/C API.</li> +-<li>Fix various stack limit checks.</li> +-<li>x64: Use thread-safe exceptions for external unwinding (GCC platforms).</li> +-<li>x64: Fix result type of cdata index conversions.</li> +-<li>x64: Fix <tt>math.random()</tt> and <tt>bit.bswap()</tt> code generation.</li> +-<li>x64: Fix <tt>lightuserdata</tt> comparisons.</li> +-<li>x64: Always extend stack-passed arguments to pointer size.</li> +-<li>ARM: Many fixes to code generation backend.</li> +-<li>PPC/e500: Fix dispatch for binop metamethods.</li> +-<li>PPC/e500: Save/restore condition registers when entering/leaving the VM.</li> +-<li>PPC/e500: Fix write barrier in stores of strings to upvalues.</li> +-</ul></li> +-<li>FFI library: +-<ul> +-<li>Fix C comment parsing.</li> +-<li>Fix snapshot optimization for cdata comparisons.</li> +-<li>Fix recording of const/enum lookups in namespaces.</li> +-<li>Fix call argument and return handling for <tt>I8/U8/I16/U16</tt> types.</li> +-<li>Fix unfused loads of float fields.</li> +-<li>Fix <tt>ffi.string()</tt> recording.</li> +-<li>Save <tt>GetLastError()</tt> around <tt>ffi.load()</tt> and symbol +-resolving, too.</li> +-<li>Improve ld script detection in <tt>ffi.load()</tt>.</li> +-<li>Record loads/stores to external variables in namespaces.</li> +-<li>Compile calls to stdcall, fastcall and vararg functions.</li> +-<li>Treat function ctypes like pointers in comparisons.</li> +-<li>Resolve <tt>__call</tt> metamethod for pointers, too.</li> +-<li>Record C function calls with bool return values.</li> +-<li>Record <tt>ffi.errno()</tt>.</li> +-<li>x86: Fix number to <tt>uint32_t</tt> conversion rounding.</li> +-<li>x86: Fix 64 bit arithmetic in assembler backend.</li> +-<li>x64: Fix struct-by-value calling conventions.</li> +-<li>ARM: Ensure invocation of SPLIT pass for float conversions.</li> +-</ul></li> +-<li>Structural and performance enhancements: +-<ul> +-<li>Display trace types with <tt>-jv</tt> and <tt>-jdump</tt>.</li> +-<li>Record isolated calls. But prefer recording loops over calls.</li> +-<li>Specialize to prototype for non-monomorphic functions. Solves the +-trace-explosion problem for closure-heavy programming styles.</li> +-<li>Always generate a portable <tt>vmdef.lua</tt>. Easier for distros.</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta8">LuaJIT 2.0.0-beta8 — 2011-06-23</h2> +-<ul> +-<li>New features: +-<ul> +-<li>Soft-float ARM port of LuaJIT is complete.</li> +-<li>Add support for bytecode loading/saving and <tt>-b</tt> command line +-option.</li> +-<li>From Lua 5.2: <tt>__len</tt> metamethod for tables +-(disabled by default).</li> +-</ul></li> +-<li>Correctness and completeness: +-<ul> +-<li>ARM: Misc. fixes for interpreter.</li> +-<li>x86/x64: Fix <tt>bit.*</tt> argument checking in interpreter.</li> +-<li>Catch early out-of-memory in memory allocator initialization.</li> +-<li>Fix data-flow analysis for paths leading to an upvalue close.</li> +-<li>Fix check for missing arguments in <tt>string.format()</tt>.</li> +-<li>Fix Solaris/x86 build (note: not a supported target).</li> +-<li>Fix recording of loops with instable directions in side traces.</li> +-<li>x86/x64: Fix fusion of comparisons with <tt>u8</tt>/<tt>u16</tt> +-<tt>XLOAD</tt>.</li> +-<li>x86/x64: Fix register allocation for variable shifts.</li> +-</ul></li> +-<li>FFI library: +-<ul> +-<li>Add <tt>ffi.errno()</tt>. Save <tt>errno</tt>/<tt>GetLastError()</tt> +-around allocations etc.</li> +-<li>Fix <tt>__gc</tt> for VLA/VLS cdata objects.</li> +-<li>Fix recording of casts from 32 bit cdata pointers to integers.</li> +-<li><tt>tonumber(cdata)</tt> returns <tt>nil</tt> for non-numbers.</li> +-<li>Show address pointed to for <tt>tostring(pointer)</tt>.</li> +-<li>Print <tt>NULL</tt> pointers as <tt>"cdata<... *>: NULL"</tt>.</li> +-<li>Support <tt>__tostring</tt> metamethod for pointers to structs, too.</li> +-</ul></li> +-<li>Structural and performance enhancements: +-<ul> +-<li>More tuning for loop unrolling heuristics.</li> +-<li>Flatten and compress in-memory debug info (saves ~70%).</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta7">LuaJIT 2.0.0-beta7 — 2011-05-05</h2> +-<ul> +-<li>New features: +-<ul> +-<li>ARM port of the LuaJIT interpreter is complete.</li> +-<li>FFI library: Add <tt>ffi.gc()</tt>, <tt>ffi.metatype()</tt>, +-<tt>ffi.istype()</tt>.</li> +-<li>FFI library: Resolve ld script redirection in <tt>ffi.load()</tt>.</li> +-<li>From Lua 5.2: <tt>package.searchpath()</tt>, <tt>fp:read("*L")</tt>, +-<tt>load(string)</tt>.</li> +-<li>From Lua 5.2, disabled by default: empty statement, +-<tt>table.unpack()</tt>, modified <tt>coroutine.running()</tt>.</li> +-</ul></li> +-<li>Correctness and completeness: +-<ul> +-<li>FFI library: numerous fixes.</li> +-<li>Fix type mismatches in store-to-load forwarding.</li> +-<li>Fix error handling within metamethods.</li> +-<li>Fix <tt>table.maxn()</tt>.</li> +-<li>Improve accuracy of <tt>x^-k</tt> on x64.</li> +-<li>Fix code generation for Intel Atom in x64 mode.</li> +-<li>Fix narrowing of POW.</li> +-<li>Fix recording of retried fast functions.</li> +-<li>Fix code generation for <tt>bit.bnot()</tt> and multiplies.</li> +-<li>Fix error location within cpcall frames.</li> +-<li>Add workaround for old libgcc unwind bug.</li> +-<li>Fix <tt>lua_yield()</tt> and <tt>getmetatable(lightuserdata)</tt> on x64.</li> +-<li>Misc. fixes for PPC/e500 interpreter.</li> +-<li>Fix stack slot updates for down-recursion.</li> +-</ul></li> +-<li>Structural and performance enhancements: +-<ul> +-<li>Add dual-number mode (int/double) for the VM. Enabled for ARM.</li> +-<li>Improve narrowing of arithmetic operators and <tt>for</tt> loops.</li> +-<li>Tune loop unrolling heuristics and increase trace recorder limits.</li> +-<li>Eliminate dead slots in snapshots using bytecode data-flow analysis.</li> +-<li>Avoid phantom stores to proxy tables.</li> +-<li>Optimize lookups in empty proxy tables.</li> +-<li>Improve bytecode optimization of <tt>and</tt>/<tt>or</tt> operators.</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta6">LuaJIT 2.0.0-beta6 — 2011-02-11</h2> +-<ul> +-<li>New features: +-<ul> +-<li>PowerPC/e500v2 port of the LuaJIT interpreter is complete.</li> +-<li>Various minor features from Lua 5.2: Hex escapes in literals, +-<tt>'*'</tt> escape, reversible <tt>string.format("%q",s)</tt>, +-<tt>"%g"</tt> pattern, <tt>table.sort</tt> checks callbacks, +-<tt>os.exit(status|true|false[,close])</tt>.</li> +-<li>Lua 5.2 <tt>__pairs</tt> and <tt>__ipairs</tt> metamethods +-(disabled by default).</li> +-<li>Initial release of the FFI library.</li> +-</ul></li> +-<li>Correctness and completeness: +-<ul> +-<li>Fix <tt>string.format()</tt> for non-finite numbers.</li> +-<li>Fix memory leak when compiled to use the built-in allocator.</li> +-<li>x86/x64: Fix unnecessary resize in <tt>TSETM</tt> bytecode.</li> +-<li>Fix various GC issues with traces and <tt>jit.flush()</tt>.</li> +-<li>x64: Fix fusion of indexes for array references.</li> +-<li>x86/x64: Fix stack overflow handling for coroutine results.</li> +-<li>Enable low-2GB memory allocation on FreeBSD/x64.</li> +-<li>Fix <tt>collectgarbage("count")</tt> result if more than 2GB is in use.</li> +-<li>Fix parsing of hex floats.</li> +-<li>x86/x64: Fix loop branch inversion with trailing +-<tt>HREF+NE/EQ</tt>.</li> +-<li>Add <tt>jit.os</tt> string.</li> +-<li><tt>coroutine.create()</tt> permits running C functions, too.</li> +-<li>Fix OSX build to work with newer ld64 versions.</li> +-<li>Fix bytecode optimization of <tt>and</tt>/<tt>or</tt> operators.</li> +-</ul></li> +-<li>Structural and performance enhancements: +-<ul> +-<li>Emit specialized bytecode for <tt>pairs()</tt>/<tt>next()</tt>.</li> +-<li>Improve bytecode coalescing of <tt>nil</tt> constants.</li> +-<li>Compile calls to vararg functions.</li> +-<li>Compile <tt>select()</tt>.</li> +-<li>Improve alias analysis, esp. for loads from allocations.</li> +-<li>Tuning of various compiler heuristics.</li> +-<li>Refactor and extend IR conversion instructions.</li> +-<li>x86/x64: Various backend enhancements related to the FFI.</li> +-<li>Add SPLIT pass to split 64 bit IR instructions for 32 bit CPUs.</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta5">LuaJIT 2.0.0-beta5 — 2010-08-24</h2> +-<ul> +-<li>Correctness and completeness: +-<ul> +-<li>Fix trace exit dispatch to function headers.</li> +-<li>Fix Windows and OSX builds with LUAJIT_DISABLE_JIT.</li> +-<li>Reorganize and fix placement of generated machine code on x64.</li> +-<li>Fix TNEW in x64 interpreter.</li> +-<li>Do not eliminate PHIs for values only referenced from side exits.</li> +-<li>OS-independent canonicalization of strings for non-finite numbers.</li> +-<li>Fix <tt>string.char()</tt> range check on x64.</li> +-<li>Fix <tt>tostring()</tt> resolving within <tt>print()</tt>.</li> +-<li>Fix error handling for <tt>next()</tt>.</li> +-<li>Fix passing of constant arguments to external calls on x64.</li> +-<li>Fix interpreter argument check for two-argument SSE math functions.</li> +-<li>Fix C frame chain corruption caused by <tt>lua_cpcall()</tt>.</li> +-<li>Fix return from <tt>pcall()</tt> within active hook.</li> +-</ul></li> +-<li>Structural and performance enhancements: +-<ul> +-<li>Replace on-trace GC frame syncing with interpreter exit.</li> +-<li>Improve hash lookup specialization by not removing dead keys during GC.</li> +-<li>Turn traces into true GC objects.</li> +-<li>Avoid starting a GC cycle immediately after library init.</li> +-<li>Add weak guards to improve dead-code elimination.</li> +-<li>Speed up string interning.</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta4">LuaJIT 2.0.0-beta4 — 2010-03-28</h2> +-<ul> +-<li>Correctness and completeness: +-<ul> +-<li>Fix precondition for on-trace creation of table keys.</li> +-<li>Fix <tt>{f()}</tt> on x64 when table is resized.</li> +-<li>Fix folding of ordered comparisons with same references.</li> +-<li>Fix snapshot restores for multi-result bytecodes.</li> +-<li>Fix potential hang when recording bytecode with nested closures.</li> +-<li>Fix recording of <tt>getmetatable()</tt>, <tt>tonumber()</tt> and bad argument types.</li> +-<li>Fix SLOAD fusion across returns to lower frames.</li> +-</ul></li> +-<li>Structural and performance enhancements: +-<ul> +-<li>Add array bounds check elimination. <tt>-Oabc</tt> is enabled by default.</li> +-<li>More tuning for x64, e.g. smaller table objects.</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta3">LuaJIT 2.0.0-beta3 — 2010-03-07</h2> +-<ul> +-<li>LuaJIT x64 port: +-<ul> +-<li>Port integrated memory allocator to Linux/x64, Windows/x64 and OSX/x64.</li> +-<li>Port interpreter and JIT compiler to x64.</li> +-<li>Port DynASM to x64.</li> +-<li>Many 32/64 bit cleanups in the VM.</li> +-<li>Allow building the interpreter with either x87 or SSE2 arithmetics.</li> +-<li>Add external unwinding and C++ exception interop (default on x64).</li> +-</ul></li> +-<li>Correctness and completeness: +-<ul> +-<li>Fix constructor bytecode generation for certain conditional values.</li> +-<li>Fix some cases of ordered string comparisons.</li> +-<li>Fix <tt>lua_tocfunction()</tt>.</li> +-<li>Fix cutoff register in JMP bytecode for some conditional expressions.</li> +-<li>Fix PHI marking algorithm for references from variant slots.</li> +-<li>Fix <tt>package.cpath</tt> for non-default PREFIX.</li> +-<li>Fix DWARF2 frame unwind information for interpreter on OSX.</li> +-<li>Drive the GC forward on string allocations in the parser.</li> +-<li>Implement call/return hooks (zero-cost if disabled).</li> +-<li>Implement yield from C hooks.</li> +-<li>Disable JIT compiler on older non-SSE2 CPUs instead of aborting.</li> +-</ul></li> +-<li>Structural and performance enhancements: +-<ul> +-<li>Compile recursive code (tail-, up- and down-recursion).</li> +-<li>Improve heuristics for bytecode penalties and blacklisting.</li> +-<li>Split CALL/FUNC recording and clean up fast function call semantics.</li> +-<li>Major redesign of internal function call handling.</li> +-<li>Improve FOR loop const specialization and integerness checks.</li> +-<li>Switch to pre-initialized stacks. Avoid frame-clearing.</li> +-<li>Colocation of prototypes and related data: bytecode, constants, debug info.</li> +-<li>Cleanup parser and streamline bytecode generation.</li> +-<li>Add support for weak IR references to register allocator.</li> +-<li>Switch to compressed, extensible snapshots.</li> +-<li>Compile returns to frames below the start frame.</li> +-<li>Improve alias analysis of upvalues using a disambiguation hash value.</li> +-<li>Compile floor/ceil/trunc to SSE2 helper calls or SSE4.1 instructions.</li> +-<li>Add generic C call handling to IR and backend.</li> +-<li>Improve KNUM fuse vs. load heuristics.</li> +-<li>Compile various <tt>io.*()</tt> functions.</li> +-<li>Compile <tt>math.sinh()</tt>, <tt>math.cosh()</tt>, <tt>math.tanh()</tt> +-and <tt>math.random()</tt>.</li> +-</ul></li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 — 2009-11-09</h2> +-<ul> +-<li>Reorganize build system. Build static+shared library on POSIX.</li> +-<li>Allow C++ exception conversion on all platforms +-using a wrapper function.</li> +-<li>Automatically catch C++ exceptions and rethrow Lua error +-(DWARF2 only).</li> +-<li>Check for the correct x87 FPU precision at strategic points.</li> +-<li>Always use wrappers for libm functions.</li> +-<li>Resurrect metamethod name strings before copying them.</li> +-<li>Mark current trace, even if compiler is idle.</li> +-<li>Ensure FILE metatable is created only once.</li> +-<li>Fix type comparisons when different integer types are involved.</li> +-<li>Fix <tt>getmetatable()</tt> recording.</li> +-<li>Fix TDUP with dead keys in template table.</li> +-<li><tt>jit.flush(tr)</tt> returns status. +-Prevent manual flush of a trace that's still linked.</li> +-<li>Improve register allocation heuristics for invariant references.</li> +-<li>Compile the push/pop variants of <tt>table.insert()</tt> and +-<tt>table.remove()</tt>.</li> +-<li>Compatibility with MSVC <tt>link /debug</tt>.</li> +-<li>Fix <tt>lua_iscfunction()</tt>.</li> +-<li>Fix <tt>math.random()</tt> when compiled with <tt>-fpic</tt> (OSX).</li> +-<li>Fix <tt>table.maxn()</tt>.</li> +-<li>Bump <tt>MACOSX_DEPLOYMENT_TARGET</tt> to <tt>10.4</tt></li> +-<li><tt>luaL_check*()</tt> and <tt>luaL_opt*()</tt> now support +-negative arguments, too.<br> +-This matches the behavior of Lua 5.1, but not the specification.</li> +-</ul> +- +-<h2 id="LuaJIT-2.0.0-beta1">LuaJIT 2.0.0-beta1 — 2009-10-31</h2> +-<ul> +-<li>This is the first public release of LuaJIT 2.0.</li> +-<li>The whole VM has been rewritten from the ground up, so there's +-no point in listing differences over earlier versions.</li> +-</ul> +-</div> +-<br class="flush"> +-</div> +-<div id="foot"> +-<hr class="hide"> +-Copyright © 2005-2017 Mike Pall +-<span class="noprint"> +-· +-<a href="contact.html">Contact</a> +-</span> +-</div> +-</body> +-</html> +diff --git a/doc/contact.html b/doc/contact.html +index fe4751c0..c253a08b 100644 +--- a/doc/contact.html ++++ b/doc/contact.html +@@ -1,17 +1,16 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>Contact</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>Contact</h1> +@@ -20,7 +19,7 @@ + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -38,6 +37,8 @@ + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -46,28 +47,23 @@ + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> + <p> + If you want to report bugs, propose fixes or suggest enhancements, + please use the +-<a href="https://github.com/LuaJIT/LuaJIT/issues">GitHub issue tracker</a>. ++<a href="https://github.com/LuaJIT/LuaJIT/issues"><span class="ext">»</span> GitHub issue tracker</a>. + </p> + <p> + Please send general questions to the +-<a href="http://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a>. ++<a href="https://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a>. + </p> + <p> + You can also send any questions you have directly to me: +@@ -93,7 +89,7 @@ xD("fyZKB8xv"FJytmz8.KAB0u52D") + <h2>Copyright</h2> + <p> + All documentation is +-Copyright © 2005-2017 Mike Pall. ++Copyright © 2005-2021 Mike Pall. + </p> + + +@@ -101,7 +97,7 @@ Copyright © 2005-2017 Mike Pall. + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html +new file mode 100644 +index 00000000..63c2efe3 +--- /dev/null ++++ b/doc/ext_buffer.html +@@ -0,0 +1,693 @@ ++<!DOCTYPE html> ++<html> ++<head> ++<title>String Buffer Library</title> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> ++<meta name="Language" content="en"> ++<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> ++<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> ++<style type="text/css"> ++.lib { ++ vertical-align: middle; ++ margin-left: 5px; ++ padding: 0 5px; ++ font-size: 60%; ++ border-radius: 5px; ++ background: #c5d5ff; ++ color: #000; ++} ++</style> ++</head> ++<body> ++<div id="site"> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++</div> ++<div id="head"> ++<h1>String Buffer Library</h1> ++</div> ++<div id="nav"> ++<ul><li> ++<a href="luajit.html">LuaJIT</a> ++<ul><li> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> ++</li><li> ++<a href="install.html">Installation</a> ++</li><li> ++<a href="running.html">Running</a> ++</li></ul> ++</li><li> ++<a href="extensions.html">Extensions</a> ++<ul><li> ++<a href="ext_ffi.html">FFI Library</a> ++<ul><li> ++<a href="ext_ffi_tutorial.html">FFI Tutorial</a> ++</li><li> ++<a href="ext_ffi_api.html">ffi.* API</a> ++</li><li> ++<a href="ext_ffi_semantics.html">FFI Semantics</a> ++</li></ul> ++</li><li> ++<a class="current" href="ext_buffer.html">String Buffers</a> ++</li><li> ++<a href="ext_jit.html">jit.* Library</a> ++</li><li> ++<a href="ext_c_api.html">Lua/C API</a> ++</li><li> ++<a href="ext_profiler.html">Profiler</a> ++</li></ul> ++</li><li> ++<a href="status.html">Status</a> ++</li><li> ++<a href="faq.html">FAQ</a> ++</li><li> ++<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> ++</li><li> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++</li></ul> ++</div> ++<div id="main"> ++<p> ++The string buffer library allows <b>high-performance manipulation of ++string-like data</b>. ++</p> ++<p> ++Unlike Lua strings, which are constants, string buffers are ++<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data ++can be stored, formatted and encoded into a string buffer and later ++converted, extracted or decoded. ++</p> ++<p> ++The convenient string buffer API simplifies common string manipulation ++tasks, that would otherwise require creating many intermediate strings. ++String buffers improve performance by eliminating redundant memory ++copies, object creation, string interning and garbage collection ++overhead. In conjunction with the FFI library, they allow zero-copy ++operations. ++</p> ++<p> ++The string buffer libary also includes a high-performance ++<a href="serialize">serializer</a> for Lua objects. ++</p> ++ ++<h2 id="wip" style="color:#ff0000">Work in Progress</h2> ++<p> ++<b style="color:#ff0000">This library is a work in progress. More ++functionality will be added soon.</b> ++</p> ++ ++<h2 id="use">Using the String Buffer Library</h2> ++<p> ++The string buffer library is built into LuaJIT by default, but it's not ++loaded by default. Add this to the start of every Lua file that needs ++one of its functions: ++</p> ++<pre class="code"> ++local buffer = require("string.buffer") ++</pre> ++<p> ++The convention for the syntax shown on this page is that <tt>buffer</tt> ++refers to the buffer library and <tt>buf</tt> refers to an individual ++buffer object. ++</p> ++<p> ++Please note the difference between a Lua function call, e.g. ++<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g. ++<tt>buf:reset()</tt> (with a colon). ++</p> ++ ++<h3 id="buffer_object">Buffer Objects</h3> ++<p> ++A buffer object is a garbage-collected Lua object. After creation with ++<tt>buffer.new()</tt>, it can (and should) be reused for many operations. ++When the last reference to a buffer object is gone, it will eventually ++be freed by the garbage collector, along with the allocated buffer ++space. ++</p> ++<p> ++Buffers operate like a FIFO (first-in first-out) data structure. Data ++can be appended (written) to the end of the buffer and consumed (read) ++from the front of the buffer. These operations may be freely mixed. ++</p> ++<p> ++The buffer space that holds the characters is managed automatically ++— it grows as needed and already consumed space is recycled. Use ++<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need more ++control. ++</p> ++<p> ++The maximum size of a single buffer is the same as the maximum size of a ++Lua string, which is slightly below two gigabytes. For huge data sizes, ++neither strings nor buffers are the right data structure — use the ++FFI library to directly map memory or files up to the virtual memory ++limit of your OS. ++</p> ++ ++<h3 id="buffer_overview">Buffer Method Overview</h3> ++<ul> ++<li> ++The <tt>buf:put*()</tt>-like methods append (write) characters to the ++end of the buffer. ++</li> ++<li> ++The <tt>buf:get*()</tt>-like methods consume (read) characters from the ++front of the buffer. ++</li> ++<li> ++Other methods, like <tt>buf:tostring()</tt> only read the buffer ++contents, but don't change the buffer. ++</li> ++<li> ++The <tt>buf:set()</tt> method allows zero-copy consumption of a string ++or an FFI cdata object as a buffer. ++</li> ++<li> ++The FFI-specific methods allow zero-copy read/write-style operations or ++modifying the buffer contents in-place. Please check the ++<a href="#ffi_caveats">FFI caveats</a> below, too. ++</li> ++<li> ++Methods that don't need to return anything specific, return the buffer ++object itself as a convenience. This allows method chaining, e.g.: ++<tt>buf:reset():encode(obj)</tt> or <tt>buf:skip(len):get()</tt> ++</li> ++</ul> ++ ++<h2 id="create">Buffer Creation and Management</h2> ++ ++<h3 id="buffer_new"><tt>local buf = buffer.new([size [,options]])<br> ++local buf = buffer.new([options])</tt></h3> ++<p> ++Creates a new buffer object. ++</p> ++<p> ++The optional <tt>size</tt> argument ensures a minimum initial buffer ++size. This is strictly an optimization when the required buffer size is ++known beforehand. The buffer space will grow as needed, in any case. ++</p> ++<p> ++The optional table <tt>options</tt> sets various ++<a href="#serialize_options">serialization options</a>. ++</p> ++ ++<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3> ++<p> ++Reset (empty) the buffer. The allocated buffer space is not freed and ++may be reused. ++</p> ++ ++<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3> ++<p> ++The buffer space of the buffer object is freed. The object itself ++remains intact, empty and may be reused. ++</p> ++<p> ++Note: you normally don't need to use this method. The garbage collector ++automatically frees the buffer space, when the buffer object is ++collected. Use this method, if you need to free the associated memory ++immediately. ++</p> ++ ++<h2 id="write">Buffer Writers</h2> ++ ++<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [,…])</tt></h3> ++<p> ++Appends a string <tt>str</tt>, a number <tt>num</tt> or any object ++<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer. ++Multiple arguments are appended in the given order. ++</p> ++<p> ++Appending a buffer to a buffer is possible and short-circuited ++internally. But it still involves a copy. Better combine the buffer ++writes to use a single buffer. ++</p> ++ ++<h3 id="buffer_putf"><tt>buf = buf:putf(format, …)</tt></h3> ++<p> ++Appends the formatted arguments to the buffer. The <tt>format</tt> ++string supports the same options as <tt>string.format()</tt>. ++</p> ++ ++<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata, len)</tt><span class="lib">FFI</span></h3> ++<p> ++Appends the given <tt>len</tt> number of bytes from the memory pointed ++to by the FFI <tt>cdata</tt> object to the buffer. The object needs to ++be convertible to a (constant) pointer. ++</p> ++ ++<h3 id="buffer_set"><tt>buf = buf:set(str)<br> ++buf = buf:set(cdata, len)</tt><span class="lib">FFI</span></h3> ++<p> ++This method allows zero-copy consumption of a string or an FFI cdata ++object as a buffer. It stores a reference to the passed string ++<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any buffer ++space originally allocated is freed. This is <i>not</i> an append ++operation, unlike the <tt>buf:put*()</tt> methods. ++</p> ++<p> ++After calling this method, the buffer behaves as if ++<tt>buf:free():put(str)</tt> or <tt>buf:free():put(cdata, len)</tt> ++had been called. However, the data is only referenced and not copied, as ++long as the buffer is only consumed. ++</p> ++<p> ++In case the buffer is written to later on, the referenced data is copied ++and the object reference is removed (copy-on-write semantics). ++</p> ++<p> ++The stored reference is an anchor for the garbage collector and keeps the ++originally passed string or FFI cdata object alive. ++</p> ++ ++<h3 id="buffer_reserve"><tt>ptr, len = buf:reserve(size)</tt><span class="lib">FFI</span><br> ++<tt>buf = buf:commit(used)</tt><span class="lib">FFI</span></h3> ++<p> ++The <tt>reserve</tt> method reserves at least <tt>size</tt> bytes of ++write space in the buffer. It returns an <tt>uint8_t *</tt> FFI ++cdata pointer <tt>ptr</tt> that points to this space. ++</p> ++<p> ++The available length in bytes is returned in <tt>len</tt>. This is at ++least <tt>size</tt> bytes, but may be more to facilitate efficient ++buffer growth. You can either make use of the additional space or ignore ++<tt>len</tt> and only use <tt>size</tt> bytes. ++</p> ++<p> ++The <tt>commit</tt> method appends the <tt>used</tt> bytes of the ++previously returned write space to the buffer data. ++</p> ++<p> ++This pair of methods allows zero-copy use of C read-style APIs: ++</p> ++<pre class="code"> ++local MIN_SIZE = 65536 ++repeat ++ local ptr, len = buf:reserve(MIN_SIZE) ++ local n = C.read(fd, ptr, len) ++ if n == 0 then break end -- EOF. ++ if n < 0 then error("read error") end ++ buf:commit(n) ++until false ++</pre> ++<p> ++The reserved write space is <i>not</i> initialized. At least the ++<tt>used</tt> bytes <b>must</b> be written to before calling the ++<tt>commit</tt> method. There's no need to call the <tt>commit</tt> ++method, if nothing is added to the buffer (e.g. on error). ++</p> ++ ++<h2 id="read">Buffer Readers</h2> ++ ++<h3 id="buffer_length"><tt>len = #buf</tt></h3> ++<p> ++Returns the current length of the buffer data in bytes. ++</p> ++ ++<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf […]</tt></h3> ++<p> ++The Lua concatenation operator <tt>..</tt> also accepts buffers, just ++like strings or numbers. It always returns a string and not a buffer. ++</p> ++<p> ++Note that although this is supported for convenience, this thwarts one ++of the main reasons to use buffers, which is to avoid string ++allocations. Rewrite it with <tt>buf:put()</tt> and <tt>buf:get()</tt>. ++</p> ++<p> ++Mixing this with unrelated objects that have a <tt>__concat</tt> ++metamethod may not work, since these probably only expect strings. ++</p> ++ ++<h3 id="buffer_skip"><tt>buf = buf:skip(len)</tt></h3> ++<p> ++Skips (consumes) <tt>len</tt> bytes from the buffer up to the current ++length of the buffer data. ++</p> ++ ++<h3 id="buffer_get"><tt>str, … = buf:get([len|nil] [,…])</tt></h3> ++<p> ++Consumes the buffer data and returns one or more strings. If called ++without arguments, the whole buffer data is consumed. If called with a ++number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt> argument ++consumes the remaining buffer space (this only makes sense as the last ++argument). Multiple arguments consume the buffer data in the given ++order. ++</p> ++<p> ++Note: a zero length or no remaining buffer data returns an empty string ++and not <tt>nil</tt>. ++</p> ++ ++<h3 id="buffer_tostring"><tt>str = buf:tostring()<br> ++str = tostring(buf)</tt></h3> ++<p> ++Creates a string from the buffer data, but doesn't consume it. The ++buffer remains unchanged. ++</p> ++<p> ++Buffer objects also define a <tt>__tostring</tt> metamethod. This means ++buffers can be passed to the global <tt>tostring()</tt> function and ++many other functions that accept this in place of strings. The important ++internal uses in functions like <tt>io.write()</tt> are short-circuited ++to avoid the creation of an intermediate string object. ++</p> ++ ++<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span class="lib">FFI</span></h3> ++<p> ++Returns an <tt>uint8_t *</tt> FFI cdata pointer <tt>ptr</tt> that ++points to the buffer data. The length of the buffer data in bytes is ++returned in <tt>len</tt>. ++</p> ++<p> ++The returned pointer can be directly passed to C functions that expect a ++buffer and a length. You can also do bytewise reads ++(<tt>local x = ptr[i]</tt>) or writes ++(<tt>ptr[i] = 0x40</tt>) of the buffer data. ++</p> ++<p> ++In conjunction with the <tt>skip</tt> method, this allows zero-copy use ++of C write-style APIs: ++</p> ++<pre class="code"> ++repeat ++ local ptr, len = buf:ref() ++ if len == 0 then break end ++ local n = C.write(fd, ptr, len) ++ if n < 0 then error("write error") end ++ buf:skip(n) ++until n >= len ++</pre> ++<p> ++Unlike Lua strings, buffer data is <i>not</i> implicitly ++zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that ++expect zero-terminated strings. If you're not using <tt>len</tt>, then ++you're doing something wrong. ++</p> ++ ++<h2 id="serialize">Serialization of Lua Objects</h2> ++<p> ++The following functions and methods allow <b>high-speed serialization</b> ++(encoding) of a Lua object into a string and decoding it back to a Lua ++object. This allows convenient storage and transport of <b>structured ++data</b>. ++</p> ++<p> ++The encoded data is in an <a href="#serialize_format">internal binary ++format</a>. The data can be stored in files, binary-transparent ++databases or transmitted to other LuaJIT instances across threads, ++processes or networks. ++</p> ++<p> ++Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or ++server-class system, even when serializing many small objects. Decoding ++speed is mostly constrained by object creation cost. ++</p> ++<p> ++The serializer handles most Lua types, common FFI number types and ++nested structures. Functions, thread objects, other FFI cdata and full ++userdata cannot be serialized (yet). ++</p> ++<p> ++The encoder serializes nested structures as trees. Multiple references ++to a single object will be stored separately and create distinct objects ++after decoding. Circular references cause an error. ++</p> ++ ++<h3 id="serialize_methods">Serialization Functions and Methods</h3> ++ ++<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br> ++buf = buf:encode(obj)</tt></h3> ++<p> ++Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone ++function returns a string <tt>str</tt>. The buffer method appends the ++encoding to the buffer. ++</p> ++<p> ++<tt>obj</tt> can be any of the supported Lua types — it doesn't ++need to be a Lua table. ++</p> ++<p> ++This function may throw an error when attempting to serialize ++unsupported object types, circular references or deeply nested tables. ++</p> ++ ++<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br> ++obj = buf:decode()</tt></h3> ++<p> ++The stand-alone function de-serializes (decodes) the string ++<tt>str</tt>, the buffer method de-serializes one object from the ++buffer. Both return a Lua object <tt>obj</tt>. ++</p> ++<p> ++The returned object may be any of the supported Lua types — ++even <tt>nil</tt>. ++</p> ++<p> ++This function may throw an error when fed with malformed or incomplete ++encoded data. The stand-alone function throws when there's left-over ++data after decoding a single top-level object. The buffer method leaves ++any left-over data in the buffer. ++</p> ++ ++<h3 id="serialize_options">Serialization Options</h3> ++<p> ++The <tt>options</tt> table passed to <tt>buffer.new()</tt> may contain ++the following members (all optional): ++</p> ++<ul> ++<li> ++<tt>dict</tt> is a Lua table holding a <b>dictionary of strings</b> that ++commonly occur as table keys of objects you are serializing. These keys ++are compactly encoded as indexes during serialization. A well chosen ++dictionary saves space and improves serialization performance. ++</li> ++<li> ++<tt>metatable</tt> is a Lua table holding a <b>dictionary of metatables</b> ++for the table objects you are serializing. ++</li> ++</ul> ++<p> ++<tt>dict</tt> needs to be an array of strings and <tt>metatable</tt> needs ++to be an array of tables. Both starting at index 1 and without holes (no ++<tt>nil</tt> inbetween). The tables are anchored in the buffer object and ++internally modified into a two-way index (don't do this yourself, just pass ++a plain array). The tables must not be modified after they have been passed ++to <tt>buffer.new()</tt>. ++</p> ++<p> ++The <tt>dict</tt> and <tt>metatable</tt> tables used by the encoder and ++decoder must be the same. Put the most common entries at the front. Extend ++at the end to ensure backwards-compatibility — older encodings can ++then still be read. You may also set some indexes to <tt>false</tt> to ++explicitly drop backwards-compatibility. Old encodings that use these ++indexes will throw an error when decoded. ++</p> ++<p> ++Metatables that are not found in the <tt>metatable</tt> dictionary are ++ignored when encoding. Decoding returns a table with a <tt>nil</tt> ++metatable. ++</p> ++<p> ++Note: parsing and preparation of the options table is somewhat ++expensive. Create a buffer object only once and recycle it for multiple ++uses. Avoid mixing encoder and decoder buffers, since the ++<tt>buf:set()</tt> method frees the already allocated buffer space: ++</p> ++<pre class="code"> ++local options = { ++ dict = { "commonly", "used", "string", "keys" }, ++} ++local buf_enc = buffer.new(options) ++local buf_dec = buffer.new(options) ++ ++local function encode(obj) ++ return buf_enc:reset():encode(obj):get() ++end ++ ++local function decode(str) ++ return buf_dec:set(str):decode() ++end ++</pre> ++ ++<h3 id="serialize_stream">Streaming Serialization</h3> ++<p> ++In some contexts, it's desirable to do piecewise serialization of large ++datasets, also known as <i>streaming</i>. ++</p> ++<p> ++This serialization format can be safely concatenated and supports streaming. ++Multiple encodings can simply be appended to a buffer and later decoded ++individually: ++</p> ++<pre class="code"> ++local buf = buffer.new() ++buf:encode(obj1) ++buf:encode(obj2) ++local copy1 = buf:decode() ++local copy2 = buf:decode() ++</pre> ++<p> ++Here's how to iterate over a stream: ++</p> ++<pre class="code"> ++while #buf ~= 0 do ++ local obj = buf:decode() ++ -- Do something with obj. ++end ++</pre> ++<p> ++Since the serialization format doesn't prepend a length to its encoding, ++network applications may need to transmit the length, too. ++</p> ++ ++<h3 id="serialize_format">Serialization Format Specification</h3> ++<p> ++This serialization format is designed for <b>internal use</b> by LuaJIT ++applications. Serialized data is upwards-compatible and portable across ++all supported LuaJIT platforms. ++</p> ++<p> ++It's an <b>8-bit binary format</b> and not human-readable. It uses e.g. ++embedded zeroes and stores embedded Lua string objects unmodified, which ++are 8-bit-clean, too. Encoded data can be safely concatenated for ++streaming and later decoded one top-level object at a time. ++</p> ++<p> ++The encoding is reasonably compact, but tuned for maximum performance, ++not for minimum space usage. It compresses well with any of the common ++byte-oriented data compression algorithms. ++</p> ++<p> ++Although documented here for reference, this format is explicitly ++<b>not</b> intended to be a 'public standard' for structured data ++interchange across computer languages (like JSON or MessagePack). Please ++do not use it as such. ++</p> ++<p> ++The specification is given below as a context-free grammar with a ++top-level <tt>object</tt> as the starting point. Alternatives are ++separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats. ++Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are ++either plain hex numbers, encoded as bytes, or have a <tt>.format</tt> ++suffix. ++</p> ++<pre> ++object → nil | false | true ++ | null | lightud32 | lightud64 ++ | int | num | tab | tab_mt ++ | int64 | uint64 | complex ++ | string ++ ++nil → 0x00 ++false → 0x01 ++true → 0x02 ++ ++null → 0x03 // NULL lightuserdata ++lightud32 → 0x04 data.I // 32 bit lightuserdata ++lightud64 → 0x05 data.L // 64 bit lightuserdata ++ ++int → 0x06 int.I // int32_t ++num → 0x07 double.L ++ ++tab → 0x08 // Empty table ++ | 0x09 h.U h*{object object} // Key/value hash ++ | 0x0a a.U a*object // 0-based array ++ | 0x0b a.U a*object h.U h*{object object} // Mixed ++ | 0x0c a.U (a-1)*object // 1-based array ++ | 0x0d a.U (a-1)*object h.U h*{object object} // Mixed ++tab_mt → 0x0e (index-1).U tab // Metatable dict entry ++ ++int64 → 0x10 int.L // FFI int64_t ++uint64 → 0x11 uint.L // FFI uint64_t ++complex → 0x12 re.L im.L // FFI complex ++ ++string → (0x20+len).U len*char.B ++ | 0x0f (index-1).U // String dict entry ++ ++.B = 8 bit ++.I = 32 bit little-endian ++.L = 64 bit little-endian ++.U = prefix-encoded 32 bit unsigned number n: ++ 0x00..0xdf → n.B ++ 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B ++ 0x1fe0.. → 0xff n.I ++</pre> ++ ++<h2 id="error">Error handling</h2> ++<p> ++Many of the buffer methods can throw an error. Out-of-memory or usage ++errors are best caught with an outer wrapper for larger parts of code. ++There's not much one can do after that, anyway. ++</p> ++<p> ++OTOH you may want to catch some errors individually. Buffer methods need ++to receive the buffer object as the first argument. The Lua colon-syntax ++<tt>obj:method()</tt> does that implicitly. But to wrap a method with ++<tt>pcall()</tt>, the arguments need to be passed like this: ++</p> ++<pre class="code"> ++local ok, err = pcall(buf.encode, buf, obj) ++if not ok then ++ -- Handle error in err. ++end ++</pre> ++ ++<h2 id="ffi_caveats">FFI caveats</h2> ++<p> ++The string buffer library has been designed to work well together with ++the FFI library. But due to the low-level nature of the FFI library, ++some care needs to be taken: ++</p> ++<p> ++First, please remember that FFI pointers are zero-indexed. The space ++returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts at the ++returned pointer and ends before <tt>len</tt> bytes after that. ++</p> ++<p> ++I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index ++is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no valid ++index at all. The returned pointer may even be <tt>NULL</tt>. ++</p> ++<p> ++The space pointed to by the returned pointer is only valid as long as ++the buffer is not modified in any way (neither append, nor consume, nor ++reset, etc.). The pointer is also not a GC anchor for the buffer object ++itself. ++</p> ++<p> ++Buffer data is only guaranteed to be byte-aligned. Casting the returned ++pointer to a data type with higher alignment may cause unaligned ++accesses. It depends on the CPU architecture whether this is allowed or ++not (it's always OK on x86/x64 and mostly OK on other modern ++architectures). ++</p> ++<p> ++FFI pointers or references do not count as GC anchors for an underlying ++object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt> is ++anchored by <tt>buf:set(array, len)</tt>, but not by ++<tt>buf:set(array+offset, len)</tt>. The addition of the offset ++creates a new pointer, even when the offset is zero. In this case, you ++need to make sure there's still a reference to the original array as ++long as its contents are in use by the buffer. ++</p> ++<p> ++Even though each LuaJIT VM instance is single-threaded (but you can ++create multiple VMs), FFI data structures can be accessed concurrently. ++Be careful when reading/writing FFI cdata from/to buffers to avoid ++concurrent accesses or modifications. In particular, the memory ++referenced by <tt>buf:set(cdata, len)</tt> must not be modified ++while buffer readers are working on it. Shared, but read-only memory ++mappings of files are OK, but only if the file does not change. ++</p> ++<br class="flush"> ++</div> ++<div id="foot"> ++<hr class="hide"> ++Copyright © 2005-2021 ++<span class="noprint"> ++· ++<a href="contact.html">Contact</a> ++</span> ++</div> ++</body> ++</html> +diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html +index ad462c63..9f1ad212 100644 +--- a/doc/ext_c_api.html ++++ b/doc/ext_c_api.html +@@ -1,17 +1,16 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>Lua/C API Extensions</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>Lua/C API Extensions</h1> +@@ -20,7 +19,7 @@ + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -38,6 +37,8 @@ + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a class="current" href="ext_c_api.html">Lua/C API</a> +@@ -46,17 +47,12 @@ + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +@@ -91,8 +87,8 @@ other Lua/C API functions). + </p> + <p> + The third argument specifies the mode, which is 'or'ed with a flag. +-The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on, +-<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or ++The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off, ++<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or + <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code. + </p> + <p> +@@ -179,7 +175,7 @@ Also note that this mechanism is not without overhead. + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html +index 5e1daaf5..b934dc78 100644 +--- a/doc/ext_ffi.html ++++ b/doc/ext_ffi.html +@@ -1,17 +1,16 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>FFI Library</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>FFI Library</h1> +@@ -20,7 +19,7 @@ + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -38,6 +37,8 @@ + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -46,17 +47,12 @@ + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +@@ -322,7 +318,7 @@ without undue conversion penalties. + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html +index 91af2e1d..061cc42a 100644 +--- a/doc/ext_ffi_api.html ++++ b/doc/ext_ffi_api.html +@@ -1,10 +1,9 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>ffi.* API Functions</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +@@ -16,7 +15,7 @@ td.abiparam { font-weight: bold; width: 6em; } + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1><tt>ffi.*</tt> API Functions</h1> +@@ -25,7 +24,7 @@ td.abiparam { font-weight: bold; width: 6em; } + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -43,6 +42,8 @@ td.abiparam { font-weight: bold; width: 6em; } + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -51,17 +52,12 @@ td.abiparam { font-weight: bold; width: 6em; } + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +@@ -261,7 +257,7 @@ contents of an <tt>__index</tt> table (if any) may be modified + afterwards. The associated metatable automatically applies to all uses + of this type, no matter how the objects are created or where they + originate from. Note that pre-defined operations on types have +-precedence (e.g. declared field names cannot be overriden). ++precedence (e.g. declared field names cannot be overridden). + </p> + <p> + All standard Lua metamethods are implemented. These are called directly, +@@ -469,6 +465,8 @@ otherwise. The following parameters are currently defined: + <tr class="odd"> + <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> + <tr class="even"> ++<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr> ++<tr class="odd"> + <td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr> + </table> + +@@ -560,7 +558,7 @@ named <tt>i</tt>. + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html +index 800b6b18..fef39c32 100644 +--- a/doc/ext_ffi_semantics.html ++++ b/doc/ext_ffi_semantics.html +@@ -1,10 +1,9 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>FFI Semantics</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +@@ -16,7 +15,7 @@ td.convop { font-style: italic; width: 40%; } + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>FFI Semantics</h1> +@@ -25,7 +24,7 @@ td.convop { font-style: italic; width: 40%; } + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -43,6 +42,8 @@ td.convop { font-style: italic; width: 40%; } + <a class="current" href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -51,17 +52,12 @@ td.convop { font-style: italic; width: 40%; } + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +@@ -678,7 +674,7 @@ through unions is explicitly detected and allowed. + <a href="ext_ffi_api.html#ffi_new">constructor</a>. This is equivalent + to <tt>ffi.new(ct, ...)</tt>, unless a <tt>__new</tt> metamethod is + defined. The <tt>__new</tt> metamethod is called with the ctype object +-plus any other arguments passed to the contructor. Note that you have to ++plus any other arguments passed to the constructor. Note that you have to + use <tt>ffi.new</tt> inside of it, since calling <tt>ct(...)</tt> would + cause infinite recursion.</li> + +@@ -864,7 +860,7 @@ place of a type, you'd need to use <tt>ffi.typeof("int")</tt> instead. + <p> + The main use for parameterized types are libraries implementing abstract + data types +-(<a href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8"><span class="ext">»</span> example</a>), ++(<a href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8"><span class="ext">»</span> example</a>), + similar to what can be achieved with C++ template metaprogramming. + Another use case are derived types of anonymous structs, which avoids + pollution of the global struct namespace. +@@ -1225,7 +1221,7 @@ suboptimal performance, especially when used in inner loops: + <li>Table initializers.</li> + <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li> + <li>Non-default initialization of VLA/VLS or large C types +-(> 128 bytes or > 16 array elements.</li> ++(> 128 bytes or > 16 array elements).</li> + <li>Bitfield initializations.</li> + <li>Pointer differences for element sizes that are not a power of + two.</li> +@@ -1252,7 +1248,7 @@ compiled.</li> + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html +index 36500664..ca71be4d 100644 +--- a/doc/ext_ffi_tutorial.html ++++ b/doc/ext_ffi_tutorial.html +@@ -1,10 +1,9 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>FFI Tutorial</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +@@ -18,7 +17,7 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>FFI Tutorial</h1> +@@ -27,7 +26,7 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -45,6 +44,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -53,17 +54,12 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +@@ -222,7 +218,7 @@ a fascinating best-selling game is left as an exercise for the reader. + <h2 id="zlib">Accessing the zlib Compression Library</h2> + <p> + The following code shows how to access the <a +-href="http://zlib.net/%22%3Ezlib</a> compression library from Lua code. ++href="https://zlib.net/%22%3E<span class="ext">»</span> zlib</a> compression library from Lua code. + We'll define two convenience wrapper functions that take a string and + compress or uncompress it to another string: + </p> +@@ -305,7 +301,7 @@ comes pre-installed. Since <tt>ffi.load()</tt> automatically adds any + missing standard prefixes/suffixes, we can simply load the + <tt>"z"</tt> library. On Windows it's named <tt>zlib1.dll</tt> and + you'll have to download it first from the +-<a href="http://zlib.net/"><span class="ext">»</span> zlib site</a>. The check for ++<a href="https://zlib.net/"><span class="ext">»</span> zlib site</a>. The check for + <tt>ffi.os</tt> makes sure we pass the right name to + <tt>ffi.load()</tt>. + </p> +@@ -593,7 +589,7 @@ it to a local variable in the function scope is unnecessary. + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/ext_jit.html b/doc/ext_jit.html +index e4088bcb..6dd54c70 100644 +--- a/doc/ext_jit.html ++++ b/doc/ext_jit.html +@@ -1,17 +1,16 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>jit.* Library</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1><tt>jit.*</tt> Library</h1> +@@ -20,7 +19,7 @@ + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -38,6 +37,8 @@ + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a class="current" href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -46,17 +47,12 @@ + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +@@ -153,7 +149,7 @@ Contains the target OS name: + <h3 id="jit_arch"><tt>jit.arch</tt></h3> + <p> + Contains the target architecture name: +-"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64". ++"x86", "x64", "arm", "arm64", "arm64be", "ppc", "mips", "mipsel", "mips64", "mips64el", "mips64r6", "mips64r6el". + </p> + + <h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler optimization control</h2> +@@ -191,7 +187,7 @@ if you want to know more. + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html +index 71b8c033..2783abdb 100644 +--- a/doc/ext_profiler.html ++++ b/doc/ext_profiler.html +@@ -1,17 +1,16 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>Profiler</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>Profiler</h1> +@@ -20,7 +19,7 @@ + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -38,6 +37,8 @@ + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -46,17 +47,12 @@ + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +@@ -355,7 +351,7 @@ use. + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/extensions.html b/doc/extensions.html +index d7cc9693..748c1793 100644 +--- a/doc/extensions.html ++++ b/doc/extensions.html +@@ -1,10 +1,9 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>Extensions</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +@@ -28,7 +27,7 @@ td.excinterop { + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>Extensions</h1> +@@ -37,7 +36,7 @@ td.excinterop { + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -55,6 +54,8 @@ td.excinterop { + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -63,25 +64,20 @@ td.excinterop { + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> + <p> + LuaJIT is fully upwards-compatible with Lua 5.1. It supports all +-<a href="http://www.lua.org/manual/5.1/manual.html#5"><span class="ext">»</span> standard Lua ++<a href="https://www.lua.org/manual/5.1/manual.html#5"><span class="ext">»</span> standard Lua + library functions</a> and the full set of +-<a href="http://www.lua.org/manual/5.1/manual.html#3"><span class="ext">»</span> Lua/C API ++<a href="https://www.lua.org/manual/5.1/manual.html#3"><span class="ext">»</span> Lua/C API + functions</a>. + </p> + <p> +@@ -105,7 +101,7 @@ LuaJIT comes with several built-in extension modules: + <h3 id="bit"><tt>bit.*</tt> — Bitwise operations</h3> + <p> + LuaJIT supports all bitwise operations as defined by +-<a href="http://bitop.luajit.org"><span class="ext">»</span> Lua BitOp</a>: ++<a href="https://bitop.luajit.org"><span class="ext">»</span> Lua BitOp</a>: + </p> + <pre class="code"> + bit.tobit bit.tohex bit.bnot bit.band bit.bor bit.bxor +@@ -114,7 +110,7 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap + <p> + This module is a LuaJIT built-in — you don't need to download or + install Lua BitOp. The Lua BitOp site has full documentation for all +-<a href="http://bitop.luajit.org/api.html"><span class="ext">»</span> Lua BitOp API functions</a>. ++<a href="https://bitop.luajit.org/api.html"><span class="ext">»</span> Lua BitOp API functions</a>. + The FFI adds support for + <a href="ext_ffi_semantics.html#cdata_arith">64 bit bitwise operations</a>, + using the same API functions. +@@ -209,9 +205,8 @@ bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. + </p> + <p> + Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies +-a different, incompatible bytecode format for ports that use this mode (e.g. +-ARM64 or MIPS64) or when explicitly enabled for x64. This may be rectified +-in the future. ++a different, incompatible bytecode format for all 64 bit ports. This may be ++rectified in the future. + </p> + + <h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3> +@@ -254,6 +249,10 @@ for every call. The result is uniformly distributed between 0.0 and 1.0. + It's correctly scaled up and rounded for <tt>math.random(n [,m])</tt> to + preserve uniformity. + </p> ++<p> ++Important: Neither this nor any other PRNG based on the simplistic ++<tt>math.random()</tt> API is suitable for cryptographic use. ++</p> + + <h3 id="io"><tt>io.*</tt> functions handle 64 bit file offsets</h3> + <p> +@@ -374,6 +373,7 @@ LuaJIT supports some extensions from Lua 5.3: + <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li> + <li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> + <li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li> ++<li><tt>assert()</tt> accepts any type of error object.</li> + <li><tt>table.move(a1, f, e, t [,a2])</tt>.</li> + <li><tt>coroutine.isyieldable()</tt>.</li> + <li>Lua/C API extensions: +@@ -394,29 +394,19 @@ the toolchain used to compile LuaJIT: + <td class="excinterop">Interoperability</td> + </tr> + <tr class="odd separate"> +-<td class="excplatform">POSIX/x64, DWARF2 unwinding</td> +-<td class="exccompiler">GCC 4.3+, Clang</td> ++<td class="excplatform">External frame unwinding</td> ++<td class="exccompiler">GCC, Clang, MSVC</td> + <td class="excinterop"><b style="color: #00a000;">Full</b></td> + </tr> + <tr class="even"> +-<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td> +-<td class="exccompiler">GCC, Clang</td> +-<td class="excinterop"><b style="color: #00a000;">Full</b></td> +-</tr> +-<tr class="odd"> +-<td class="excplatform">Other platforms, DWARF2 unwinding</td> ++<td class="excplatform">Internal frame unwinding + DWARF2</td> + <td class="exccompiler">GCC, Clang</td> + <td class="excinterop"><b style="color: #c06000;">Limited</b></td> + </tr> +-<tr class="even"> +-<td class="excplatform">Windows/x64</td> +-<td class="exccompiler">MSVC or WinSDK</td> +-<td class="excinterop"><b style="color: #00a000;">Full</b></td> +-</tr> + <tr class="odd"> +-<td class="excplatform">Windows/x86</td> +-<td class="exccompiler">Any</td> +-<td class="excinterop"><b style="color: #00a000;">Full</b></td> ++<td class="excplatform">Windows 64 bit</td> ++<td class="exccompiler">non-MSVC</td> ++<td class="excinterop"><b style="color: #c06000;">Limited</b></td> + </tr> + <tr class="even"> + <td class="excplatform">Other platforms</td> +@@ -437,7 +427,9 @@ the toolchain used to compile LuaJIT: + on the C stack. The contents of the C++ exception object + pass through unmodified.</li> + <li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>. +-The corresponding Lua error message can be retrieved from the Lua stack.</li> ++The corresponding Lua error message can be retrieved from the Lua stack.<br> ++For MSVC for Windows 64 bit this requires compilation of your C++ code ++with <tt>/EHa</tt>.</li> + <li>Throwing Lua errors across C++ frames is safe. C++ destructors + will be called.</li> + </ul> +@@ -472,7 +464,7 @@ C++ destructors.</li> + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/faq.html b/doc/faq.html +index 2c930743..1b7cb371 100644 +--- a/doc/faq.html ++++ b/doc/faq.html +@@ -1,10 +1,9 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>Frequently Asked Questions (FAQ)</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +@@ -14,7 +13,7 @@ dd { margin-left: 1.5em; } + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>Frequently Asked Questions (FAQ)</h1> +@@ -23,7 +22,7 @@ dd { margin-left: 1.5em; } + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -41,6 +40,8 @@ dd { margin-left: 1.5em; } + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -49,67 +50,60 @@ dd { margin-left: 1.5em; } + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a class="current" href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +-<dl> ++<dl id="info"> + <dt>Q: Where can I learn more about LuaJIT and Lua?</dt> + <dd> + <ul style="padding: 0;"> +-<li>The <a href="http://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a> focuses on topics ++<li>The <a href="https://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a> focuses on topics + related to LuaJIT.</li> + <li>The <a href="http://wiki.luajit.org/"><span class="ext">»</span> LuaJIT wiki</a> gathers community + resources about LuaJIT.</li> + <li>News about Lua itself can be found at the +-<a href="http://www.lua.org/lua-l.html"><span class="ext">»</span> Lua mailing list</a>. ++<a href="https://www.lua.org/lua-l.html"><span class="ext">»</span> Lua mailing list</a>. + The mailing list archives are worth checking out for older postings + about LuaJIT.</li> +-<li>The <a href="http://lua.org"><span class="ext">»</span> main Lua.org site</a> has complete +-<a href="http://www.lua.org/docs.html"><span class="ext">»</span> documentation</a> of the language ++<li>The <a href="https://lua.org"><span class="ext">»</span> main Lua.org site</a> has complete ++<a href="https://www.lua.org/docs.html"><span class="ext">»</span> documentation</a> of the language + and links to books and papers about Lua.</li> + <li>The community-managed <a href="http://lua-users.org/wiki/"><span class="ext">»</span> Lua Wiki</a> + has information about diverse topics.</li> +-</ul> ++</ul></dd> + </dl> + +-<dl> ++<dl id="tech"> + <dt>Q: Where can I learn more about the compiler technology used by LuaJIT?</dt> + <dd> +-I'm planning to write more documentation about the internals of LuaJIT. +-In the meantime, please use the following Google Scholar searches +-to find relevant papers:<br> +-Search for: <a href="http://scholar.google.com/scholar?q=Trace+Compiler"><span class="ext">»</span> Trace Compiler</a><br> +-Search for: <a href="http://scholar.google.com/scholar?q=JIT+Compiler"><span class="ext">»</span> JIT Compiler</a><br> +-Search for: <a href="http://scholar.google.com/scholar?q=Dynamic+Language+Optimizations"><span class="ext">»</span> Dynamic Language Optimizations</a><br> +-Search for: <a href="http://scholar.google.com/scholar?q=SSA+Form"><span class="ext">»</span> SSA Form</a><br> +-Search for: <a href="http://scholar.google.com/scholar?q=Linear+Scan+Register+Allocation"><span class="ext">»</span> Linear Scan Register Allocation</a><br> +-Here is a list of the <a href="http://article.gmane.org/gmane.comp.lang.lua.general/58908"><span class="ext">»</span> innovative features in LuaJIT</a>.<br> +-And, you know, reading the source is of course the only way to enlightenment. :-) ++Please use the following Google Scholar searches to find relevant papers:<br> ++Search for: <a href="https://scholar.google.com/scholar?q=Trace+Compiler"><span class="ext">»</span> Trace Compiler</a><br> ++Search for: <a href="https://scholar.google.com/scholar?q=JIT+Compiler"><span class="ext">»</span> JIT Compiler</a><br> ++Search for: <a href="https://scholar.google.com/scholar?q=Dynamic+Language+Optimizations"><span class="ext">»</span> Dynamic Language Optimizations</a><br> ++Search for: <a href="https://scholar.google.com/scholar?q=SSA+Form"><span class="ext">»</span> SSA Form</a><br> ++Search for: <a href="https://scholar.google.com/scholar?q=Linear+Scan+Register+Allocation"><span class="ext">»</span> Linear Scan Register Allocation</a><br> ++Here is a list of the <a href="http://lua-users.org/lists/lua-l/2009-11/msg00089.html"><span class="ext">»</span> innovative features in LuaJIT</a>.<br> ++And, you know, reading the source is of course the only way to enlightenment. + </dd> + </dl> + +-<dl> ++<dl id="arg"> + <dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?<br> + Q: My vararg functions fail after switching to LuaJIT!</dt> + <dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't + support the implicit <tt>arg</tt> parameter for old-style vararg + functions from Lua 5.0.<br>Please convert your code to the +-<a href="http://www.lua.org/manual/5.1/manual.html#2.5.9"><span class="ext">»</span> Lua 5.1 ++<a href="https://www.lua.org/manual/5.1/manual.html#2.5.9"><span class="ext">»</span> Lua 5.1 + vararg syntax</a>.</dd> + </dl> + +-<dl> ++<dl id="x87"> + <dt>Q: Why do I get this error: "bad FPU precision"?<br> + <dt>Q: I get weird behavior after initializing Direct3D.<br> + <dt>Q: Some FPU operations crash after I load a Delphi DLL.<br> +@@ -127,56 +121,75 @@ Consider testing your application with older versions, too.<br> + + Similarly, the Borland/Delphi runtime modifies the FPU control word and + enables FP exceptions. Of course this violates the Windows ABI, too. +-Please check the Delphi docs for the Set8087CW method. +- ++Please check the Delphi docs for the Set8087CW method.</dd> + </dl> + +-<dl> ++<dl id="ctrlc"> + <dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt> + <dd>The interrupt signal handler sets a Lua debug hook. But this is +-currently ignored by compiled code (this will eventually be fixed). If +-your program is running in a tight loop and never falls back to the +-interpreter, the debug hook never runs and can't throw the +-"interrupted!" error.<br> In the meantime you have to press Ctrl-C +-twice to get stop your program. That's similar to when it's stuck +-running inside a C function under the Lua interpreter.</dd> ++ignored by compiled code. If your program is running in a tight loop ++and never falls back to the interpreter, the debug hook never runs and ++can't throw the "interrupted!" error.<br> ++You have to press Ctrl-C twice to get stop your program. That's similar ++to when it's stuck running inside a C function under the Lua interpreter.</dd> + </dl> + +-<dl> +-<dt>Q: Why doesn't my favorite power-patch for Lua apply against LuaJIT?</dt> +-<dd>Because it's a completely redesigned VM and has very little code +-in common with Lua anymore. Also, if the patch introduces changes to +-the Lua semantics, these would need to be reflected everywhere in the +-VM, from the interpreter up to all stages of the compiler.<br> Please +-use only standard Lua language constructs. For many common needs you +-can use source transformations or use wrapper or proxy functions. +-The compiler will happily optimize away such indirections.</dd> ++<dl id="order"> ++<dt>Q: Table iteration with <tt>pairs()</tt> does not result in the same order?</dt> ++<dd>The order of table iteration is explicitly <b>undefined</b> by ++the Lua language standard.<br> ++Different Lua implementations or versions may use different orders for ++otherwise identical tables. Different ways of constructing a table may ++result in different orders, too.<br> ++Due to improved VM security, LuaJIT 2.1 may even use a different order ++on separate VM invocations or when string keys are newly interned.<br><br> ++If your program relies on a deterministic order, it has a bug. Rewrite it, ++so it doesn't rely on the key order. Or sort the table keys, if you must.</dd> + </dl> + +-<dl> ++<dl id="sandbox"> ++<dt>Q: Can Lua code be safely sandboxed?</dt> ++<dd> ++Maybe for an extremly restricted subset of Lua and if you relentlessly ++scrutinize every single interface function you offer to the untrusted code.<br> ++ ++Although Lua provides some sandboxing functionality (<tt>setfenv()</tt>, hooks), ++it's very hard to get this right even for the Lua core libraries. Of course, ++you'll need to inspect any extension library, too. And there are libraries ++that are inherently unsafe, e.g. the <a href="ext_ffi.html">FFI library</a>.<br> ++ ++More reading material at the <a href="http://lua-users.org/wiki/SandBoxes"><span class="ext">»</span> Lua Wiki</a> and <a href="https://en.wikipedia.org/wiki/Sandbox_(computer_security)"><span class="ext">»</span> Wikipedia</a>.<br><br> ++ ++Relatedly, <b>loading untrusted bytecode is not safe!</b><br> ++ ++It's trivial to crash the Lua or LuaJIT VM with maliciously crafted bytecode. ++This is well known and there's no bytecode verification on purpose, so please ++don't report a bug about it. Check the <tt>mode</tt> parameter for the ++<tt>load*()</tt> functions to disable loading of bytecode.<br><br> ++ ++<b>In general, the only promising approach is to sandbox Lua code at the ++process level and not the VM level.</b> ++</dd> ++</dl> ++ ++<dl id="arch"> + <dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt> + <dd>Because it's a compiler — it needs to generate native + machine code. This means the code generator must be ported to each + architecture. And the fast interpreter is written in assembler and + must be ported, too. This is quite an undertaking.<br> + The <a href="install.html">install documentation</a> shows the supported +-architectures. Other architectures will follow based on sufficient user +-demand and/or sponsoring.</dd> +-</dl> +- +-<dl> +-<dt>Q: When will feature X be added? When will the next version be released?</dt> +-<dd>When it's ready.<br> +-C'mon, it's open source — I'm doing it on my own time and you're +-getting it for free. You can either contribute a patch or sponsor +-the development of certain features, if they are important to you. +-</dd> ++architectures.<br> ++Other architectures may follow based on sufficient user demand and ++market-relevance of the architecture. Sponsoring is required to develop ++the port itself, to integrate it and to continuously maintain it in the ++actively developed branches.</dd> + </dl> + <br class="flush"> + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/install.html b/doc/install.html +index c491c601..e4af9dde 100644 +--- a/doc/install.html ++++ b/doc/install.html +@@ -1,10 +1,9 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>Installation</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +@@ -39,7 +38,7 @@ td.compatno { + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>Installation</h1> +@@ -48,7 +47,7 @@ td.compatno { + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a class="current" href="install.html">Installation</a> + </li><li> +@@ -66,6 +65,8 @@ td.compatno { + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -74,17 +75,12 @@ td.compatno { + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +@@ -109,22 +105,22 @@ operating systems, CPUs and compilers: + <td class="compatcpu">CPU / OS</td> + <td class="compatos"><a href="#posix">Linux</a> or<br><a href="#android">Android</a></td> + <td class="compatos"><a href="#posix">*BSD, Other</a></td> +-<td class="compatos"><a href="#posix">OSX 10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td> +-<td class="compatos"><a href="#windows">Windows<br>XP/Vista/7</a></td> ++<td class="compatos"><a href="#posix">macOS 10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td> ++<td class="compatos"><a href="#windows">Windows 7<br>or later</a></td> + </tr> + <tr class="odd separate"> + <td class="compatcpu">x86 (32 bit)</td> + <td class="compatos">GCC 4.2+</td> + <td class="compatos">GCC 4.2+</td> + <td class="compatos">XCode 5.0+<br>Clang</td> +-<td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td> ++<td class="compatos">MSVC<br>MinGW, Cygwin</td> + </tr> + <tr class="even"> + <td class="compatcpu">x64 (64 bit)</td> + <td class="compatos">GCC 4.2+</td> + <td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td> + <td class="compatos">XCode 5.0+<br>Clang</td> +-<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td> ++<td class="compatos">MSVC<br>Durango (<a href="#xboxone">Xbox One</a>)</td> + </tr> + <tr class="odd"> + <td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td> +@@ -134,7 +130,7 @@ operating systems, CPUs and compilers: + <td class="compatos compatno"> </td> + </tr> + <tr class="even"> +-<td class="compatcpu"><a href="#cross2">ARM64</a></td> ++<td class="compatcpu"><a href="#cross2">ARM64<br>ARM64be</a></td> + <td class="compatos">GCC 4.8+</td> + <td class="compatos compatno"> </td> + <td class="compatos">XCode 6.0+<br>Clang 3.5+</td> +@@ -148,7 +144,7 @@ operating systems, CPUs and compilers: + <td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td> + </tr> + <tr class="even"> +-<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td> ++<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64<br>MIPS64r6</a></td> + <td class="compatos">GCC 4.3+</td> + <td class="compatos">GCC 4.3+</td> + <td class="compatos compatno"> </td> +@@ -169,22 +165,21 @@ only).</li> + <li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT + under POSIX, MinGW or Cygwin.</li> + <li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with +-MSVC or WinSDK.</li> ++MSVC (Visual Studio).</li> + </ul> + <p> + Please read the instructions given in these files, before changing + any settings. + </p> + <p> +-LuaJIT on x64 currently uses 32 bit GC objects by default. +-<tt>LJ_GC64</tt> mode may be explicitly enabled: +-add <tt>XCFLAGS=-DLUAJIT_ENABLE_GC64</tt> to the make command or run +-<tt>msvcbuild gc64</tt> for MSVC/WinSDK. Please check the note +-about the <a href="extensions.html#string_dump">bytecode format</a> +-differences, too. ++All LuaJIT 64 bit ports use 64 bit GC objects by default (<tt>LJ_GC64</tt>). ++For x64, you can select the old 32-on-64 bit mode by adding ++<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command. ++Please check the note about the ++<a href="extensions.html#string_dump">bytecode format</a> differences, too. + </p> + +-<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2> ++<h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2> + <h3>Prerequisites</h3> + <p> + Depending on your distribution, you may need to install a package for +@@ -192,14 +187,19 @@ GCC, the development headers and/or a complete SDK. E.g. on a current + Debian/Ubuntu, install <tt>libc6-dev</tt> with the package manager. + </p> + <p> +-Download the current source package of LuaJIT (pick the .tar.gz), +-if you haven't already done so. Move it to a directory of your choice, +-open a terminal window and change to this directory. Now unpack the archive +-and change to the newly created directory: ++The recommended way to fetch the latest version is to do a pull from ++the git repository. ++</p> ++<p> ++Alternatively download the latest source package of LuaJIT (pick the .tar.gz). ++Move it to a directory of your choice, open a terminal window and change ++to this directory. Now unpack the archive and change to the newly created ++directory (replace XX.YY.ZZ with the version you downloaded): + </p> + <pre class="code"> +-tar zxf LuaJIT-2.0.5.tar.gz +-cd LuaJIT-2.0.5</pre> ++tar zxf LuaJIT-XX.YY.ZZ.tar.gz ++cd LuaJIT-XX.YY.ZZ ++</pre> + <h3>Building LuaJIT</h3> + <p> + The supplied Makefiles try to auto-detect the settings needed for your +@@ -223,9 +223,12 @@ You can add an extra prefix to the search paths by appending the + make PREFIX=/home/myself/lj2 + </pre> + <p> +-Note for OSX: if the <tt>MACOSX_DEPLOYMENT_TARGET</tt> environment +-variable is not set, then it's forced to <tt>10.4</tt>. ++Note for macOS: you <b>must</b> set the <tt>MACOSX_DEPLOYMENT_TARGET</tt> ++environment variable to a value supported by your toolchain: + </p> ++<pre class="code"> ++MACOSX_DEPLOYMENT_TARGET=XX.YY make ++</pre> + <h3>Installing LuaJIT</h3> + <p> + The top-level Makefile installs LuaJIT by default under +@@ -252,27 +255,18 @@ Obviously the prefixes given during build and installation need to be the same. + <p> + Either install one of the open source SDKs + (<a href="http://mingw.org/"><span class="ext">»</span> MinGW</a> or +-<a href="http://www.cygwin.com/"><span class="ext">»</span> Cygwin</a>), which come with a modified ++<a href="https://www.cygwin.com/"><span class="ext">»</span> Cygwin</a>), which come with a modified + GCC plus the required development headers. ++Or install Microsoft's Visual Studio (MSVC). + </p> + <p> +-Or install Microsoft's Visual C++ (MSVC). The freely downloadable +-<a href="http://www.microsoft.com/Express/VC/"><span class="ext">»</span> Express Edition</a> +-works just fine, but only contains an x86 compiler. +-</p> +-<p> +-The freely downloadable +-<a href="http://msdn.microsoft.com/en-us/windowsserver/bb980924.aspx"><span class="ext">»</span> Windows SDK</a> +-only comes with command line tools, but this is all you need to build LuaJIT. +-It contains x86 and x64 compilers. +-</p> +-<p> +-Next, download the source package and unpack it using an archive manager +-(e.g. the Windows Explorer) to a directory of your choice. ++Next, pull from the git repository or download the source package and ++unpack it using an archive manager (e.g. the Windows Explorer) to ++a directory of your choice. + </p> + <h3>Building with MSVC</h3> + <p> +-Open a "Visual Studio .NET Command Prompt", <tt>cd</tt> to the ++Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the + directory where you've unpacked the sources and run these commands: + </p> + <pre class="code"> +@@ -280,37 +274,14 @@ cd src + msvcbuild + </pre> + <p> +-Then follow the installation instructions below. +-</p> +-<h3>Building with the Windows SDK</h3> +-<p> +-Open a "Windows SDK Command Shell" and select the x86 compiler: +-</p> +-<pre class="code"> +-setenv /release /x86 +-</pre> +-<p> +-Or select the x64 compiler: +-</p> +-<pre class="code"> +-setenv /release /x64 +-</pre> +-<p> +-Then <tt>cd</tt> to the directory where you've unpacked the sources +-and run these commands: +-</p> +-<pre class="code"> +-cd src +-msvcbuild +-</pre> +-<p> ++Check the <tt>msvcbuild.bat</tt> file for more options. + Then follow the installation instructions below. + </p> + <h3>Building with MinGW or Cygwin</h3> + <p> + Open a command prompt window and make sure the MinGW or Cygwin programs +-are in your path. Then <tt>cd</tt> to the directory where +-you've unpacked the sources and run this command for MinGW: ++are in your path. Then <tt>cd</tt> to the directory of the git repository ++or where you've unpacked the sources. Then run this command for MinGW: + </p> + <pre class="code"> + mingw32-make +@@ -365,7 +336,7 @@ You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the + target OS differ, or you'll get assembler or linker errors: + </p> + <ul> +-<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li> ++<li>E.g. if you're compiling on a Windows or macOS host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li> + <li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li> + <li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li> + </ul> +@@ -428,52 +399,31 @@ make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" + make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" + </pre> + <p> +-You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/index.html">Android NDK</a>. +-The environment variables need to match the install locations and the +-desired target platform. E.g. Android 4.0 corresponds to ABI level 14. +-For details check the folder <tt>docs</tt> in the NDK directory. +-</p> +-<p> +-Only a few common variations for the different CPUs, ABIs and platforms +-are listed. Please use your own judgement for which combination you want +-to build/deploy or which lowest common denominator you want to pick: ++You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/"><span class="ext">»</span> Android NDK</a>. ++Please adapt the environment variables to match the install locations and the ++desired target platform. E.g. Android 4.1 corresponds to ABI level 16. + </p> + <pre class="code"> +-# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo) +-NDK=/opt/android/ndk +-NDKABI=8 +-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 +-NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- +-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" +-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" +- +-# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS) +-NDK=/opt/android/ndk +-NDKABI=14 +-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 +-NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- +-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" +-NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8" +-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH" ++# Android/ARM64, aarch64, Android 5.0+ (L) ++NDKDIR=/opt/android/ndk ++NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin ++NDKCROSS=$NDKBIN/aarch64-linux-android- ++NDKCC=$NDKBIN/aarch64-linux-android21-clang ++make CROSS=$NDKCROSS \ ++ STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \ ++ TARGET_LD=$NDKCC + +-# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS) +-NDK=/opt/android/ndk +-NDKABI=14 +-NDKVER=$NDK/toolchains/mipsel-linux-android-4.9 +-NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android- +-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips" +-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" +- +-# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS) +-NDK=/opt/android/ndk +-NDKABI=14 +-NDKVER=$NDK/toolchains/x86-4.9 +-NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android- +-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86" +-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" ++# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB) ++NDKDIR=/opt/android/ndk ++NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin ++NDKCROSS=$NDKBIN/arm-linux-androideabi- ++NDKCC=$NDKBIN/armv7a-linux-androideabi16-clang ++make HOST_CC="gcc -m32" CROSS=$NDKCROSS \ ++ STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \ ++ TARGET_LD=$NDKCC + </pre> + <p> +-You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">»</span> iOS SDK</a>: ++You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="https://developer.apple.com/ios/"><span class="ext">»</span> iOS SDK</a>: + </p> + <p style="font-size: 8pt;"> + Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps +@@ -483,13 +433,6 @@ much slower than the JIT compiler. Please complain to Apple, not me. + Or use Android. :-p + </p> + <pre class="code"> +-# iOS/ARM (32 bit) +-ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) +-ICC=$(xcrun --sdk iphoneos --find clang) +-ISDKF="-arch armv7 -isysroot $ISDKP" +-make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \ +- CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS +- + # iOS/ARM64 + ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) + ICC=$(xcrun --sdk iphoneos --find clang) +@@ -590,14 +533,6 @@ the DLL). You may link LuaJIT statically on Windows only if you don't + intend to load Lua/C modules at runtime. + </li></ul> + </li> +-<li> +-If you're building a 64 bit application on OSX which links directly or +-indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode, +-you need to link your main executable with these flags: +-<pre class="code"> +--pagezero_size 10000 -image_base 100000000 +-</pre> +-</li> + </ul> + <p>Additional hints for initializing LuaJIT using the C API functions:</p> + <ul> +@@ -606,7 +541,7 @@ you need to link your main executable with these flags: + for embedding Lua or LuaJIT into your application.</li> + <li>Make sure you use <tt>luaL_newstate</tt>. Avoid using + <tt>lua_newstate</tt>, since this uses the (slower) default memory +-allocator from your system (no support for this on x64).</li> ++allocator from your system (no support for this on 64 bit architectures).</li> + <li>Make sure you use <tt>luaL_openlibs</tt> and not the old Lua 5.0 style + of calling <tt>luaopen_base</tt> etc. directly.</li> + <li>To change or extend the list of standard libraries to load, copy +@@ -615,7 +550,7 @@ Make sure the <tt>jit</tt> library is loaded or the JIT compiler + will not be activated.</li> + <li>The <tt>bit.*</tt> module for bitwise operations + is already built-in. There's no need to statically link +-<a href="http://bitop.luajit.org/"><span class="ext">»</span> Lua BitOp</a> to your application.</li> ++<a href="https://bitop.luajit.org/"><span class="ext">»</span> Lua BitOp</a> to your application.</li> + </ul> + + <h2 id="distro">Hints for Distribution Maintainers</h2> +@@ -682,7 +617,7 @@ to me (the upstream) and not you (the package maintainer), anyway. + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/luajit.html b/doc/luajit.html +index ef5b824c..a25267a6 100644 +--- a/doc/luajit.html ++++ b/doc/luajit.html +@@ -1,10 +1,9 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>LuaJIT</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +@@ -96,7 +95,7 @@ table.feature small { + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>LuaJIT</h1> +@@ -105,7 +104,7 @@ table.feature small { + <ul><li> + <a class="current" href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -123,6 +122,8 @@ table.feature small { + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -131,36 +132,31 @@ table.feature small { + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> + <p> + LuaJIT is a <b>Just-In-Time Compiler</b> (JIT) for the +-<a href="http://www.lua.org/"><span class="ext">»</span> Lua</a> programming language. ++<a href="https://www.lua.org/"><span class="ext">»</span> Lua</a> programming language. + Lua is a powerful, dynamic and light-weight programming language. + It may be embedded or used as a general-purpose, stand-alone language. + </p> + <p> +-LuaJIT is Copyright © 2005-2017 Mike Pall, released under the +-<a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">»</span> MIT open source license</a>. ++LuaJIT is Copyright © 2005-2021 Mike Pall, released under the ++<a href="https://www.opensource.org/licenses/mit-license.php"><span class="ext">»</span> MIT open source license</a>. + </p> + <p> + </p> + + <h2>Compatibility</h2> + <table class="feature os os1"> +-<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>OSX</td><td>POSIX</td></tr> ++<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>macOS</td><td>POSIX</td></tr> + </table> + <table class="feature os os2"> + <tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr> +@@ -194,7 +190,7 @@ LuaJIT has been successfully used as a <b>scripting middleware</b> in + games, appliances, network and graphics apps, numerical simulations, + trading platforms and many other specialty applications. It scales from + embedded devices, smartphones, desktops up to server farms. It combines +-high flexibility with <a href="http://luajit.org/performance.html"><span class="ext">»</span> high performance</a> ++high flexibility with high performance + and an unmatched <b>low memory footprint</b>. + </p> + <p> +@@ -226,7 +222,7 @@ Please select a sub-topic in the navigation bar to learn more about LuaJIT. + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/running.html b/doc/running.html +index 666b0abc..b55b8439 100644 +--- a/doc/running.html ++++ b/doc/running.html +@@ -1,10 +1,9 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>Running LuaJIT</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +@@ -33,7 +32,7 @@ td.param_default { + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>Running LuaJIT</h1> +@@ -42,7 +41,7 @@ td.param_default { + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -60,6 +59,8 @@ td.param_default { + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -68,17 +69,12 @@ td.param_default { + </li></ul> + </li><li> + <a href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> +@@ -95,7 +91,7 @@ The <tt>luajit</tt> stand-alone executable is just a slightly modified + version of the regular <tt>lua</tt> stand-alone executable. + It supports the same basic options, too. <tt>luajit -h</tt> + prints a short list of the available options. Please have a look at the +-<a href="http://www.lua.org/manual/5.1/manual.html#6"><span class="ext">»</span> Lua manual</a> ++<a href="https://www.lua.org/manual/5.1/manual.html#6"><span class="ext">»</span> Lua manual</a> + for details. + </p> + <p> +@@ -189,8 +185,8 @@ itself. For a description of their options and output format, please + read the comment block at the start of their source. + They can be found in the <tt>lib</tt> directory of the source + distribution or installed under the <tt>jit</tt> directory. By default +-this is <tt>/usr/local/share/luajit-2.0.5/jit</tt> on POSIX +-systems. ++this is <tt>/usr/local/share/luajit-XX.YY.ZZ>/jit</tt> on POSIX ++systems (replace XX.YY.ZZ by the installed version). + </p> + + <h3 id="opt_O"><tt>-O[level]</tt><br> +@@ -299,7 +295,7 @@ Here are the parameters and their default settings: + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/doc/status.html b/doc/status.html +index cad6ca65..1d3ba984 100644 +--- a/doc/status.html ++++ b/doc/status.html +@@ -1,10 +1,9 @@ +-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> ++<!DOCTYPE html> + <html> + <head> + <title>Status</title> +-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +-<meta name="Author" content="Mike Pall"> +-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike Pall"> ++<meta charset="utf-8"> ++<meta name="Copyright" content="Copyright (C) 2005-2021"> + <meta name="Language" content="en"> + <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> + <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +@@ -14,7 +13,7 @@ ul li { padding-bottom: 0.3em; } + </head> + <body> + <div id="site"> +-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> ++<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> + </div> + <div id="head"> + <h1>Status</h1> +@@ -23,7 +22,7 @@ ul li { padding-bottom: 0.3em; } + <ul><li> + <a href="luajit.html">LuaJIT</a> + <ul><li> +-<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> ++<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> + </li><li> + <a href="install.html">Installation</a> + </li><li> +@@ -41,6 +40,8 @@ ul li { padding-bottom: 0.3em; } + <a href="ext_ffi_semantics.html">FFI Semantics</a> + </li></ul> + </li><li> ++<a href="ext_buffer.html">String Buffers</a> ++</li><li> + <a href="ext_jit.html">jit.* Library</a> + </li><li> + <a href="ext_c_api.html">Lua/C API</a> +@@ -49,24 +50,25 @@ ul li { padding-bottom: 0.3em; } + </li></ul> + </li><li> + <a class="current" href="status.html">Status</a> +-<ul><li> +-<a href="changes.html">Changes</a> +-</li></ul> + </li><li> + <a href="faq.html">FAQ</a> + </li><li> +-<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +-</li><li> + <a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> + </li><li> +-<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> ++<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> + </li></ul> + </div> + <div id="main"> + <p> +-<span style="color: #0000c0;">LuaJIT 2.0</span> is the current +-<span style="color: #0000c0;">stable branch</span>. This branch is in +-feature-freeze — new features will only be added to LuaJIT 2.1. ++This documentation is for LuaJIT 2.1.0-beta3. Please check the <tt>doc</tt> ++directory in each git branch for the version-specific documentation. ++</p> ++<p> ++The currently developed branches are LuaJIT 2.1 and LuaJIT 2.0. ++</p> ++<p> ++LuaJIT 2.0 is in feature-freeze — new features will only ++be added to LuaJIT 2.1. + </p> + + <h2>Current Status</h2> +@@ -90,30 +92,12 @@ The Lua <b>debug API</b> is missing a couple of features (return + hooks for non-Lua functions) and shows slightly different behavior + in LuaJIT (no per-coroutine hooks, no tail call counting). + </li> +-<li> +-Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not +-handled correctly. The error may fall through an on-trace +-<tt>pcall</tt> or it may be passed on to the function set with +-<tt>lua_atpanic</tt> on x64. This issue will be fixed with the new +-garbage collector. +-</li> +-<li> +-LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the +-<b>legacy <tt>lightuserdata</tt></b> data type. +-This is only relevant on x64 systems which use the negative part of the +-virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems +-configured with a 48 bit or 52 bit VA. +-Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside +-of that range, e.g. variables on the stack. In general, avoid this data +-type for new code and replace it with (much more performant) FFI bindings. +-FFI cdata pointers can address the full 64 bit range. +-</li> + </ul> + <br class="flush"> + </div> + <div id="foot"> + <hr class="hide"> +-Copyright © 2005-2017 Mike Pall ++Copyright © 2005-2021 + <span class="noprint"> + · + <a href="contact.html">Contact</a> +diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h +index a43f7c66..ebcf4ac0 100644 +--- a/dynasm/dasm_arm.h ++++ b/dynasm/dasm_arm.h +@@ -1,6 +1,6 @@ + /* + ** DynASM ARM encoding engine. +-** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++** Copyright (C) 2005-2021 Mike Pall. All rights reserved. + ** Released under the MIT license. See dynasm.lua for full copyright notice. + */ + +@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...) + case DASM_IMMV8: + CK((n & 3) == 0, RANGE_I); + n >>= 2; ++ /* fallthrough */ + case DASM_IMML8: + case DASM_IMML12: + CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : +@@ -293,7 +294,7 @@ int dasm_link(Dst_DECL, size_t *szp) + + { /* Handle globals not defined in this translation unit. */ + int idx; +- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { ++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } +@@ -370,7 +371,11 @@ int dasm_encode(Dst_DECL, void *buffer) + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; + break; + case DASM_REL_LG: +- CK(n >= 0, UNDEF_LG); ++ if (n < 0) { ++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4); ++ goto patchrel; ++ } ++ /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; +diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua +index 32f595af..0c775ae2 100644 +--- a/dynasm/dasm_arm.lua ++++ b/dynasm/dasm_arm.lua +@@ -1,7 +1,7 @@ + ------------------------------------------------------------------------------ + -- DynASM ARM module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- See dynasm.lua for full copyright notice. + ------------------------------------------------------------------------------ + +@@ -9,9 +9,9 @@ + local _info = { + arch = "arm", + description = "DynASM ARM module", +- version = "1.4.0", +- vernum = 10400, +- release = "2015-10-18", ++ version = "1.5.0", ++ vernum = 10500, ++ release = "2021-05-02", + author = "Mike Pall", + license = "MIT", + } +diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h +index 47e1e074..d6da4528 100644 +--- a/dynasm/dasm_arm64.h ++++ b/dynasm/dasm_arm64.h +@@ -1,6 +1,6 @@ + /* + ** DynASM ARM64 encoding engine. +-** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++** Copyright (C) 2005-2021 Mike Pall. All rights reserved. + ** Released under the MIT license. See dynasm.lua for full copyright notice. + */ + +@@ -21,8 +21,9 @@ enum { + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ +- DASM_REL_PC, DASM_LABEL_PC, ++ DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A, + DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML, ++ DASM_IMMV, DASM_VREG, + DASM__MAX + }; + +@@ -39,6 +40,7 @@ enum { + #define DASM_S_RANGE_LG 0x13000000 + #define DASM_S_RANGE_PC 0x14000000 + #define DASM_S_RANGE_REL 0x15000000 ++#define DASM_S_RANGE_VREG 0x16000000 + #define DASM_S_UNDEF_LG 0x21000000 + #define DASM_S_UNDEF_PC 0x22000000 + +@@ -247,7 +249,7 @@ void dasm_put(Dst_DECL, int start, ...) + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; +- case DASM_REL_EXT: break; ++ case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; +@@ -268,6 +270,11 @@ void dasm_put(Dst_DECL, int start, ...) + *pl = pos; + } + pos++; ++ if ((ins & 0x8000)) ofs += 8; ++ break; ++ case DASM_REL_A: ++ b[pos++] = n; ++ b[pos++] = va_arg(ap, int); + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; +@@ -312,13 +319,21 @@ void dasm_put(Dst_DECL, int start, ...) + } + case DASM_IMML: { + #ifdef DASM_CHECKS +- int scale = (p[-2] >> 30); ++ int scale = (ins & 3); + CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) || + (unsigned int)(n+256) < 512, RANGE_I); + #endif + b[pos++] = n; + break; + } ++ case DASM_IMMV: ++ ofs += 4; ++ b[pos++] = n; ++ break; ++ case DASM_VREG: ++ CK(n < 32, RANGE_VREG); ++ b[pos++] = n; ++ break; + } + } + } +@@ -348,7 +363,7 @@ int dasm_link(Dst_DECL, size_t *szp) + + { /* Handle globals not defined in this translation unit. */ + int idx; +- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { ++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } +@@ -375,8 +390,8 @@ int dasm_link(Dst_DECL, size_t *szp) + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W: +- case DASM_IMML: pos++; break; +- case DASM_IMM13X: pos += 2; break; ++ case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break; ++ case DASM_IMM13X: case DASM_REL_A: pos += 2; break; + } + } + stop: (void)0; +@@ -426,7 +441,13 @@ int dasm_encode(Dst_DECL, void *buffer) + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; + break; + case DASM_REL_LG: +- CK(n >= 0, UNDEF_LG); ++ if (n < 0) { ++ ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4; ++ n = (int)na; ++ CK((ptrdiff_t)n == na, RANGE_REL); ++ goto patchrel; ++ } ++ /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; +@@ -445,8 +466,24 @@ int dasm_encode(Dst_DECL, void *buffer) + } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ + CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL); + cp[-1] |= ((n << 3) & 0x0007ffe0); ++ } else if ((ins & 0x8000)) { /* absolute */ ++ cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n); ++ cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32); ++ cp += 2; + } + break; ++ case DASM_REL_A: { ++ ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n); ++ if ((ins & 0x3000) == 0x3000) { /* ADRP */ ++ ins &= ~0x1000; ++ na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12); ++ } else { ++ na = na - (ptrdiff_t)cp + 4; ++ } ++ n = (int)na; ++ CK((ptrdiff_t)n == na, RANGE_REL); ++ goto patchrel; ++ } + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; +@@ -467,11 +504,17 @@ int dasm_encode(Dst_DECL, void *buffer) + cp[-1] |= (dasm_imm13(n, *b++) << 10); + break; + case DASM_IMML: { +- int scale = (p[-2] >> 30); ++ int scale = (ins & 3); + cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ? + ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12); + break; + } ++ case DASM_IMMV: ++ *cp++ = n; ++ break; ++ case DASM_VREG: ++ cp[-1] |= (n & 0x1f) << (ins & 0x1f); ++ break; + default: *cp++ = ins; break; + } + } +diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua +index 8a5f735d..cb82dc4a 100644 +--- a/dynasm/dasm_arm64.lua ++++ b/dynasm/dasm_arm64.lua +@@ -1,7 +1,7 @@ + ------------------------------------------------------------------------------ + -- DynASM ARM64 module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- See dynasm.lua for full copyright notice. + ------------------------------------------------------------------------------ + +@@ -9,9 +9,9 @@ + local _info = { + arch = "arm", + description = "DynASM ARM64 module", +- version = "1.4.0", +- vernum = 10400, +- release = "2015-10-18", ++ version = "1.5.0", ++ vernum = 10500, ++ release = "2021-05-02", + author = "Mike Pall", + license = "MIT", + } +@@ -23,12 +23,12 @@ local _M = { _info = _info } + local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs + local assert, setmetatable, rawget = assert, setmetatable, rawget + local _s = string +-local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char ++local format, byte, char = _s.format, _s.byte, _s.char + local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub + local concat, sort, insert = table.concat, table.sort, table.insert + local bit = bit or require("bit") + local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +-local ror, tohex = bit.ror, bit.tohex ++local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit + + -- Inherited tables and callbacks. + local g_opt, g_arch +@@ -39,7 +39,9 @@ local wline, werror, wfatal, wwarn + local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", +- "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", ++ "REL_PC", "LABEL_PC", "REL_A", ++ "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV", ++ "VREG", + } + + -- Maximum number of section buffer positions for dasm_put(). +@@ -246,9 +248,12 @@ local map_cond = { + + local parse_reg_type + +-local function parse_reg(expr) ++local function parse_reg(expr, shift) + if not expr then werror("expected register name") end + local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$") ++ if not tname then ++ tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$") ++ end + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg +@@ -266,18 +271,28 @@ local function parse_reg(expr) + elseif parse_reg_type ~= rt then + werror("register size mismatch") + end +- return r, tp ++ return shl(r, shift), tp ++ end ++ end ++ local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$") ++ if vreg then ++ if not parse_reg_type then ++ parse_reg_type = vrt ++ elseif parse_reg_type ~= vrt then ++ werror("register size mismatch") + end ++ if shift then waction("VREG", shift, vreg) end ++ return 0 + end + werror("bad register name `"..expr.."'") + end + + local function parse_reg_base(expr) + if expr == "sp" then return 0x3e0 end +- local base, tp = parse_reg(expr) ++ local base, tp = parse_reg(expr, 5) + if parse_reg_type ~= "x" then werror("bad register type") end + parse_reg_type = false +- return shl(base, 5), tp ++ return base, tp + end + + local parse_ctx = {} +@@ -297,7 +312,7 @@ local function parse_number(n) + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) +- if ok then return y end ++ if ok and type(y) == "number" then return y end + end + return nil + end +@@ -403,7 +418,7 @@ local function parse_imm_load(imm, scale) + end + werror("out of range immediate `"..imm.."'") + else +- waction("IMML", 0, imm) ++ waction("IMML", scale, imm) + return 0 + end + end +@@ -462,6 +477,7 @@ end + + local function parse_load(params, nparams, n, op) + if params[n+2] then werror("too many operands") end ++ local scale = shr(op, 30) + local pn, p2 = params[n], params[n+1] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + if not p1 then +@@ -470,14 +486,13 @@ local function parse_load(params, nparams, n, op) + if reg and tailr ~= "" then + local base, tp = parse_reg_base(reg) + if tp then +- waction("IMML", 0, format(tp.ctypefmt, tailr)) ++ waction("IMML", scale, format(tp.ctypefmt, tailr)) + return op + base + end + end + end + werror("expected address operand") + end +- local scale = shr(op, 30) + if p2 then + if wb == "!" then werror("bad use of '!'") end + op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400 +@@ -494,7 +509,7 @@ local function parse_load(params, nparams, n, op) + op = op + parse_imm_load(imm, scale) + else + local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$") +- op = op + shl(parse_reg(p2b), 16) + 0x00200800 ++ op = op + parse_reg(p2b, 16) + 0x00200800 + if parse_reg_type ~= "x" and parse_reg_type ~= "w" then + werror("bad index register type") + end +@@ -561,14 +576,14 @@ local function parse_load_pair(params, nparams, n, op) + end + + local function parse_label(label, def) +- local prefix = sub(label, 1, 2) ++ local prefix = label:sub(1, 2) + -- =>label (pc label reference) + if prefix == "=>" then +- return "PC", 0, sub(label, 3) ++ return "PC", 0, label:sub(3) + end + -- ->name (global label reference) + if prefix == "->" then +- return "LG", map_global[sub(label, 3)] ++ return "LG", map_global[label:sub(3)] + end + if def then + -- [1-9] (local label definition) +@@ -586,8 +601,11 @@ local function parse_label(label, def) + if extname then + return "EXT", map_extern[extname] + end ++ -- &expr (pointer) ++ if label:sub(1, 1) == "&" then ++ return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2)) ++ end + end +- werror("bad label `"..label.."'") + end + + local function branch_type(op) +@@ -620,24 +638,24 @@ local function alias_bfx(p) + end + + local function alias_bfiz(p) +- parse_reg(p[1]) ++ parse_reg(p[1], 0) + if parse_reg_type == "w" then +- p[3] = "#-("..p[3]:sub(2)..")%32" ++ p[3] = "#(32-("..p[3]:sub(2).."))%32" + p[4] = "#("..p[4]:sub(2)..")-1" + else +- p[3] = "#-("..p[3]:sub(2)..")%64" ++ p[3] = "#(64-("..p[3]:sub(2).."))%64" + p[4] = "#("..p[4]:sub(2)..")-1" + end + end + + local alias_lslimm = op_alias("ubfm_4", function(p) +- parse_reg(p[1]) ++ parse_reg(p[1], 0) + local sh = p[3]:sub(2) + if parse_reg_type == "w" then +- p[3] = "#-("..sh..")%32" ++ p[3] = "#(32-("..sh.."))%32" + p[4] = "#31-("..sh..")" + else +- p[3] = "#-("..sh..")%64" ++ p[3] = "#(64-("..sh.."))%64" + p[4] = "#63-("..sh..")" + end + end) +@@ -881,25 +899,25 @@ end + + -- Handle opcodes defined with template strings. + local function parse_template(params, template, nparams, pos) +- local op = tonumber(sub(template, 1, 8), 16) ++ local op = tonumber(template:sub(1, 8), 16) + local n = 1 + local rtt = {} + + parse_reg_type = false + + -- Process each character. +- for p in gmatch(sub(template, 9), ".") do ++ for p in gmatch(template:sub(9), ".") do + local q = params[n] + if p == "D" then +- op = op + parse_reg(q); n = n + 1 ++ op = op + parse_reg(q, 0); n = n + 1 + elseif p == "N" then +- op = op + shl(parse_reg(q), 5); n = n + 1 ++ op = op + parse_reg(q, 5); n = n + 1 + elseif p == "M" then +- op = op + shl(parse_reg(q), 16); n = n + 1 ++ op = op + parse_reg(q, 16); n = n + 1 + elseif p == "A" then +- op = op + shl(parse_reg(q), 10); n = n + 1 ++ op = op + parse_reg(q, 10); n = n + 1 + elseif p == "m" then +- op = op + shl(parse_reg(params[n-1]), 16) ++ op = op + parse_reg(params[n-1], 16) + + elseif p == "p" then + if q == "sp" then params[n] = "@x31" end +@@ -930,8 +948,14 @@ local function parse_template(params, template, nparams, pos) + + elseif p == "B" then + local mode, v, s = parse_label(q, false); n = n + 1 ++ if not mode then werror("bad label `"..q.."'") end + local m = branch_type(op) +- waction("REL_"..mode, v+m, s, 1) ++ if mode == "A" then ++ waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s)) ++ actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s) ++ else ++ waction("REL_"..mode, v+m, s, 1) ++ end + + elseif p == "I" then + op = op + parse_imm12(q); n = n + 1 +@@ -977,8 +1001,8 @@ function op_template(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end + + -- Limit number of section buffer positions used by a single dasm_put(). +- -- A single opcode needs a maximum of 3 positions. +- if secpos+3 > maxsecpos then wflush() end ++ -- A single opcode needs a maximum of 4 positions. ++ if secpos+4 > maxsecpos then wflush() end + local pos = wpos() + local lpos, apos, spos = #actlist, #actargs, secpos + +@@ -990,9 +1014,11 @@ function op_template(params, template, nparams) + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil ++ actlist[lpos+4] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil ++ actargs[apos+4] = nil + end + error(err, 0) + end +@@ -1036,23 +1062,50 @@ map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) +- if mode == "EXT" then werror("bad label definition") end ++ if not mode or mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) + end + + ------------------------------------------------------------------------------ + + -- Pseudo-opcodes for data storage. +-map_op[".long_*"] = function(params) ++local function op_data(params) + if not params then return "imm..." end ++ local sz = params.op == ".long" and 4 or 8 + for _,p in ipairs(params) do +- local n = tonumber(p) +- if not n then werror("bad immediate `"..p.."'") end +- if n < 0 then n = n + 2^32 end +- wputw(n) ++ local imm = parse_number(p) ++ if imm then ++ local n = tobit(imm) ++ if n == imm or (n < 0 and n + 2^32 == imm) then ++ wputw(n < 0 and n + 2^32 or n) ++ if sz == 8 then ++ wputw(imm < 0 and 0xffffffff or 0) ++ end ++ elseif sz == 4 then ++ werror("bad immediate `"..p.."'") ++ else ++ imm = nil ++ end ++ end ++ if not imm then ++ local mode, v, s = parse_label(p, false) ++ if sz == 4 then ++ if mode then werror("label does not fit into .long") end ++ waction("IMMV", 0, p) ++ elseif mode and mode ~= "A" then ++ waction("REL_"..mode, v+0x8000, s, 1) ++ else ++ if mode == "A" then p = s end ++ waction("IMMV", 0, format("(unsigned int)(%s)", p)) ++ waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p)) ++ end ++ end + if secpos+2 > maxsecpos then wflush() end + end + end ++map_op[".long_*"] = op_data ++map_op[".quad_*"] = op_data ++map_op[".addr_*"] = op_data + + -- Alignment pseudo-opcode. + map_op[".align_1"] = function(params) +diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h +index 4b49fd8c..b99b56b0 100644 +--- a/dynasm/dasm_mips.h ++++ b/dynasm/dasm_mips.h +@@ -1,6 +1,6 @@ + /* + ** DynASM MIPS encoding engine. +-** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++** Copyright (C) 2005-2021 Mike Pall. All rights reserved. + ** Released under the MIT license. See dynasm.lua for full copyright notice. + */ + +@@ -273,7 +273,7 @@ int dasm_link(Dst_DECL, size_t *szp) + + { /* Handle globals not defined in this translation unit. */ + int idx; +- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { ++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } +@@ -349,19 +349,24 @@ int dasm_encode(Dst_DECL, void *buffer) + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: +- CK(n >= 0, UNDEF_LG); ++ if (n < 0) { ++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp); ++ goto patchrel; ++ } ++ /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n); + if (ins & 2048) +- n = n - (int)((char *)cp - base); +- else + n = (n + (int)(size_t)base) & 0x0fffffff; +- patchrel: ++ else ++ n = n - (int)((char *)cp - base); ++ patchrel: { ++ unsigned int e = 16 + ((ins >> 12) & 15); + CK((n & 3) == 0 && +- ((n + ((ins & 2048) ? 0x00020000 : 0)) >> +- ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); +- cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); ++ ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL); ++ cp[-1] |= ((n>>2) & ((1<<e)-1)); ++ } + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); +diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua +index 78a4e34a..59147015 100644 +--- a/dynasm/dasm_mips.lua ++++ b/dynasm/dasm_mips.lua +@@ -1,19 +1,20 @@ + ------------------------------------------------------------------------------ + -- DynASM MIPS32/MIPS64 module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- See dynasm.lua for full copyright notice. + ------------------------------------------------------------------------------ + + local mips64 = mips64 ++local mipsr6 = _map_def.MIPSR6 + + -- Module information: + local _info = { + arch = mips64 and "mips64" or "mips", + description = "DynASM MIPS32/MIPS64 module", +- version = "1.4.0", +- vernum = 10400, +- release = "2016-05-24", ++ version = "1.5.0", ++ vernum = 10500, ++ release = "2021-05-02", + author = "Mike Pall", + license = "MIT", + } +@@ -238,7 +239,6 @@ local map_op = { + bne_3 = "14000000STB", + blez_2 = "18000000SB", + bgtz_2 = "1c000000SB", +- addi_3 = "20000000TSI", + li_2 = "24000000TI", + addiu_3 = "24000000TSI", + slti_3 = "28000000TSI", +@@ -248,40 +248,22 @@ local map_op = { + ori_3 = "34000000TSU", + xori_3 = "38000000TSU", + lui_2 = "3c000000TU", +- beqzl_2 = "50000000SB", +- beql_3 = "50000000STB", +- bnezl_2 = "54000000SB", +- bnel_3 = "54000000STB", +- blezl_2 = "58000000SB", +- bgtzl_2 = "5c000000SB", +- daddi_3 = mips64 and "60000000TSI", + daddiu_3 = mips64 and "64000000TSI", + ldl_2 = mips64 and "68000000TO", + ldr_2 = mips64 and "6c000000TO", + lb_2 = "80000000TO", + lh_2 = "84000000TO", +- lwl_2 = "88000000TO", + lw_2 = "8c000000TO", + lbu_2 = "90000000TO", + lhu_2 = "94000000TO", +- lwr_2 = "98000000TO", + lwu_2 = mips64 and "9c000000TO", + sb_2 = "a0000000TO", + sh_2 = "a4000000TO", +- swl_2 = "a8000000TO", + sw_2 = "ac000000TO", +- sdl_2 = mips64 and "b0000000TO", +- sdr_2 = mips64 and "b1000000TO", +- swr_2 = "b8000000TO", +- cache_2 = "bc000000NO", +- ll_2 = "c0000000TO", + lwc1_2 = "c4000000HO", +- pref_2 = "cc000000NO", + ldc1_2 = "d4000000HO", + ld_2 = mips64 and "dc000000TO", +- sc_2 = "e0000000TO", + swc1_2 = "e4000000HO", +- scd_2 = mips64 and "f0000000TO", + sdc1_2 = "f4000000HO", + sd_2 = mips64 and "fc000000TO", + +@@ -289,10 +271,6 @@ local map_op = { + nop_0 = "00000000", + sll_3 = "00000000DTA", + sextw_2 = "00000000DT", +- movf_2 = "00000001DS", +- movf_3 = "00000001DSC", +- movt_2 = "00010001DS", +- movt_3 = "00010001DSC", + srl_3 = "00000002DTA", + rotr_3 = "00200002DTA", + sra_3 = "00000003DTA", +@@ -301,31 +279,16 @@ local map_op = { + rotrv_3 = "00000046DTS", + drotrv_3 = mips64 and "00000056DTS", + srav_3 = "00000007DTS", +- jr_1 = "00000008S", + jalr_1 = "0000f809S", + jalr_2 = "00000009DS", +- movz_3 = "0000000aDST", +- movn_3 = "0000000bDST", + syscall_0 = "0000000c", + syscall_1 = "0000000cY", + break_0 = "0000000d", + break_1 = "0000000dY", + sync_0 = "0000000f", +- mfhi_1 = "00000010D", +- mthi_1 = "00000011S", +- mflo_1 = "00000012D", +- mtlo_1 = "00000013S", + dsllv_3 = mips64 and "00000014DTS", + dsrlv_3 = mips64 and "00000016DTS", + dsrav_3 = mips64 and "00000017DTS", +- mult_2 = "00000018ST", +- multu_2 = "00000019ST", +- div_2 = "0000001aST", +- divu_2 = "0000001bST", +- dmult_2 = mips64 and "0000001cST", +- dmultu_2 = mips64 and "0000001dST", +- ddiv_2 = mips64 and "0000001eST", +- ddivu_2 = mips64 and "0000001fST", + add_3 = "00000020DST", + move_2 = mips64 and "00000025DS" or "00000021DS", + addu_3 = "00000021DST", +@@ -369,32 +332,9 @@ local map_op = { + bgez_2 = "04010000SB", + bltzl_2 = "04020000SB", + bgezl_2 = "04030000SB", +- tgei_2 = "04080000SI", +- tgeiu_2 = "04090000SI", +- tlti_2 = "040a0000SI", +- tltiu_2 = "040b0000SI", +- teqi_2 = "040c0000SI", +- tnei_2 = "040e0000SI", +- bltzal_2 = "04100000SB", + bal_1 = "04110000B", +- bgezal_2 = "04110000SB", +- bltzall_2 = "04120000SB", +- bgezall_2 = "04130000SB", + synci_1 = "041f0000O", + +- -- Opcode SPECIAL2. +- madd_2 = "70000000ST", +- maddu_2 = "70000001ST", +- mul_3 = "70000002DST", +- msub_2 = "70000004ST", +- msubu_2 = "70000005ST", +- clz_2 = "70000020DS=", +- clo_2 = "70000021DS=", +- dclz_2 = mips64 and "70000024DS=", +- dclo_2 = mips64 and "70000025DS=", +- sdbbp_0 = "7000003f", +- sdbbp_1 = "7000003fY", +- + -- Opcode SPECIAL3. + ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 + dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 +@@ -445,15 +385,6 @@ local map_op = { + ctc1_2 = "44c00000TG", + mthc1_2 = "44e00000TG", + +- bc1f_1 = "45000000B", +- bc1f_2 = "45000000CB", +- bc1t_1 = "45010000B", +- bc1t_2 = "45010000CB", +- bc1fl_1 = "45020000B", +- bc1fl_2 = "45020000CB", +- bc1tl_1 = "45030000B", +- bc1tl_2 = "45030000CB", +- + ["add.s_3"] = "46000000FGH", + ["sub.s_3"] = "46000001FGH", + ["mul.s_3"] = "46000002FGH", +@@ -470,51 +401,11 @@ local map_op = { + ["trunc.w.s_2"] = "4600000dFG", + ["ceil.w.s_2"] = "4600000eFG", + ["floor.w.s_2"] = "4600000fFG", +- ["movf.s_2"] = "46000011FG", +- ["movf.s_3"] = "46000011FGC", +- ["movt.s_2"] = "46010011FG", +- ["movt.s_3"] = "46010011FGC", +- ["movz.s_3"] = "46000012FGT", +- ["movn.s_3"] = "46000013FGT", + ["recip.s_2"] = "46000015FG", + ["rsqrt.s_2"] = "46000016FG", + ["cvt.d.s_2"] = "46000021FG", + ["cvt.w.s_2"] = "46000024FG", + ["cvt.l.s_2"] = "46000025FG", +- ["cvt.ps.s_3"] = "46000026FGH", +- ["c.f.s_2"] = "46000030GH", +- ["c.f.s_3"] = "46000030VGH", +- ["c.un.s_2"] = "46000031GH", +- ["c.un.s_3"] = "46000031VGH", +- ["c.eq.s_2"] = "46000032GH", +- ["c.eq.s_3"] = "46000032VGH", +- ["c.ueq.s_2"] = "46000033GH", +- ["c.ueq.s_3"] = "46000033VGH", +- ["c.olt.s_2"] = "46000034GH", +- ["c.olt.s_3"] = "46000034VGH", +- ["c.ult.s_2"] = "46000035GH", +- ["c.ult.s_3"] = "46000035VGH", +- ["c.ole.s_2"] = "46000036GH", +- ["c.ole.s_3"] = "46000036VGH", +- ["c.ule.s_2"] = "46000037GH", +- ["c.ule.s_3"] = "46000037VGH", +- ["c.sf.s_2"] = "46000038GH", +- ["c.sf.s_3"] = "46000038VGH", +- ["c.ngle.s_2"] = "46000039GH", +- ["c.ngle.s_3"] = "46000039VGH", +- ["c.seq.s_2"] = "4600003aGH", +- ["c.seq.s_3"] = "4600003aVGH", +- ["c.ngl.s_2"] = "4600003bGH", +- ["c.ngl.s_3"] = "4600003bVGH", +- ["c.lt.s_2"] = "4600003cGH", +- ["c.lt.s_3"] = "4600003cVGH", +- ["c.nge.s_2"] = "4600003dGH", +- ["c.nge.s_3"] = "4600003dVGH", +- ["c.le.s_2"] = "4600003eGH", +- ["c.le.s_3"] = "4600003eVGH", +- ["c.ngt.s_2"] = "4600003fGH", +- ["c.ngt.s_3"] = "4600003fVGH", +- + ["add.d_3"] = "46200000FGH", + ["sub.d_3"] = "46200001FGH", + ["mul.d_3"] = "46200002FGH", +@@ -531,130 +422,410 @@ local map_op = { + ["trunc.w.d_2"] = "4620000dFG", + ["ceil.w.d_2"] = "4620000eFG", + ["floor.w.d_2"] = "4620000fFG", +- ["movf.d_2"] = "46200011FG", +- ["movf.d_3"] = "46200011FGC", +- ["movt.d_2"] = "46210011FG", +- ["movt.d_3"] = "46210011FGC", +- ["movz.d_3"] = "46200012FGT", +- ["movn.d_3"] = "46200013FGT", + ["recip.d_2"] = "46200015FG", + ["rsqrt.d_2"] = "46200016FG", + ["cvt.s.d_2"] = "46200020FG", + ["cvt.w.d_2"] = "46200024FG", + ["cvt.l.d_2"] = "46200025FG", +- ["c.f.d_2"] = "46200030GH", +- ["c.f.d_3"] = "46200030VGH", +- ["c.un.d_2"] = "46200031GH", +- ["c.un.d_3"] = "46200031VGH", +- ["c.eq.d_2"] = "46200032GH", +- ["c.eq.d_3"] = "46200032VGH", +- ["c.ueq.d_2"] = "46200033GH", +- ["c.ueq.d_3"] = "46200033VGH", +- ["c.olt.d_2"] = "46200034GH", +- ["c.olt.d_3"] = "46200034VGH", +- ["c.ult.d_2"] = "46200035GH", +- ["c.ult.d_3"] = "46200035VGH", +- ["c.ole.d_2"] = "46200036GH", +- ["c.ole.d_3"] = "46200036VGH", +- ["c.ule.d_2"] = "46200037GH", +- ["c.ule.d_3"] = "46200037VGH", +- ["c.sf.d_2"] = "46200038GH", +- ["c.sf.d_3"] = "46200038VGH", +- ["c.ngle.d_2"] = "46200039GH", +- ["c.ngle.d_3"] = "46200039VGH", +- ["c.seq.d_2"] = "4620003aGH", +- ["c.seq.d_3"] = "4620003aVGH", +- ["c.ngl.d_2"] = "4620003bGH", +- ["c.ngl.d_3"] = "4620003bVGH", +- ["c.lt.d_2"] = "4620003cGH", +- ["c.lt.d_3"] = "4620003cVGH", +- ["c.nge.d_2"] = "4620003dGH", +- ["c.nge.d_3"] = "4620003dVGH", +- ["c.le.d_2"] = "4620003eGH", +- ["c.le.d_3"] = "4620003eVGH", +- ["c.ngt.d_2"] = "4620003fGH", +- ["c.ngt.d_3"] = "4620003fVGH", +- +- ["add.ps_3"] = "46c00000FGH", +- ["sub.ps_3"] = "46c00001FGH", +- ["mul.ps_3"] = "46c00002FGH", +- ["abs.ps_2"] = "46c00005FG", +- ["mov.ps_2"] = "46c00006FG", +- ["neg.ps_2"] = "46c00007FG", +- ["movf.ps_2"] = "46c00011FG", +- ["movf.ps_3"] = "46c00011FGC", +- ["movt.ps_2"] = "46c10011FG", +- ["movt.ps_3"] = "46c10011FGC", +- ["movz.ps_3"] = "46c00012FGT", +- ["movn.ps_3"] = "46c00013FGT", +- ["cvt.s.pu_2"] = "46c00020FG", +- ["cvt.s.pl_2"] = "46c00028FG", +- ["pll.ps_3"] = "46c0002cFGH", +- ["plu.ps_3"] = "46c0002dFGH", +- ["pul.ps_3"] = "46c0002eFGH", +- ["puu.ps_3"] = "46c0002fFGH", +- ["c.f.ps_2"] = "46c00030GH", +- ["c.f.ps_3"] = "46c00030VGH", +- ["c.un.ps_2"] = "46c00031GH", +- ["c.un.ps_3"] = "46c00031VGH", +- ["c.eq.ps_2"] = "46c00032GH", +- ["c.eq.ps_3"] = "46c00032VGH", +- ["c.ueq.ps_2"] = "46c00033GH", +- ["c.ueq.ps_3"] = "46c00033VGH", +- ["c.olt.ps_2"] = "46c00034GH", +- ["c.olt.ps_3"] = "46c00034VGH", +- ["c.ult.ps_2"] = "46c00035GH", +- ["c.ult.ps_3"] = "46c00035VGH", +- ["c.ole.ps_2"] = "46c00036GH", +- ["c.ole.ps_3"] = "46c00036VGH", +- ["c.ule.ps_2"] = "46c00037GH", +- ["c.ule.ps_3"] = "46c00037VGH", +- ["c.sf.ps_2"] = "46c00038GH", +- ["c.sf.ps_3"] = "46c00038VGH", +- ["c.ngle.ps_2"] = "46c00039GH", +- ["c.ngle.ps_3"] = "46c00039VGH", +- ["c.seq.ps_2"] = "46c0003aGH", +- ["c.seq.ps_3"] = "46c0003aVGH", +- ["c.ngl.ps_2"] = "46c0003bGH", +- ["c.ngl.ps_3"] = "46c0003bVGH", +- ["c.lt.ps_2"] = "46c0003cGH", +- ["c.lt.ps_3"] = "46c0003cVGH", +- ["c.nge.ps_2"] = "46c0003dGH", +- ["c.nge.ps_3"] = "46c0003dVGH", +- ["c.le.ps_2"] = "46c0003eGH", +- ["c.le.ps_3"] = "46c0003eVGH", +- ["c.ngt.ps_2"] = "46c0003fGH", +- ["c.ngt.ps_3"] = "46c0003fVGH", +- + ["cvt.s.w_2"] = "46800020FG", + ["cvt.d.w_2"] = "46800021FG", +- + ["cvt.s.l_2"] = "46a00020FG", + ["cvt.d.l_2"] = "46a00021FG", +- +- -- Opcode COP1X. +- lwxc1_2 = "4c000000FX", +- ldxc1_2 = "4c000001FX", +- luxc1_2 = "4c000005FX", +- swxc1_2 = "4c000008FX", +- sdxc1_2 = "4c000009FX", +- suxc1_2 = "4c00000dFX", +- prefx_2 = "4c00000fMX", +- ["alnv.ps_4"] = "4c00001eFGHS", +- ["madd.s_4"] = "4c000020FRGH", +- ["madd.d_4"] = "4c000021FRGH", +- ["madd.ps_4"] = "4c000026FRGH", +- ["msub.s_4"] = "4c000028FRGH", +- ["msub.d_4"] = "4c000029FRGH", +- ["msub.ps_4"] = "4c00002eFRGH", +- ["nmadd.s_4"] = "4c000030FRGH", +- ["nmadd.d_4"] = "4c000031FRGH", +- ["nmadd.ps_4"] = "4c000036FRGH", +- ["nmsub.s_4"] = "4c000038FRGH", +- ["nmsub.d_4"] = "4c000039FRGH", +- ["nmsub.ps_4"] = "4c00003eFRGH", + } + ++if mipsr6 then -- Instructions added with MIPSR6. ++ ++ for k,v in pairs({ ++ ++ -- Add immediate to upper bits. ++ aui_3 = "3c000000TSI", ++ daui_3 = mips64 and "74000000TSI", ++ dahi_2 = mips64 and "04060000SI", ++ dati_2 = mips64 and "041e0000SI", ++ ++ -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc. ++ ++ -- Compact branches. ++ blezalc_2 = "18000000TB", -- rt != 0. ++ bgezalc_2 = "18000000T=SB", -- rt != 0. ++ bgtzalc_2 = "1c000000TB", -- rt != 0. ++ bltzalc_2 = "1c000000T=SB", -- rt != 0. ++ ++ blezc_2 = "58000000TB", -- rt != 0. ++ bgezc_2 = "58000000T=SB", -- rt != 0. ++ bgec_3 = "58000000STB", -- rs != rt. ++ blec_3 = "58000000TSB", -- rt != rs. ++ ++ bgtzc_2 = "5c000000TB", -- rt != 0. ++ bltzc_2 = "5c000000T=SB", -- rt != 0. ++ bltc_3 = "5c000000STB", -- rs != rt. ++ bgtc_3 = "5c000000TSB", -- rt != rs. ++ ++ bgeuc_3 = "18000000STB", -- rs != rt. ++ bleuc_3 = "18000000TSB", -- rt != rs. ++ bltuc_3 = "1c000000STB", -- rs != rt. ++ bgtuc_3 = "1c000000TSB", -- rt != rs. ++ ++ beqzalc_2 = "20000000TB", -- rt != 0. ++ bnezalc_2 = "60000000TB", -- rt != 0. ++ beqc_3 = "20000000STB", -- rs < rt. ++ bnec_3 = "60000000STB", -- rs < rt. ++ bovc_3 = "20000000STB", -- rs >= rt. ++ bnvc_3 = "60000000STB", -- rs >= rt. ++ ++ beqzc_2 = "d8000000SK", -- rs != 0. ++ bnezc_2 = "f8000000SK", -- rs != 0. ++ jic_2 = "d8000000TI", ++ jialc_2 = "f8000000TI", ++ bc_1 = "c8000000L", ++ balc_1 = "e8000000L", ++ ++ -- Opcode SPECIAL. ++ jr_1 = "00000009S", ++ sdbbp_0 = "0000000e", ++ sdbbp_1 = "0000000eY", ++ lsa_4 = "00000005DSTA", ++ dlsa_4 = mips64 and "00000015DSTA", ++ seleqz_3 = "00000035DST", ++ selnez_3 = "00000037DST", ++ clz_2 = "00000050DS", ++ clo_2 = "00000051DS", ++ dclz_2 = mips64 and "00000052DS", ++ dclo_2 = mips64 and "00000053DS", ++ mul_3 = "00000098DST", ++ muh_3 = "000000d8DST", ++ mulu_3 = "00000099DST", ++ muhu_3 = "000000d9DST", ++ div_3 = "0000009aDST", ++ mod_3 = "000000daDST", ++ divu_3 = "0000009bDST", ++ modu_3 = "000000dbDST", ++ dmul_3 = mips64 and "0000009cDST", ++ dmuh_3 = mips64 and "000000dcDST", ++ dmulu_3 = mips64 and "0000009dDST", ++ dmuhu_3 = mips64 and "000000ddDST", ++ ddiv_3 = mips64 and "0000009eDST", ++ dmod_3 = mips64 and "000000deDST", ++ ddivu_3 = mips64 and "0000009fDST", ++ dmodu_3 = mips64 and "000000dfDST", ++ ++ -- Opcode SPECIAL3. ++ align_4 = "7c000220DSTA", ++ dalign_4 = mips64 and "7c000224DSTA", ++ bitswap_2 = "7c000020DT", ++ dbitswap_2 = mips64 and "7c000024DT", ++ ++ -- Opcode COP1. ++ bc1eqz_2 = "45200000HB", ++ bc1nez_2 = "45a00000HB", ++ ++ ["sel.s_3"] = "46000010FGH", ++ ["seleqz.s_3"] = "46000014FGH", ++ ["selnez.s_3"] = "46000017FGH", ++ ["maddf.s_3"] = "46000018FGH", ++ ["msubf.s_3"] = "46000019FGH", ++ ["rint.s_2"] = "4600001aFG", ++ ["class.s_2"] = "4600001bFG", ++ ["min.s_3"] = "4600001cFGH", ++ ["mina.s_3"] = "4600001dFGH", ++ ["max.s_3"] = "4600001eFGH", ++ ["maxa.s_3"] = "4600001fFGH", ++ ["cmp.af.s_3"] = "46800000FGH", ++ ["cmp.un.s_3"] = "46800001FGH", ++ ["cmp.or.s_3"] = "46800011FGH", ++ ["cmp.eq.s_3"] = "46800002FGH", ++ ["cmp.une.s_3"] = "46800012FGH", ++ ["cmp.ueq.s_3"] = "46800003FGH", ++ ["cmp.ne.s_3"] = "46800013FGH", ++ ["cmp.lt.s_3"] = "46800004FGH", ++ ["cmp.ult.s_3"] = "46800005FGH", ++ ["cmp.le.s_3"] = "46800006FGH", ++ ["cmp.ule.s_3"] = "46800007FGH", ++ ["cmp.saf.s_3"] = "46800008FGH", ++ ["cmp.sun.s_3"] = "46800009FGH", ++ ["cmp.sor.s_3"] = "46800019FGH", ++ ["cmp.seq.s_3"] = "4680000aFGH", ++ ["cmp.sune.s_3"] = "4680001aFGH", ++ ["cmp.sueq.s_3"] = "4680000bFGH", ++ ["cmp.sne.s_3"] = "4680001bFGH", ++ ["cmp.slt.s_3"] = "4680000cFGH", ++ ["cmp.sult.s_3"] = "4680000dFGH", ++ ["cmp.sle.s_3"] = "4680000eFGH", ++ ["cmp.sule.s_3"] = "4680000fFGH", ++ ++ ["sel.d_3"] = "46200010FGH", ++ ["seleqz.d_3"] = "46200014FGH", ++ ["selnez.d_3"] = "46200017FGH", ++ ["maddf.d_3"] = "46200018FGH", ++ ["msubf.d_3"] = "46200019FGH", ++ ["rint.d_2"] = "4620001aFG", ++ ["class.d_2"] = "4620001bFG", ++ ["min.d_3"] = "4620001cFGH", ++ ["mina.d_3"] = "4620001dFGH", ++ ["max.d_3"] = "4620001eFGH", ++ ["maxa.d_3"] = "4620001fFGH", ++ ["cmp.af.d_3"] = "46a00000FGH", ++ ["cmp.un.d_3"] = "46a00001FGH", ++ ["cmp.or.d_3"] = "46a00011FGH", ++ ["cmp.eq.d_3"] = "46a00002FGH", ++ ["cmp.une.d_3"] = "46a00012FGH", ++ ["cmp.ueq.d_3"] = "46a00003FGH", ++ ["cmp.ne.d_3"] = "46a00013FGH", ++ ["cmp.lt.d_3"] = "46a00004FGH", ++ ["cmp.ult.d_3"] = "46a00005FGH", ++ ["cmp.le.d_3"] = "46a00006FGH", ++ ["cmp.ule.d_3"] = "46a00007FGH", ++ ["cmp.saf.d_3"] = "46a00008FGH", ++ ["cmp.sun.d_3"] = "46a00009FGH", ++ ["cmp.sor.d_3"] = "46a00019FGH", ++ ["cmp.seq.d_3"] = "46a0000aFGH", ++ ["cmp.sune.d_3"] = "46a0001aFGH", ++ ["cmp.sueq.d_3"] = "46a0000bFGH", ++ ["cmp.sne.d_3"] = "46a0001bFGH", ++ ["cmp.slt.d_3"] = "46a0000cFGH", ++ ["cmp.sult.d_3"] = "46a0000dFGH", ++ ["cmp.sle.d_3"] = "46a0000eFGH", ++ ["cmp.sule.d_3"] = "46a0000fFGH", ++ ++ }) do map_op[k] = v end ++ ++else -- Instructions removed by MIPSR6. ++ ++ for k,v in pairs({ ++ -- Traps, don't use. ++ addi_3 = "20000000TSI", ++ daddi_3 = mips64 and "60000000TSI", ++ ++ -- Branch on likely, don't use. ++ beqzl_2 = "50000000SB", ++ beql_3 = "50000000STB", ++ bnezl_2 = "54000000SB", ++ bnel_3 = "54000000STB", ++ blezl_2 = "58000000SB", ++ bgtzl_2 = "5c000000SB", ++ ++ lwl_2 = "88000000TO", ++ lwr_2 = "98000000TO", ++ swl_2 = "a8000000TO", ++ sdl_2 = mips64 and "b0000000TO", ++ sdr_2 = mips64 and "b1000000TO", ++ swr_2 = "b8000000TO", ++ cache_2 = "bc000000NO", ++ ll_2 = "c0000000TO", ++ pref_2 = "cc000000NO", ++ sc_2 = "e0000000TO", ++ scd_2 = mips64 and "f0000000TO", ++ ++ -- Opcode SPECIAL. ++ movf_2 = "00000001DS", ++ movf_3 = "00000001DSC", ++ movt_2 = "00010001DS", ++ movt_3 = "00010001DSC", ++ jr_1 = "00000008S", ++ movz_3 = "0000000aDST", ++ movn_3 = "0000000bDST", ++ mfhi_1 = "00000010D", ++ mthi_1 = "00000011S", ++ mflo_1 = "00000012D", ++ mtlo_1 = "00000013S", ++ mult_2 = "00000018ST", ++ multu_2 = "00000019ST", ++ div_3 = "0000001aST", ++ divu_3 = "0000001bST", ++ ddiv_3 = mips64 and "0000001eST", ++ ddivu_3 = mips64 and "0000001fST", ++ dmult_2 = mips64 and "0000001cST", ++ dmultu_2 = mips64 and "0000001dST", ++ ++ -- Opcode REGIMM. ++ tgei_2 = "04080000SI", ++ tgeiu_2 = "04090000SI", ++ tlti_2 = "040a0000SI", ++ tltiu_2 = "040b0000SI", ++ teqi_2 = "040c0000SI", ++ tnei_2 = "040e0000SI", ++ bltzal_2 = "04100000SB", ++ bgezal_2 = "04110000SB", ++ bltzall_2 = "04120000SB", ++ bgezall_2 = "04130000SB", ++ ++ -- Opcode SPECIAL2. ++ madd_2 = "70000000ST", ++ maddu_2 = "70000001ST", ++ mul_3 = "70000002DST", ++ msub_2 = "70000004ST", ++ msubu_2 = "70000005ST", ++ clz_2 = "70000020D=TS", ++ clo_2 = "70000021D=TS", ++ dclz_2 = mips64 and "70000024D=TS", ++ dclo_2 = mips64 and "70000025D=TS", ++ sdbbp_0 = "7000003f", ++ sdbbp_1 = "7000003fY", ++ ++ -- Opcode COP1. ++ bc1f_1 = "45000000B", ++ bc1f_2 = "45000000CB", ++ bc1t_1 = "45010000B", ++ bc1t_2 = "45010000CB", ++ bc1fl_1 = "45020000B", ++ bc1fl_2 = "45020000CB", ++ bc1tl_1 = "45030000B", ++ bc1tl_2 = "45030000CB", ++ ++ ["movf.s_2"] = "46000011FG", ++ ["movf.s_3"] = "46000011FGC", ++ ["movt.s_2"] = "46010011FG", ++ ["movt.s_3"] = "46010011FGC", ++ ["movz.s_3"] = "46000012FGT", ++ ["movn.s_3"] = "46000013FGT", ++ ["cvt.ps.s_3"] = "46000026FGH", ++ ["c.f.s_2"] = "46000030GH", ++ ["c.f.s_3"] = "46000030VGH", ++ ["c.un.s_2"] = "46000031GH", ++ ["c.un.s_3"] = "46000031VGH", ++ ["c.eq.s_2"] = "46000032GH", ++ ["c.eq.s_3"] = "46000032VGH", ++ ["c.ueq.s_2"] = "46000033GH", ++ ["c.ueq.s_3"] = "46000033VGH", ++ ["c.olt.s_2"] = "46000034GH", ++ ["c.olt.s_3"] = "46000034VGH", ++ ["c.ult.s_2"] = "46000035GH", ++ ["c.ult.s_3"] = "46000035VGH", ++ ["c.ole.s_2"] = "46000036GH", ++ ["c.ole.s_3"] = "46000036VGH", ++ ["c.ule.s_2"] = "46000037GH", ++ ["c.ule.s_3"] = "46000037VGH", ++ ["c.sf.s_2"] = "46000038GH", ++ ["c.sf.s_3"] = "46000038VGH", ++ ["c.ngle.s_2"] = "46000039GH", ++ ["c.ngle.s_3"] = "46000039VGH", ++ ["c.seq.s_2"] = "4600003aGH", ++ ["c.seq.s_3"] = "4600003aVGH", ++ ["c.ngl.s_2"] = "4600003bGH", ++ ["c.ngl.s_3"] = "4600003bVGH", ++ ["c.lt.s_2"] = "4600003cGH", ++ ["c.lt.s_3"] = "4600003cVGH", ++ ["c.nge.s_2"] = "4600003dGH", ++ ["c.nge.s_3"] = "4600003dVGH", ++ ["c.le.s_2"] = "4600003eGH", ++ ["c.le.s_3"] = "4600003eVGH", ++ ["c.ngt.s_2"] = "4600003fGH", ++ ["c.ngt.s_3"] = "4600003fVGH", ++ ["movf.d_2"] = "46200011FG", ++ ["movf.d_3"] = "46200011FGC", ++ ["movt.d_2"] = "46210011FG", ++ ["movt.d_3"] = "46210011FGC", ++ ["movz.d_3"] = "46200012FGT", ++ ["movn.d_3"] = "46200013FGT", ++ ["c.f.d_2"] = "46200030GH", ++ ["c.f.d_3"] = "46200030VGH", ++ ["c.un.d_2"] = "46200031GH", ++ ["c.un.d_3"] = "46200031VGH", ++ ["c.eq.d_2"] = "46200032GH", ++ ["c.eq.d_3"] = "46200032VGH", ++ ["c.ueq.d_2"] = "46200033GH", ++ ["c.ueq.d_3"] = "46200033VGH", ++ ["c.olt.d_2"] = "46200034GH", ++ ["c.olt.d_3"] = "46200034VGH", ++ ["c.ult.d_2"] = "46200035GH", ++ ["c.ult.d_3"] = "46200035VGH", ++ ["c.ole.d_2"] = "46200036GH", ++ ["c.ole.d_3"] = "46200036VGH", ++ ["c.ule.d_2"] = "46200037GH", ++ ["c.ule.d_3"] = "46200037VGH", ++ ["c.sf.d_2"] = "46200038GH", ++ ["c.sf.d_3"] = "46200038VGH", ++ ["c.ngle.d_2"] = "46200039GH", ++ ["c.ngle.d_3"] = "46200039VGH", ++ ["c.seq.d_2"] = "4620003aGH", ++ ["c.seq.d_3"] = "4620003aVGH", ++ ["c.ngl.d_2"] = "4620003bGH", ++ ["c.ngl.d_3"] = "4620003bVGH", ++ ["c.lt.d_2"] = "4620003cGH", ++ ["c.lt.d_3"] = "4620003cVGH", ++ ["c.nge.d_2"] = "4620003dGH", ++ ["c.nge.d_3"] = "4620003dVGH", ++ ["c.le.d_2"] = "4620003eGH", ++ ["c.le.d_3"] = "4620003eVGH", ++ ["c.ngt.d_2"] = "4620003fGH", ++ ["c.ngt.d_3"] = "4620003fVGH", ++ ["add.ps_3"] = "46c00000FGH", ++ ["sub.ps_3"] = "46c00001FGH", ++ ["mul.ps_3"] = "46c00002FGH", ++ ["abs.ps_2"] = "46c00005FG", ++ ["mov.ps_2"] = "46c00006FG", ++ ["neg.ps_2"] = "46c00007FG", ++ ["movf.ps_2"] = "46c00011FG", ++ ["movf.ps_3"] = "46c00011FGC", ++ ["movt.ps_2"] = "46c10011FG", ++ ["movt.ps_3"] = "46c10011FGC", ++ ["movz.ps_3"] = "46c00012FGT", ++ ["movn.ps_3"] = "46c00013FGT", ++ ["cvt.s.pu_2"] = "46c00020FG", ++ ["cvt.s.pl_2"] = "46c00028FG", ++ ["pll.ps_3"] = "46c0002cFGH", ++ ["plu.ps_3"] = "46c0002dFGH", ++ ["pul.ps_3"] = "46c0002eFGH", ++ ["puu.ps_3"] = "46c0002fFGH", ++ ["c.f.ps_2"] = "46c00030GH", ++ ["c.f.ps_3"] = "46c00030VGH", ++ ["c.un.ps_2"] = "46c00031GH", ++ ["c.un.ps_3"] = "46c00031VGH", ++ ["c.eq.ps_2"] = "46c00032GH", ++ ["c.eq.ps_3"] = "46c00032VGH", ++ ["c.ueq.ps_2"] = "46c00033GH", ++ ["c.ueq.ps_3"] = "46c00033VGH", ++ ["c.olt.ps_2"] = "46c00034GH", ++ ["c.olt.ps_3"] = "46c00034VGH", ++ ["c.ult.ps_2"] = "46c00035GH", ++ ["c.ult.ps_3"] = "46c00035VGH", ++ ["c.ole.ps_2"] = "46c00036GH", ++ ["c.ole.ps_3"] = "46c00036VGH", ++ ["c.ule.ps_2"] = "46c00037GH", ++ ["c.ule.ps_3"] = "46c00037VGH", ++ ["c.sf.ps_2"] = "46c00038GH", ++ ["c.sf.ps_3"] = "46c00038VGH", ++ ["c.ngle.ps_2"] = "46c00039GH", ++ ["c.ngle.ps_3"] = "46c00039VGH", ++ ["c.seq.ps_2"] = "46c0003aGH", ++ ["c.seq.ps_3"] = "46c0003aVGH", ++ ["c.ngl.ps_2"] = "46c0003bGH", ++ ["c.ngl.ps_3"] = "46c0003bVGH", ++ ["c.lt.ps_2"] = "46c0003cGH", ++ ["c.lt.ps_3"] = "46c0003cVGH", ++ ["c.nge.ps_2"] = "46c0003dGH", ++ ["c.nge.ps_3"] = "46c0003dVGH", ++ ["c.le.ps_2"] = "46c0003eGH", ++ ["c.le.ps_3"] = "46c0003eVGH", ++ ["c.ngt.ps_2"] = "46c0003fGH", ++ ["c.ngt.ps_3"] = "46c0003fVGH", ++ ++ -- Opcode COP1X. ++ lwxc1_2 = "4c000000FX", ++ ldxc1_2 = "4c000001FX", ++ luxc1_2 = "4c000005FX", ++ swxc1_2 = "4c000008FX", ++ sdxc1_2 = "4c000009FX", ++ suxc1_2 = "4c00000dFX", ++ prefx_2 = "4c00000fMX", ++ ["alnv.ps_4"] = "4c00001eFGHS", ++ ["madd.s_4"] = "4c000020FRGH", ++ ["madd.d_4"] = "4c000021FRGH", ++ ["madd.ps_4"] = "4c000026FRGH", ++ ["msub.s_4"] = "4c000028FRGH", ++ ["msub.d_4"] = "4c000029FRGH", ++ ["msub.ps_4"] = "4c00002eFRGH", ++ ["nmadd.s_4"] = "4c000030FRGH", ++ ["nmadd.d_4"] = "4c000031FRGH", ++ ["nmadd.ps_4"] = "4c000036FRGH", ++ ["nmsub.s_4"] = "4c000038FRGH", ++ ["nmsub.d_4"] = "4c000039FRGH", ++ ["nmsub.ps_4"] = "4c00003eFRGH", ++ ++ }) do map_op[k] = v end ++ ++end ++ + ------------------------------------------------------------------------------ + + local function parse_gpr(expr) +@@ -808,10 +979,12 @@ map_op[".template__"] = function(params, template, nparams) + op = op + parse_disp(params[n]); n = n + 1 + elseif p == "X" then + op = op + parse_index(params[n]); n = n + 1 +- elseif p == "B" or p == "J" then +- local mode, n, s = parse_label(params[n], false) +- if p == "B" then n = n + 2048 end +- waction("REL_"..mode, n, s, 1) ++ elseif p == "B" or p == "J" or p == "K" or p == "L" then ++ local mode, m, s = parse_label(params[n], false) ++ if p == "J" then m = m + 0xa800 ++ elseif p == "K" then m = m + 0x5000 ++ elseif p == "L" then m = m + 0xa000 end ++ waction("REL_"..mode, m, s, 1) + n = n + 1 + elseif p == "A" then + op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 +@@ -833,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams) + elseif p == "Z" then + op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 + elseif p == "=" then +- op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. ++ n = n - 1 -- Re-use previous parameter for next template char. + else + assert(false) + end +diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua +index 5636b23a..8ab5d33a 100644 +--- a/dynasm/dasm_mips64.lua ++++ b/dynasm/dasm_mips64.lua +@@ -1,7 +1,7 @@ + ------------------------------------------------------------------------------ + -- DynASM MIPS64 module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- See dynasm.lua for full copyright notice. + ------------------------------------------------------------------------------ + -- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. +diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h +index 3a7ee9b0..35264f2e 100644 +--- a/dynasm/dasm_ppc.h ++++ b/dynasm/dasm_ppc.h +@@ -1,6 +1,6 @@ + /* + ** DynASM PPC/PPC64 encoding engine. +-** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++** Copyright (C) 2005-2021 Mike Pall. All rights reserved. + ** Released under the MIT license. See dynasm.lua for full copyright notice. + */ + +@@ -277,7 +277,7 @@ int dasm_link(Dst_DECL, size_t *szp) + + { /* Handle globals not defined in this translation unit. */ + int idx; +- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { ++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } +@@ -353,7 +353,11 @@ int dasm_encode(Dst_DECL, void *buffer) + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: +- CK(n >= 0, UNDEF_LG); ++ if (n < 0) { ++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp); ++ goto patchrel; ++ } ++ /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); +diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua +index f73974dd..ee2afb2e 100644 +--- a/dynasm/dasm_ppc.lua ++++ b/dynasm/dasm_ppc.lua +@@ -1,7 +1,7 @@ + ------------------------------------------------------------------------------ + -- DynASM PPC/PPC64 module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- See dynasm.lua for full copyright notice. + -- + -- Support for various extensions contributed by Caio Souza Oliveira. +@@ -11,9 +11,9 @@ + local _info = { + arch = "ppc", + description = "DynASM PPC module", +- version = "1.4.0", +- vernum = 10400, +- release = "2015-10-18", ++ version = "1.5.0", ++ vernum = 10500, ++ release = "2021-05-02", + author = "Mike Pall", + license = "MIT", + } +@@ -1722,9 +1722,9 @@ op_template = function(params, template, nparams) + elseif p == "M" then + op = op + parse_shiftmask(params[n], false); n = n + 1 + elseif p == "J" or p == "K" then +- local mode, n, s = parse_label(params[n], false) +- if p == "K" then n = n + 2048 end +- waction("REL_"..mode, n, s, 1) ++ local mode, m, s = parse_label(params[n], false) ++ if p == "K" then m = m + 2048 end ++ waction("REL_"..mode, m, s, 1) + n = n + 1 + elseif p == "0" then + if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end +diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h +index 59d9e2b2..8914596a 100644 +--- a/dynasm/dasm_proto.h ++++ b/dynasm/dasm_proto.h +@@ -1,6 +1,6 @@ + /* + ** DynASM encoding engine prototypes. +-** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++** Copyright (C) 2005-2021 Mike Pall. All rights reserved. + ** Released under the MIT license. See dynasm.lua for full copyright notice. + */ + +@@ -10,8 +10,8 @@ + #include <stddef.h> + #include <stdarg.h> + +-#define DASM_IDENT "DynASM 1.4.0" +-#define DASM_VERSION 10400 /* 1.4.0 */ ++#define DASM_IDENT "DynASM 1.5.0" ++#define DASM_VERSION 10500 /* 1.5.0 */ + + #ifndef Dst_DECL + #define Dst_DECL dasm_State **Dst +diff --git a/dynasm/dasm_x64.lua b/dynasm/dasm_x64.lua +index e8bdeb37..2c0a0e86 100644 +--- a/dynasm/dasm_x64.lua ++++ b/dynasm/dasm_x64.lua +@@ -1,7 +1,7 @@ + ------------------------------------------------------------------------------ + -- DynASM x64 module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- See dynasm.lua for full copyright notice. + ------------------------------------------------------------------------------ + -- This module just sets 64 bit mode for the combined x86/x64 module. +diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h +index bc636357..d8d4928c 100644 +--- a/dynasm/dasm_x86.h ++++ b/dynasm/dasm_x86.h +@@ -1,6 +1,6 @@ + /* + ** DynASM x86 encoding engine. +-** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++** Copyright (C) 2005-2021 Mike Pall. All rights reserved. + ** Released under the MIT license. See dynasm.lua for full copyright notice. + */ + +@@ -194,12 +194,13 @@ void dasm_put(Dst_DECL, int start, ...) + switch (action) { + case DASM_DISP: + if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } +- case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; ++ /* fallthrough */ ++ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ + case DASM_IMM_D: ofs += 4; break; + case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; + case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; +- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; ++ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; + case DASM_SPACE: p++; ofs += n; break; + case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ +@@ -207,8 +208,8 @@ void dasm_put(Dst_DECL, int start, ...) + if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; + if (*p < 0x20 && (n&7) == 4) ofs++; + switch ((*p++ >> 3) & 3) { +- case 3: n |= b[pos-3]; +- case 2: n |= b[pos-2]; ++ case 3: n |= b[pos-3]; /* fallthrough */ ++ case 2: n |= b[pos-2]; /* fallthrough */ + case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } + } + continue; +@@ -238,8 +239,11 @@ void dasm_put(Dst_DECL, int start, ...) + } + pos++; + ofs += 4; /* Maximum offset needed. */ +- if (action == DASM_REL_LG || action == DASM_REL_PC) ++ if (action == DASM_REL_LG || action == DASM_REL_PC) { + b[pos++] = ofs; /* Store pass1 offset estimate. */ ++ } else if (sizeof(ptrdiff_t) == 8) { ++ ofs += 4; ++ } + break; + case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); +@@ -304,11 +308,13 @@ int dasm_link(Dst_DECL, size_t *szp) + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; ++ int op = 0; + while (1) { +- int op, action = *p++; ++ int action = *p++; + switch (action) { +- case DASM_REL_LG: p++; op = p[-3]; goto rel_pc; +- case DASM_REL_PC: op = p[-2]; rel_pc: { ++ case DASM_REL_LG: p++; ++ /* fallthrough */ ++ case DASM_REL_PC: { + int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); + if (shrink) { /* Shrinkable branch opcode? */ + int lofs, lpos = b[pos]; +@@ -329,17 +335,21 @@ int dasm_link(Dst_DECL, size_t *szp) + pos += 2; + break; + } ++ /* fallthrough */ + case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; ++ /* fallthrough */ + case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: + case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: + case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; + case DASM_LABEL_LG: p++; ++ /* fallthrough */ + case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ + case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ + case DASM_EXTERN: p += 2; break; +- case DASM_ESC: p++; break; ++ case DASM_ESC: op = *p++; break; + case DASM_MARK: break; + case DASM_SECTION: case DASM_STOP: goto stop; ++ default: op = action; break; + } + } + stop: (void)0; +@@ -358,10 +368,22 @@ int dasm_link(Dst_DECL, size_t *szp) + do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) + #define dasmd(x) \ + do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) ++#define dasmq(x) \ ++ do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0) + #else + #define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) + #define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) ++#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0) + #endif ++static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x) ++{ ++ if (sizeof(ptrdiff_t) == 8) ++ dasmq((unsigned long long)x); ++ else ++ dasmd((unsigned int)x); ++ return cp; ++} ++#define dasma(x) (cp = dasma_(cp, (x))) + + /* Pass 3: Encode sections. */ + int dasm_encode(Dst_DECL, void *buffer) +@@ -391,12 +413,15 @@ int dasm_encode(Dst_DECL, void *buffer) + if (mrm != 5) { mm[-1] -= 0x80; break; } } + if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; + } ++ /* fallthrough */ + case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; + case DASM_IMM_DB: if (((n+128)&-256) == 0) { + db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; + } else mark = NULL; ++ /* fallthrough */ + case DASM_IMM_D: wd: dasmd(n); break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; ++ /* fallthrough */ + case DASM_IMM_W: dasmw(n); break; + case DASM_VREG: { + int t = *p++; +@@ -421,7 +446,9 @@ int dasm_encode(Dst_DECL, void *buffer) + } + case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; + b++; n = (int)(ptrdiff_t)D->globals[-n]; +- case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ ++ /* fallthrough */ ++ case DASM_REL_A: rel_a: ++ n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_PC: rel_pc: { + int shrink = *b++; + int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } +@@ -431,11 +458,13 @@ int dasm_encode(Dst_DECL, void *buffer) + goto wb; + } + case DASM_IMM_LG: +- p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } ++ p++; ++ if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; } ++ /* fallthrough */ + case DASM_IMM_PC: { + int *pb = DASM_POS2PTR(D, n); +- n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); +- goto wd; ++ dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base)); ++ break; + } + case DASM_LABEL_LG: { + int idx = *p++; +@@ -452,6 +481,7 @@ int dasm_encode(Dst_DECL, void *buffer) + case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; + case DASM_MARK: mark = cp; break; + case DASM_ESC: action = *p++; ++ /* fallthrough */ + default: *cp++ = action; break; + case DASM_SECTION: case DASM_STOP: goto stop; + } +diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua +index 4c031e2c..fe2cf579 100644 +--- a/dynasm/dasm_x86.lua ++++ b/dynasm/dasm_x86.lua +@@ -1,7 +1,7 @@ + ------------------------------------------------------------------------------ + -- DynASM x86/x64 module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- See dynasm.lua for full copyright notice. + ------------------------------------------------------------------------------ + +@@ -11,9 +11,9 @@ local x64 = x64 + local _info = { + arch = x64 and "x64" or "x86", + description = "DynASM x86/x64 module", +- version = "1.4.0", +- vernum = 10400, +- release = "2015-10-18", ++ version = "1.5.0", ++ vernum = 10500, ++ release = "2021-05-02", + author = "Mike Pall", + license = "MIT", + } +@@ -484,6 +484,22 @@ local function wputdarg(n) + end + end + ++-- Put signed or unsigned qword or arg. ++local function wputqarg(n) ++ local tn = type(n) ++ if tn == "number" then -- This is only used for numbers from -2^31..2^32-1. ++ wputb(band(n, 255)) ++ wputb(band(shr(n, 8), 255)) ++ wputb(band(shr(n, 16), 255)) ++ wputb(shr(n, 24)) ++ local sign = n < 0 and 255 or 0 ++ wputb(sign); wputb(sign); wputb(sign); wputb(sign) ++ else ++ waction("IMM_D", format("(unsigned int)(%s)", n)) ++ waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n)) ++ end ++end ++ + -- Put operand-size dependent number or arg (defaults to dword). + local function wputszarg(sz, n) + if not sz or sz == "d" or sz == "q" then wputdarg(n) +@@ -663,10 +679,16 @@ local function opmodestr(op, args) + end + + -- Convert number to valid integer or nil. +-local function toint(expr) ++local function toint(expr, isqword) + local n = tonumber(expr) + if n then +- if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then ++ if n % 1 ~= 0 then ++ werror("not an integer number `"..expr.."'") ++ elseif isqword then ++ if n < -2147483648 or n > 2147483647 then ++ n = nil -- Handle it as an expression to avoid precision loss. ++ end ++ elseif n < -2147483648 or n > 4294967295 then + werror("bad integer number `"..expr.."'") + end + return n +@@ -749,7 +771,7 @@ local function rtexpr(expr) + end + + -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. +-local function parseoperand(param) ++local function parseoperand(param, isqword) + local t = {} + + local expr = param +@@ -810,7 +832,7 @@ local function parseoperand(param) + if t.disp then break end + + -- [reg+xreg...] +- local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$") ++ local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$") + xreg, t.xreg, tp = rtexpr(xreg) + if not t.xreg then + -- [reg+-expr] +@@ -837,7 +859,7 @@ local function parseoperand(param) + t.disp = dispexpr(tailx) + else + -- imm or opsize*imm +- local imm = toint(expr) ++ local imm = toint(expr, isqword) + if not imm and sub(expr, 1, 1) == "*" and t.opsize then + imm = toint(sub(expr, 2)) + if imm then +@@ -955,6 +977,7 @@ end + -- "u" Use VEX encoding, vvvv unused. + -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is + -- removed from the list used by future characters). ++-- "w" Use VEX encoding, vvvv from 3rd operand. + -- "L" Force VEX.L + -- + -- All of the following characters force a flush of the opcode: +@@ -1536,8 +1559,8 @@ local map_op = { + vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", + vrsqrtps_2 = "rmoy:0Fu52rM", + vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", +- vroundpd_3 = "rmioy:660F3AV09rMU", +- vroundps_3 = "rmioy:660F3AV08rMU", ++ vroundpd_3 = "rmioy:660F3Au09rMU", ++ vroundps_3 = "rmioy:660F3Au08rMU", + vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", + vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", + vshufpd_4 = "rrmioy:660FVC6rMU", +@@ -1677,6 +1700,91 @@ local map_op = { + -- Intel ADX + adcx_2 = "rmqd:660F38F6rM", + adox_2 = "rmqd:F30F38F6rM", ++ ++ -- BMI1 ++ andn_3 = "rrmqd:0F38VF2rM", ++ bextr_3 = "rmrqd:0F38wF7rM", ++ blsi_2 = "rmqd:0F38vF33m", ++ blsmsk_2 = "rmqd:0F38vF32m", ++ blsr_2 = "rmqd:0F38vF31m", ++ tzcnt_2 = "rmqdw:F30FBCrM", ++ ++ -- BMI2 ++ bzhi_3 = "rmrqd:0F38wF5rM", ++ mulx_3 = "rrmqd:F20F38VF6rM", ++ pdep_3 = "rrmqd:F20F38VF5rM", ++ pext_3 = "rrmqd:F30F38VF5rM", ++ rorx_3 = "rmSqd:F20F3AuF0rMS", ++ sarx_3 = "rmrqd:F30F38wF7rM", ++ shrx_3 = "rmrqd:F20F38wF7rM", ++ shlx_3 = "rmrqd:660F38wF7rM", ++ ++ -- FMA3 ++ vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", ++ vfmaddsub132ps_3 = "rrmoy:660F38V96rM", ++ vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", ++ vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", ++ vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", ++ vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", ++ ++ vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", ++ vfmsubadd132ps_3 = "rrmoy:660F38V97rM", ++ vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", ++ vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", ++ vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", ++ vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", ++ ++ vfmadd132pd_3 = "rrmoy:660F38VX98rM", ++ vfmadd132ps_3 = "rrmoy:660F38V98rM", ++ vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", ++ vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", ++ vfmadd213pd_3 = "rrmoy:660F38VXA8rM", ++ vfmadd213ps_3 = "rrmoy:660F38VA8rM", ++ vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", ++ vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", ++ vfmadd231pd_3 = "rrmoy:660F38VXB8rM", ++ vfmadd231ps_3 = "rrmoy:660F38VB8rM", ++ vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", ++ vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", ++ ++ vfmsub132pd_3 = "rrmoy:660F38VX9ArM", ++ vfmsub132ps_3 = "rrmoy:660F38V9ArM", ++ vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", ++ vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", ++ vfmsub213pd_3 = "rrmoy:660F38VXAArM", ++ vfmsub213ps_3 = "rrmoy:660F38VAArM", ++ vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", ++ vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", ++ vfmsub231pd_3 = "rrmoy:660F38VXBArM", ++ vfmsub231ps_3 = "rrmoy:660F38VBArM", ++ vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", ++ vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", ++ ++ vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", ++ vfnmadd132ps_3 = "rrmoy:660F38V9CrM", ++ vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", ++ vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", ++ vfnmadd213pd_3 = "rrmoy:660F38VXACrM", ++ vfnmadd213ps_3 = "rrmoy:660F38VACrM", ++ vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", ++ vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", ++ vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", ++ vfnmadd231ps_3 = "rrmoy:660F38VBCrM", ++ vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", ++ vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", ++ ++ vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", ++ vfnmsub132ps_3 = "rrmoy:660F38V9ErM", ++ vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", ++ vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", ++ vfnmsub213pd_3 = "rrmoy:660F38VXAErM", ++ vfnmsub213ps_3 = "rrmoy:660F38VAErM", ++ vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", ++ vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", ++ vfnmsub231pd_3 = "rrmoy:660F38VXBErM", ++ vfnmsub231ps_3 = "rrmoy:660F38VBErM", ++ vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", ++ vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", + } + + ------------------------------------------------------------------------------ +@@ -1766,7 +1874,7 @@ end + + ------------------------------------------------------------------------------ + +-local map_vexarg = { u = false, v = 1, V = 2 } ++local map_vexarg = { u = false, v = 1, V = 2, w = 3 } + + -- Process pattern string. + local function dopattern(pat, args, sz, op, needrex) +@@ -1866,7 +1974,7 @@ local function dopattern(pat, args, sz, op, needrex) + local a = args[narg] + narg = narg + 1 + local mode, imm = a.mode, a.imm +- if mode == "iJ" and not match("iIJ", c) then ++ if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then + werror("bad operand size for label") + end + if c == "S" then +@@ -2058,14 +2166,16 @@ end + local function op_data(params) + if not params then return "imm..." end + local sz = sub(params.op, 2, 2) +- if sz == "a" then sz = addrsize end ++ if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end + for _,p in ipairs(params) do +- local a = parseoperand(p) ++ local a = parseoperand(p, sz == "q") + if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then + werror("bad mode or size in `"..p.."'") + end + if a.mode == "iJ" then + wputlabel("IMM_", a.imm, 1) ++ elseif sz == "q" then ++ wputqarg(a.imm) + else + wputszarg(sz, a.imm) + end +@@ -2077,7 +2187,11 @@ map_op[".byte_*"] = op_data + map_op[".sbyte_*"] = op_data + map_op[".word_*"] = op_data + map_op[".dword_*"] = op_data ++map_op[".qword_*"] = op_data + map_op[".aword_*"] = op_data ++map_op[".long_*"] = op_data ++map_op[".quad_*"] = op_data ++map_op[".addr_*"] = op_data + + ------------------------------------------------------------------------------ + +diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua +index 5ec21a79..f4e71eca 100644 +--- a/dynasm/dynasm.lua ++++ b/dynasm/dynasm.lua +@@ -2,7 +2,7 @@ + -- DynASM. A dynamic assembler for code generation engines. + -- Originally designed and implemented for LuaJIT. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- See below for full copyright notice. + ------------------------------------------------------------------------------ + +@@ -10,14 +10,14 @@ + local _info = { + name = "DynASM", + description = "A dynamic assembler for code generation engines", +- version = "1.4.0", +- vernum = 10400, +- release = "2015-10-18", ++ version = "1.5.0", ++ vernum = 10500, ++ release = "2021-05-02", + author = "Mike Pall", +- url = "http://luajit.org/dynasm.html", ++ url = "https://luajit.org/dynasm.html", + license = "MIT", + copyright = [[ +-Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++Copyright (C) 2005-2021 Mike Pall. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the +@@ -38,7 +38,7 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-[ MIT license: http://www.opensource.org/licenses/mit-license.php ] ++[ MIT license: https://www.opensource.org/licenses/mit-license.php ] + ]], + } + +@@ -630,6 +630,7 @@ end + -- Load architecture-specific module. + local function loadarch(arch) + if not match(arch, "^[%w_]+$") then return "bad arch name" end ++ _G._map_def = map_def + local ok, m_arch = pcall(require, "dasm_"..arch) + if not ok then return "cannot load module: "..m_arch end + g_arch = m_arch +diff --git a/etc/luajit.1 b/etc/luajit.1 +index 0d263db7..2846d073 100644 +--- a/etc/luajit.1 ++++ b/etc/luajit.1 +@@ -6,7 +6,7 @@ luajit - Just-In-Time Compiler for the Lua Language + .B luajit + [\fIoptions\fR]... [\fIscript\fR [\fIargs\fR]...] + .SH "WEB SITE" +-.IR http://luajit.org ++.IR https://luajit.org + .SH DESCRIPTION + .PP + This is the command-line program to run Lua programs with \fBLuaJIT\fR. +@@ -74,15 +74,15 @@ luajit -jv -e "for i=1,10 do for j=1,10 do for k=1,100 do end end end" + Runs some nested loops and shows the resulting traces. + .SH COPYRIGHT + .PP +-\fBLuaJIT\fR is Copyright (co 2005-2017 Mike Pall. ++\fBLuaJIT\fR is Copyright (co 2005-2021 Mike Pall. + .br + \fBLuaJIT\fR is open source software, released under the MIT license. + .SH SEE ALSO + .PP + More details in the provided HTML docs or at: +-.IR http://luajit.org ++.IR https://luajit.org + .br + More about the Lua language can be found at: +-.IR http://lua.org/docs.html ++.IR https://lua.org/docs.html + .PP + lua(1) +diff --git a/etc/luajit.pc b/etc/luajit.pc +index a78f1746..39e1e577 100644 +--- a/etc/luajit.pc ++++ b/etc/luajit.pc +@@ -17,7 +17,7 @@ INSTALL_CMOD=${prefix}/${multilib}/lua/${abiver} + + Name: LuaJIT + Description: Just-in-time compiler for Lua +-URL: http://luajit.org ++URL: https://luajit.org + Version: ${version} + Requires: + Libs: -L${libdir} -l${libname} +diff --git a/src/Makefile b/src/Makefile +index f56465d1..2538503f 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -7,7 +7,7 @@ + # Also works with MinGW and Cygwin on Windows. + # Please check msvcbuild.bat for building with MSVC on Windows. + # +-# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++# Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ############################################################################## + + MAJVER= 2 +@@ -110,8 +110,8 @@ XCFLAGS= + #XCFLAGS+= -DLUAJIT_NUMMODE=1 + #XCFLAGS+= -DLUAJIT_NUMMODE=2 + # +-# Enable GC64 mode for x64. +-#XCFLAGS+= -DLUAJIT_ENABLE_GC64 ++# Disable LJ_GC64 mode for x64. ++#XCFLAGS+= -DLUAJIT_DISABLE_GC64 + # + ############################################################################## + +@@ -132,7 +132,6 @@ XCFLAGS= + # + # This define is required to run LuaJIT under Valgrind. The Valgrind + # header files must be installed. You should enable debug information, too. +-# Use --suppressions=lj.supp to avoid some false positives. + #XCFLAGS+= -DLUAJIT_USE_VALGRIND + # + # This is the client for the GDB JIT API. GDB 7.0 or higher is required +@@ -158,13 +157,16 @@ XCFLAGS= + + ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM)) + HOST_SYS= Windows +- HOST_RM= del + else + HOST_SYS:= $(shell uname -s) + ifneq (,$(findstring MINGW,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= mingw + endif ++ ifneq (,$(findstring MSYS,$(HOST_SYS))) ++ HOST_SYS= Windows ++ HOST_MSYS= mingw ++ endif + ifneq (,$(findstring CYGWIN,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= cygwin +@@ -191,7 +193,7 @@ CCOPTIONS= $(CCDEBUG) $(ASOPTIONS) + LDOPTIONS= $(CCDEBUG) $(LDFLAGS) + + HOST_CC= $(CC) +-HOST_RM= rm -f ++HOST_RM?= rm -f + # If left blank, minilua is built and used. You can supply an installed + # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua + HOST_LUA= +@@ -209,7 +211,7 @@ TARGET_CC= $(STATIC_CC) + TARGET_STCC= $(STATIC_CC) + TARGET_DYNCC= $(DYNAMIC_CC) + TARGET_LD= $(CROSS)$(CC) +-TARGET_AR= $(CROSS)ar rcus 2>/dev/null ++TARGET_AR= $(CROSS)ar rcus + TARGET_STRIP= $(CROSS)strip + + TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) +@@ -217,6 +219,7 @@ TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) + TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib + TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) + TARGET_DLLNAME= lua$(NODOTABIVER).dll ++TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a + TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) + TARGET_DYNXLDOPTS= + +@@ -305,24 +308,22 @@ endif + TARGET_SYS?= $(HOST_SYS) + ifeq (Windows,$(TARGET_SYS)) + TARGET_STRIP+= --strip-unneeded +- TARGET_XSHLDFLAGS= -shared ++ TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME) + TARGET_DYNXLDOPTS= + else ++ TARGET_AR+= 2>/dev/null + ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) + TARGET_XCFLAGS+= -fno-stack-protector + endif + ifeq (Darwin,$(TARGET_SYS)) + ifeq (,$(MACOSX_DEPLOYMENT_TARGET)) +- export MACOSX_DEPLOYMENT_TARGET=10.4 ++ $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY) + endif + TARGET_STRIP+= -x ++ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL + TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC + TARGET_DYNXLDOPTS= + TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) +- ifeq (x64,$(TARGET_LJARCH)) +- TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000 +- TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000 +- endif + else + ifeq (iOS,$(TARGET_SYS)) + TARGET_STRIP+= -x +@@ -333,6 +334,13 @@ ifeq (iOS,$(TARGET_SYS)) + TARGET_XCFLAGS+= -fno-omit-frame-pointer + endif + else ++ ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) ++ # Find out whether the target toolchain always generates unwind tables. ++ TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o) ++ ifneq (,$(findstring E,$(TARGET_TESTUNWIND))) ++ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL ++ endif ++ endif + ifneq (SunOS,$(TARGET_SYS)) + ifneq (PS3,$(TARGET_SYS)) + TARGET_XLDFLAGS+= -Wl,-E +@@ -359,7 +367,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS)) + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX + else + ifeq (iOS,$(TARGET_SYS)) +- HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX ++ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1 + else + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER + endif +@@ -439,6 +447,9 @@ ifeq (arm,$(TARGET_LJARCH)) + DASM_AFLAGS+= -D IOS + endif + else ++ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D MIPSR6 ++endif + ifeq (ppc,$(TARGET_LJARCH)) + ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SQRT +@@ -452,9 +463,6 @@ ifeq (ppc,$(TARGET_LJARCH)) + ifeq (PS3,$(TARGET_SYS)) + DASM_AFLAGS+= -D PPE -D TOC + endif +- ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH))) +- DASM_ARCH= ppc64 +- endif + endif + endif + endif +@@ -476,13 +484,15 @@ LJVM_BOUT= $(LJVM_S) + LJVM_MODE= elfasm + + LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ +- lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o ++ lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \ ++ lib_buffer.o + LJLIB_C= $(LJLIB_O:.o=.c) + +-LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ ++LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ + lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ +- lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ +- lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ ++ lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \ ++ lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \ ++ lj_api.o lj_profile.o \ + lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ + lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ + lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ +@@ -557,6 +567,7 @@ ifeq (Windows,$(HOST_SYS)) + MINILUA_X= host\minilua + BUILDVM_X= host\buildvm + ALL_RM:= $(subst /,,$(ALL_RM)) ++ HOST_RM= del + endif + endif + +@@ -597,7 +608,6 @@ E= @echo + default all: $(TARGET_T) + + amalg: +- @grep "^[+|]" ljamalg.c + $(MAKE) all "LJCORE_O=ljamalg.o" + + clean: +@@ -631,7 +641,7 @@ $(MINILUA_T): $(MINILUA_O) + $(E) "HOSTLINK $@" + $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) + +-host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua ++host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua lj_arch.h lua.h luaconf.h + $(E) "DYNASM $@" + $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) + +diff --git a/src/Makefile.dep b/src/Makefile.dep +index 2b1cb5ef..1ad6701a 100644 +--- a/src/Makefile.dep ++++ b/src/Makefile.dep +@@ -1,15 +1,19 @@ + lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ + lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ +- lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h ++ lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h + lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ +- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ +- lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ +- lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ +- lj_strfmt.h lj_lib.h lj_libdef.h ++ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \ ++ lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ ++ lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \ ++ lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h + lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ + lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ + lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ + lj_ffdef.h lj_lib.h lj_libdef.h ++lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ ++ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ ++ lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \ ++ lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h + lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ + lj_libdef.h +@@ -28,7 +32,7 @@ lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ + lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ + lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h + lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ +- lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h ++ lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h + lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ + lj_libdef.h +@@ -41,16 +45,18 @@ lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ + lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h +-lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h ++lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \ ++ lj_prng.h + lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ + lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ + lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h + lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ +- lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ +- lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ +- lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \ +- lj_asm_*.h ++ lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \ ++ lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ ++ lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \ ++ lj_emit_*.h lj_asm_*.h ++lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h + lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ + lj_bcdef.h + lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ +@@ -75,8 +81,8 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h + lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ +- lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ +- lj_ccallback.h ++ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \ ++ lj_cdata.h lj_cconv.h lj_ccallback.h + lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h + lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h +@@ -108,10 +114,10 @@ lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ + lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_strfmt.h + lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ +- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ +- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ +- lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ +- lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h ++ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \ ++ lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ ++ lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \ ++ lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h + lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ + lj_traceerr.h lj_vm.h +@@ -125,21 +131,21 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ + lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \ +- lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h ++ lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h + lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \ + lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \ + lj_strfmt.h + lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ +- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \ +- lj_bcdump.h lj_lib.h ++ lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \ ++ lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h + lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \ + lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h + lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ +- lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h ++ lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h + lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \ + lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h +@@ -155,7 +161,7 @@ lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \ + lj_vm.h + lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ +- lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h ++ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h + lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_strscan.h +@@ -168,6 +174,7 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ + lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ + lj_vm.h lj_vmevent.h ++lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h + lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ + lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h +@@ -175,7 +182,10 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ + lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \ + lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \ +- lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h ++ lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h ++lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \ ++ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ ++ lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h + lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ + lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ +@@ -183,11 +193,13 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ + lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ +- lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h ++ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \ ++ lj_alloc.h luajit.h + lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ +- lj_err.h lj_errmsg.h lj_str.h lj_char.h ++ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h + lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ +- lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h ++ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \ ++ lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h + lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h + lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ +@@ -198,36 +210,37 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ + lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ + lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ +- lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h ++ lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h + lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ +- lj_gc.h lj_udata.h ++ lj_gc.h lj_err.h lj_errmsg.h lj_udata.h + lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ + lj_vm.h lj_vmevent.h + lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_ir.h lj_vm.h +-ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ +- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \ +- lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ +- lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \ +- lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \ +- lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \ +- lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ +- lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ +- lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ +- lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \ +- lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \ +- lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \ +- lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \ +- lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \ +- lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \ +- lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \ +- lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \ +- lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ +- lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ +- lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ +- lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ +- lib_ffi.c lib_init.c ++ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \ ++ lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \ ++ lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \ ++ lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ ++ lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \ ++ lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \ ++ lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \ ++ lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \ ++ lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \ ++ lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \ ++ lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \ ++ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \ ++ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \ ++ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ ++ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \ ++ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ ++ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ ++ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ ++ lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ ++ lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ ++ lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ ++ lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ ++ lib_buffer.c lib_init.c + luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h + host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ + lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ +diff --git a/src/host/buildvm.c b/src/host/buildvm.c +index de23fabd..9dc328fc 100644 +--- a/src/host/buildvm.c ++++ b/src/host/buildvm.c +@@ -1,6 +1,6 @@ + /* + ** LuaJIT VM builder. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** This is a tool to build the hand-tuned assembler code required for + ** LuaJIT's bytecode interpreter. It supports a variety of output formats +diff --git a/src/host/buildvm.h b/src/host/buildvm.h +index b90428dc..f81ef7e0 100644 +--- a/src/host/buildvm.h ++++ b/src/host/buildvm.h +@@ -1,6 +1,6 @@ + /* + ** LuaJIT VM builder. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _BUILDVM_H +diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c +index ffd14903..01a1ba06 100644 +--- a/src/host/buildvm_asm.c ++++ b/src/host/buildvm_asm.c +@@ -1,6 +1,6 @@ + /* + ** LuaJIT VM builder: Assembler source code emitter. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "buildvm.h" +@@ -144,14 +144,6 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, + fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n", + (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym); + } else if ((ins >> 26) == 18) { +-#if LJ_ARCH_PPC64 +- const char *suffix = strchr(sym, '@'); +- if (suffix && suffix[1] == 'h') { +- fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym); +- } else if (suffix && suffix[1] == 'l') { +- fprintf(ctx->fp, "\tld 12, %s\n", sym); +- } else +-#endif + fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym); + } else { + fprintf(stderr, +@@ -250,9 +242,6 @@ void emit_asm(BuildCtx *ctx) + int i, rel; + + fprintf(ctx->fp, "\t.file "buildvm_%s.dasc"\n", ctx->dasm_arch); +-#if LJ_ARCH_PPC64 +- fprintf(ctx->fp, "\t.abiversion 2\n"); +-#endif + fprintf(ctx->fp, "\t.text\n"); + emit_asm_align(ctx, 4); + +@@ -338,7 +327,7 @@ void emit_asm(BuildCtx *ctx) + #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) + fprintf(ctx->fp, "\t.section .note.GNU-stack,""," ELFASM_PX "progbits\n"); + #endif +-#if LJ_TARGET_PPC && !LJ_TARGET_PS3 ++#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP + /* Hard-float ABI. */ + fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); + #endif +diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c +index d579f4d4..02b51c4e 100644 +--- a/src/host/buildvm_fold.c ++++ b/src/host/buildvm_fold.c +@@ -1,6 +1,6 @@ + /* + ** LuaJIT VM builder: IR folding hash table generator. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "buildvm.h" +diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c +index 2956fdb6..20bb77cd 100644 +--- a/src/host/buildvm_lib.c ++++ b/src/host/buildvm_lib.c +@@ -1,6 +1,6 @@ + /* + ** LuaJIT VM builder: library definition compiler. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "buildvm.h" +@@ -385,6 +385,8 @@ void emit_lib(BuildCtx *ctx) + ok = LJ_HASJIT; + else if (!strcmp(buf, "#if LJ_HASFFI\n")) + ok = LJ_HASFFI; ++ else if (!strcmp(buf, "#if LJ_HASBUFFER\n")) ++ ok = LJ_HASBUFFER; + if (!ok) { + int lvl = 1; + while (fgets(buf, sizeof(buf), fp) != NULL) { +diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c +index 2eb2bb7b..aa061e6e 100644 +--- a/src/host/buildvm_peobj.c ++++ b/src/host/buildvm_peobj.c +@@ -1,6 +1,6 @@ + /* + ** LuaJIT VM builder: PE object emitter. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Only used for building on Windows, since we cannot assume the presence + ** of a suitable assembler. The host and target byte order must match. +@@ -9,7 +9,7 @@ + #include "buildvm.h" + #include "lj_bc.h" + +-#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC ++#if LJ_TARGET_X86ORX64 + + /* Context for PE object emitter. */ + static char *strtab; +@@ -93,12 +93,6 @@ typedef struct PEsymaux { + #define PEOBJ_RELOC_ADDR32NB 0x03 + #define PEOBJ_RELOC_OFS 0 + #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ +-#elif LJ_TARGET_PPC +-#define PEOBJ_ARCH_TARGET 0x01f2 +-#define PEOBJ_RELOC_REL32 0x06 +-#define PEOBJ_RELOC_DIR32 0x02 +-#define PEOBJ_RELOC_OFS (-4) +-#define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */ + #endif + + /* Section numbers (0-based). */ +@@ -251,15 +245,8 @@ void emit_peobj(BuildCtx *ctx) + /* Write .text section. */ + host_endian.u = 1; + if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { +-#if LJ_TARGET_PPC +- uint32_t *p = (uint32_t *)ctx->code; +- int n = (int)(ctx->codesz >> 2); +- for (i = 0; i < n; i++, p++) +- *p = lj_bswap(*p); /* Byteswap .text section. */ +-#else + fprintf(stderr, "Error: different byte order for host and target\n"); + exit(1); +-#endif + } + owrite(ctx, ctx->code, ctx->codesz); + for (i = 0; i < ctx->nreloc; i++) { +diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua +index 6f5a05cc..921769f7 100644 +--- a/src/host/genlibbc.lua ++++ b/src/host/genlibbc.lua +@@ -2,7 +2,7 @@ + -- Lua script to dump the bytecode of the library functions written in Lua. + -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT. + ---------------------------------------------------------------------------- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + +diff --git a/src/host/genminilua.lua b/src/host/genminilua.lua +index 50feff01..c05ed63c 100644 +--- a/src/host/genminilua.lua ++++ b/src/host/genminilua.lua +@@ -2,7 +2,7 @@ + -- Lua script to generate a customized, minified version of Lua. + -- The resulting 'minilua' is used for the build process of LuaJIT. + ---------------------------------------------------------------------------- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + +diff --git a/src/host/minilua.c b/src/host/minilua.c +index 79150286..cfc7491d 100644 +--- a/src/host/minilua.c ++++ b/src/host/minilua.c +@@ -1134,7 +1134,7 @@ if(!cl->isC){ + CallInfo*ci; + StkId st,base; + Proto*p=cl->p; +-luaD_checkstack(L,p->maxstacksize); ++luaD_checkstack(L,p->maxstacksize+p->numparams); + func=restorestack(L,funcr); + if(!p->is_vararg){ + base=func+1; +diff --git a/src/jit/bc.lua b/src/jit/bc.lua +index 193cf01f..e58a3fef 100644 +--- a/src/jit/bc.lua ++++ b/src/jit/bc.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT bytecode listing module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- +diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua +index c17c88e0..ab13667a 100644 +--- a/src/jit/bcsave.lua ++++ b/src/jit/bcsave.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT module to save/list bytecode. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- +@@ -17,6 +17,10 @@ local bit = require("bit") + -- Symbol name prefix for LuaJIT bytecode. + local LJBC_PREFIX = "luaJIT_BC_" + ++local type, assert = type, assert ++local format = string.format ++local tremove, tconcat = table.remove, table.concat ++ + ------------------------------------------------------------------------------ + + local function usage() +@@ -63,8 +67,18 @@ local map_type = { + } + + local map_arch = { +- x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, +- ppc = true, mips = true, mipsel = true, ++ x86 = { e = "le", b = 32, m = 3, p = 0x14c, }, ++ x64 = { e = "le", b = 64, m = 62, p = 0x8664, }, ++ arm = { e = "le", b = 32, m = 40, p = 0x1c0, }, ++ arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, }, ++ arm64be = { e = "be", b = 64, m = 183, }, ++ ppc = { e = "be", b = 32, m = 20, }, ++ mips = { e = "be", b = 32, m = 8, f = 0x50001006, }, ++ mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, }, ++ mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, }, ++ mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, ++ mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, ++ mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, + } + + local map_os = { +@@ -73,33 +87,33 @@ local map_os = { + } + + local function checkarg(str, map, err) +- str = string.lower(str) ++ str = str:lower() + local s = check(map[str], "unknown ", err) +- return s == true and str or s ++ return type(s) == "string" and s or str + end + + local function detecttype(str) +- local ext = string.match(string.lower(str), "%.(%a+)$") ++ local ext = str:lower():match("%.(%a+)$") + return map_type[ext] or "raw" + end + + local function checkmodname(str) +- check(string.match(str, "^[%w_.%-]+$"), "bad module name") +- return string.gsub(str, "[%.%-]", "_") ++ check(str:match("^[%w_.%-]+$"), "bad module name") ++ return str:gsub("[%.%-]", "_") + end + + local function detectmodname(str) + if type(str) == "string" then +- local tail = string.match(str, "[^/\]+$") ++ local tail = str:match("[^/\]+$") + if tail then str = tail end +- local head = string.match(str, "^(.*)%.[^.]*$") ++ local head = str:match("^(.*)%.[^.]*$") + if head then str = head end +- str = string.match(str, "^[%w_.%-]+") ++ str = str:match("^[%w_.%-]+") + else + str = nil + end + check(str, "cannot derive module name, use -n name") +- return string.gsub(str, "[%.%-]", "_") ++ return str:gsub("[%.%-]", "_") + end + + ------------------------------------------------------------------------------ +@@ -118,8 +132,8 @@ end + local function bcsave_c(ctx, output, s) + local fp = savefile(output, "w") + if ctx.type == "c" then +- fp:write(string.format([[ +-#ifdef _cplusplus ++ fp:write(format([[ ++#ifdef __cplusplus + extern "C" + #endif + #ifdef _WIN32 +@@ -128,7 +142,7 @@ __declspec(dllexport) + const unsigned char %s%s[] = { + ]], LJBC_PREFIX, ctx.modname)) + else +- fp:write(string.format([[ ++ fp:write(format([[ + #define %s%s_SIZE %d + static const unsigned char %s%s[] = { + ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) +@@ -138,13 +152,13 @@ static const unsigned char %s%s[] = { + local b = tostring(string.byte(s, i)) + m = m + #b + 1 + if m > 78 then +- fp:write(table.concat(t, ",", 1, n), ",\n") ++ fp:write(tconcat(t, ",", 1, n), ",\n") + n, m = 0, #b + 1 + end + n = n + 1 + t[n] = b + end +- bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") ++ bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n") + end + + local function bcsave_elfobj(ctx, output, s, ffi) +@@ -199,12 +213,8 @@ typedef struct { + } ELF64obj; + ]] + local symname = LJBC_PREFIX..ctx.modname +- local is64, isbe = false, false +- if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then +- is64 = true +- elseif ctx.arch == "ppc" or ctx.arch == "mips" then +- isbe = true +- end ++ local ai = assert(map_arch[ctx.arch]) ++ local is64, isbe = ai.b == 64, ai.e == "be" + + -- Handle different host/target endianess. + local function f32(x) return x end +@@ -237,10 +247,8 @@ typedef struct { + hdr.eendian = isbe and 2 or 1 + hdr.eversion = 1 + hdr.type = f16(1) +- hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) +- if ctx.arch == "mips" or ctx.arch == "mipsel" then +- hdr.flags = f32(0x50001006) +- end ++ hdr.machine = f16(ai.m) ++ hdr.flags = f32(ai.f or 0) + hdr.version = f32(1) + hdr.shofs = fofs(ffi.offsetof(o, "sect")) + hdr.ehsize = f16(ffi.sizeof(hdr)) +@@ -275,7 +283,7 @@ typedef struct { + o.sect[2].size = fofs(ofs) + o.sect[3].type = f32(3) -- .strtab + o.sect[3].ofs = fofs(sofs + ofs) +- o.sect[3].size = fofs(#symname+1) ++ o.sect[3].size = fofs(#symname+2) + ffi.copy(o.space+ofs+1, symname) + ofs = ofs + #symname + 2 + o.sect[4].type = f32(1) -- .rodata +@@ -336,12 +344,8 @@ typedef struct { + } PEobj; + ]] + local symname = LJBC_PREFIX..ctx.modname +- local is64 = false +- if ctx.arch == "x86" then +- symname = "_"..symname +- elseif ctx.arch == "x64" then +- is64 = true +- end ++ local ai = assert(map_arch[ctx.arch]) ++ local is64 = ai.b == 64 + local symexport = " /EXPORT:"..symname..",DATA " + + -- The file format is always little-endian. Swap if the host is big-endian. +@@ -355,7 +359,7 @@ typedef struct { + -- Create PE object and fill in header. + local o = ffi.new("PEobj") + local hdr = o.hdr +- hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) ++ hdr.arch = f16(assert(ai.p)) + hdr.nsects = f16(2) + hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) + hdr.nsyms = f32(6) +@@ -605,16 +609,16 @@ local function docmd(...) + local n = 1 + local list = false + local ctx = { +- strip = true, arch = jit.arch, os = string.lower(jit.os), ++ strip = true, arch = jit.arch, os = jit.os:lower(), + type = false, modname = false, + } + while n <= #arg do + local a = arg[n] +- if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then +- table.remove(arg, n) ++ if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then ++ tremove(arg, n) + if a == "--" then break end + for m=2,#a do +- local opt = string.sub(a, m, m) ++ local opt = a:sub(m, m) + if opt == "l" then + list = true + elseif opt == "s" then +@@ -627,13 +631,13 @@ local function docmd(...) + if n ~= 1 then usage() end + arg[1] = check(loadstring(arg[1])) + elseif opt == "n" then +- ctx.modname = checkmodname(table.remove(arg, n)) ++ ctx.modname = checkmodname(tremove(arg, n)) + elseif opt == "t" then +- ctx.type = checkarg(table.remove(arg, n), map_type, "file type") ++ ctx.type = checkarg(tremove(arg, n), map_type, "file type") + elseif opt == "a" then +- ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") ++ ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture") + elseif opt == "o" then +- ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") ++ ctx.os = checkarg(tremove(arg, n), map_os, "OS name") + else + usage() + end +diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua +index c2dd7769..ba79c47e 100644 +--- a/src/jit/dis_arm.lua ++++ b/src/jit/dis_arm.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT ARM disassembler module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- This is a helper module used by the LuaJIT machine code dumper module. +diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua +index a7173326..ad909fbd 100644 +--- a/src/jit/dis_arm64.lua ++++ b/src/jit/dis_arm64.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT ARM64 disassembler module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + -- + -- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +@@ -1089,7 +1089,7 @@ local function disass_ins(ctx) + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = x + 1 +- elseif x >= immr then ++ else + name = a2 + x = x - immr + 1 + end +diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua +index 7eb389e2..edcbffa8 100644 +--- a/src/jit/dis_arm64be.lua ++++ b/src/jit/dis_arm64be.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT ARM64BE disassembler wrapper module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- ARM64 instructions are always little-endian. So just forward to the +diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua +index a12b8e62..6ad17f54 100644 +--- a/src/jit/dis_mips.lua ++++ b/src/jit/dis_mips.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT MIPS disassembler module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT/X license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- This is a helper module used by the LuaJIT machine code dumper module. +@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex + local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift + + ------------------------------------------------------------------------------ +--- Primary and extended opcode maps ++-- Extended opcode maps common to all MIPS releases + ------------------------------------------------------------------------------ + +-local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } + local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } + local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } + ++local map_cop0 = { ++ shift = 25, mask = 1, ++ [0] = { ++ shift = 21, mask = 15, ++ [0] = "mfc0TDW", [4] = "mtc0TDW", ++ [10] = "rdpgprDT", ++ [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, ++ [14] = "wrpgprDT", ++ }, { ++ shift = 0, mask = 63, ++ [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", ++ [24] = "eret", [31] = "deret", ++ [32] = "wait", ++ }, ++} ++ ++------------------------------------------------------------------------------ ++-- Primary and extended opcode maps for MIPS R1-R5 ++------------------------------------------------------------------------------ ++ ++local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } ++ + local map_special = { + shift = 0, mask = 63, + [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, +@@ -87,22 +108,6 @@ local map_regimm = { + false, false, false, "synciSO", + } + +-local map_cop0 = { +- shift = 25, mask = 1, +- [0] = { +- shift = 21, mask = 15, +- [0] = "mfc0TDW", [4] = "mtc0TDW", +- [10] = "rdpgprDT", +- [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, +- [14] = "wrpgprDT", +- }, { +- shift = 0, mask = 63, +- [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", +- [24] = "eret", [31] = "deret", +- [32] = "wait", +- }, +-} +- + local map_cop1s = { + shift = 0, mask = 63, + [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", +@@ -233,6 +238,208 @@ local map_pri = { + false, "sdc1HSO", "sdc2TSO", "sdTSO", + } + ++------------------------------------------------------------------------------ ++-- Primary and extended opcode maps for MIPS R6 ++------------------------------------------------------------------------------ ++ ++local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" } ++local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" } ++local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" } ++local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" } ++local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" } ++local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" } ++local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" } ++local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" } ++ ++local map_special_r6 = { ++ shift = 0, mask = 63, ++ [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, ++ false, map_srl, "sraDTA", ++ "sllvDTS", false, map_srlv, "sravDTS", ++ "jrS", "jalrD1S", false, false, ++ "syscallY", "breakY", false, "sync", ++ "clzDS", "cloDS", "dclzDS", "dcloDS", ++ "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST", ++ map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6, ++ map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6, ++ "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", ++ "andDST", "or|moveDST0", "xorDST", "nor|notDST0", ++ false, false, "sltDST", "sltuDST", ++ "daddDST", "dadduDST", "dsubDST", "dsubuDST", ++ "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", ++ "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST", ++ "dsllDTA", false, "dsrlDTA", "dsraDTA", ++ "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", ++} ++ ++local map_bshfl_r6 = { ++ shift = 9, mask = 3, ++ [1] = "alignDSTa", ++ _ = { ++ shift = 6, mask = 31, ++ [0] = "bitswapDT", ++ [2] = "wsbhDT", ++ [16] = "sebDT", ++ [24] = "sehDT", ++ } ++} ++ ++local map_dbshfl_r6 = { ++ shift = 9, mask = 3, ++ [1] = "dalignDSTa", ++ _ = { ++ shift = 6, mask = 31, ++ [0] = "dbitswapDT", ++ [2] = "dsbhDT", ++ [5] = "dshdDT", ++ } ++} ++ ++local map_special3_r6 = { ++ shift = 0, mask = 63, ++ [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", ++ [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", ++ [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD", ++} ++ ++local map_regimm_r6 = { ++ shift = 16, mask = 31, ++ [0] = "bltzSB", [1] = "bgezSB", ++ [6] = "dahiSI", [30] = "datiSI", ++ [23] = "sigrieI", [31] = "synciSO", ++} ++ ++local map_pcrel_r6 = { ++ shift = 19, mask = 3, ++ [0] = "addiupcS2", "lwpcS2", "lwupcS2", { ++ shift = 18, mask = 1, ++ [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" } ++ } ++} ++ ++local map_cop1s_r6 = { ++ shift = 0, mask = 63, ++ [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", ++ "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG", ++ "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG", ++ "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG", ++ "sel.sFGH", false, false, false, ++ "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH", ++ "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG", ++ "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH", ++ false, "cvt.d.sFG", false, false, ++ "cvt.w.sFG", "cvt.l.sFG", ++} ++ ++local map_cop1d_r6 = { ++ shift = 0, mask = 63, ++ [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH", ++ "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG", ++ "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG", ++ "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG", ++ "sel.dFGH", false, false, false, ++ "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH", ++ "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG", ++ "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH", ++ "cvt.s.dFG", false, false, false, ++ "cvt.w.dFG", "cvt.l.dFG", ++} ++ ++local map_cop1w_r6 = { ++ shift = 0, mask = 63, ++ [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH", ++ "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH", ++ "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH", ++ "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH", ++ false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH", ++ false, false, false, false, ++ false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH", ++ false, false, false, false, ++ "cvt.s.wFG", "cvt.d.wFG", ++} ++ ++local map_cop1l_r6 = { ++ shift = 0, mask = 63, ++ [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH", ++ "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH", ++ "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH", ++ "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH", ++ false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH", ++ false, false, false, false, ++ false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH", ++ false, false, false, false, ++ "cvt.s.lFG", "cvt.d.lFG", ++} ++ ++local map_cop1_r6 = { ++ shift = 21, mask = 31, ++ [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", ++ "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", ++ false, "bc1eqzHB", false, false, ++ false, "bc1nezHB", false, false, ++ map_cop1s_r6, map_cop1d_r6, false, false, ++ map_cop1w_r6, map_cop1l_r6, ++} ++ ++local function maprs_popTS(rs, rt) ++ if rt == 0 then return 0 elseif rs == 0 then return 1 ++ elseif rs == rt then return 2 else return 3 end ++end ++ ++local map_pop06_r6 = { ++ maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB" ++} ++local map_pop07_r6 = { ++ maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB" ++} ++local map_pop26_r6 = { ++ maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB" ++} ++local map_pop27_r6 = { ++ maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB" ++} ++ ++local function maprs_popS(rs, rt) ++ if rs == 0 then return 0 else return 1 end ++end ++ ++local map_pop66_r6 = { ++ maprs = maprs_popS, [0] = "jicTI", "beqzcSb" ++} ++local map_pop76_r6 = { ++ maprs = maprs_popS, [0] = "jialcTI", "bnezcSb" ++} ++ ++local function maprs_popST(rs, rt) ++ if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end ++end ++ ++local map_pop10_r6 = { ++ maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB" ++} ++local map_pop30_r6 = { ++ maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB" ++} ++ ++local map_pri_r6 = { ++ [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ", ++ "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6, ++ map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI", ++ "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U", ++ map_cop0, map_cop1_r6, false, false, ++ false, false, map_pop26_r6, map_pop27_r6, ++ map_pop30_r6, "daddiuTSI", false, false, ++ false, "dauiTSI", false, map_special3_r6, ++ "lbTSO", "lhTSO", false, "lwTSO", ++ "lbuTSO", "lhuTSO", false, false, ++ "sbTSO", "shTSO", false, "swTSO", ++ false, false, false, false, ++ false, "lwc1HSO", "bc#", false, ++ false, "ldc1HSO", map_pop66_r6, "ldTSO", ++ false, "swc1HSO", "balc#", map_pcrel_r6, ++ false, "sdc1HSO", map_pop76_r6, "sdTSO", ++} ++ + ------------------------------------------------------------------------------ + + local map_gpr = { +@@ -287,10 +494,14 @@ local function disass_ins(ctx) + ctx.op = op + ctx.rel = nil + +- local opat = map_pri[rshift(op, 26)] ++ local opat = ctx.map_pri[rshift(op, 26)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end +- opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ ++ if opat.maprs then ++ opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))] ++ else ++ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ ++ end + end + local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") + local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") +@@ -314,6 +525,8 @@ local function disass_ins(ctx) + x = "f"..band(rshift(op, 21), 31) + elseif p == "A" then + x = band(rshift(op, 6), 31) ++ elseif p == "a" then ++ x = band(rshift(op, 6), 7) + elseif p == "E" then + x = band(rshift(op, 6), 31) + 32 + elseif p == "M" then +@@ -333,6 +546,10 @@ local function disass_ins(ctx) + x = band(rshift(op, 11), 31) - last + 33 + elseif p == "I" then + x = arshift(lshift(op, 16), 16) ++ elseif p == "2" then ++ x = arshift(lshift(op, 13), 11) ++ elseif p == "3" then ++ x = arshift(lshift(op, 14), 11) + elseif p == "U" then + x = band(op, 0xffff) + elseif p == "O" then +@@ -342,7 +559,15 @@ local function disass_ins(ctx) + local index = map_gpr[band(rshift(op, 16), 31)] + operands[#operands] = format("%s(%s)", index, last) + elseif p == "B" then +- x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 ++ x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4 ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "b" then ++ x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4 ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "#" then ++ x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4 + ctx.rel = x + x = format("0x%08x", x) + elseif p == "J" then +@@ -408,6 +633,7 @@ local function create(code, addr, out) + ctx.disass = disass_block + ctx.hexdump = 8 + ctx.get = get_be ++ ctx.map_pri = map_pri + return ctx + end + +@@ -417,6 +643,19 @@ local function create_el(code, addr, out) + return ctx + end + ++local function create_r6(code, addr, out) ++ local ctx = create(code, addr, out) ++ ctx.map_pri = map_pri_r6 ++ return ctx ++end ++ ++local function create_r6_el(code, addr, out) ++ local ctx = create(code, addr, out) ++ ctx.get = get_le ++ ctx.map_pri = map_pri_r6 ++ return ctx ++end ++ + -- Simple API: disassemble code (a string) at address and output via out. + local function disass(code, addr, out) + create(code, addr, out):disass() +@@ -426,6 +665,14 @@ local function disass_el(code, addr, out) + create_el(code, addr, out):disass() + end + ++local function disass_r6(code, addr, out) ++ create_r6(code, addr, out):disass() ++end ++ ++local function disass_r6_el(code, addr, out) ++ create_r6_el(code, addr, out):disass() ++end ++ + -- Return register name for RID. + local function regname(r) + if r < 32 then return map_gpr[r] end +@@ -436,8 +683,12 @@ end + return { + create = create, + create_el = create_el, ++ create_r6 = create_r6, ++ create_r6_el = create_r6_el, + disass = disass, + disass_el = disass_el, ++ disass_r6 = disass_r6, ++ disass_r6_el = disass_r6_el, + regname = regname + } + +diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua +index c4374928..5ad48f8f 100644 +--- a/src/jit/dis_mips64.lua ++++ b/src/jit/dis_mips64.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT MIPS64 disassembler wrapper module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- This module just exports the big-endian functions from the +diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua +index 2b1470af..d50e3a18 100644 +--- a/src/jit/dis_mips64el.lua ++++ b/src/jit/dis_mips64el.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT MIPS64EL disassembler wrapper module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- This module just exports the little-endian functions from the +diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64r6.lua +similarity index 67% +copy from src/jit/dis_mips64el.lua +copy to src/jit/dis_mips64r6.lua +index 2b1470af..921b3cbe 100644 +--- a/src/jit/dis_mips64el.lua ++++ b/src/jit/dis_mips64r6.lua +@@ -1,17 +1,17 @@ + ---------------------------------------------------------------------------- +--- LuaJIT MIPS64EL disassembler wrapper module. ++-- LuaJIT MIPS64R6 disassembler wrapper module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- +--- This module just exports the little-endian functions from the ++-- This module just exports the r6 big-endian functions from the + -- MIPS disassembler module. All the interesting stuff is there. + ------------------------------------------------------------------------------ + + local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") + return { +- create = dis_mips.create_el, +- disass = dis_mips.disass_el, ++ create = dis_mips.create_r6, ++ disass = dis_mips.disass_r6, + regname = dis_mips.regname + } + +diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64r6el.lua +similarity index 66% +copy from src/jit/dis_mips64el.lua +copy to src/jit/dis_mips64r6el.lua +index 2b1470af..aadef9f3 100644 +--- a/src/jit/dis_mips64el.lua ++++ b/src/jit/dis_mips64r6el.lua +@@ -1,17 +1,17 @@ + ---------------------------------------------------------------------------- +--- LuaJIT MIPS64EL disassembler wrapper module. ++-- LuaJIT MIPS64R6EL disassembler wrapper module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- +--- This module just exports the little-endian functions from the ++-- This module just exports the r6 little-endian functions from the + -- MIPS disassembler module. All the interesting stuff is there. + ------------------------------------------------------------------------------ + + local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") + return { +- create = dis_mips.create_el, +- disass = dis_mips.disass_el, ++ create = dis_mips.create_r6_el, ++ disass = dis_mips.disass_r6_el, + regname = dis_mips.regname + } + +diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua +index f69b11f0..52cebefb 100644 +--- a/src/jit/dis_mipsel.lua ++++ b/src/jit/dis_mipsel.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT MIPSEL disassembler wrapper module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- This module just exports the little-endian functions from the +diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua +index 2aeb1b29..08d742f1 100644 +--- a/src/jit/dis_ppc.lua ++++ b/src/jit/dis_ppc.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT PPC disassembler module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT/X license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- This is a helper module used by the LuaJIT machine code dumper module. +diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua +index d5714ee1..2d37423e 100644 +--- a/src/jit/dis_x64.lua ++++ b/src/jit/dis_x64.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT x64 disassembler wrapper module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- This module just exports the 64 bit functions from the combined +diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua +index 4371233d..5480854c 100644 +--- a/src/jit/dis_x86.lua ++++ b/src/jit/dis_x86.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT x86/x64 disassembler module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- This is a helper module used by the LuaJIT machine code dumper module. +@@ -239,6 +239,24 @@ nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", + --8x + [0x8c] = "||pmaskmovXrvVSm", + [0x8e] = "||pmaskmovVSmXvr", ++--9x ++[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm", ++[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm", ++[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm", ++[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm", ++[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm", ++--Ax ++[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm", ++[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm", ++[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm", ++[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm", ++[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm", ++--Bx ++[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm", ++[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm", ++[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm", ++[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm", ++[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm", + --Dx + [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", + [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", +@@ -483,7 +501,7 @@ local function putpat(ctx, name, pat) + local operands, regs, sz, mode, sp, rm, sc, rx, sdisp + local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl + +- -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz ++ -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz + for p in gmatch(pat, ".") do + local x = nil + if p == "V" or p == "U" then +@@ -506,6 +524,9 @@ local function putpat(ctx, name, pat) + sz = ctx.o16 and "X" or "M"; ctx.o16 = false + if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end + regs = map_regs[sz] ++ elseif p == "H" then ++ name = name..(ctx.rexw and "d" or "s") ++ ctx.rexw = false + elseif p == "S" then + name = name..lower(sz) + elseif p == "s" then +@@ -735,6 +756,7 @@ map_act = { + V = putpat, U = putpat, T = putpat, + M = putpat, X = putpat, P = putpat, + F = putpat, G = putpat, Y = putpat, ++ H = putpat, + + -- Collect prefixes. + [":"] = function(ctx, name, pat) +diff --git a/src/jit/dump.lua b/src/jit/dump.lua +index 2bea652b..9eda08c4 100644 +--- a/src/jit/dump.lua ++++ b/src/jit/dump.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT compiler dump module. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- +@@ -102,10 +102,12 @@ end + local function fillsymtab(tr, nexit) + local t = symtab + if nexitsym == 0 then ++ local maskaddr = jit.arch == "arm" and -2 + local ircall = vmdef.ircall + for i=0,#ircall do + local addr = ircalladdr(i) + if addr ~= 0 then ++ if maskaddr then addr = band(addr, maskaddr) end + if addr < 0 then addr = addr + 2^32 end + t[addr] = ircall[i] + end +@@ -217,8 +219,10 @@ local function colorize_text(s) + return s + end + +-local function colorize_ansi(s, t) +- return format(colortype_ansi[t], s) ++local function colorize_ansi(s, t, extra) ++ local out = format(colortype_ansi[t], s) ++ if extra then out = "\027[3m"..out end ++ return out + end + + local irtype_ansi = setmetatable({}, +@@ -227,9 +231,10 @@ local irtype_ansi = setmetatable({}, + + local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } + +-local function colorize_html(s, t) ++local function colorize_html(s, t, extra) + s = gsub(s, "[<>&]", html_escape) +- return format('<span class="irt_%s">%s</span>', irtype_text[t], s) ++ return format('<span class="irt_%s%s">%s</span>', ++ irtype_text[t], extra and " irt_extra" or "", s) + end + + local irtype_html = setmetatable({}, +@@ -254,6 +259,7 @@ span.irt_tab { color: #c00000; } + span.irt_udt, span.irt_lud { color: #00c0c0; } + span.irt_num { color: #4040c0; } + span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } ++span.irt_extra { font-style: italic; } + </style> + ]] + +@@ -269,6 +275,7 @@ local litname = { + if band(mode, 8) ~= 0 then s = s.."C" end + if band(mode, 16) ~= 0 then s = s.."R" end + if band(mode, 32) ~= 0 then s = s.."I" end ++ if band(mode, 64) ~= 0 then s = s.."K" end + t[mode] = s + return s + end}), +@@ -277,15 +284,18 @@ local litname = { + local s = irtype[band(mode, 31)] + s = irtype[band(shr(mode, 5), 31)].."."..s + if band(mode, 0x800) ~= 0 then s = s.." sext" end +- local c = shr(mode, 14) +- if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end ++ local c = shr(mode, 12) ++ if c == 1 then s = s.." none" ++ elseif c == 2 then s = s.." index" ++ elseif c == 3 then s = s.." check" end + t[mode] = s + return s + end}), + ["FLOAD "] = vmdef.irfield, + ["FREF "] = vmdef.irfield, + ["FPMATH"] = vmdef.irfpm, +- ["BUFHDR"] = { [0] = "RESET", "APPEND" }, ++ ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" }, ++ ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" }, + ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" }, + } + +@@ -315,7 +325,9 @@ local function formatk(tr, idx, sn) + local tn = type(k) + local s + if tn == "number" then +- if band(sn or 0, 0x30000) ~= 0 then ++ if t < 12 then ++ s = k == 0 and "NULL" or format("[0x%08x]", k) ++ elseif band(sn or 0, 0x30000) ~= 0 then + s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" + elseif k == 2^52+2^51 then + s = "bias" +@@ -343,7 +355,7 @@ local function formatk(tr, idx, sn) + else + s = tostring(k) -- For primitives. + end +- s = colorize(format("%-4s", s), t) ++ s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0) + if slot then + s = format("%s @%d", s, slot) + end +@@ -363,7 +375,7 @@ local function printsnap(tr, snap) + out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) + else + local m, ot, op1, op2 = traceir(tr, ref) +- out:write(colorize(format("%04d", ref), band(ot, 31))) ++ out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0)) + end + out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME + else +@@ -582,7 +594,7 @@ local function dump_trace(what, tr, func, pc, otr, oex) + end + + -- Dump recorded bytecode. +-local function dump_record(tr, func, pc, depth, callee) ++local function dump_record(tr, func, pc, depth) + if depth ~= recdepth then + recdepth = depth + recprefix = rep(" .", depth) +@@ -593,7 +605,6 @@ local function dump_record(tr, func, pc, depth, callee) + if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end + else + line = "0000 "..recprefix.." FUNCC \n" +- callee = func + end + if pc <= 0 then + out:write(sub(line, 1, -2), " ; ", fmtfunc(func), "\n") +@@ -607,12 +618,15 @@ end + + ------------------------------------------------------------------------------ + ++local gpr64 = jit.arch:match("64") ++local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel" ++ + -- Dump taken trace exits. + local function dump_texit(tr, ex, ngpr, nfpr, ...) + out:write("---- TRACE ", tr, " exit ", ex, "\n") + if dumpmode.X then + local regs = {...} +- if jit.arch == "x64" then ++ if gpr64 then + for i=1,ngpr do + out:write(format(" %016x", regs[i])) + if i % 4 == 0 then out:write("\n") end +@@ -623,7 +637,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...) + if i % 8 == 0 then out:write("\n") end + end + end +- if jit.arch == "mips" or jit.arch == "mipsel" then ++ if fprmips32 then + for i=1,nfpr,2 do + out:write(format(" %+17.14g", regs[ngpr+i])) + if i % 8 == 7 then out:write("\n") end +diff --git a/src/jit/p.lua b/src/jit/p.lua +index 7be10586..c9ec1d8b 100644 +--- a/src/jit/p.lua ++++ b/src/jit/p.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT profiler. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- +@@ -238,6 +238,7 @@ local function prof_finish() + prof_count1 = nil + prof_count2 = nil + prof_ud = nil ++ if out ~= stdout then out:close() end + end + end + +diff --git a/src/jit/v.lua b/src/jit/v.lua +index 934de985..83589143 100644 +--- a/src/jit/v.lua ++++ b/src/jit/v.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- Verbose mode of the LuaJIT compiler. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- +diff --git a/src/jit/zone.lua b/src/jit/zone.lua +index fa702c4e..94357854 100644 +--- a/src/jit/zone.lua ++++ b/src/jit/zone.lua +@@ -1,7 +1,7 @@ + ---------------------------------------------------------------------------- + -- LuaJIT profiler zones. + -- +--- Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. + -- Released under the MIT license. See Copyright Notice in luajit.h + ---------------------------------------------------------------------------- + -- +diff --git a/src/lib_aux.c b/src/lib_aux.c +index c40565c3..4ef55581 100644 +--- a/src/lib_aux.c ++++ b/src/lib_aux.c +@@ -1,6 +1,6 @@ + /* + ** Auxiliary library for the Lua/C API. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major parts taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -218,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B) + + LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) + { +- while (l--) +- luaL_addchar(B, *s++); ++ if (l <= bufffree(B)) { ++ memcpy(B->p, s, l); ++ B->p += l; ++ } else { ++ emptybuffer(B); ++ lua_pushlstring(B->L, s, l); ++ B->lvl++; ++ adjuststack(B); ++ } + } + + LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) +@@ -338,17 +345,13 @@ LUALIB_API lua_State *luaL_newstate(void) + + #else + +-#include "lj_alloc.h" +- + LUALIB_API lua_State *luaL_newstate(void) + { + lua_State *L; +- void *ud = lj_alloc_create(); +- if (ud == NULL) return NULL; + #if LJ_64 && !LJ_GC64 +- L = lj_state_newstate(lj_alloc_f, ud); ++ L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL); + #else +- L = lua_newstate(lj_alloc_f, ud); ++ L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL); + #endif + if (L) G(L)->panic = panic; + return L; +diff --git a/src/lib_base.c b/src/lib_base.c +index 3a757870..55e3c6b8 100644 +--- a/src/lib_base.c ++++ b/src/lib_base.c +@@ -1,6 +1,6 @@ + /* + ** Base and coroutine library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -19,6 +19,7 @@ + #include "lj_gc.h" + #include "lj_err.h" + #include "lj_debug.h" ++#include "lj_buf.h" + #include "lj_str.h" + #include "lj_tab.h" + #include "lj_meta.h" +@@ -42,13 +43,13 @@ + + LJLIB_ASM(assert) LJLIB_REC(.) + { +- GCstr *s; + lj_lib_checkany(L, 1); +- s = lj_lib_optstr(L, 2); +- if (s) +- lj_err_callermsg(L, strdata(s)); +- else ++ if (L->top == L->base+1) + lj_err_caller(L, LJ_ERR_ASSERT); ++ else if (tvisstr(L->base+1) || tvisnumber(L->base+1)) ++ lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2))); ++ else ++ lj_err_run(L); + return FFH_UNREACHABLE; + } + +@@ -75,9 +76,10 @@ LJLIB_ASM_(type) LJLIB_REC(.) + /* This solves a circular dependency problem -- change FF_next_N as needed. */ + LJ_STATIC_ASSERT((int)FF_next == FF_next_N); + +-LJLIB_ASM(next) ++LJLIB_ASM(next) LJLIB_REC(.) + { + lj_lib_checktab(L, 1); ++ lj_err_msg(L, LJ_ERR_NEXTIDX); + return FFH_UNREACHABLE; + } + +@@ -224,9 +226,11 @@ LJLIB_CF(unpack) + int32_t n, i = lj_lib_optint(L, 2, 1); + int32_t e = (L->base+3-1 < L->top && !tvisnil(L->base+3-1)) ? + lj_lib_checkint(L, 3) : (int32_t)lj_tab_len(t); ++ uint32_t nu; + if (i > e) return 0; +- n = e - i + 1; +- if (n <= 0 || !lua_checkstack(L, n)) ++ nu = (uint32_t)e - (uint32_t)i; ++ n = (int32_t)(nu+1); ++ if (nu >= LUAI_MAXCSTACK || !lua_checkstack(L, n)) + lj_err_caller(L, LJ_ERR_UNPACK); + do { + cTValue *tv = lj_tab_getint(t, i); +@@ -287,18 +291,27 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) + } else { + const char *p = strdata(lj_lib_checkstr(L, 1)); + char *ep; ++ unsigned int neg = 0; + unsigned long ul; + if (base < 2 || base > 36) + lj_err_arg(L, 2, LJ_ERR_BASERNG); +- ul = strtoul(p, &ep, base); +- if (p != ep) { +- while (lj_char_isspace((unsigned char)(*ep))) ep++; +- if (*ep == '\0') { +- if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) +- setintV(L->base-1-LJ_FR2, (int32_t)ul); +- else +- setnumV(L->base-1-LJ_FR2, (lua_Number)ul); +- return FFH_RES(1); ++ while (lj_char_isspace((unsigned char)(*p))) p++; ++ if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; } ++ if (lj_char_isalnum((unsigned char)(*p))) { ++ ul = strtoul(p, &ep, base); ++ if (p != ep) { ++ while (lj_char_isspace((unsigned char)(*ep))) ep++; ++ if (*ep == '\0') { ++ if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) { ++ if (neg) ul = (unsigned long)-(long)ul; ++ setintV(L->base-1-LJ_FR2, (int32_t)ul); ++ } else { ++ lua_Number n = (lua_Number)ul; ++ if (neg) n = -n; ++ setnumV(L->base-1-LJ_FR2, n); ++ } ++ return FFH_RES(1); ++ } + } + } + } +@@ -395,10 +408,22 @@ LJLIB_CF(load) + GCstr *name = lj_lib_optstr(L, 2); + GCstr *mode = lj_lib_optstr(L, 3); + int status; +- if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) { +- GCstr *s = lj_lib_checkstr(L, 1); ++ if (L->base < L->top && ++ (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) { ++ const char *s; ++ MSize len; ++ if (tvisbuf(L->base)) { ++ SBufExt *sbx = bufV(L->base); ++ s = sbx->r; ++ len = sbufxlen(sbx); ++ if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */ ++ } else { ++ GCstr *str = lj_lib_checkstr(L, 1); ++ s = strdata(str); ++ len = str->len; ++ } + lua_settop(L, 4); /* Ensure env arg exists. */ +- status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s), ++ status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s, + mode ? strdata(mode) : NULL); + } else { + lj_lib_checkfunc(L, 1); +@@ -493,7 +518,8 @@ LJLIB_CF(print) + lua_gettable(L, LUA_GLOBALSINDEX); + tv = L->top-1; + } +- shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); ++ shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) && ++ !gcrefu(basemt_it(G(L), LJ_TNUMX)); + for (i = 0; i < nargs; i++) { + cTValue *o = &L->base[i]; + const char *str; +diff --git a/src/lib_bit.c b/src/lib_bit.c +index c979a448..6fb8ad47 100644 +--- a/src/lib_bit.c ++++ b/src/lib_bit.c +@@ -1,6 +1,6 @@ + /* + ** Bit manipulation library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lib_bit_c +diff --git a/src/lib_buffer.c b/src/lib_buffer.c +new file mode 100644 +index 00000000..2e364861 +--- /dev/null ++++ b/src/lib_buffer.c +@@ -0,0 +1,356 @@ ++/* ++** Buffer library. ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++#define lib_buffer_c ++#define LUA_LIB ++ ++#include "lua.h" ++#include "lauxlib.h" ++#include "lualib.h" ++ ++#include "lj_obj.h" ++ ++#if LJ_HASBUFFER ++#include "lj_gc.h" ++#include "lj_err.h" ++#include "lj_buf.h" ++#include "lj_str.h" ++#include "lj_tab.h" ++#include "lj_udata.h" ++#include "lj_meta.h" ++#if LJ_HASFFI ++#include "lj_ctype.h" ++#include "lj_cdata.h" ++#include "lj_cconv.h" ++#endif ++#include "lj_strfmt.h" ++#include "lj_serialize.h" ++#include "lj_lib.h" ++ ++/* -- Helper functions ---------------------------------------------------- */ ++ ++/* Check that the first argument is a string buffer. */ ++static SBufExt *buffer_tobuf(lua_State *L) ++{ ++ if (!(L->base < L->top && tvisbuf(L->base))) ++ lj_err_argtype(L, 1, "buffer"); ++ return bufV(L->base); ++} ++ ++/* Ditto, but for writers. */ ++static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ setsbufXL_(sbx, L); ++ return sbx; ++} ++ ++#define buffer_toudata(sbx) ((GCudata *)(sbx)-1) ++ ++/* -- Buffer methods ------------------------------------------------------ */ ++ ++#define LJLIB_MODULE_buffer_method ++ ++LJLIB_CF(buffer_method_free) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ lj_bufx_free(L, sbx); ++ L->top = L->base+1; /* Chain buffer object. */ ++ return 1; ++} ++ ++LJLIB_CF(buffer_method_reset) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ lj_bufx_reset(sbx); ++ L->top = L->base+1; /* Chain buffer object. */ ++ return 1; ++} ++ ++LJLIB_CF(buffer_method_skip) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); ++ MSize len = sbufxlen(sbx); ++ if (n < len) { ++ sbx->r += n; ++ } else { ++ sbx->r = sbx->w = sbx->b; ++ } ++ L->top = L->base+1; /* Chain buffer object. */ ++ return 1; ++} ++ ++LJLIB_CF(buffer_method_set) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ GCobj *ref; ++ const char *p; ++ MSize len; ++#if LJ_HASFFI ++ if (tviscdata(L->base+1)) { ++ CTState *cts = ctype_cts(L); ++ lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, ++ L->base+1, CCF_ARG(2)); ++ len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); ++ } else ++#endif ++ { ++ GCstr *str = lj_lib_checkstrx(L, 2); ++ p = strdata(str); ++ len = str->len; ++ } ++ lj_bufx_free(L, sbx); ++ lj_bufx_set_cow(L, sbx, p, len); ++ ref = gcV(L->base+1); ++ setgcref(sbx->cowref, ref); ++ lj_gc_objbarrier(L, buffer_toudata(sbx), ref); ++ L->top = L->base+1; /* Chain buffer object. */ ++ return 1; ++} ++ ++LJLIB_CF(buffer_method_put) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobufw(L); ++ ptrdiff_t arg, narg = L->top - L->base; ++ for (arg = 1; arg < narg; arg++) { ++ cTValue *o = &L->base[arg], *mo = NULL; ++ retry: ++ if (tvisstr(o)) { ++ lj_buf_putstr((SBuf *)sbx, strV(o)); ++ } else if (tvisint(o)) { ++ lj_strfmt_putint((SBuf *)sbx, intV(o)); ++ } else if (tvisnum(o)) { ++ lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o)); ++ } else if (tvisbuf(o)) { ++ SBufExt *sbx2 = bufV(o); ++ if (sbx2 == sbx) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF); ++ lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2)); ++ } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { ++ /* Call __tostring metamethod inline. */ ++ copyTV(L, L->top++, mo); ++ copyTV(L, L->top++, o); ++ lua_call(L, 1, 1); ++ o = &L->base[arg]; /* The stack may have been reallocated. */ ++ copyTV(L, &L->base[arg], L->top-1); ++ L->top = L->base + narg; ++ goto retry; /* Retry with the result. */ ++ } else { ++ lj_err_argtype(L, arg+1, "string/number/__tostring"); ++ } ++ /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */ ++ } ++ L->top = L->base+1; /* Chain buffer object. */ ++ lj_gc_check(L); ++ return 1; ++} ++ ++LJLIB_CF(buffer_method_putf) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobufw(L); ++ lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2); ++ L->top = L->base+1; /* Chain buffer object. */ ++ lj_gc_check(L); ++ return 1; ++} ++ ++LJLIB_CF(buffer_method_get) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ ptrdiff_t arg, narg = L->top - L->base; ++ if (narg == 1) { ++ narg++; ++ setnilV(L->top++); /* get() is the same as get(nil). */ ++ } ++ for (arg = 1; arg < narg; arg++) { ++ TValue *o = &L->base[arg]; ++ MSize n = tvisnil(o) ? LJ_MAX_BUF : ++ (MSize) lj_lib_checkintrange(L, arg+1, 0, LJ_MAX_BUF); ++ MSize len = sbufxlen(sbx); ++ if (n > len) n = len; ++ setstrV(L, o, lj_str_new(L, sbx->r, n)); ++ sbx->r += n; ++ } ++ if (sbx->r == sbx->w) sbx->r = sbx->w = sbx->b; ++ lj_gc_check(L); ++ return narg-1; ++} ++ ++#if LJ_HASFFI ++LJLIB_CF(buffer_method_putcdata) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobufw(L); ++ const char *p; ++ MSize len; ++ if (tviscdata(L->base+1)) { ++ CTState *cts = ctype_cts(L); ++ lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, ++ L->base+1, CCF_ARG(2)); ++ } else { ++ lj_err_argtype(L, 2, "cdata"); ++ } ++ len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); ++ lj_buf_putmem((SBuf *)sbx, p, len); ++ L->top = L->base+1; /* Chain buffer object. */ ++ return 1; ++} ++ ++LJLIB_CF(buffer_method_reserve) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobufw(L); ++ MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); ++ GCcdata *cd; ++ lj_buf_more((SBuf *)sbx, sz); ++ ctype_loadffi(L); ++ cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); ++ *(void **)cdataptr(cd) = sbx->w; ++ setcdataV(L, L->top++, cd); ++ setintV(L->top++, sbufleft(sbx)); ++ return 2; ++} ++ ++LJLIB_CF(buffer_method_commit) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); ++ if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG); ++ sbx->w += len; ++ L->top = L->base+1; /* Chain buffer object. */ ++ return 1; ++} ++ ++LJLIB_CF(buffer_method_ref) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ GCcdata *cd; ++ ctype_loadffi(L); ++ cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); ++ *(void **)cdataptr(cd) = sbx->r; ++ setcdataV(L, L->top++, cd); ++ setintV(L->top++, sbufxlen(sbx)); ++ return 2; ++} ++#endif ++ ++LJLIB_CF(buffer_method_encode) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobufw(L); ++ cTValue *o = lj_lib_checkany(L, 2); ++ lj_serialize_put(sbx, o); ++ lj_gc_check(L); ++ L->top = L->base+1; /* Chain buffer object. */ ++ return 1; ++} ++ ++LJLIB_CF(buffer_method_decode) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobufw(L); ++ setnilV(L->top++); ++ sbx->r = lj_serialize_get(sbx, L->top-1); ++ lj_gc_check(L); ++ return 1; ++} ++ ++LJLIB_CF(buffer_method___gc) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ lj_bufx_free(L, sbx); ++ return 0; ++} ++ ++LJLIB_CF(buffer_method___tostring) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx))); ++ lj_gc_check(L); ++ return 1; ++} ++ ++LJLIB_CF(buffer_method___len) LJLIB_REC(.) ++{ ++ SBufExt *sbx = buffer_tobuf(L); ++ setintV(L->top-1, (int32_t)sbufxlen(sbx)); ++ return 1; ++} ++ ++LJLIB_PUSH("buffer") LJLIB_SET(__metatable) ++LJLIB_PUSH(top-1) LJLIB_SET(__index) ++ ++/* -- Buffer library functions -------------------------------------------- */ ++ ++#define LJLIB_MODULE_buffer ++ ++LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ ++ ++LJLIB_CF(buffer_new) ++{ ++ MSize sz = 0; ++ int targ = 1; ++ GCtab *env, *dict_str = NULL, *dict_mt = NULL; ++ GCudata *ud; ++ SBufExt *sbx; ++ if (L->base < L->top && !tvistab(L->base)) { ++ targ = 2; ++ if (!tvisnil(L->base)) ++ sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF); ++ } ++ if (L->base+targ-1 < L->top) { ++ GCtab *options = lj_lib_checktab(L, targ); ++ cTValue *opt_dict, *opt_mt; ++ opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict")); ++ if (opt_dict && tvistab(opt_dict)) { ++ dict_str = tabV(opt_dict); ++ lj_serialize_dict_prep_str(L, dict_str); ++ } ++ opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable")); ++ if (opt_mt && tvistab(opt_mt)) { ++ dict_mt = tabV(opt_mt); ++ lj_serialize_dict_prep_mt(L, dict_mt); ++ } ++ } ++ env = tabref(curr_func(L)->c.env); ++ ud = lj_udata_new(L, sizeof(SBufExt), env); ++ ud->udtype = UDTYPE_BUFFER; ++ /* NOBARRIER: The GCudata is new (marked white). */ ++ setgcref(ud->metatable, obj2gco(env)); ++ setudataV(L, L->top++, ud); ++ sbx = (SBufExt *)uddata(ud); ++ lj_bufx_init(L, sbx); ++ setgcref(sbx->dict_str, obj2gco(dict_str)); ++ setgcref(sbx->dict_mt, obj2gco(dict_mt)); ++ if (sz > 0) lj_buf_need2((SBuf *)sbx, sz); ++ return 1; ++} ++ ++LJLIB_CF(buffer_encode) LJLIB_REC(.) ++{ ++ cTValue *o = lj_lib_checkany(L, 1); ++ setstrV(L, L->top++, lj_serialize_encode(L, o)); ++ lj_gc_check(L); ++ return 1; ++} ++ ++LJLIB_CF(buffer_decode) LJLIB_REC(.) ++{ ++ GCstr *str = lj_lib_checkstrx(L, 1); ++ setnilV(L->top++); ++ lj_serialize_decode(L, L->top-1, str); ++ return 1; ++} ++ ++/* ------------------------------------------------------------------------ */ ++ ++#include "lj_libdef.h" ++ ++int luaopen_string_buffer(lua_State *L) ++{ ++ LJ_LIB_REG(L, NULL, buffer_method); ++ lua_getfield(L, -1, "__tostring"); ++ lua_setfield(L, -2, "tostring"); ++ LJ_LIB_REG(L, NULL, buffer); ++ return 1; ++} ++ ++#endif +diff --git a/src/lib_debug.c b/src/lib_debug.c +index f112b5bc..a6acc6f2 100644 +--- a/src/lib_debug.c ++++ b/src/lib_debug.c +@@ -1,6 +1,6 @@ + /* + ** Debug library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -231,8 +231,8 @@ LJLIB_CF(debug_upvalueid) + int32_t n = lj_lib_checkint(L, 2) - 1; + if ((uint32_t)n >= fn->l.nupvalues) + lj_err_arg(L, 2, LJ_ERR_IDXRNG); +- setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : +- (void *)&fn->c.upvalue[n]); ++ lua_pushlightuserdata(L, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : ++ (void *)&fn->c.upvalue[n]); + return 1; + } + +@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue) + + /* ------------------------------------------------------------------------ */ + +-#define KEY_HOOK ((void *)0x3004) ++#define KEY_HOOK (U64x(80000000,00000000)|'h') + + static void hookf(lua_State *L, lua_Debug *ar) + { + static const char *const hooknames[] = + {"call", "return", "line", "count", "tail return"}; +- lua_pushlightuserdata(L, KEY_HOOK); ++ (L->top++)->u64 = KEY_HOOK; + lua_rawget(L, LUA_REGISTRYINDEX); + if (lua_isfunction(L, -1)) { + lua_pushstring(L, hooknames[(int)ar->event]); +@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook) + count = luaL_optint(L, arg+3, 0); + func = hookf; mask = makemask(smask, count); + } +- lua_pushlightuserdata(L, KEY_HOOK); ++ (L->top++)->u64 = KEY_HOOK; + lua_pushvalue(L, arg+1); + lua_rawset(L, LUA_REGISTRYINDEX); + lua_sethook(L, func, mask, count); +@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook) + if (hook != NULL && hook != hookf) { /* external hook? */ + lua_pushliteral(L, "external hook"); + } else { +- lua_pushlightuserdata(L, KEY_HOOK); ++ (L->top++)->u64 = KEY_HOOK; + lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ + } + lua_pushstring(L, unmakemask(mask, buff)); +@@ -369,7 +369,8 @@ LJLIB_CF(debug_debug) + return 0; + if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") || + lua_pcall(L, 0, 0, 0)) { +- fputs(lua_tostring(L, -1), stderr); ++ const char *s = lua_tostring(L, -1); ++ fputs(s ? s : "(error object is not a string)", stderr); + fputs("\n", stderr); + } + lua_settop(L, 0); /* remove eventual returns */ +diff --git a/src/lib_ffi.c b/src/lib_ffi.c +index 136e98e8..b4321048 100644 +--- a/src/lib_ffi.c ++++ b/src/lib_ffi.c +@@ -1,6 +1,6 @@ + /* + ** FFI library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lib_ffi_c +@@ -573,6 +573,7 @@ LJLIB_CF(ffi_typeinfo) + setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib); + if (gcref(ct->name)) { + GCstr *s = gco2str(gcref(ct->name)); ++ if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s)); + setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s); + } + lj_gc_check(L); +@@ -720,47 +721,47 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.) + return 0; + } + +-#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) +- + /* Test ABI string. */ + LJLIB_CF(ffi_abi) LJLIB_REC(.) + { + GCstr *s = lj_lib_checkstr(L, 1); +- int b = 0; +- switch (s->hash) { ++ int b = lj_cparse_case(s, + #if LJ_64 +- case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ ++ "\00564bit" + #else +- case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ ++ "\00532bit" + #endif + #if LJ_ARCH_HASFPU +- case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ ++ "\003fpu" + #endif + #if LJ_ABI_SOFTFP +- case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ ++ "\006softfp" + #else +- case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ ++ "\006hardfp" + #endif + #if LJ_ABI_EABI +- case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ ++ "\004eabi" + #endif + #if LJ_ABI_WIN +- case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ ++ "\003win" ++#endif ++#if LJ_TARGET_UWP ++ "\003uwp" ++#endif ++#if LJ_LE ++ "\002le" ++#else ++ "\002be" + #endif +- case H_(3af93066,1f001464): b = 1; break; /* le/be */ + #if LJ_GC64 +- case H_(9e89d2c9,13c83c92): b = 1; break; /* gc64 */ ++ "\004gc64" + #endif +- default: +- break; +- } ++ ) >= 0; + setboolV(L->top-1, b); + setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ + return 1; + } + +-#undef H_ +- + LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ + + LJLIB_CF(ffi_metatype) +diff --git a/src/lib_init.c b/src/lib_init.c +index 2ed370e9..56e0619a 100644 +--- a/src/lib_init.c ++++ b/src/lib_init.c +@@ -1,6 +1,6 @@ + /* + ** Library initialization. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major parts taken verbatim from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +diff --git a/src/lib_io.c b/src/lib_io.c +index 9763ed46..b9d8cc75 100644 +--- a/src/lib_io.c ++++ b/src/lib_io.c +@@ -1,6 +1,6 @@ + /* + ** I/O library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -60,12 +60,12 @@ static IOFileUD *io_tofile(lua_State *L) + return iof; + } + +-static FILE *io_stdfile(lua_State *L, ptrdiff_t id) ++static IOFileUD *io_stdfile(lua_State *L, ptrdiff_t id) + { + IOFileUD *iof = IOSTDF_IOF(L, id); + if (iof->fp == NULL) + lj_err_caller(L, LJ_ERR_IOSTDCL); +- return iof->fp; ++ return iof; + } + + static IOFileUD *io_file_new(lua_State *L) +@@ -99,11 +99,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof) + int stat = -1; + #if LJ_TARGET_POSIX + stat = pclose(iof->fp); +-#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE ++#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP + stat = _pclose(iof->fp); +-#else +- lua_assert(0); +- return 0; + #endif + #if LJ_52 + iof->fp = NULL; +@@ -112,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof) + ok = (stat != -1); + #endif + } else { +- lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); ++ lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF, ++ "close of unknown FILE* type"); + setnilV(L->top++); + lua_pushliteral(L, "cannot close standard file"); + return 2; +@@ -180,7 +178,7 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m) + MSize n = (MSize)fread(buf, 1, m, fp); + setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); + lj_gc_check(L); +- return (n > 0 || m == 0); ++ return n > 0; + } else { + int c = getc(fp); + ungetc(c, fp); +@@ -189,8 +187,9 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m) + } + } + +-static int io_file_read(lua_State *L, FILE *fp, int start) ++static int io_file_read(lua_State *L, IOFileUD *iof, int start) + { ++ FILE *fp = iof->fp; + int ok, n, nargs = (int)(L->top - L->base) - start; + clearerr(fp); + if (nargs == 0) { +@@ -226,8 +225,9 @@ static int io_file_read(lua_State *L, FILE *fp, int start) + return n - start; + } + +-static int io_file_write(lua_State *L, FILE *fp, int start) ++static int io_file_write(lua_State *L, IOFileUD *iof, int start) + { ++ FILE *fp = iof->fp; + cTValue *tv; + int status = 1; + for (tv = L->base+start; tv < L->top; tv++) { +@@ -255,13 +255,11 @@ static int io_file_iter(lua_State *L) + lj_err_caller(L, LJ_ERR_IOCLFL); + L->top = L->base; + if (n) { /* Copy upvalues with options to stack. */ +- if (n > LUAI_MAXCSTACK) +- lj_err_caller(L, LJ_ERR_STKOV); + lj_state_checkstack(L, (MSize)n); + memcpy(L->top, &fn->c.upvalue[1], n*sizeof(TValue)); + L->top += n; + } +- n = io_file_read(L, iof->fp, 0); ++ n = io_file_read(L, iof, 0); + if (ferror(iof->fp)) + lj_err_callermsg(L, strVdata(L->top-2)); + if (tvisnil(L->base) && (iof->type & IOFILE_FLAG_CLOSE)) { +@@ -286,19 +284,25 @@ static int io_file_lines(lua_State *L) + + LJLIB_CF(io_method_close) + { +- IOFileUD *iof = L->base < L->top ? io_tofile(L) : +- IOSTDF_IOF(L, GCROOT_IO_OUTPUT); ++ IOFileUD *iof; ++ if (L->base < L->top) { ++ iof = io_tofile(L); ++ } else { ++ iof = IOSTDF_IOF(L, GCROOT_IO_OUTPUT); ++ if (iof->fp == NULL) ++ lj_err_caller(L, LJ_ERR_IOCLFL); ++ } + return io_file_close(L, iof); + } + + LJLIB_CF(io_method_read) + { +- return io_file_read(L, io_tofile(L)->fp, 1); ++ return io_file_read(L, io_tofile(L), 1); + } + + LJLIB_CF(io_method_write) LJLIB_REC(io_write 0) + { +- return io_file_write(L, io_tofile(L)->fp, 1); ++ return io_file_write(L, io_tofile(L), 1); + } + + LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) +@@ -306,6 +310,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) + return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL); + } + ++#if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24 ++/* The Android NDK is such an unmatched marvel of engineering. */ ++extern int fseeko32(FILE *, long int, int) __asm__("fseeko"); ++extern long int ftello32(FILE *) __asm__("ftello"); ++#define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence))) ++#define ftello(fp) (ftello32((fp))) ++#endif ++ + LJLIB_CF(io_method_seek) + { + FILE *fp = io_tofile(L)->fp; +@@ -406,7 +418,7 @@ LJLIB_CF(io_open) + + LJLIB_CF(io_popen) + { +-#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE) ++#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP) + const char *fname = strdata(lj_lib_checkstr(L, 1)); + GCstr *s = lj_lib_optstr(L, 2); + const char *mode = s ? strdata(s) : "r"; +@@ -452,7 +464,7 @@ LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT) + + LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT) + { +- return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL); ++ return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)->fp) == 0, NULL); + } + + static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode) +diff --git a/src/lib_jit.c b/src/lib_jit.c +index 22ca0a1a..817c2967 100644 +--- a/src/lib_jit.c ++++ b/src/lib_jit.c +@@ -1,6 +1,6 @@ + /* + ** JIT library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lib_jit_c +@@ -104,8 +104,8 @@ LJLIB_CF(jit_status) + jit_State *J = L2J(L); + L->top = L->base; + setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); +- flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); +- flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); ++ flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING); ++ flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING); + return (int)(L->top - L->base); + #else + setboolV(L->top++, 0); +@@ -113,6 +113,13 @@ LJLIB_CF(jit_status) + #endif + } + ++LJLIB_CF(jit_security) ++{ ++ int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING); ++ setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3)); ++ return 1; ++} ++ + LJLIB_CF(jit_attach) + { + #ifdef LUAJIT_DISABLE_VMEVENT +@@ -227,7 +234,7 @@ LJLIB_CF(jit_util_funcbc) + if (pc < pt->sizebc) { + BCIns ins = proto_bc(pt)[pc]; + BCOp op = bc_op(ins); +- lua_assert(op < BC__MAX); ++ lj_assertL(op < BC__MAX, "bad bytecode op %d", op); + setintV(L->top, ins); + setintV(L->top+1, lj_bc_mode[op]); + L->top += 2; +@@ -339,11 +346,7 @@ LJLIB_CF(jit_util_tracek) + ir = &T->ir[ir->op1]; + } + #if LJ_HASFFI +- if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) { +- ptrdiff_t oldtop = savestack(L, L->top); +- luaopen_ffi(L); /* Load FFI library on-demand. */ +- L->top = restorestack(L, oldtop); +- } ++ if (ir->o == IR_KINT64) ctype_loadffi(L); + #endif + lj_ir_kvalue(L, L->top-2, ir); + setintV(L->top-1, (int32_t)irt_type(ir->t)); +@@ -471,7 +474,7 @@ static int jitopt_flag(jit_State *J, const char *str) + str += str[2] == '-' ? 3 : 2; + set = 0; + } +- for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { ++ for (opt = JIT_F_OPT; ; opt <<= 1) { + size_t len = *(const uint8_t *)lst; + if (len == 0) + break; +@@ -491,7 +494,7 @@ static int jitopt_param(jit_State *J, const char *str) + int i; + for (i = 0; i < JIT_P__MAX; i++) { + size_t len = *(const uint8_t *)lst; +- lua_assert(len != 0); ++ lj_assertJ(len != 0, "bad JIT_P_STRING"); + if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { + int32_t n = 0; + const char *p = &str[len+1]; +@@ -540,15 +543,15 @@ LJLIB_CF(jit_opt_start) + + /* Not loaded by default, use: local profile = require("jit.profile") */ + +-static const char KEY_PROFILE_THREAD = 't'; +-static const char KEY_PROFILE_FUNC = 'f'; ++#define KEY_PROFILE_THREAD (U64x(80000000,00000000)|'t') ++#define KEY_PROFILE_FUNC (U64x(80000000,00000000)|'f') + + static void jit_profile_callback(lua_State *L2, lua_State *L, int samples, + int vmstate) + { + TValue key; + cTValue *tv; +- setlightudV(&key, (void *)&KEY_PROFILE_FUNC); ++ key.u64 = KEY_PROFILE_FUNC; + tv = lj_tab_get(L, tabV(registry(L)), &key); + if (tvisfunc(tv)) { + char vmst = (char)vmstate; +@@ -575,9 +578,9 @@ LJLIB_CF(jit_profile_start) + lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */ + TValue key; + /* Anchor thread and function in registry. */ +- setlightudV(&key, (void *)&KEY_PROFILE_THREAD); ++ key.u64 = KEY_PROFILE_THREAD; + setthreadV(L, lj_tab_set(L, registry, &key), L2); +- setlightudV(&key, (void *)&KEY_PROFILE_FUNC); ++ key.u64 = KEY_PROFILE_FUNC; + setfuncV(L, lj_tab_set(L, registry, &key), func); + lj_gc_anybarriert(L, registry); + luaJIT_profile_start(L, mode ? strdata(mode) : "", +@@ -592,9 +595,9 @@ LJLIB_CF(jit_profile_stop) + TValue key; + luaJIT_profile_stop(L); + registry = tabV(registry(L)); +- setlightudV(&key, (void *)&KEY_PROFILE_THREAD); ++ key.u64 = KEY_PROFILE_THREAD; + setnilV(lj_tab_set(L, registry, &key)); +- setlightudV(&key, (void *)&KEY_PROFILE_FUNC); ++ key.u64 = KEY_PROFILE_FUNC; + setnilV(lj_tab_set(L, registry, &key)); + lj_gc_anybarriert(L, registry); + return 0; +@@ -640,59 +643,41 @@ JIT_PARAMDEF(JIT_PARAMINIT) + #undef JIT_PARAMINIT + 0 + }; +-#endif + + #if LJ_TARGET_ARM && LJ_TARGET_LINUX + #include <sys/utsname.h> + #endif + +-/* Arch-dependent CPU detection. */ +-static uint32_t jit_cpudetect(lua_State *L) ++/* Arch-dependent CPU feature detection. */ ++static uint32_t jit_cpudetect(void) + { + uint32_t flags = 0; + #if LJ_TARGET_X86ORX64 ++ + uint32_t vendor[4]; + uint32_t features[4]; + if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { +-#if !LJ_HASJIT +-#define JIT_F_SSE2 2 +-#endif +- flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; +-#if LJ_HASJIT + flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; + flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; +- if (vendor[2] == 0x6c65746e) { /* Intel. */ +- if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ +- flags |= JIT_F_LEA_AGU; +- } else if (vendor[2] == 0x444d4163) { /* AMD. */ +- uint32_t fam = (features[0] & 0x0ff00f00); +- if (fam >= 0x00000f00) /* K8, K10. */ +- flags |= JIT_F_PREFER_IMUL; +- } + if (vendor[0] >= 7) { + uint32_t xfeatures[4]; + lj_vm_cpuid(7, xfeatures); + flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; + } +-#endif + } +- /* Check for required instruction set support on x86 (unnecessary on x64). */ +-#if LJ_TARGET_X86 +- if (!(flags & JIT_F_SSE2)) +- luaL_error(L, "CPU with SSE2 required"); +-#endif ++ /* Don't bother checking for SSE2 -- the VM will crash before getting here. */ ++ + #elif LJ_TARGET_ARM +-#if LJ_HASJIT ++ + int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ + #if LJ_TARGET_LINUX + if (ver < 70) { /* Runtime ARM CPU detection. */ + struct utsname ut; + uname(&ut); + if (strncmp(ut.machine, "armv", 4) == 0) { +- if (ut.machine[4] >= '7') +- ver = 70; +- else if (ut.machine[4] == '6') +- ver = 60; ++ if (ut.machine[4] >= '8') ver = 80; ++ else if (ut.machine[4] == '7') ver = 70; ++ else if (ut.machine[4] == '6') ver = 60; + } + } + #endif +@@ -700,20 +685,22 @@ static uint32_t jit_cpudetect(lua_State *L) + ver >= 61 ? JIT_F_ARMV6T2_ : + ver >= 60 ? JIT_F_ARMV6_ : 0; + flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; +-#endif ++ + #elif LJ_TARGET_ARM64 ++ + /* No optional CPU features to detect (for now). */ ++ + #elif LJ_TARGET_PPC +-#if LJ_HASJIT ++ + #if LJ_ARCH_SQRT + flags |= JIT_F_SQRT; + #endif + #if LJ_ARCH_ROUND + flags |= JIT_F_ROUND; + #endif +-#endif ++ + #elif LJ_TARGET_MIPS +-#if LJ_HASJIT ++ + /* Compile-time MIPS CPU detection. */ + #if LJ_ARCH_VERSION >= 20 + flags |= JIT_F_MIPSXXR2; +@@ -731,31 +718,28 @@ static uint32_t jit_cpudetect(lua_State *L) + if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ + } + #endif +-#endif ++ + #else + #error "Missing CPU detection for this architecture" + #endif +- UNUSED(L); + return flags; + } + + /* Initialize JIT compiler. */ + static void jit_init(lua_State *L) + { +- uint32_t flags = jit_cpudetect(L); +-#if LJ_HASJIT + jit_State *J = L2J(L); +- J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; ++ J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; + memcpy(J->param, jit_param_default, sizeof(J->param)); + lj_dispatch_update(G(L)); +-#else +- UNUSED(flags); +-#endif + } ++#endif + + LUALIB_API int luaopen_jit(lua_State *L) + { ++#if LJ_HASJIT + jit_init(L); ++#endif + lua_pushliteral(L, LJ_OS_NAME); + lua_pushliteral(L, LJ_ARCH_NAME); + lua_pushinteger(L, LUAJIT_VERSION_NUM); +diff --git a/src/lib_math.c b/src/lib_math.c +index ef9dda2d..e9a0b597 100644 +--- a/src/lib_math.c ++++ b/src/lib_math.c +@@ -1,6 +1,6 @@ + /* + ** Math library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include <math.h> +@@ -15,6 +15,7 @@ + #include "lj_obj.h" + #include "lj_lib.h" + #include "lj_vm.h" ++#include "lj_prng.h" + + /* ------------------------------------------------------------------------ */ + +@@ -33,19 +34,19 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT) + lj_lib_checknum(L, 1); + return FFH_RETRY; + } +-LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10) +-LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP) +-LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN) +-LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS) +-LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) +-LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) +-LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) +-LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) +-LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) +-LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) +-LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) ++LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10) ++LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp) ++LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin) ++LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos) ++LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan) ++LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin) ++LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos) ++LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan) ++LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh) ++LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh) ++LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh) + LJLIB_ASM_(math_frexp) +-LJLIB_ASM_(math_modf) LJLIB_REC(.) ++LJLIB_ASM_(math_modf) + + LJLIB_ASM(math_log) LJLIB_REC(math_log) + { +@@ -105,34 +106,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge) + ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. + */ + +-/* PRNG state. */ +-struct RandomState { +- uint64_t gen[4]; /* State of the 4 LFSR generators. */ +- int valid; /* State is valid. */ +-}; +- + /* Union needed for bit-pattern conversion between uint64_t and double. */ + typedef union { uint64_t u64; double d; } U64double; + +-/* Update generator i and compute a running xor of all states. */ +-#define TW223_GEN(i, k, q, s) \ +- z = rs->gen[i]; \ +- z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ +- r ^= z; rs->gen[i] = z; +- +-/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ +-LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs) +-{ +- uint64_t z, r = 0; +- TW223_GEN(0, 63, 31, 18) +- TW223_GEN(1, 58, 19, 28) +- TW223_GEN(2, 55, 24, 7) +- TW223_GEN(3, 47, 21, 8) +- return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); +-} +- +-/* PRNG initialization function. */ +-static void random_init(RandomState *rs, double d) ++/* PRNG seeding function. */ ++static void random_seed(PRNGState *rs, double d) + { + uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ + int i; +@@ -141,24 +119,22 @@ static void random_init(RandomState *rs, double d) + uint32_t m = 1u << (r&255); + r >>= 8; + u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; +- if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ +- rs->gen[i] = u.u64; ++ if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */ ++ rs->u[i] = u.u64; + } +- rs->valid = 1; + for (i = 0; i < 10; i++) +- lj_math_random_step(rs); ++ (void)lj_prng_u64(rs); + } + + /* PRNG extract function. */ +-LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ ++LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ + LJLIB_CF(math_random) LJLIB_REC(.) + { + int n = (int)(L->top - L->base); +- RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); ++ PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + U64double u; + double d; +- if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); +- u.u64 = lj_math_random_step(rs); ++ u.u64 = lj_prng_u64d(rs); + d = u.d - 1.0; + if (n > 0) { + #if LJ_DUALNUM +@@ -203,11 +179,11 @@ LJLIB_CF(math_random) LJLIB_REC(.) + } + + /* PRNG seed function. */ +-LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ ++LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ + LJLIB_CF(math_randomseed) + { +- RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); +- random_init(rs, lj_lib_checknum(L, 1)); ++ PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); ++ random_seed(rs, lj_lib_checknum(L, 1)); + return 0; + } + +@@ -217,9 +193,8 @@ LJLIB_CF(math_randomseed) + + LUALIB_API int luaopen_math(lua_State *L) + { +- RandomState *rs; +- rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); +- rs->valid = 0; /* Use lazy initialization to save some time on startup. */ ++ PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState)); ++ lj_prng_seed_fixed(rs); + LJ_LIB_REG(L, LUA_MATHLIBNAME, math); + return 1; + } +diff --git a/src/lib_os.c b/src/lib_os.c +index 9e78d49a..f19b831c 100644 +--- a/src/lib_os.c ++++ b/src/lib_os.c +@@ -1,6 +1,6 @@ + /* + ** OS library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -205,12 +205,12 @@ LJLIB_CF(os_date) + setboolfield(L, "isdst", stm->tm_isdst); + } else if (*s) { + SBuf *sb = &G(L)->tmpbuf; +- MSize sz = 0; ++ MSize sz = 0, retry = 4; + const char *q; + for (q = s; *q; q++) + sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */ + setsbufL(sb, L); +- for (;;) { ++ while (retry--) { /* Limit growth for invalid format or empty result. */ + char *buf = lj_buf_need(sb, sz); + size_t len = strftime(buf, sbufsz(sb), s, stm); + if (len) { +diff --git a/src/lib_package.c b/src/lib_package.c +index 6fac43ec..2068a098 100644 +--- a/src/lib_package.c ++++ b/src/lib_package.c +@@ -1,6 +1,6 @@ + /* + ** Package library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym) + BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); + #endif + ++#if LJ_TARGET_UWP ++void *LJ_WIN_LOADLIBA(const char *path) ++{ ++ DWORD err = GetLastError(); ++ wchar_t wpath[256]; ++ HANDLE lib = NULL; ++ if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) { ++ lib = LoadPackagedLibrary(wpath, 0); ++ } ++ SetLastError(err); ++ return lib; ++} ++#endif ++ + #undef setprogdir + + static void setprogdir(lua_State *L) +@@ -119,7 +133,7 @@ static void ll_unloadlib(void *lib) + + static void *ll_load(lua_State *L, const char *path, int gl) + { +- HINSTANCE lib = LoadLibraryExA(path, NULL, 0); ++ HINSTANCE lib = LJ_WIN_LOADLIBA(path); + if (lib == NULL) pusherror(L); + UNUSED(gl); + return lib; +@@ -132,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) + return f; + } + ++#if LJ_TARGET_UWP ++EXTERN_C IMAGE_DOS_HEADER __ImageBase; ++#endif ++ + static const char *ll_bcsym(void *lib, const char *sym) + { + if (lib) { + return (const char *)GetProcAddress((HINSTANCE)lib, sym); + } else { ++#if LJ_TARGET_UWP ++ return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym); ++#else + HINSTANCE h = GetModuleHandleA(NULL); + const char *p = (const char *)GetProcAddress(h, sym); + if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (const char *)ll_bcsym, &h)) + p = (const char *)GetProcAddress(h, sym); + return p; ++#endif + } + } + +@@ -215,7 +237,12 @@ static const char *mksymname(lua_State *L, const char *modname, + + static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r) + { +- void **reg = ll_register(L, path); ++ void **reg; ++ if (strlen(path) >= 4096) { ++ lua_pushliteral(L, "path too long"); ++ return PACKAGE_ERR_LIB; ++ } ++ reg = ll_register(L, path); + if (*reg == NULL) *reg = ll_load(L, path, (*name == '*')); + if (*reg == NULL) { + return PACKAGE_ERR_LIB; /* Unable to load library. */ +@@ -233,7 +260,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r) + const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC)); + lua_pop(L, 1); + if (bcdata) { +- if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0) ++ if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) + return PACKAGE_ERR_LOAD; + return 0; + } +@@ -390,7 +417,7 @@ static int lj_cf_package_loader_preload(lua_State *L) + if (lua_isnil(L, -1)) { /* Not found? */ + const char *bcname = mksymname(L, name, SYMPREFIX_BC); + const char *bcdata = ll_bcsym(NULL, bcname); +- if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0) ++ if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0) + lua_pushfstring(L, "\n\tno field package.preload['%s']", name); + } + return 1; +@@ -398,7 +425,7 @@ static int lj_cf_package_loader_preload(lua_State *L) + + /* ------------------------------------------------------------------------ */ + +-#define sentinel ((void *)0x4004) ++#define KEY_SENTINEL (U64x(80000000,00000000)|'s') + + static int lj_cf_package_require(lua_State *L) + { +@@ -408,7 +435,7 @@ static int lj_cf_package_require(lua_State *L) + lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); + lua_getfield(L, 2, name); + if (lua_toboolean(L, -1)) { /* is it there? */ +- if (lua_touserdata(L, -1) == sentinel) /* check loops */ ++ if ((L->top-1)->u64 == KEY_SENTINEL) /* check loops */ + luaL_error(L, "loop or previous error loading module " LUA_QS, name); + return 1; /* package is already loaded */ + } +@@ -431,14 +458,14 @@ static int lj_cf_package_require(lua_State *L) + else + lua_pop(L, 1); + } +- lua_pushlightuserdata(L, sentinel); ++ (L->top++)->u64 = KEY_SENTINEL; + lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */ + lua_pushstring(L, name); /* pass name as argument to module */ + lua_call(L, 1, 1); /* run loaded module */ + if (!lua_isnil(L, -1)) /* non-nil return? */ + lua_setfield(L, 2, name); /* _LOADED[name] = returned value */ + lua_getfield(L, 2, name); +- if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */ ++ if ((L->top-1)->u64 == KEY_SENTINEL) { /* module did not set a value? */ + lua_pushboolean(L, 1); /* use true as result */ + lua_pushvalue(L, -1); /* extra copy to be returned */ + lua_setfield(L, 2, name); /* _LOADED[name] = true */ +diff --git a/src/lib_string.c b/src/lib_string.c +index 76b0730a..75d855d6 100644 +--- a/src/lib_string.c ++++ b/src/lib_string.c +@@ -1,6 +1,6 @@ + /* + ** String library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -136,7 +136,7 @@ LJLIB_CF(string_dump) + /* ------------------------------------------------------------------------ */ + + /* macro to `unsign' a character */ +-#define uchar(c) ((unsigned char)(c)) ++#define uchar(c) ((unsigned char)(c)) + + #define CAP_UNFINISHED (-1) + #define CAP_POSITION (-2) +@@ -640,89 +640,14 @@ LJLIB_CF(string_gsub) + + /* ------------------------------------------------------------------------ */ + +-/* Emulate tostring() inline. */ +-static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry) +-{ +- TValue *o = L->base+arg-1; +- cTValue *mo; +- lua_assert(o < L->top); /* Caller already checks for existence. */ +- if (LJ_LIKELY(tvisstr(o))) +- return strV(o); +- if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { +- copyTV(L, L->top++, mo); +- copyTV(L, L->top++, o); +- lua_call(L, 1, 1); +- copyTV(L, L->base+arg-1, --L->top); +- return NULL; /* Buffer may be overwritten, retry. */ +- } +- return lj_strfmt_obj(L, o); +-} +- + LJLIB_CF(string_format) LJLIB_REC(.) + { +- int arg, top = (int)(L->top - L->base); +- GCstr *fmt; +- SBuf *sb; +- FormatState fs; +- SFormat sf; + int retry = 0; +-again: +- arg = 1; +- sb = lj_buf_tmp_(L); +- fmt = lj_lib_checkstr(L, arg); +- lj_strfmt_init(&fs, strdata(fmt), fmt->len); +- while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { +- if (sf == STRFMT_LIT) { +- lj_buf_putmem(sb, fs.str, fs.len); +- } else if (sf == STRFMT_ERR) { +- lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len))); +- } else { +- if (++arg > top) +- luaL_argerror(L, arg, lj_obj_typename[0]); +- switch (STRFMT_TYPE(sf)) { +- case STRFMT_INT: +- if (tvisint(L->base+arg-1)) { +- int32_t k = intV(L->base+arg-1); +- if (sf == STRFMT_INT) +- lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ +- else +- lj_strfmt_putfxint(sb, sf, k); +- } else { +- lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); +- } +- break; +- case STRFMT_UINT: +- if (tvisint(L->base+arg-1)) +- lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1)); +- else +- lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); +- break; +- case STRFMT_NUM: +- lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); +- break; +- case STRFMT_STR: { +- GCstr *str = string_fmt_tostring(L, arg, retry); +- if (str == NULL) +- retry = 1; +- else if ((sf & STRFMT_T_QUOTED)) +- lj_strfmt_putquoted(sb, str); /* No formatting. */ +- else +- lj_strfmt_putfstr(sb, sf, str); +- break; +- } +- case STRFMT_CHAR: +- lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); +- break; +- case STRFMT_PTR: /* No formatting. */ +- lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1)); +- break; +- default: +- lua_assert(0); +- break; +- } +- } +- } +- if (retry++ == 1) goto again; ++ SBuf *sb; ++ do { ++ sb = lj_buf_tmp_(L); ++ retry = lj_strfmt_putarg(L, sb, 1, -retry); ++ } while (retry > 0); + setstrV(L, L->top-1, lj_buf_str(L, sb)); + lj_gc_check(L); + return 1; +@@ -743,6 +668,9 @@ LUALIB_API int luaopen_string(lua_State *L) + setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); + settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); + mt->nomm = (uint8_t)(~(1u<<MM_index)); ++#if LJ_HASBUFFER ++ lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1)); ++#endif + return 1; + } + +diff --git a/src/lib_table.c b/src/lib_table.c +index 0450f1f6..0214bb40 100644 +--- a/src/lib_table.c ++++ b/src/lib_table.c +@@ -1,6 +1,6 @@ + /* + ** Table library. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -159,7 +159,7 @@ LJLIB_CF(table_concat) LJLIB_REC(.) + SBuf *sb = lj_buf_tmp_(L); + SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e); + if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */ +- int32_t idx = (int32_t)(intptr_t)sbufP(sb); ++ int32_t idx = (int32_t)(intptr_t)sb->w; + cTValue *o = lj_tab_getint(t, idx); + lj_err_callerv(L, LJ_ERR_TABCAT, + lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx); +diff --git a/src/lj.supp b/src/lj.supp +deleted file mode 100644 +index 217f7c89..00000000 +--- a/src/lj.supp ++++ /dev/null +@@ -1,41 +0,0 @@ +-# Valgrind suppression file for LuaJIT 2.0. +-{ +- Optimized string compare +- Memcheck:Addr4 +- fun:lj_str_cmp +-} +-{ +- Optimized string compare +- Memcheck:Addr1 +- fun:lj_str_cmp +-} +-{ +- Optimized string compare +- Memcheck:Addr4 +- fun:lj_str_new +-} +-{ +- Optimized string compare +- Memcheck:Addr1 +- fun:lj_str_new +-} +-{ +- Optimized string compare +- Memcheck:Cond +- fun:lj_str_new +-} +-{ +- Optimized string compare +- Memcheck:Addr4 +- fun:str_fastcmp +-} +-{ +- Optimized string compare +- Memcheck:Addr1 +- fun:str_fastcmp +-} +-{ +- Optimized string compare +- Memcheck:Cond +- fun:str_fastcmp +-} +diff --git a/src/lj_alloc.c b/src/lj_alloc.c +index 95d15d04..165203fa 100644 +--- a/src/lj_alloc.c ++++ b/src/lj_alloc.c +@@ -6,7 +6,7 @@ + ** + ** This is a version (aka dlmalloc) of malloc/free/realloc written by + ** Doug Lea and released to the public domain, as explained at +-** http://creativecommons.org/licenses/publicdomain. ++** https://creativecommons.org/licenses/publicdomain. + ** + ** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee) + ** +@@ -16,8 +16,8 @@ + ** If you want to use dlmalloc in another project, you should get + ** the original from: ftp://gee.cs.oswego.edu/pub/misc/ + ** For thread-safe derivatives, take a look at: +-** - ptmalloc: http://www.malloc.de/ +-** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/ ++** - ptmalloc: https://www.malloc.de/ ++** - nedmalloc: https://www.nedprod.com/programs/portable/nedmalloc/ + */ + + #define lj_alloc_c +@@ -31,6 +31,7 @@ + #include "lj_def.h" + #include "lj_arch.h" + #include "lj_alloc.h" ++#include "lj_prng.h" + + #ifndef LUAJIT_USE_SYSMALLOC + +@@ -123,7 +124,7 @@ + + #if LJ_ALLOC_NTAVM + /* Undocumented, but hey, that's what we all love so much about Windows. */ +-typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, ++typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits, + size_t *size, ULONG alloctype, ULONG prot); + static PNTAVM ntavm; + +@@ -140,7 +141,7 @@ static void init_mmap(void) + #define INIT_MMAP() init_mmap() + + /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ +-static void *CALL_MMAP(size_t size) ++static void *mmap_plain(size_t size) + { + DWORD olderr = GetLastError(); + void *ptr = NULL; +@@ -151,7 +152,7 @@ static void *CALL_MMAP(size_t size) + } + + /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ +-static void *DIRECT_MMAP(size_t size) ++static void *direct_mmap(size_t size) + { + DWORD olderr = GetLastError(); + void *ptr = NULL; +@@ -164,26 +165,29 @@ static void *DIRECT_MMAP(size_t size) + #else + + /* Win32 MMAP via VirtualAlloc */ +-static void *CALL_MMAP(size_t size) ++static void *mmap_plain(size_t size) + { + DWORD olderr = GetLastError(); +- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); ++ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + SetLastError(olderr); + return ptr ? ptr : MFAIL; + } + + /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ +-static void *DIRECT_MMAP(size_t size) ++static void *direct_mmap(size_t size) + { + DWORD olderr = GetLastError(); +- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, +- PAGE_READWRITE); ++ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, ++ PAGE_READWRITE); + SetLastError(olderr); + return ptr ? ptr : MFAIL; + } + + #endif + ++#define CALL_MMAP(prng, size) mmap_plain(size) ++#define DIRECT_MMAP(prng, size) direct_mmap(size) ++ + /* This function supports releasing coalesed segments */ + static int CALL_MUNMAP(void *ptr, size_t size) + { +@@ -226,36 +230,17 @@ static int CALL_MUNMAP(void *ptr, size_t size) + + #define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) + +-/* No point in a giant ifdef mess. Just try to open /dev/urandom. +-** It doesn't really matter if this fails, since we get some ASLR bits from +-** every unsuitable allocation, too. And we prefer linear allocation, anyway. +-*/ +-#include <fcntl.h> +-#include <unistd.h> +- +-static uintptr_t mmap_probe_seed(void) +-{ +- uintptr_t val; +- int fd = open("/dev/urandom", O_RDONLY); +- if (fd != -1) { +- int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val)); +- (void)close(fd); +- if (ok) return val; +- } +- return 1; /* Punt. */ +-} +- +-static void *mmap_probe(size_t size) ++static void *mmap_probe(PRNGState *rs, size_t size) + { + /* Hint for next allocation. Doesn't need to be thread-safe. */ + static uintptr_t hint_addr = 0; +- static uintptr_t hint_prng = 0; + int olderr = errno; + int retry; + for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { + void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); + uintptr_t addr = (uintptr_t)p; +- if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) { ++ if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER && ++ ((addr + size) >> LJ_ALLOC_MBITS) == 0) { + /* We got a suitable address. Bump the hint address. */ + hint_addr = addr + size; + errno = olderr; +@@ -280,15 +265,8 @@ static void *mmap_probe(size_t size) + } + } + /* Finally, try pseudo-random probing. */ +- if (LJ_UNLIKELY(hint_prng == 0)) { +- hint_prng = mmap_probe_seed(); +- } +- /* The unsuitable address we got has some ASLR PRNG bits. */ +- hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1)); +- do { /* The PRNG itself is very weak, but see above. */ +- hint_prng = hint_prng * 1103515245 + 12345; +- hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE; +- hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1); ++ do { ++ hint_addr = lj_prng_u64(rs) & (((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE); + } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER); + } + errno = olderr; +@@ -299,18 +277,22 @@ static void *mmap_probe(size_t size) + + #if LJ_ALLOC_MMAP32 + +-#if defined(__sun__) ++#if LJ_TARGET_SOLARIS + #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) + #else + #define LJ_ALLOC_MMAP32_START ((uintptr_t)0) + #endif + ++#if LJ_ALLOC_MMAP_PROBE ++static void *mmap_map32(PRNGState *rs, size_t size) ++#else + static void *mmap_map32(size_t size) ++#endif + { + #if LJ_ALLOC_MMAP_PROBE + static int fallback = 0; + if (fallback) +- return mmap_probe(size); ++ return mmap_probe(rs, size); + #endif + { + int olderr = errno; +@@ -320,7 +302,7 @@ static void *mmap_map32(size_t size) + #if LJ_ALLOC_MMAP_PROBE + if (ptr == MFAIL) { + fallback = 1; +- return mmap_probe(size); ++ return mmap_probe(rs, size); + } + #endif + return ptr; +@@ -330,20 +312,25 @@ static void *mmap_map32(size_t size) + #endif + + #if LJ_ALLOC_MMAP32 +-#define CALL_MMAP(size) mmap_map32(size) ++#if LJ_ALLOC_MMAP_PROBE ++#define CALL_MMAP(prng, size) mmap_map32(prng, size) ++#else ++#define CALL_MMAP(prng, size) mmap_map32(size) ++#endif + #elif LJ_ALLOC_MMAP_PROBE +-#define CALL_MMAP(size) mmap_probe(size) ++#define CALL_MMAP(prng, size) mmap_probe(prng, size) + #else +-static void *CALL_MMAP(size_t size) ++static void *mmap_plain(size_t size) + { + int olderr = errno; + void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); + errno = olderr; + return ptr; + } ++#define CALL_MMAP(prng, size) mmap_plain(size) + #endif + +-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 ++#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 + + #include <sys/resource.h> + +@@ -378,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) + #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) + #define CALL_MREMAP_NOMOVE 0 + #define CALL_MREMAP_MAYMOVE 1 +-#if LJ_64 && !LJ_GC64 ++#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) + #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE + #else + #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE +@@ -393,7 +380,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) + #endif + + #ifndef DIRECT_MMAP +-#define DIRECT_MMAP(s) CALL_MMAP(s) ++#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s) + #endif + + #ifndef CALL_MREMAP +@@ -552,6 +539,7 @@ struct malloc_state { + mchunkptr smallbins[(NSMALLBINS+1)*2]; + tbinptr treebins[NTREEBINS]; + msegment seg; ++ PRNGState *prng; + }; + + typedef struct malloc_state *mstate; +@@ -609,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss) + noncontiguous segments are added. + */ + #define TOP_FOOT_SIZE\ +- (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) ++ (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + + /* ---------------------------- Indexing Bins ---------------------------- */ + +@@ -834,11 +822,11 @@ static int has_segment_link(mstate m, msegmentptr ss) + + /* ----------------------- Direct-mmapping chunks ----------------------- */ + +-static void *direct_alloc(size_t nb) ++static void *direct_alloc(mstate m, size_t nb) + { + size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ +- char *mm = (char *)(DIRECT_MMAP(mmsize)); ++ char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize)); + if (mm != CMFAIL) { + size_t offset = align_offset(chunk2mem(mm)); + size_t psize = mmsize - offset - DIRECT_FOOT_PAD; +@@ -850,6 +838,7 @@ static void *direct_alloc(size_t nb) + return chunk2mem(p); + } + } ++ UNUSED(m); + return NULL; + } + +@@ -998,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb) + + /* Directly map large chunks */ + if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { +- void *mem = direct_alloc(nb); ++ void *mem = direct_alloc(m, nb); + if (mem != 0) + return mem; + } +@@ -1007,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb) + size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; + size_t rsize = granularity_align(req); + if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ +- char *mp = (char *)(CALL_MMAP(rsize)); ++ char *mp = (char *)(CALL_MMAP(m->prng, rsize)); + if (mp != CMFAIL) { + tbase = mp; + tsize = rsize; +@@ -1234,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb) + + /* ----------------------------------------------------------------------- */ + +-void *lj_alloc_create(void) ++void *lj_alloc_create(PRNGState *rs) + { + size_t tsize = DEFAULT_GRANULARITY; + char *tbase; + INIT_MMAP(); +- tbase = (char *)(CALL_MMAP(tsize)); ++ UNUSED(rs); ++ tbase = (char *)(CALL_MMAP(rs, tsize)); + if (tbase != CMFAIL) { + size_t msize = pad_request(sizeof(struct malloc_state)); + mchunkptr mn; +@@ -1258,6 +1248,12 @@ void *lj_alloc_create(void) + return NULL; + } + ++void lj_alloc_setprng(void *msp, PRNGState *rs) ++{ ++ mstate ms = (mstate)msp; ++ ms->prng = rs; ++} ++ + void lj_alloc_destroy(void *msp) + { + mstate ms = (mstate)msp; +diff --git a/src/lj_alloc.h b/src/lj_alloc.h +index f87a7cf3..669f50b7 100644 +--- a/src/lj_alloc.h ++++ b/src/lj_alloc.h +@@ -9,7 +9,8 @@ + #include "lj_def.h" + + #ifndef LUAJIT_USE_SYSMALLOC +-LJ_FUNC void *lj_alloc_create(void); ++LJ_FUNC void *lj_alloc_create(PRNGState *rs); ++LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs); + LJ_FUNC void lj_alloc_destroy(void *msp); + LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); + #endif +diff --git a/src/lj_api.c b/src/lj_api.c +index d17a5754..8c60c058 100644 +--- a/src/lj_api.c ++++ b/src/lj_api.c +@@ -1,6 +1,6 @@ + /* + ** Public Lua/C API. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -28,8 +28,8 @@ + + /* -- Common helper functions --------------------------------------------- */ + +-#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base)) +-#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L)) ++#define lj_checkapi_slot(idx) \ ++ lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of range", (idx)) + + static TValue *index2adr(lua_State *L, int idx) + { +@@ -37,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx) + TValue *o = L->base + (idx - 1); + return o < L->top ? o : niltv(L); + } else if (idx > LUA_REGISTRYINDEX) { +- api_check(L, idx != 0 && -idx <= L->top - L->base); ++ lj_checkapi(idx != 0 && -idx <= L->top - L->base, ++ "bad stack slot %d", idx); + return L->top + idx; + } else if (idx == LUA_GLOBALSINDEX) { + TValue *o = &G(L)->tmptv; +@@ -47,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx) + return registry(L); + } else { + GCfunc *fn = curr_func(L); +- api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn)); ++ lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn), ++ "calling frame is not a C function"); + if (idx == LUA_ENVIRONINDEX) { + TValue *o = &G(L)->tmptv; + settabV(L, o, tabref(fn->c.env)); +@@ -59,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx) + } + } + +-static TValue *stkindex2adr(lua_State *L, int idx) ++static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx) ++{ ++ TValue *o = index2adr(L, idx); ++ lj_checkapi(o != niltv(L), "invalid stack slot %d", idx); ++ return o; ++} ++ ++static TValue *index2adr_stack(lua_State *L, int idx) + { + if (idx > 0) { + TValue *o = L->base + (idx - 1); ++ if (o < L->top) { ++ return o; ++ } else { ++ lj_checkapi(0, "invalid stack slot %d", idx); ++ return niltv(L); ++ } + return o < L->top ? o : niltv(L); + } else { +- api_check(L, idx != 0 && -idx <= L->top - L->base); ++ lj_checkapi(idx != 0 && -idx <= L->top - L->base, ++ "invalid stack slot %d", idx); + return L->top + idx; + } + } +@@ -99,17 +115,17 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg) + lj_err_callerv(L, LJ_ERR_STKOVM, msg); + } + +-LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) ++LUA_API void lua_xmove(lua_State *L, lua_State *to, int n) + { + TValue *f, *t; +- if (from == to) return; +- api_checknelems(from, n); +- api_check(from, G(from) == G(to)); ++ if (L == to) return; ++ lj_checkapi_slot(n); ++ lj_checkapi(G(L) == G(to), "move across global states"); + lj_state_checkstack(to, (MSize)n); +- f = from->top; ++ f = L->top; + t = to->top = to->top + n; + while (--n >= 0) copyTV(to, --t, --f); +- from->top = f; ++ L->top = f; + } + + LUA_API const lua_Number *lua_version(lua_State *L) +@@ -129,7 +145,7 @@ LUA_API int lua_gettop(lua_State *L) + LUA_API void lua_settop(lua_State *L, int idx) + { + if (idx >= 0) { +- api_check(L, idx <= tvref(L->maxstack) - L->base); ++ lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot %d", idx); + if (L->base + idx > L->top) { + if (L->base + idx >= tvref(L->maxstack)) + lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); +@@ -138,23 +154,21 @@ LUA_API void lua_settop(lua_State *L, int idx) + L->top = L->base + idx; + } + } else { +- api_check(L, -(idx+1) <= (L->top - L->base)); ++ lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d", idx); + L->top += idx+1; /* Shrinks top (idx < 0). */ + } + } + + LUA_API void lua_remove(lua_State *L, int idx) + { +- TValue *p = stkindex2adr(L, idx); +- api_checkvalidindex(L, p); ++ TValue *p = index2adr_stack(L, idx); + while (++p < L->top) copyTV(L, p-1, p); + L->top--; + } + + LUA_API void lua_insert(lua_State *L, int idx) + { +- TValue *q, *p = stkindex2adr(L, idx); +- api_checkvalidindex(L, p); ++ TValue *q, *p = index2adr_stack(L, idx); + for (q = L->top; q > p; q--) copyTV(L, q, q-1); + copyTV(L, p, L->top); + } +@@ -162,19 +176,18 @@ LUA_API void lua_insert(lua_State *L, int idx) + static void copy_slot(lua_State *L, TValue *f, int idx) + { + if (idx == LUA_GLOBALSINDEX) { +- api_check(L, tvistab(f)); ++ lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); + /* NOBARRIER: A thread (i.e. L) is never black. */ + setgcref(L->env, obj2gco(tabV(f))); + } else if (idx == LUA_ENVIRONINDEX) { + GCfunc *fn = curr_func(L); + if (fn->c.gct != ~LJ_TFUNC) + lj_err_msg(L, LJ_ERR_NOENV); +- api_check(L, tvistab(f)); ++ lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); + setgcref(fn->c.env, obj2gco(tabV(f))); + lj_gc_barrier(L, fn, f); + } else { +- TValue *o = index2adr(L, idx); +- api_checkvalidindex(L, o); ++ TValue *o = index2adr_check(L, idx); + copyTV(L, o, f); + if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ + lj_gc_barrier(L, curr_func(L), f); +@@ -183,7 +196,7 @@ static void copy_slot(lua_State *L, TValue *f, int idx) + + LUA_API void lua_replace(lua_State *L, int idx) + { +- api_checknelems(L, 1); ++ lj_checkapi_slot(1); + copy_slot(L, L->top - 1, idx); + L->top--; + } +@@ -219,7 +232,7 @@ LUA_API int lua_type(lua_State *L, int idx) + #else + int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); + #endif +- lua_assert(tt != LUA_TNIL || tvisnil(o)); ++ lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion"); + return tt; + } + } +@@ -595,7 +608,7 @@ LUA_API void *lua_touserdata(lua_State *L, int idx) + if (tvisudata(o)) + return uddata(udataV(o)); + else if (tvislightud(o)) +- return lightudV(o); ++ return lightudV(G(L), o); + else + return NULL; + } +@@ -608,7 +621,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx) + + LUA_API const void *lua_topointer(lua_State *L, int idx) + { +- return lj_obj_ptr(index2adr(L, idx)); ++ return lj_obj_ptr(G(L), index2adr(L, idx)); + } + + /* -- Stack setters (object creation) ------------------------------------- */ +@@ -677,14 +690,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n) + { + GCfunc *fn; + lj_gc_check(L); +- api_checknelems(L, n); ++ lj_checkapi_slot(n); + fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); + fn->c.f = f; + L->top -= n; + while (n--) + copyTV(L, &fn->c.upvalue[n], L->top+n); + setfuncV(L, L->top, fn); +- lua_assert(iswhite(obj2gco(fn))); ++ lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white"); + incr_top(L); + } + +@@ -696,7 +709,10 @@ LUA_API void lua_pushboolean(lua_State *L, int b) + + LUA_API void lua_pushlightuserdata(lua_State *L, void *p) + { +- setlightudV(L->top, checklightudptr(L, p)); ++#if LJ_64 ++ p = lj_lightud_intern(L, p); ++#endif ++ setrawlightudV(L->top, p); + incr_top(L); + } + +@@ -754,7 +770,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size) + + LUA_API void lua_concat(lua_State *L, int n) + { +- api_checknelems(L, n); ++ lj_checkapi_slot(n); + if (n >= 2) { + n--; + do { +@@ -780,9 +796,8 @@ LUA_API void lua_concat(lua_State *L, int n) + + LUA_API void lua_gettable(lua_State *L, int idx) + { +- cTValue *v, *t = index2adr(L, idx); +- api_checkvalidindex(L, t); +- v = lj_meta_tget(L, t, L->top-1); ++ cTValue *t = index2adr_check(L, idx); ++ cTValue *v = lj_meta_tget(L, t, L->top-1); + if (v == NULL) { + L->top += 2; + lj_vm_call(L, L->top-2, 1+1); +@@ -794,9 +809,8 @@ LUA_API void lua_gettable(lua_State *L, int idx) + + LUA_API void lua_getfield(lua_State *L, int idx, const char *k) + { +- cTValue *v, *t = index2adr(L, idx); ++ cTValue *v, *t = index2adr_check(L, idx); + TValue key; +- api_checkvalidindex(L, t); + setstrV(L, &key, lj_str_newz(L, k)); + v = lj_meta_tget(L, t, &key); + if (v == NULL) { +@@ -812,14 +826,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k) + LUA_API void lua_rawget(lua_State *L, int idx) + { + cTValue *t = index2adr(L, idx); +- api_check(L, tvistab(t)); ++ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); + copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); + } + + LUA_API void lua_rawgeti(lua_State *L, int idx, int n) + { + cTValue *v, *t = index2adr(L, idx); +- api_check(L, tvistab(t)); ++ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); + v = lj_tab_getint(tabV(t), n); + if (v) { + copyTV(L, L->top, v); +@@ -861,8 +875,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field) + + LUA_API void lua_getfenv(lua_State *L, int idx) + { +- cTValue *o = index2adr(L, idx); +- api_checkvalidindex(L, o); ++ cTValue *o = index2adr_check(L, idx); + if (tvisfunc(o)) { + settabV(L, L->top, tabref(funcV(o)->c.env)); + } else if (tvisudata(o)) { +@@ -879,12 +892,14 @@ LUA_API int lua_next(lua_State *L, int idx) + { + cTValue *t = index2adr(L, idx); + int more; +- api_check(L, tvistab(t)); +- more = lj_tab_next(L, tabV(t), L->top-1); +- if (more) { ++ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); ++ more = lj_tab_next(tabV(t), L->top-1, L->top-1); ++ if (more > 0) { + incr_top(L); /* Return new key and value slot. */ +- } else { /* End of traversal. */ ++ } else if (!more) { /* End of traversal. */ + L->top--; /* Remove key slot. */ ++ } else { ++ lj_err_msg(L, LJ_ERR_NEXTIDX); + } + return more; + } +@@ -892,7 +907,8 @@ LUA_API int lua_next(lua_State *L, int idx) + LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n) + { + TValue *val; +- const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val); ++ GCobj *o; ++ const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val, &o); + if (name) { + copyTV(L, L->top, val); + incr_top(L); +@@ -904,7 +920,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n) + { + GCfunc *fn = funcV(index2adr(L, idx)); + n--; +- api_check(L, (uint32_t)n < fn->l.nupvalues); ++ lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n); + return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : + (void *)&fn->c.upvalue[n]; + } +@@ -914,8 +930,10 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2) + GCfunc *fn1 = funcV(index2adr(L, idx1)); + GCfunc *fn2 = funcV(index2adr(L, idx2)); + n1--; n2--; +- api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues); +- api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues); ++ lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1); ++ lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2); ++ lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1); ++ lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1); + setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]); + lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); + } +@@ -944,9 +962,8 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) + LUA_API void lua_settable(lua_State *L, int idx) + { + TValue *o; +- cTValue *t = index2adr(L, idx); +- api_checknelems(L, 2); +- api_checkvalidindex(L, t); ++ cTValue *t = index2adr_check(L, idx); ++ lj_checkapi_slot(2); + o = lj_meta_tset(L, t, L->top-2); + if (o) { + /* NOBARRIER: lj_meta_tset ensures the table is not black. */ +@@ -965,9 +982,8 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k) + { + TValue *o; + TValue key; +- cTValue *t = index2adr(L, idx); +- api_checknelems(L, 1); +- api_checkvalidindex(L, t); ++ cTValue *t = index2adr_check(L, idx); ++ lj_checkapi_slot(1); + setstrV(L, &key, lj_str_newz(L, k)); + o = lj_meta_tset(L, t, &key); + if (o) { +@@ -986,7 +1002,7 @@ LUA_API void lua_rawset(lua_State *L, int idx) + { + GCtab *t = tabV(index2adr(L, idx)); + TValue *dst, *key; +- api_checknelems(L, 2); ++ lj_checkapi_slot(2); + key = L->top-2; + dst = lj_tab_set(L, t, key); + copyTV(L, dst, key+1); +@@ -998,7 +1014,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n) + { + GCtab *t = tabV(index2adr(L, idx)); + TValue *dst, *src; +- api_checknelems(L, 1); ++ lj_checkapi_slot(1); + dst = lj_tab_setint(L, t, n); + src = L->top-1; + copyTV(L, dst, src); +@@ -1010,13 +1026,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) + { + global_State *g; + GCtab *mt; +- cTValue *o = index2adr(L, idx); +- api_checknelems(L, 1); +- api_checkvalidindex(L, o); ++ cTValue *o = index2adr_check(L, idx); ++ lj_checkapi_slot(1); + if (tvisnil(L->top-1)) { + mt = NULL; + } else { +- api_check(L, tvistab(L->top-1)); ++ lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); + mt = tabV(L->top-1); + } + g = G(L); +@@ -1053,11 +1068,10 @@ LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname) + + LUA_API int lua_setfenv(lua_State *L, int idx) + { +- cTValue *o = index2adr(L, idx); ++ cTValue *o = index2adr_check(L, idx); + GCtab *t; +- api_checknelems(L, 1); +- api_checkvalidindex(L, o); +- api_check(L, tvistab(L->top-1)); ++ lj_checkapi_slot(1); ++ lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); + t = tabV(L->top-1); + if (tvisfunc(o)) { + setgcref(funcV(o)->c.env, obj2gco(t)); +@@ -1078,13 +1092,14 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) + { + cTValue *f = index2adr(L, idx); + TValue *val; ++ GCobj *o; + const char *name; +- api_checknelems(L, 1); +- name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val); ++ lj_checkapi_slot(1); ++ name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); + if (name) { + L->top--; + copyTV(L, val, L->top); +- lj_gc_barrier(L, funcV(f), L->top); ++ lj_gc_barrier(L, o, L->top); + } + return name; + } +@@ -1106,8 +1121,9 @@ static TValue *api_call_base(lua_State *L, int nargs) + + LUA_API void lua_call(lua_State *L, int nargs, int nresults) + { +- api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); +- api_checknelems(L, nargs+1); ++ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, ++ "thread called in wrong state %d", L->status); ++ lj_checkapi_slot(nargs+1); + lj_vm_call(L, api_call_base(L, nargs), nresults+1); + } + +@@ -1117,13 +1133,13 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) + uint8_t oldh = hook_save(g); + ptrdiff_t ef; + int status; +- api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); +- api_checknelems(L, nargs+1); ++ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, ++ "thread called in wrong state %d", L->status); ++ lj_checkapi_slot(nargs+1); + if (errfunc == 0) { + ef = 0; + } else { +- cTValue *o = stkindex2adr(L, errfunc); +- api_checkvalidindex(L, o); ++ cTValue *o = index2adr_stack(L, errfunc); + ef = savestack(L, o); + } + status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef); +@@ -1138,7 +1154,10 @@ static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) + fn->c.f = func; + setfuncV(L, top++, fn); + if (LJ_FR2) setnilV(top++); +- setlightudV(top++, checklightudptr(L, ud)); ++#if LJ_64 ++ ud = lj_lightud_intern(L, ud); ++#endif ++ setrawlightudV(top++, ud); + cframe_nres(L->cframe) = 1+0; /* Zero results. */ + L->top = top; + return top-1; /* Now call the newly allocated C function. */ +@@ -1149,7 +1168,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) + global_State *g = G(L); + uint8_t oldh = hook_save(g); + int status; +- api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); ++ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, ++ "thread called in wrong state %d", L->status); + status = lj_vm_cpcall(L, func, ud, cpcall); + if (status) hook_restore(g, oldh); + return status; +@@ -1198,11 +1218,12 @@ LUA_API int lua_yield(lua_State *L, int nresults) + setcont(top, lj_cont_hook); + if (LJ_FR2) top++; + setframe_pc(top, cframe_pc(cf)-1); +- if (LJ_FR2) top++; ++ top++; + setframe_gc(top, obj2gco(L), LJ_TTHREAD); ++ if (LJ_FR2) top++; + setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT); + L->top = L->base = top+1; +-#if LJ_TARGET_X64 ++#if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS + lj_err_throw(L, LUA_YIELD); + #else + L->cframe = NULL; +diff --git a/src/lj_arch.h b/src/lj_arch.h +index c8d7138e..ae999467 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -1,6 +1,6 @@ + /* + ** Target architecture selection. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_ARCH_H +@@ -8,6 +8,8 @@ + + #include "lua.h" + ++/* -- Target definitions -------------------------------------------------- */ ++ + /* Target endianess. */ + #define LUAJIT_LE 0 + #define LUAJIT_BE 1 +@@ -38,6 +40,14 @@ + #define LUAJIT_OS_BSD 4 + #define LUAJIT_OS_POSIX 5 + ++/* Number mode. */ ++#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ ++#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ ++#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ ++#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ ++ ++/* -- Target detection ---------------------------------------------------- */ ++ + /* Select native target if no target defined. */ + #ifndef LUAJIT_TARGET + +@@ -69,12 +79,16 @@ + #elif defined(__linux__) + #define LUAJIT_OS LUAJIT_OS_LINUX + #elif defined(__MACH__) && defined(__APPLE__) ++#include "TargetConditionals.h" + #define LUAJIT_OS LUAJIT_OS_OSX + #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ + defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__DragonFly__)) && !defined(__ORBIS__) + #define LUAJIT_OS LUAJIT_OS_BSD +-#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__) ++#elif (defined(__sun__) && defined(__svr4__)) ++#define LJ_TARGET_SOLARIS 1 ++#define LUAJIT_OS LUAJIT_OS_POSIX ++#elif defined(__HAIKU__) + #define LUAJIT_OS LUAJIT_OS_POSIX + #elif defined(__CYGWIN__) + #define LJ_TARGET_CYGWIN 1 +@@ -103,10 +117,16 @@ + #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) + #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) + #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) +-#define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64)) ++#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD) + #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) + #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX + ++#if TARGET_OS_IPHONE ++#define LJ_TARGET_IOS 1 ++#else ++#define LJ_TARGET_IOS 0 ++#endif ++ + #ifdef __CELLOS_LV2__ + #define LJ_TARGET_PS3 1 + #define LJ_TARGET_CONSOLE 1 +@@ -135,10 +155,14 @@ + #define LJ_TARGET_GC64 1 + #endif + +-#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ +-#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ +-#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ +-#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ ++#ifdef _UWP ++#define LJ_TARGET_UWP 1 ++#if LUAJIT_TARGET == LUAJIT_ARCH_X64 ++#define LJ_TARGET_GC64 1 ++#endif ++#endif ++ ++/* -- Arch-specific settings ---------------------------------------------- */ + + /* Set target architecture properties. */ + #if LUAJIT_TARGET == LUAJIT_ARCH_X86 +@@ -146,14 +170,10 @@ + #define LJ_ARCH_NAME "x86" + #define LJ_ARCH_BITS 32 + #define LJ_ARCH_ENDIAN LUAJIT_LE +-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN +-#define LJ_ABI_WIN 1 +-#else +-#define LJ_ABI_WIN 0 +-#endif + #define LJ_TARGET_X86 1 + #define LJ_TARGET_X86ORX64 1 + #define LJ_TARGET_EHRETREG 0 ++#define LJ_TARGET_EHRAREG 8 + #define LJ_TARGET_MASKSHIFT 1 + #define LJ_TARGET_MASKROT 1 + #define LJ_TARGET_UNALIGNED 1 +@@ -164,21 +184,19 @@ + #define LJ_ARCH_NAME "x64" + #define LJ_ARCH_BITS 64 + #define LJ_ARCH_ENDIAN LUAJIT_LE +-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN +-#define LJ_ABI_WIN 1 +-#else +-#define LJ_ABI_WIN 0 +-#endif + #define LJ_TARGET_X64 1 + #define LJ_TARGET_X86ORX64 1 + #define LJ_TARGET_EHRETREG 0 ++#define LJ_TARGET_EHRAREG 16 + #define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */ + #define LJ_TARGET_MASKSHIFT 1 + #define LJ_TARGET_MASKROT 1 + #define LJ_TARGET_UNALIGNED 1 + #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL +-#ifdef LUAJIT_ENABLE_GC64 ++#ifndef LUAJIT_DISABLE_GC64 + #define LJ_TARGET_GC64 1 ++#elif LJ_TARGET_OSX ++#error "macOS requires GC64 -- don't disable it" + #endif + + #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM +@@ -195,19 +213,20 @@ + #define LJ_ABI_EABI 1 + #define LJ_TARGET_ARM 1 + #define LJ_TARGET_EHRETREG 0 ++#define LJ_TARGET_EHRAREG 14 + #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ + #define LJ_TARGET_MASKSHIFT 0 + #define LJ_TARGET_MASKROT 1 + #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ + #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + +-#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ ++#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ + #define LJ_ARCH_VERSION 80 +-#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ ++#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ + #define LJ_ARCH_VERSION 70 + #elif __ARM_ARCH_6T2__ + #define LJ_ARCH_VERSION 61 +-#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ ++#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ + #define LJ_ARCH_VERSION 60 + #else + #define LJ_ARCH_VERSION 50 +@@ -225,6 +244,7 @@ + #endif + #define LJ_TARGET_ARM64 1 + #define LJ_TARGET_EHRETREG 0 ++#define LJ_TARGET_EHRAREG 30 + #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ + #define LJ_TARGET_MASKSHIFT 1 + #define LJ_TARGET_MASKROT 1 +@@ -254,23 +274,43 @@ + #else + #define LJ_ARCH_BITS 32 + #define LJ_ARCH_NAME "ppc" ++ ++#if !defined(LJ_ARCH_HASFPU) ++#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) ++#define LJ_ARCH_HASFPU 0 ++#else ++#define LJ_ARCH_HASFPU 1 ++#endif ++#endif ++ ++#if !defined(LJ_ABI_SOFTFP) ++#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) ++#define LJ_ABI_SOFTFP 1 ++#else ++#define LJ_ABI_SOFTFP 0 ++#endif ++#endif ++#endif ++ ++#if LJ_ABI_SOFTFP ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL ++#else ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE + #endif + + #define LJ_TARGET_PPC 1 + #define LJ_TARGET_EHRETREG 3 ++#define LJ_TARGET_EHRAREG 65 + #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ + #define LJ_TARGET_MASKSHIFT 0 + #define LJ_TARGET_MASKROT 1 + #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ +-#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE + + #if LJ_TARGET_CONSOLE + #define LJ_ARCH_PPC32ON64 1 + #define LJ_ARCH_NOFFI 1 + #elif LJ_ARCH_BITS == 64 +-#define LJ_ARCH_PPC64 1 +-#define LJ_TARGET_GC64 1 +-#define LJ_ARCH_NOJIT 1 /* NYI */ ++#error "No support for PPC64" + #endif + + #if _ARCH_PWR7 +@@ -302,18 +342,38 @@ + #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 + + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) ++#if __mips_isa_rev >= 6 ++#define LJ_TARGET_MIPSR6 1 ++#define LJ_TARGET_UNALIGNED 1 ++#endif + #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 ++#if LJ_TARGET_MIPSR6 ++#define LJ_ARCH_NAME "mips32r6el" ++#else + #define LJ_ARCH_NAME "mipsel" ++#endif ++#else ++#if LJ_TARGET_MIPSR6 ++#define LJ_ARCH_NAME "mips64r6el" + #else + #define LJ_ARCH_NAME "mips64el" + #endif ++#endif + #define LJ_ARCH_ENDIAN LUAJIT_LE + #else + #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 ++#if LJ_TARGET_MIPSR6 ++#define LJ_ARCH_NAME "mips32r6" ++#else + #define LJ_ARCH_NAME "mips" ++#endif ++#else ++#if LJ_TARGET_MIPSR6 ++#define LJ_ARCH_NAME "mips64r6" + #else + #define LJ_ARCH_NAME "mips64" + #endif ++#endif + #define LJ_ARCH_ENDIAN LUAJIT_BE + #endif + +@@ -337,22 +397,22 @@ + #define LJ_ARCH_BITS 32 + #define LJ_TARGET_MIPS32 1 + #else +-#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU +-#define LJ_ARCH_NOJIT 1 /* NYI */ +-#endif + #define LJ_ARCH_BITS 64 + #define LJ_TARGET_MIPS64 1 + #define LJ_TARGET_GC64 1 + #endif + #define LJ_TARGET_MIPS 1 + #define LJ_TARGET_EHRETREG 4 ++#define LJ_TARGET_EHRAREG 31 + #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ + #define LJ_TARGET_MASKSHIFT 1 + #define LJ_TARGET_MASKROT 1 + #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ + #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + +-#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 ++#if LJ_TARGET_MIPSR6 ++#define LJ_ARCH_VERSION 60 ++#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 + #define LJ_ARCH_VERSION 20 + #else + #define LJ_ARCH_VERSION 10 +@@ -362,9 +422,7 @@ + #error "No target architecture defined" + #endif + +-#ifndef LJ_PAGESIZE +-#define LJ_PAGESIZE 4096 +-#endif ++/* -- Checks for requirements --------------------------------------------- */ + + /* Check for minimum required compiler versions. */ + #if defined(__GNUC__) +@@ -418,29 +476,30 @@ + #error "No support for ILP32 model on ARM64" + #endif + #elif LJ_TARGET_PPC +-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) +-#error "No support for PowerPC CPUs without double-precision FPU" +-#endif +-#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE ++#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) + #error "No support for little-endian PPC32" + #endif +-#if LJ_ARCH_PPC64 +-#error "No support for PowerPC 64 bit mode (yet)" +-#endif +-#ifdef __NO_FPRS__ ++#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) + #error "No support for PPC/e500 anymore (use LuaJIT 2.0)" + #endif + #elif LJ_TARGET_MIPS32 + #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) + #error "Only o32 ABI supported for MIPS32" + #endif ++#if LJ_TARGET_MIPSR6 ++/* Not that useful, since most available r6 CPUs are 64 bit. */ ++#error "No support for MIPS32R6" ++#endif + #elif LJ_TARGET_MIPS64 + #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) ++/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ + #error "Only n64 ABI supported for MIPS64" + #endif + #endif + #endif + ++/* -- Derived defines ----------------------------------------------------- */ ++ + /* Enable or disable the dual-number mode for the VM. */ + #if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \ + (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1) +@@ -490,6 +549,13 @@ + #define LJ_HASFFI 1 + #endif + ++/* Disable or enable the string buffer extension. */ ++#if defined(LUAJIT_DISABLE_BUFFER) ++#define LJ_HASBUFFER 0 ++#else ++#define LJ_HASBUFFER 1 ++#endif ++ + #if defined(LUAJIT_DISABLE_PROFILE) + #define LJ_HASPROFILE 0 + #elif LJ_TARGET_POSIX +@@ -512,6 +578,7 @@ + #define LJ_ABI_SOFTFP 0 + #endif + #define LJ_SOFTFP (!LJ_ARCH_HASFPU) ++#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32) + + #if LJ_ARCH_ENDIAN == LUAJIT_BE + #define LJ_LE 0 +@@ -537,26 +604,52 @@ + #define LJ_TARGET_UNALIGNED 0 + #endif + ++#ifndef LJ_PAGESIZE ++#define LJ_PAGESIZE 4096 ++#endif ++ + /* Various workarounds for embedded operating systems or weak C runtimes. */ + #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS + #define LUAJIT_NO_LOG2 + #endif +-#if defined(__symbian__) || LJ_TARGET_WINDOWS +-#define LUAJIT_NO_EXP2 +-#endif + #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) + #define LJ_NO_SYSTEM 1 + #endif + +-#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ +-/* NYI: no support for compact unwind specification, yet. */ +-#define LUAJIT_NO_UNWIND 1 ++#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN ++#define LJ_ABI_WIN 1 ++#else ++#define LJ_ABI_WIN 0 + #endif + +-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 ++#if LJ_TARGET_WINDOWS ++#if LJ_TARGET_UWP ++#define LJ_WIN_VALLOC VirtualAllocFromApp ++#define LJ_WIN_VPROTECT VirtualProtectFromApp ++extern void *LJ_WIN_LOADLIBA(const char *path); ++#else ++#define LJ_WIN_VALLOC VirtualAlloc ++#define LJ_WIN_VPROTECT VirtualProtect ++#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0) ++#endif ++#endif ++ ++#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 + #define LJ_NO_UNWIND 1 + #endif + ++#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__)))) ++#define LJ_UNWIND_EXT 1 ++#else ++#define LJ_UNWIND_EXT 0 ++#endif ++ ++#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86) ++#define LJ_UNWIND_JIT 1 ++#else ++#define LJ_UNWIND_JIT 0 ++#endif ++ + /* Compatibility with Lua 5.1 vs. 5.2. */ + #ifdef LUAJIT_ENABLE_LUA52COMPAT + #define LJ_52 1 +@@ -564,4 +657,46 @@ + #define LJ_52 0 + #endif + ++/* -- VM security --------------------------------------------------------- */ ++ ++/* Don't make any changes here. Instead build with: ++** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value" ++** ++** Important note to distro maintainers: DO NOT change the defaults for a ++** regular distro build -- neither upwards, nor downwards! ++** These build-time configurable security flags are intended for embedders ++** who may have specific needs wrt. security vs. performance. ++*/ ++ ++/* Security defaults. */ ++#ifndef LUAJIT_SECURITY_PRNG ++/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */ ++#define LUAJIT_SECURITY_PRNG 1 ++#endif ++ ++#ifndef LUAJIT_SECURITY_STRHASH ++/* String hash: 0 = sparse only, 1 = sparse + dense. */ ++#define LUAJIT_SECURITY_STRHASH 1 ++#endif ++ ++#ifndef LUAJIT_SECURITY_STRID ++/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */ ++#define LUAJIT_SECURITY_STRID 1 ++#endif ++ ++#ifndef LUAJIT_SECURITY_MCODE ++/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */ ++#define LUAJIT_SECURITY_MCODE 1 ++#endif ++ ++#define LJ_SECURITY_MODE \ ++ ( 0u \ ++ | ((LUAJIT_SECURITY_PRNG & 3) << 0) \ ++ | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \ ++ | ((LUAJIT_SECURITY_STRID & 3) << 4) \ ++ | ((LUAJIT_SECURITY_MCODE & 3) << 6) \ ++ ) ++#define LJ_SECURITY_MODESTRING \ ++ "\004prng\007strhash\005strid\005mcode" ++ + #endif +diff --git a/src/lj_asm.c b/src/lj_asm.c +index c2cf5a95..5968c5e3 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -1,6 +1,6 @@ + /* + ** IR assembler (SSA IR -> machine code). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_asm_c +@@ -11,6 +11,7 @@ + #if LJ_HASJIT + + #include "lj_gc.h" ++#include "lj_buf.h" + #include "lj_str.h" + #include "lj_tab.h" + #include "lj_frame.h" +@@ -22,7 +23,6 @@ + #include "lj_ircall.h" + #include "lj_iropt.h" + #include "lj_mcode.h" +-#include "lj_iropt.h" + #include "lj_trace.h" + #include "lj_snap.h" + #include "lj_asm.h" +@@ -72,6 +72,8 @@ typedef struct ASMState { + IRRef snaprename; /* Rename highwater mark for snapshot check. */ + SnapNo snapno; /* Current snapshot number. */ + SnapNo loopsnapno; /* Loop snapshot number. */ ++ int snapalloc; /* Current snapshot needs allocation. */ ++ BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ + + IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ + IRRef sectref; /* Section base reference (loopref or 0). */ +@@ -85,6 +87,7 @@ typedef struct ASMState { + + MCode *mcbot; /* Bottom of reserved MCode. */ + MCode *mctop; /* Top of generated MCode. */ ++ MCode *mctoporig; /* Original top of generated MCode. */ + MCode *mcloop; /* Pointer to loop MCode (or NULL). */ + MCode *invmcp; /* Points to invertible loop branch (or NULL). */ + MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ +@@ -97,6 +100,12 @@ typedef struct ASMState { + uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ + } ASMState; + ++#ifdef LUA_USE_ASSERT ++#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__) ++#else ++#define lj_assertA(c, ...) ((void)as) ++#endif ++ + #define IR(ref) (&as->ir[(ref)]) + + #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ +@@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as) + #ifdef LUA_USE_ASSERT + if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { + IRIns *ir = IR(as->curins+1); +- fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, +- as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); +- lua_assert(0); ++ lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp, ++ as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); + } + #endif + if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); +@@ -244,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) + *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; + } else { + *p++ = '?'; +- lua_assert(0); ++ lj_assertA(0, "bad register %d for debug format "%s"", r, fmt); + } + } else if (e[1] == 'f' || e[1] == 'i') { + IRRef ref; +@@ -262,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) + } else if (e[1] == 'x') { + p += sprintf(p, "%08x", va_arg(argp, int32_t)); + } else { +- lua_assert(0); ++ lj_assertA(0, "bad debug format code"); + } + fmt = e+2; + } +@@ -321,7 +329,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) + Reg r; + if (ra_iskref(ref)) { + r = ra_krefreg(ref); +- lua_assert(!rset_test(as->freeset, r)); ++ lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r); + ra_free(as, r); + ra_modified(as, r); + #if LJ_64 +@@ -333,12 +341,14 @@ static Reg ra_rematk(ASMState *as, IRRef ref) + } + ir = IR(ref); + r = ir->r; +- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); ++ lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref); ++ lj_assertA(!ra_hasspill(ir->s), ++ "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s); + ra_free(as, r); + ra_modified(as, r); + ir->r = RID_INIT; /* Do not keep any hint. */ + RA_DBGX((as, "remat $i $r", ir, r)); +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + if (ir->o == IR_KNUM) { + emit_loadk64(as, r, ir); + } else +@@ -347,7 +357,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) + ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ + emit_getgl(as, r, jit_base); + } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { +- lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ ++ /* REF_NIL stores ASMREF_L register. */ ++ lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L"); + emit_getgl(as, r, cur_L); + #if LJ_64 + } else if (ir->o == IR_KINT64) { +@@ -360,8 +371,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref) + #endif + #endif + } else { +- lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || +- ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); ++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || ++ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, ++ "rematk of bad IR op %d", ir->o); + emit_loadi(as, r, ir->i); + } + return r; +@@ -371,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) + static int32_t ra_spill(ASMState *as, IRIns *ir) + { + int32_t slot = ir->s; +- lua_assert(ir >= as->ir + REF_TRUE); ++ lj_assertA(ir >= as->ir + REF_TRUE, ++ "spill of K%03d", REF_BIAS - (int)(ir - as->ir)); + if (!ra_hasspill(slot)) { + if (irt_is64(ir->t)) { + slot = as->evenspill; +@@ -396,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) + { + IRIns *ir = IR(ref); + Reg r = ir->r; +- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); ++ lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1); ++ lj_assertA(!ra_hasspill(ir->s), ++ "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s); + ra_free(as, r); + ra_modified(as, r); + ir->r = RID_INIT; +@@ -412,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref) + IRIns *ir = IR(ref); + int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ + Reg r = ir->r; +- lua_assert(ra_hasreg(r)); ++ lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS); + ra_sethint(ir->r, r); /* Keep hint. */ + ra_free(as, r); + if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ +@@ -441,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow) + { + IRRef ref; + RegCost cost = ~(RegCost)0; +- lua_assert(allow != RSET_EMPTY); ++ lj_assertA(allow != RSET_EMPTY, "evict from empty set"); + if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { + GPRDEF(MINCOST) + } else { + FPRDEF(MINCOST) + } + ref = regcost_ref(cost); +- lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); ++ lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins), ++ "evict of out-of-range IR %04d", ref - REF_BIAS); + /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ + if (!irref_isk(ref) && (as->weakset & allow)) { + IRIns *ir = IR(ref); +@@ -606,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) + IRIns *ir = IR(ref); + RegSet pick = as->freeset & allow; + Reg r; +- lua_assert(ra_noreg(ir->r)); ++ lj_assertA(ra_noreg(ir->r), ++ "IR %04d already has reg %d", ref - REF_BIAS, ir->r); + if (pick) { + /* First check register hint from propagation or PHI. */ + if (ra_hashint(ir->r)) { +@@ -670,8 +687,10 @@ static void ra_rename(ASMState *as, Reg down, Reg up) + IRIns *ir = IR(ref); + ir->r = (uint8_t)up; + as->cost[down] = 0; +- lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); +- lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); ++ lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR), ++ "rename between GPR/FPR %d and %d", down, up); ++ lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down); ++ lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up); + ra_free(as, down); /* 'down' is free ... */ + ra_modified(as, down); + rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ +@@ -679,7 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up) + RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); + emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ + if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ +- ra_addrename(as, down, ref, as->snapno); ++ /* ++ ** The rename is effective at the subsequent (already emitted) exit ++ ** branch. This is for the current snapshot (as->snapno). Except if we ++ ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1), ++ ** then it belongs to the next snapshot. ++ ** See also the discussion at asm_snap_checkrename(). ++ */ ++ ra_addrename(as, down, ref, as->snapno + as->snapalloc); + } + } + +@@ -712,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) + { + Reg dest = ra_dest(as, ir, RID2RSET(r)); + if (dest != r) { +- lua_assert(rset_test(as->freeset, r)); ++ lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r); + ra_modified(as, r); + emit_movrr(as, ir, dest, r); + } +@@ -745,8 +771,9 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) + #endif + #endif + } else if (ir->o != IR_KPRI) { +- lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || +- ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); ++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || ++ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, ++ "K%03d has bad IR op %d", REF_BIAS - lref, ir->o); + emit_loadi(as, dest, ir->i); + return; + } +@@ -791,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref) + } + #endif + +-#if !LJ_64 + /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ + static void ra_destpair(ASMState *as, IRIns *ir) + { + Reg destlo = ir->r, desthi = (ir+1)->r; ++ IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir; + /* First spill unrelated refs blocking the destination registers. */ + if (!rset_test(as->freeset, RID_RETLO) && + destlo != RID_RETLO && desthi != RID_RETLO) +@@ -819,29 +846,28 @@ static void ra_destpair(ASMState *as, IRIns *ir) + /* Check for conflicts and shuffle the registers as needed. */ + if (destlo == RID_RETHI) { + if (desthi == RID_RETLO) { +-#if LJ_TARGET_X86 +- *--as->mcp = XI_XCHGa + RID_RETHI; ++#if LJ_TARGET_X86ORX64 ++ *--as->mcp = REX_64IR(irx, XI_XCHGa + RID_RETHI); + #else +- emit_movrr(as, ir, RID_RETHI, RID_TMP); +- emit_movrr(as, ir, RID_RETLO, RID_RETHI); +- emit_movrr(as, ir, RID_TMP, RID_RETLO); ++ emit_movrr(as, irx, RID_RETHI, RID_TMP); ++ emit_movrr(as, irx, RID_RETLO, RID_RETHI); ++ emit_movrr(as, irx, RID_TMP, RID_RETLO); + #endif + } else { +- emit_movrr(as, ir, RID_RETHI, RID_RETLO); +- if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); ++ emit_movrr(as, irx, RID_RETHI, RID_RETLO); ++ if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); + } + } else if (desthi == RID_RETLO) { +- emit_movrr(as, ir, RID_RETLO, RID_RETHI); +- if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); ++ emit_movrr(as, irx, RID_RETLO, RID_RETHI); ++ if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); + } else { +- if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); +- if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); ++ if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); ++ if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); + } + /* Restore spill slots (if any). */ + if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); + if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); + } +-#endif + + /* -- Snapshot handling --------- ----------------------------------------- */ + +@@ -876,7 +902,10 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs) + static void asm_snap_alloc1(ASMState *as, IRRef ref) + { + IRIns *ir = IR(ref); +- if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) { ++ if (!irref_isk(ref) && ir->r != RID_SUNK) { ++ bloomset(as->snapfilt1, ref); ++ bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS)); ++ if (ra_used(ir)) return; + if (ir->r == RID_SINK) { + ir->r = RID_SUNK; + #if LJ_HASFFI +@@ -888,11 +917,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) + #endif + { /* Allocate stored values for TNEW, TDUP and CNEW. */ + IRIns *irs; +- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); ++ lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW, ++ "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o); + for (irs = IR(as->snapref-1); irs > ir; irs--) + if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { +- lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || +- irs->o == IR_FSTORE || irs->o == IR_XSTORE); ++ lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE || ++ irs->o == IR_FSTORE || irs->o == IR_XSTORE, ++ "sunk store IR %04d has bad op %d", ++ (int)(irs - as->ir) - REF_BIAS, irs->o); + asm_snap_alloc1(as, irs->op2); + if (LJ_32 && (irs+1)->o == IR_HIOP) + asm_snap_alloc1(as, (irs+1)->op2); +@@ -928,18 +960,21 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) + } + + /* Allocate refs escaping to a snapshot. */ +-static void asm_snap_alloc(ASMState *as) ++static void asm_snap_alloc(ASMState *as, int snapno) + { +- SnapShot *snap = &as->T->snap[as->snapno]; ++ SnapShot *snap = &as->T->snap[snapno]; + SnapEntry *map = &as->T->snapmap[snap->mapofs]; + MSize n, nent = snap->nent; ++ as->snapfilt1 = as->snapfilt2 = 0; + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + IRRef ref = snap_ref(sn); + if (!irref_isk(ref)) { + asm_snap_alloc1(as, ref); + if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { +- lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); ++ lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP, ++ "snap %d[%d] points to bad SOFTFP IR %04d", ++ snapno, n, ref - REF_BIAS); + asm_snap_alloc1(as, ref+1); + } + } +@@ -955,35 +990,26 @@ static void asm_snap_alloc(ASMState *as) + */ + static int asm_snap_checkrename(ASMState *as, IRRef ren) + { +- SnapShot *snap = &as->T->snap[as->snapno]; +- SnapEntry *map = &as->T->snapmap[snap->mapofs]; +- MSize n, nent = snap->nent; +- for (n = 0; n < nent; n++) { +- SnapEntry sn = map[n]; +- IRRef ref = snap_ref(sn); +- if (ref == ren || (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && ++ref == ren)) { +- IRIns *ir = IR(ref); +- ra_spill(as, ir); /* Register renamed, so force a spill slot. */ +- RA_DBGX((as, "snaprensp $f $s", ref, ir->s)); +- return 1; /* Found. */ +- } ++ if (bloomtest(as->snapfilt1, ren) && ++ bloomtest(as->snapfilt2, hashrot(ren, ren + HASH_BIAS))) { ++ IRIns *ir = IR(ren); ++ ra_spill(as, ir); /* Register renamed, so force a spill slot. */ ++ RA_DBGX((as, "snaprensp $f $s", ren, ir->s)); ++ return 1; /* Found. */ + } + return 0; /* Not found. */ + } + +-/* Prepare snapshot for next guard instruction. */ ++/* Prepare snapshot for next guard or throwing instruction. */ + static void asm_snap_prep(ASMState *as) + { +- if (as->curins < as->snapref) { +- do { +- if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ +- as->snapno--; +- as->snapref = as->T->snap[as->snapno].ref; +- } while (as->curins < as->snapref); +- asm_snap_alloc(as); ++ if (as->snapalloc) { ++ /* Alloc on first invocation for each snapshot. */ ++ as->snapalloc = 0; ++ asm_snap_alloc(as, as->snapno); + as->snaprename = as->T->nins; + } else { +- /* Process any renames above the highwater mark. */ ++ /* Check any renames above the highwater mark. */ + for (; as->snaprename < as->T->nins; as->snaprename++) { + IRIns *ir = &as->T->ir[as->snaprename]; + if (asm_snap_checkrename(as, ir->op1)) +@@ -992,6 +1018,35 @@ static void asm_snap_prep(ASMState *as) + } + } + ++/* Move to previous snapshot when we cross the current snapshot ref. */ ++static void asm_snap_prev(ASMState *as) ++{ ++ if (as->curins < as->snapref) { ++ uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp); ++ if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV); ++ do { ++ if (as->snapno == 0) return; ++ as->snapno--; ++ as->snapref = as->T->snap[as->snapno].ref; ++ as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs. */ ++ } while (as->curins < as->snapref); /* May have no ins inbetween. */ ++ as->snapalloc = 1; ++ } ++} ++ ++/* Fixup snapshot mcode offsetst. */ ++static void asm_snap_fixup_mcofs(ASMState *as) ++{ ++ uint32_t sz = (uint32_t)(as->mctoporig - as->mcp); ++ SnapShot *snap = as->T->snap; ++ SnapNo i; ++ for (i = as->T->nsnap-1; i > 0; i--) { ++ /* Compute offset from mcode start and store in correct snapshot. */ ++ snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs); ++ } ++ snap[0].mcofs = 0; ++} ++ + /* -- Miscellaneous helpers ----------------------------------------------- */ + + /* Calculate stack adjustment. */ +@@ -1003,21 +1058,26 @@ static int32_t asm_stack_adjust(ASMState *as) + } + + /* Must match with hash*() in lj_tab.c. */ +-static uint32_t ir_khash(IRIns *ir) ++static uint32_t ir_khash(ASMState *as, IRIns *ir) + { + uint32_t lo, hi; ++ UNUSED(as); + if (irt_isstr(ir->t)) { +- return ir_kstr(ir)->hash; ++ return ir_kstr(ir)->sid; + } else if (irt_isnum(ir->t)) { + lo = ir_knum(ir)->u32.lo; + hi = ir_knum(ir)->u32.hi << 1; + } else if (irt_ispri(ir->t)) { +- lua_assert(!irt_isnil(ir->t)); ++ lj_assertA(!irt_isnil(ir->t), "hash of nil key"); + return irt_type(ir->t)-IRT_FALSE; + } else { +- lua_assert(irt_isgcv(ir->t)); ++ lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t)); + lo = u32ptr(ir_kgc(ir)); ++#if LJ_GC64 ++ hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); ++#else + hi = lo + HASH_BIAS; ++#endif + } + return hashrot(lo, hi); + } +@@ -1031,6 +1091,7 @@ static void asm_snew(ASMState *as, IRIns *ir) + { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; + IRRef args[3]; ++ asm_snap_prep(as); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* const char *str */ + args[2] = ir->op2; /* size_t len */ +@@ -1043,6 +1104,7 @@ static void asm_tnew(ASMState *as, IRIns *ir) + { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; + IRRef args[2]; ++ asm_snap_prep(as); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* uint32_t ahsize */ + as->gcsteps++; +@@ -1055,6 +1117,7 @@ static void asm_tdup(ASMState *as, IRIns *ir) + { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; + IRRef args[2]; ++ asm_snap_prep(as); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* const GCtab *kt */ + as->gcsteps++; +@@ -1080,28 +1143,43 @@ static void asm_gcstep(ASMState *as, IRIns *ir) + + /* -- Buffer operations --------------------------------------------------- */ + +-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode); ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb); ++#endif + + static void asm_bufhdr(ASMState *as, IRIns *ir) + { + Reg sb = ra_dest(as, ir, RSET_GPR); +- if ((ir->op2 & IRBUFHDR_APPEND)) { ++ switch (ir->op2) { ++ case IRBUFHDR_RESET: { ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irbp; ++ irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */ ++ emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w)); ++ emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b)); ++ break; ++ } ++ case IRBUFHDR_APPEND: { + /* Rematerialize const buffer pointer instead of likely spill. */ + IRIns *irp = IR(ir->op1); + if (!(ra_hasreg(irp->r) || irp == ir-1 || + (irp == ir-2 && !ra_used(ir-1)))) { +- while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND))) ++ while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET)) + irp = IR(irp->op1); + if (irref_isk(irp->op1)) { + ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); + ir = irp; + } + } +- } else { +- Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); +- /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */ +- emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); +- emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); ++ break; ++ } ++#if LJ_HASBUFFER ++ case IRBUFHDR_WRITE: ++ asm_bufhdr_write(as, sb); ++ break; ++#endif ++ default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break; + } + #if LJ_TARGET_X86ORX64 + ra_left(as, sb, ir->op1); +@@ -1115,15 +1193,16 @@ static void asm_bufput(ASMState *as, IRIns *ir) + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; + IRRef args[3]; + IRIns *irs; +- int kchar = -1; ++ int kchar = -129; + args[0] = ir->op1; /* SBuf * */ + args[1] = ir->op2; /* GCstr * */ + irs = IR(ir->op2); +- lua_assert(irt_isstr(irs->t)); ++ lj_assertA(irt_isstr(irs->t), ++ "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS); + if (irs->o == IR_KGC) { + GCstr *s = ir_kstr(irs); + if (s->len == 1) { /* Optimize put of single-char string constant. */ +- kchar = strdata(s)[0]; ++ kchar = (int8_t)strdata(s)[0]; /* Signed! */ + args[1] = ASMREF_TMP1; /* int, truncated to char */ + ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; + } +@@ -1133,7 +1212,8 @@ static void asm_bufput(ASMState *as, IRIns *ir) + args[1] = ASMREF_TMP1; /* TValue * */ + ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; + } else { +- lua_assert(irt_isinteger(IR(irs->op1)->t)); ++ lj_assertA(irt_isinteger(IR(irs->op1)->t), ++ "TOSTR of non-numeric IR %04d", irs->op1); + args[1] = irs->op1; /* int */ + if (irs->op2 == IRTOSTR_INT) + ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; +@@ -1150,8 +1230,8 @@ static void asm_bufput(ASMState *as, IRIns *ir) + asm_gencall(as, ci, args); + if (args[1] == ASMREF_TMP1) { + Reg tmp = ra_releasetmp(as, ASMREF_TMP1); +- if (kchar == -1) +- asm_tvptr(as, tmp, irs->op1); ++ if (kchar == -129) ++ asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1); + else + ra_allockreg(as, kchar, tmp); + } +@@ -1173,6 +1253,7 @@ static void asm_tostr(ASMState *as, IRIns *ir) + { + const CCallInfo *ci; + IRRef args[2]; ++ asm_snap_prep(as); + args[0] = ASMREF_L; + as->gcsteps++; + if (ir->op2 == IRTOSTR_NUM) { +@@ -1188,7 +1269,7 @@ static void asm_tostr(ASMState *as, IRIns *ir) + asm_setupresult(as, ir, ci); /* GCstr * */ + asm_gencall(as, ci, args); + if (ir->op2 == IRTOSTR_NUM) +- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); ++ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1); + } + + #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 +@@ -1198,7 +1279,8 @@ static void asm_conv64(ASMState *as, IRIns *ir) + IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); + IRCallID id; + IRRef args[2]; +- lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); ++ lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, ++ "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); + args[LJ_BE] = (ir-1)->op1; + args[LJ_LE] = ir->op1; + if (st == IRT_NUM || st == IRT_FLOAT) { +@@ -1228,12 +1310,19 @@ static void asm_newref(ASMState *as, IRIns *ir) + IRRef args[3]; + if (ir->r == RID_SINK) + return; ++ asm_snap_prep(as); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* GCtab *t */ + args[2] = ASMREF_TMP1; /* cTValue *key */ + asm_setupresult(as, ir, ci); /* TValue * */ + asm_gencall(as, ci, args); +- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); ++ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1); ++} ++ ++static void asm_tmpref(ASMState *as, IRIns *ir) ++{ ++ Reg r = ra_dest(as, ir, RSET_GPR); ++ asm_tvptr(as, r, ir->op1, ir->op2); + } + + static void asm_lref(ASMState *as, IRIns *ir) +@@ -1253,15 +1342,16 @@ static void asm_collectargs(ASMState *as, IRIns *ir, + const CCallInfo *ci, IRRef *args) + { + uint32_t n = CCI_XNARGS(ci); +- lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ ++ /* Account for split args. */ ++ lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n); + if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } + while (n-- > 1) { + ir = IR(ir->op1); +- lua_assert(ir->o == IR_CARG); ++ lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree"); + args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; + } + args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; +- lua_assert(IR(ir->op1)->o != IR_CARG); ++ lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree"); + } + + /* Reconstruct CCallInfo flags for CALLX*. */ +@@ -1305,32 +1395,6 @@ static void asm_call(ASMState *as, IRIns *ir) + asm_gencall(as, ci, args); + } + +-#if !LJ_SOFTFP +-static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) +-{ +- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; +- IRRef args[2]; +- args[0] = lref; +- args[1] = rref; +- asm_setupresult(as, ir, ci); +- asm_gencall(as, ci, args); +-} +- +-static int asm_fpjoin_pow(ASMState *as, IRIns *ir) +-{ +- IRIns *irp = IR(ir->op1); +- if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { +- IRIns *irpp = IR(irp->op1); +- if (irpp == ir-2 && irpp->o == IR_FPMATH && +- irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { +- asm_fppow(as, ir, irpp->op1, irp->op2); +- return 1; +- } +- } +- return 0; +-} +-#endif +- + /* -- PHI and loop handling ----------------------------------------------- */ + + /* Break a PHI cycle by renaming to a free register (evict if needed). */ +@@ -1601,6 +1665,68 @@ static void asm_loop(ASMState *as) + #error "Missing assembler for target CPU" + #endif + ++/* -- Common instruction helpers ------------------------------------------ */ ++ ++#if !LJ_SOFTFP32 ++#if !LJ_TARGET_X86ORX64 ++#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) ++#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) ++#endif ++ ++static void asm_pow(ASMState *as, IRIns *ir) ++{ ++#if LJ_64 && LJ_HASFFI ++ if (!irt_isnum(ir->t)) ++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : ++ IRCALL_lj_carith_powu64); ++ else ++#endif ++ if (irt_isnum(IR(ir->op2)->t)) ++ asm_callid(as, ir, IRCALL_pow); ++ else ++ asm_fppowi(as, ir); ++} ++ ++static void asm_div(ASMState *as, IRIns *ir) ++{ ++#if LJ_64 && LJ_HASFFI ++ if (!irt_isnum(ir->t)) ++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : ++ IRCALL_lj_carith_divu64); ++ else ++#endif ++ asm_fpdiv(as, ir); ++} ++#endif ++ ++static void asm_mod(ASMState *as, IRIns *ir) ++{ ++#if LJ_64 && LJ_HASFFI ++ if (!irt_isint(ir->t)) ++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : ++ IRCALL_lj_carith_modu64); ++ else ++#endif ++ asm_callid(as, ir, IRCALL_lj_vm_modi); ++} ++ ++static void asm_fuseequal(ASMState *as, IRIns *ir) ++{ ++ /* Fuse HREF + EQ/NE. */ ++ if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { ++ as->curins--; ++ asm_href(as, ir-1, (IROp)ir->o); ++ } else { ++ asm_equal(as, ir); ++ } ++} ++ ++static void asm_alen(ASMState *as, IRIns *ir) ++{ ++ asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len : ++ IRCALL_lj_tab_len_hint); ++} ++ + /* -- Instruction dispatch ------------------------------------------------ */ + + /* Assemble a single instruction. */ +@@ -1609,7 +1735,10 @@ static void asm_ir(ASMState *as, IRIns *ir) + switch ((IROp)ir->o) { + /* Miscellaneous ops. */ + case IR_LOOP: asm_loop(as); break; +- case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; ++ case IR_NOP: case IR_XBAR: ++ lj_assertA(!ra_used(ir), ++ "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS); ++ break; + case IR_USE: + ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; + case IR_PHI: asm_phi(as, ir); break; +@@ -1623,14 +1752,7 @@ static void asm_ir(ASMState *as, IRIns *ir) + case IR_ABC: + asm_comp(as, ir); + break; +- case IR_EQ: case IR_NE: +- if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { +- as->curins--; +- asm_href(as, ir-1, (IROp)ir->o); +- } else { +- asm_equal(as, ir); +- } +- break; ++ case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break; + + case IR_RETF: asm_retf(as, ir); break; + +@@ -1652,16 +1774,17 @@ static void asm_ir(ASMState *as, IRIns *ir) + case IR_MUL: asm_mul(as, ir); break; + case IR_MOD: asm_mod(as, ir); break; + case IR_NEG: asm_neg(as, ir); break; +-#if LJ_SOFTFP ++#if LJ_SOFTFP32 + case IR_DIV: case IR_POW: case IR_ABS: +- case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: +- lua_assert(0); /* Unused for LJ_SOFTFP. */ ++ case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: ++ /* Unused for LJ_SOFTFP32. */ ++ lj_assertA(0, "IR %04d with unused op %d", ++ (int)(ir - as->ir) - REF_BIAS, ir->o); + break; + #else + case IR_DIV: asm_div(as, ir); break; + case IR_POW: asm_pow(as, ir); break; + case IR_ABS: asm_abs(as, ir); break; +- case IR_ATAN2: asm_atan2(as, ir); break; + case IR_LDEXP: asm_ldexp(as, ir); break; + case IR_FPMATH: asm_fpmath(as, ir); break; + case IR_TOBIT: asm_tobit(as, ir); break; +@@ -1681,6 +1804,7 @@ static void asm_ir(ASMState *as, IRIns *ir) + case IR_NEWREF: asm_newref(as, ir); break; + case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; + case IR_FREF: asm_fref(as, ir); break; ++ case IR_TMPREF: asm_tmpref(as, ir); break; + case IR_STRREF: asm_strref(as, ir); break; + case IR_LREF: asm_lref(as, ir); break; + +@@ -1691,6 +1815,7 @@ static void asm_ir(ASMState *as, IRIns *ir) + case IR_FLOAD: asm_fload(as, ir); break; + case IR_XLOAD: asm_xload(as, ir); break; + case IR_SLOAD: asm_sload(as, ir); break; ++ case IR_ALEN: asm_alen(as, ir); break; + + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; + case IR_FSTORE: asm_fstore(as, ir); break; +@@ -1700,7 +1825,14 @@ static void asm_ir(ASMState *as, IRIns *ir) + case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; + case IR_TNEW: asm_tnew(as, ir); break; + case IR_TDUP: asm_tdup(as, ir); break; +- case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; ++ case IR_CNEW: case IR_CNEWI: ++#if LJ_HASFFI ++ asm_cnew(as, ir); ++#else ++ lj_assertA(0, "IR %04d with unused op %d", ++ (int)(ir - as->ir) - REF_BIAS, ir->o); ++#endif ++ break; + + /* Buffer operations. */ + case IR_BUFHDR: asm_bufhdr(as, ir); break; +@@ -1767,8 +1899,7 @@ static void asm_head_side(ASMState *as) + + if (as->snapno && as->topslot > as->parent->topslot) { + /* Force snap #0 alloc to prevent register overwrite in stack check. */ +- as->snapno = 0; +- asm_snap_alloc(as); ++ asm_snap_alloc(as, 0); + } + allow = asm_head_side_base(as, irp, allow); + +@@ -1776,8 +1907,10 @@ static void asm_head_side(ASMState *as) + for (i = as->stopins; i > REF_BASE; i--) { + IRIns *ir = IR(i); + RegSP rs; +- lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || +- (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); ++ lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || ++ (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL, ++ "IR %04d has bad parent op %d", ++ (int)(ir - as->ir) - REF_BIAS, ir->o); + rs = as->parentmap[i - REF_FIRST]; + if (ra_hasreg(ir->r)) { + rset_clear(allow, ir->r); +@@ -2005,12 +2138,16 @@ static void asm_setup_regsp(ASMState *as) + #endif + + ra_setup(as); ++#if LJ_TARGET_ARM64 ++ ra_setkref(as, RID_GL, (intptr_t)J2G(as->J)); ++#endif + + /* Clear reg/sp for constants. */ + for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { + ir->prev = REGSP_INIT; + if (irt_is64(ir->t) && ir->o != IR_KNULL) { + #if LJ_GC64 ++ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ + ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ + #else + /* Make life easier for backends by putting address of constant in i. */ +@@ -2026,6 +2163,7 @@ static void asm_setup_regsp(ASMState *as) + as->snaprename = nins; + as->snapref = nins; + as->snapno = T->nsnap; ++ as->snapalloc = 0; + + as->stopins = REF_BASE; + as->orignins = nins; +@@ -2035,7 +2173,7 @@ static void asm_setup_regsp(ASMState *as) + ir = IR(REF_FIRST); + if (as->parent) { + uint16_t *p; +- lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); ++ lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir); + if (lastir - ir > LJ_MAX_JSLOTS) + lj_trace_err(as->J, LJ_TRERR_NYICOAL); + as->stopins = (IRRef)((lastir-1) - as->ir); +@@ -2074,6 +2212,10 @@ static void asm_setup_regsp(ASMState *as) + ir->prev = (uint16_t)REGSP_HINT((rload & 15)); + rload = lj_ror(rload, 4); + continue; ++ case IR_TMPREF: ++ if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4) ++ as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */ ++ break; + #endif + case IR_CALLXS: { + CCallInfo ci; +@@ -2083,7 +2225,17 @@ static void asm_setup_regsp(ASMState *as) + as->modset |= RSET_SCRATCH; + continue; + } +- case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { ++ case IR_CALLL: ++ /* lj_vm_next needs two TValues on the stack. */ ++#if LJ_TARGET_X64 && LJ_ABI_WIN ++ if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4) ++ as->evenspill = SPS_FIRST + 4; ++#else ++ if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4) ++ as->evenspill = 4; ++#endif ++ /* fallthrough */ ++ case IR_CALLN: case IR_CALLA: case IR_CALLS: { + const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; + ir->prev = asm_setup_call_slots(as, ir, ci); + if (inloop) +@@ -2091,7 +2243,6 @@ static void asm_setup_regsp(ASMState *as) + (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; + continue; + } +-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) + case IR_HIOP: + switch ((ir-1)->o) { + #if LJ_SOFTFP && LJ_TARGET_ARM +@@ -2102,15 +2253,15 @@ static void asm_setup_regsp(ASMState *as) + } + break; + #endif +-#if !LJ_SOFTFP && LJ_NEED_FP64 ++#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI + case IR_CONV: + if (irt_isfp((ir-1)->t)) { + ir->prev = REGSP_HINT(RID_FPRET); + continue; + } +- /* fallthrough */ + #endif +- case IR_CALLN: case IR_CALLXS: ++ /* fallthrough */ ++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: + #if LJ_SOFTFP + case IR_MIN: case IR_MAX: + #endif +@@ -2121,12 +2272,11 @@ static void asm_setup_regsp(ASMState *as) + break; + } + break; +-#endif + #if LJ_SOFTFP + case IR_MIN: case IR_MAX: + if ((ir+1)->o != IR_HIOP) break; +- /* fallthrough */ + #endif ++ /* fallthrough */ + /* C calls evict all scratch regs and return results in RID_RET. */ + case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: + if (REGARG_NUMGPR < 3 && as->evenspill < 3) +@@ -2137,9 +2287,12 @@ static void asm_setup_regsp(ASMState *as) + if (ir->op2 != REF_NIL && as->evenspill < 4) + as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ + } ++ /* fallthrough */ + #else ++ /* fallthrough */ + case IR_CNEW: + #endif ++ /* fallthrough */ + case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: + case IR_BUFSTR: + ir->prev = REGSP_HINT(RID_RET); +@@ -2151,35 +2304,45 @@ static void asm_setup_regsp(ASMState *as) + as->modset = RSET_SCRATCH; + break; + #if !LJ_SOFTFP +- case IR_ATAN2: +-#if LJ_TARGET_X86 +- if (as->evenspill < 4) /* Leave room to call atan2(). */ +- as->evenspill = 4; +-#endif + #if !LJ_TARGET_X86ORX64 + case IR_LDEXP: + #endif + #endif ++ /* fallthrough */ + case IR_POW: + if (!LJ_SOFTFP && irt_isnum(ir->t)) { + if (inloop) + as->modset |= RSET_SCRATCH; + #if LJ_TARGET_X86 ++ if (irt_isnum(IR(ir->op2)->t)) { ++ if (as->evenspill < 4) /* Leave room to call pow(). */ ++ as->evenspill = 4; ++ } + break; + #else + ir->prev = REGSP_HINT(RID_FPRET); + continue; + #endif + } +- /* fallthrough for integer POW */ ++ /* fallthrough */ /* for integer POW */ + case IR_DIV: case IR_MOD: +- if (!irt_isnum(ir->t)) { ++ if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) { ++ ir->prev = REGSP_HINT(RID_RET); ++ if (inloop) ++ as->modset |= (RSET_SCRATCH & RSET_GPR); ++ continue; ++ } ++ break; ++#if LJ_64 && LJ_SOFTFP ++ case IR_ADD: case IR_SUB: case IR_MUL: ++ if (irt_isnum(ir->t)) { + ir->prev = REGSP_HINT(RID_RET); + if (inloop) + as->modset |= (RSET_SCRATCH & RSET_GPR); + continue; + } + break; ++#endif + case IR_FPMATH: + #if LJ_TARGET_X86ORX64 + if (ir->op2 <= IRFPM_TRUNC) { +@@ -2190,9 +2353,6 @@ static void asm_setup_regsp(ASMState *as) + continue; + } + break; +- } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) { +- if (as->evenspill < 4) /* Leave room to call pow(). */ +- as->evenspill = 4; + } + #endif + if (inloop) +@@ -2208,6 +2368,7 @@ static void asm_setup_regsp(ASMState *as) + case IR_BSHL: case IR_BSHR: case IR_BSAR: + if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ + break; ++ /* fallthrough */ + case IR_BROL: case IR_BROR: + if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { + IR(ir->op2)->r = REGSP_HINT(RID_ECX); +@@ -2252,7 +2413,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T) + { + ASMState as_; + ASMState *as = &as_; +- MCode *origtop; + + /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ + { +@@ -2267,7 +2427,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) + /* Ensure an initialized instruction beyond the last one for HIOP checks. */ + /* This also allows one RENAME to be added without reallocating curfinal. */ + as->orignins = lj_ir_nextins(J); +- J->cur.ir[as->orignins].o = IR_NOP; ++ lj_ir_nop(&J->cur.ir[as->orignins]); + + /* Setup initial state. Copy some fields to reduce indirections. */ + as->J = J; +@@ -2280,7 +2440,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) + as->parent = J->parent ? traceref(J, J->parent) : NULL; + + /* Reserve MCode memory. */ +- as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); ++ as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot); + as->mcp = as->mctop; + as->mclim = as->mcbot + MCLIM_REDZONE; + asm_setup_target(as); +@@ -2338,7 +2498,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T) + /* Assemble a trace in linear backwards order. */ + for (as->curins--; as->curins > as->stopins; as->curins--) { + IRIns *ir = IR(as->curins); +- lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ ++ /* 64 bit types handled by SPLIT for 32 bit archs. */ ++ lj_assertA(!(LJ_32 && irt_isint64(ir->t)), ++ "IR %04d has unsplit 64 bit type", ++ (int)(ir - as->ir) - REF_BIAS); ++ asm_snap_prev(as); + if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) + continue; /* Dead-code elimination can be soooo easy. */ + if (irt_isguard(ir->t)) +@@ -2368,10 +2532,13 @@ void lj_asm_trace(jit_State *J, GCtrace *T) + asm_phi_fixup(as); + + if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ +- lua_assert(J->curfinal->nk == T->nk); ++ lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth"); + memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, + (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ + T->nins = J->curfinal->nins; ++ /* Fill mcofs of any unprocessed snapshots. */ ++ as->curins = REF_FIRST; ++ asm_snap_prev(as); + break; /* Done. */ + } + +@@ -2390,13 +2557,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T) + /* Set trace entry point before fixing up tail to allow link to self. */ + T->mcode = as->mcp; + T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; +- if (!as->loopref) ++ if (as->loopref) ++ asm_loop_tail_fixup(as); ++ else + asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ + T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); ++ asm_snap_fixup_mcofs(as); + #if LJ_TARGET_MCODE_FIXUP + asm_mcode_fixup(T->mcode, T->szmcode); + #endif +- lj_mcode_sync(T->mcode, origtop); ++ lj_mcode_sync(T->mcode, as->mctoporig); + } + + #undef IR +diff --git a/src/lj_asm.h b/src/lj_asm.h +index 2819481b..624da844 100644 +--- a/src/lj_asm.h ++++ b/src/lj_asm.h +@@ -1,6 +1,6 @@ + /* + ** IR assembler (SSA IR -> machine code). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_ASM_H +diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h +index 37bfa40f..cc608c0d 100644 +--- a/src/lj_asm_arm.h ++++ b/src/lj_asm_arm.h +@@ -1,6 +1,6 @@ + /* + ** ARM IR assembler (SSA IR -> machine code). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + /* -- Register allocator extensions --------------------------------------- */ +@@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow) + } + } + } +- lua_assert(rset_test(RSET_GPREVEN, r)); ++ lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r); + ra_modified(as, r); + ra_modified(as, r+1); + RA_DBGX((as, "scratchpair $r $r", r, r+1)); +@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, + *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ + return ra_allock(as, (ofs & ~255), allow); + } ++ } else if (ir->o == IR_TMPREF) { ++ *ofsp = 0; ++ return RID_SP; + } + } + *ofsp = 0; +@@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, + return; + } + } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { +- lua_assert(ofs == 0); ++ lj_assertA(ofs == 0, "bad usage"); + ofs = (int32_t)sizeof(GCstr); + if (irref_isk(ir->op2)) { + ofs += IR(ir->op2)->i; +@@ -389,9 +392,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); + if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; + if (gpr <= REGARG_LASTGPR) { +- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ + if (irt_isnum(ir->t)) { +- lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ ++ lj_assertA(rset_test(as->freeset, gpr+1), ++ "reg %d not free", gpr+1); /* Ditto. */ + emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); + gpr += 2; + } else { +@@ -408,7 +413,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + #endif + { + if (gpr <= REGARG_LASTGPR) { +- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ + if (ref) ra_leftov(as, gpr, ref); + gpr++; + } else { +@@ -433,7 +439,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { +- lua_assert(!irt_ispri(ir->t)); ++ lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (!LJ_SOFTFP && irt_isfp(ir->t)) { + if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { + Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); +@@ -495,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir) + emit_lso(as, ARMI_LDR, RID_TMP, base, -4); + } + ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L); ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); ++ if ((as->flags & JIT_F_ARMV6T2)) { ++ emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp); ++ } else { ++ emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp); ++ emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp); ++ } ++ emit_lso(as, ARMI_LDR, RID_TMP, ++ ra_allock(as, (addr & ~4095), ++ rset_exclude(rset_exclude(RSET_GPR, sb), tmp)), ++ (addr & 4095)); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ + /* -- Type conversions ---------------------------------------------------- */ + + #if !LJ_SOFTFP +@@ -530,13 +560,17 @@ static void asm_conv(ASMState *as, IRIns *ir) + #endif + IRRef lref = ir->op1; + /* 64 bit integer conversions are handled by SPLIT. */ +- lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); ++ lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64), ++ "IR %04d has unsplit 64 bit type", ++ (int)(ir - as->ir) - REF_BIAS); + #if LJ_SOFTFP + /* FP conversions are handled by SPLIT. */ +- lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); ++ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), ++ "IR %04d has FP type", ++ (int)(ir - as->ir) - REF_BIAS); + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ + #else +- lua_assert(irt_type(ir->t) != st); ++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ +@@ -553,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir) + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ +- lua_assert(irt_isint(ir->t) && st == IRT_NUM); ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg left = ra_alloc1(as, lref, RSET_FPR); +@@ -572,7 +607,7 @@ static void asm_conv(ASMState *as, IRIns *ir) + Reg dest = ra_dest(as, ir, RSET_GPR); + if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); +- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + if ((as->flags & JIT_F_ARMV6)) { + ARMIns ai = st == IRT_I8 ? ARMI_SXTB : + st == IRT_U8 ? ARMI_UXTB : +@@ -658,35 +693,55 @@ static void asm_strto(ASMState *as, IRIns *ir) + /* -- Memory references --------------------------------------------------- */ + + /* Get pointer to TValue. */ +-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) + { +- IRIns *ir = IR(ref); +- if (irt_isnum(ir->t)) { +- if (irref_isk(ref)) { +- /* Use the number constant itself as a TValue. */ +- ra_allockreg(as, i32ptr(ir_knum(ir)), dest); +- } else { ++ if ((mode & IRTMPREF_IN1)) { ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if ((mode & IRTMPREF_OUT1)) { ++#if LJ_SOFTFP ++ lj_assertA(irref_isk(ref), "unsplit FP op"); ++ emit_dm(as, ARMI_MOV, dest, RID_SP); ++ emit_lso(as, ARMI_STR, ++ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), ++ RID_SP, 0); ++ emit_lso(as, ARMI_STR, ++ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), ++ RID_SP, 4); ++#else ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_dm(as, ARMI_MOV, dest, RID_SP); ++ emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0); ++#endif ++ } else if (irref_isk(ref)) { ++ /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, i32ptr(ir_knum(ir)), dest); ++ } else { + #if LJ_SOFTFP +- lua_assert(0); ++ lj_assertA(0, "unsplit FP op"); + #else +- /* Otherwise force a spill and use the spill slot. */ +- emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); ++ /* Otherwise force a spill and use the spill slot. */ ++ emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); + #endif ++ } ++ } else { ++ /* Otherwise use [sp] and [sp+4] to hold the TValue. ++ ** This assumes the following call has max. 4 args. ++ */ ++ Reg type; ++ emit_dm(as, ARMI_MOV, dest, RID_SP); ++ if (!irt_ispri(ir->t)) { ++ Reg src = ra_alloc1(as, ref, RSET_GPR); ++ emit_lso(as, ARMI_STR, src, RID_SP, 0); ++ } ++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) ++ type = ra_alloc1(as, ref+1, RSET_GPR); ++ else ++ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); ++ emit_lso(as, ARMI_STR, type, RID_SP, 4); + } + } else { +- /* Otherwise use [sp] and [sp+4] to hold the TValue. */ +- RegSet allow = rset_exclude(RSET_GPR, dest); +- Reg type; + emit_dm(as, ARMI_MOV, dest, RID_SP); +- if (!irt_ispri(ir->t)) { +- Reg src = ra_alloc1(as, ref, allow); +- emit_lso(as, ARMI_STR, src, RID_SP, 0); +- } +- if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) +- type = ra_alloc1(as, ref+1, allow); +- else +- type = ra_allock(as, irt_toitype(ir->t), allow); +- emit_lso(as, ARMI_STR, type, RID_SP, 4); + } + } + +@@ -811,16 +866,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); + + /* Load main position relative to tab->node into dest. */ +- khash = irref_isk(refkey) ? ir_khash(irkey) : 1; ++ khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); + } else { + emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); + emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); +- if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ ++ if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */ + emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); + emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); +- emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); ++ emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid)); + emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); + } else if (irref_isk(refkey)) { + emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, +@@ -867,7 +922,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + Reg key = RID_NONE, type = RID_TMP, idx = node; + RegSet allow = rset_exclude(RSET_GPR, node); +- lua_assert(ofs % sizeof(Node) == 0); ++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (ofs > 4095) { + idx = dest; + rset_clear(allow, dest); +@@ -934,7 +989,7 @@ static void asm_uref(ASMState *as, IRIns *ir) + static void asm_fref(ASMState *as, IRIns *ir) + { + UNUSED(as); UNUSED(ir); +- lua_assert(!ra_used(ir)); ++ lj_assertA(!ra_used(ir), "unfused FREF"); + } + + static void asm_strref(ASMState *as, IRIns *ir) +@@ -971,39 +1026,43 @@ static void asm_strref(ASMState *as, IRIns *ir) + + /* -- Loads and stores ---------------------------------------------------- */ + +-static ARMIns asm_fxloadins(IRIns *ir) ++static ARMIns asm_fxloadins(ASMState *as, IRIns *ir) + { ++ UNUSED(as); + switch (irt_type(ir->t)) { + case IRT_I8: return ARMI_LDRSB; + case IRT_U8: return ARMI_LDRB; + case IRT_I16: return ARMI_LDRSH; + case IRT_U16: return ARMI_LDRH; +- case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; +- case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; ++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D; ++ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ + default: return ARMI_LDR; + } + } + +-static ARMIns asm_fxstoreins(IRIns *ir) ++static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir) + { ++ UNUSED(as); + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return ARMI_STRB; + case IRT_I16: case IRT_U16: return ARMI_STRH; +- case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; +- case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; ++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D; ++ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ + default: return ARMI_STR; + } + } + + static void asm_fload(ASMState *as, IRIns *ir) + { +- if (ir->op1 == REF_NIL) { +- lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ ARMIns ai = asm_fxloadins(as, ir); ++ Reg idx; ++ int32_t ofs; ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ ++ idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR); ++ ofs = 0; + } else { +- Reg dest = ra_dest(as, ir, RSET_GPR); +- Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); +- ARMIns ai = asm_fxloadins(ir); +- int32_t ofs; ++ idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ +@@ -1012,11 +1071,11 @@ static void asm_fload(ASMState *as, IRIns *ir) + } + } + ofs = field_ofs[ir->op2]; +- if ((ai & 0x04000000)) +- emit_lso(as, ai, dest, idx, ofs); +- else +- emit_lsox(as, ai, dest, idx, ofs); + } ++ if ((ai & 0x04000000)) ++ emit_lso(as, ai, dest, idx, ofs); ++ else ++ emit_lsox(as, ai, dest, idx, ofs); + } + + static void asm_fstore(ASMState *as, IRIns *ir) +@@ -1026,7 +1085,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; +- ARMIns ai = asm_fxstoreins(ir); ++ ARMIns ai = asm_fxstoreins(as, ir); + if ((ai & 0x04000000)) + emit_lso(as, ai, src, idx, ofs); + else +@@ -1038,8 +1097,8 @@ static void asm_xload(ASMState *as, IRIns *ir) + { + Reg dest = ra_dest(as, ir, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); +- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); +- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); ++ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); ++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); + } + + static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) +@@ -1047,7 +1106,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) + if (ir->r != RID_SINK) { + Reg src = ra_alloc1(as, ir->op2, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); +- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, ++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, + rset_exclude(RSET_GPR, src), ofs); + } + } +@@ -1066,13 +1125,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + rset_clear(allow, type); + } + if (ra_used(ir)) { +- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || +- irt_isint(ir->t) || irt_isaddr(ir->t)); ++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad load type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); + rset_clear(allow, dest); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow, + (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); ++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; + if (!hiop || type == RID_NONE) { + rset_clear(allow, idx); + if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && +@@ -1133,10 +1194,13 @@ static void asm_sload(ASMState *as, IRIns *ir) + IRType t = hiop ? IRT_NUM : irt_type(ir->t); + Reg dest = RID_NONE, type = RID_NONE, base; + RegSet allow = RSET_GPR; +- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ +- lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); ++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), ++ "bad parent SLOAD"); /* Handled by asm_head_side(). */ ++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), ++ "inconsistent SLOAD variant"); + #if LJ_SOFTFP +- lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ ++ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), ++ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ + if (hiop && ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); +@@ -1152,8 +1216,9 @@ static void asm_sload(ASMState *as, IRIns *ir) + Reg tmp = RID_NONE; + if ((ir->op2 & IRSLOAD_CONVERT)) + tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); +- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || +- irt_isint(ir->t) || irt_isaddr(ir->t)); ++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad SLOAD type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); +@@ -1218,7 +1283,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) + IRRef args[4]; + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + RegSet drop = RSET_SCRATCH; +- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); ++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), ++ "bad CNEW/CNEWI operands"); + + as->gcsteps++; + if (ra_hasreg(ir->r)) +@@ -1230,10 +1296,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + int32_t ofs = sizeof(GCcdata); +- lua_assert(sz == 4 || sz == 8); ++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + if (sz == 8) { + ofs += 4; ir++; +- lua_assert(ir->o == IR_HIOP); ++ lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI"); + } + for (;;) { + Reg r = ra_alloc1(as, ir->op2, allow); +@@ -1268,8 +1334,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); + } +-#else +-#define asm_cnew(as, ir) ((void)0) + #endif + + /* -- Write barriers ------------------------------------------------------ */ +@@ -1301,7 +1365,7 @@ static void asm_obar(ASMState *as, IRIns *ir) + MCLabel l_end; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ +- lua_assert(IR(ir->op1)->o == IR_UREFC); ++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ +@@ -1364,8 +1428,6 @@ static void asm_callround(ASMState *as, IRIns *ir, int id) + + static void asm_fpmath(ASMState *as, IRIns *ir) + { +- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) +- return; + if (ir->op2 <= IRFPM_TRUNC) + asm_callround(as, ir, ir->op2); + else if (ir->op2 == IRFPM_SQRT) +@@ -1412,14 +1474,29 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) + emit_dn(as, ai^m, dest, left); + } + +-static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) ++/* Try to drop cmp r, #0. */ ++static ARMIns asm_drop_cmp0(ASMState *as, ARMIns ai) + { +- if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ ++ if (as->flagmcp == as->mcp) { ++ uint32_t cc = (as->mcp[1] >> 28); + as->flagmcp = NULL; +- as->mcp++; +- ai |= ARMI_S; ++ if (cc <= CC_NE) { ++ as->mcp++; ++ ai |= ARMI_S; ++ } else if (cc == CC_GE) { ++ *++as->mcp ^= ((CC_GE^CC_PL) << 28); ++ ai |= ARMI_S; ++ } else if (cc == CC_LT) { ++ *++as->mcp ^= ((CC_LT^CC_MI) << 28); ++ ai |= ARMI_S; ++ } /* else: other conds don't work in general. */ + } +- asm_intop(as, ir, ai); ++ return ai; ++} ++ ++static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) ++{ ++ asm_intop(as, ir, asm_drop_cmp0(as, ai)); + } + + static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) +@@ -1492,15 +1569,10 @@ static void asm_mul(ASMState *as, IRIns *ir) + #define asm_mulov(as, ir) asm_mul(as, ir) + + #if !LJ_SOFTFP +-#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) +-#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) ++#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) + #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) +-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +-#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) + #endif + +-#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) +- + static void asm_neg(ASMState *as, IRIns *ir) + { + #if !LJ_SOFTFP +@@ -1514,20 +1586,7 @@ static void asm_neg(ASMState *as, IRIns *ir) + + static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) + { +- if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ +- uint32_t cc = (as->mcp[1] >> 28); +- as->flagmcp = NULL; +- if (cc <= CC_NE) { +- as->mcp++; +- ai |= ARMI_S; +- } else if (cc == CC_GE) { +- *++as->mcp ^= ((CC_GE^CC_PL) << 28); +- ai |= ARMI_S; +- } else if (cc == CC_LT) { +- *++as->mcp ^= ((CC_LT^CC_MI) << 28); +- ai |= ARMI_S; +- } /* else: other conds don't work with bit ops. */ +- } ++ ai = asm_drop_cmp0(as, ai); + if (ir->op2 == 0) { + Reg dest = ra_dest(as, ir, RSET_GPR); + uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); +@@ -1582,7 +1641,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) + #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) + #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) + #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) +-#define asm_brol(as, ir) lua_assert(0) ++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") + + static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) + { +@@ -1657,8 +1716,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) + asm_intmin_max(as, ir, cc); + } + +-#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) +-#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) ++#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL) ++#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE) + + /* -- Comparisons --------------------------------------------------------- */ + +@@ -1733,7 +1792,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir) + Reg left; + uint32_t m; + int cmpprev0 = 0; +- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), ++ "bad comparison data type %d", irt_type(ir->t)); + if (asm_swapops(as, lref, rref)) { + Reg tmp = lref; lref = rref; rref = tmp; + if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ +@@ -1825,15 +1885,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir) + } + #endif + +-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ ++/* -- Split register ops -------------------------------------------------- */ + +-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */ + static void asm_hiop(ASMState *as, IRIns *ir) + { +-#if LJ_HASFFI || LJ_SOFTFP + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; ++#if LJ_HASFFI || LJ_SOFTFP + if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ + as->curins--; /* Always skip the loword comparison. */ + #if LJ_SOFTFP +@@ -1850,7 +1910,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) + } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { + as->curins--; /* Always skip the loword min/max. */ + if (uselo || usehi) +- asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); ++ asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); + return; + #elif LJ_HASFFI + } else if ((ir-1)->o == IR_CONV) { +@@ -1864,6 +1924,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) + asm_xstore_(as, ir, 4); + return; + } ++#endif + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { + #if LJ_HASFFI +@@ -1882,6 +1943,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) + asm_intneg(as, ir, ARMI_RSC); + asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); + break; ++ case IR_CNEWI: ++ /* Nothing to do here. Handled by lo op itself. */ ++ break; + #endif + #if LJ_SOFTFP + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: +@@ -1889,24 +1953,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) + if (!uselo) + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ + break; ++ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: ++ /* Nothing to do here. Handled by lo op itself. */ ++ break; + #endif +- case IR_CALLN: +- case IR_CALLS: +- case IR_CALLXS: ++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; +-#if LJ_SOFTFP +- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: +-#endif +- case IR_CNEWI: +- /* Nothing to do here. Handled by lo op itself. */ +- break; +- default: lua_assert(0); break; ++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +-#else +- UNUSED(as); UNUSED(ir); lua_assert(0); +-#endif + } + + /* -- Profiling ----------------------------------------------------------- */ +@@ -1930,7 +1986,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, + if (irp) { + if (!ra_hasspill(irp->s)) { + pbase = irp->r; +- lua_assert(ra_hasreg(pbase)); ++ lj_assertA(ra_hasreg(pbase), "base reg lost"); + } else if (allow) { + pbase = rset_pickbot(allow); + } else { +@@ -1942,7 +1998,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, + } + emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); + k = emit_isk12(0, (int32_t)(8*topslot)); +- lua_assert(k); ++ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); + emit_n(as, ARMI_CMP^k, RID_TMP); + emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); + emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, +@@ -1979,7 +2035,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + #if LJ_SOFTFP + RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); + Reg tmp; +- lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ ++ /* LJ_SOFTFP: must be a number constant. */ ++ lj_assertA(irref_isk(ref), "unsplit FP op"); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, + rset_exclude(RSET_GPREVEN, RID_BASE)); + emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); +@@ -1993,7 +2050,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + } else { + RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); + Reg type; +- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), ++ "restore of IR type %d", irt_type(ir->t)); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); + emit_lso(as, ARMI_STR, src, RID_BASE, ofs); +@@ -2006,6 +2064,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + } else if ((sn & SNAP_SOFTFPNUM)) { + type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); + #endif ++ } else if ((sn & SNAP_KEYINDEX)) { ++ type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd); + } else { + type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); + } +@@ -2013,11 +2073,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + } + checkmclim(as); + } +- lua_assert(map + nent == flinks); ++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); + } + + /* -- GC handling --------------------------------------------------------- */ + ++/* Marker to prevent patching the GC check exit. */ ++#define ARM_NOPATCH_GC_CHECK (ARMI_BIC|ARMI_K12) ++ + /* Check GC threshold and do one or more GC steps. */ + static void asm_gc_check(ASMState *as) + { +@@ -2029,6 +2092,7 @@ static void asm_gc_check(ASMState *as) + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ ++ *--as->mcp = ARM_NOPATCH_GC_CHECK; + emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ +@@ -2063,6 +2127,12 @@ static void asm_loop_fixup(ASMState *as) + } + } + ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ UNUSED(as); /* Nothing to do. */ ++} ++ + /* -- Head of trace ------------------------------------------------------- */ + + /* Reload L register from g->cur_L. */ +@@ -2099,7 +2169,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) + rset_clear(allow, ra_dest(as, ir, allow)); + } else { + Reg r = irp->r; +- lua_assert(ra_hasreg(r)); ++ lj_assertA(ra_hasreg(r), "base reg lost"); + rset_clear(allow, r); + if (r != ir->r && !rset_test(as->freeset, r)) + ra_restore(as, regcost_ref(as->cost[r])); +@@ -2121,7 +2191,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) + } else { + /* Patch stack adjustment. */ + uint32_t k = emit_isk12(ARMI_ADD, spadj); +- lua_assert(k); ++ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); + p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + } + /* Patch exit branch. */ +@@ -2197,13 +2267,14 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) + /* Look for bl_cc exitstub, replace with b_cc target. */ + uint32_t ins = *p; + if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u && +- ((ins ^ (px-p)) & 0x00ffffffu) == 0) { ++ ((ins ^ (px-p)) & 0x00ffffffu) == 0 && ++ p[-1] != ARM_NOPATCH_GC_CHECK) { + *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu); + cend = p+1; + if (!cstart) cstart = p; + } + } +- lua_assert(cstart != NULL); ++ lj_assertJ(cstart != NULL, "exit stub %d not found", exitno); + lj_mcode_sync(cstart, cend); + lj_mcode_patch(J, mcarea, 1); + } +diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h +index 8fd92e76..67c53ee2 100644 +--- a/src/lj_asm_arm64.h ++++ b/src/lj_asm_arm64.h +@@ -1,6 +1,6 @@ + /* + ** ARM64 IR assembler (SSA IR -> machine code). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. + ** Sponsored by Cisco Systems, Inc. +@@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) + asm_mclimit(as); + /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ + for (i = nexits-1; (int32_t)i >= 0; i--) +- *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu)); +- *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno)); ++ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); ++ *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); + mxp--; +- *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu)); +- *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP)); ++ *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); ++ *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); + as->mctop = mxp; + } + +@@ -77,7 +77,7 @@ static void asm_guardcc(ASMState *as, A64CC cc) + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; +- *p = A64I_B | ((target-p) & 0x03ffffffu); ++ *p = A64I_B | A64F_S26(target-p); + emit_cond_branch(as, cc^1, p-1); + return; + } +@@ -91,7 +91,7 @@ static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; +- *p = A64I_B | ((target-p) & 0x03ffffffu); ++ *p = A64I_B | A64F_S26(target-p); + emit_tnb(as, ai^0x01000000u, r, bit, p-1); + return; + } +@@ -105,7 +105,7 @@ static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; +- *p = A64I_B | ((target-p) & 0x03ffffffu); ++ *p = A64I_B | A64F_S26(target-p); + emit_cnb(as, ai^0x01000000u, r, p-1); + return; + } +@@ -198,6 +198,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, + return RID_GL; + } + } ++ } else if (ir->o == IR_TMPREF) { ++ *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv); ++ return RID_GL; + } + } + *ofsp = 0; +@@ -213,7 +216,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) + return A64F_M(ir->r); + } else if (irref_isk(ref)) { + uint32_t m; +- int64_t k = get_k64val(ir); ++ int64_t k = get_k64val(as, ref); + if ((ai & 0x1f000000) == 0x0a000000) + m = emit_isk13(k, irt_is64(ir->t)); + else +@@ -295,8 +298,10 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, + } else if (asm_isk32(as, ir->op1, &ofs)) { + ref = ir->op2; + } else { +- Reg rn = ra_alloc1(as, ir->op1, allow); +- IRIns *irr = IR(ir->op2); ++ Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2; ++ Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1; ++ Reg rn = ra_alloc1(as, refv, allow); ++ IRIns *irr = IR(refk); + uint32_t m; + if (irr+1 == ir && !ra_used(irr) && + irr->o == IR_ADD && irref_isk(irr->op2)) { +@@ -307,7 +312,7 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, + goto skipopm; + } + } +- m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); ++ m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn)); + ofs = sizeof(GCstr); + skipopm: + emit_lso(as, ai, rd, rd, ofs); +@@ -352,9 +357,9 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) + static int asm_fuseandshift(ASMState *as, IRIns *ir) + { + IRIns *irl = IR(ir->op1); +- lua_assert(ir->o == IR_BAND); ++ lj_assertA(ir->o == IR_BAND, "bad usage"); + if (canfuse(as, irl) && irref_isk(ir->op2)) { +- uint64_t mask = get_k64val(IR(ir->op2)); ++ uint64_t mask = get_k64val(as, ir->op2); + if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { + int32_t shmask = irt_is64(irl->t) ? 63 : 31; + int32_t shift = (IR(irl->op2)->i & shmask); +@@ -382,7 +387,7 @@ static int asm_fuseandshift(ASMState *as, IRIns *ir) + static int asm_fuseorshift(ASMState *as, IRIns *ir) + { + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); +- lua_assert(ir->o == IR_BOR); ++ lj_assertA(ir->o == IR_BOR, "bad usage"); + if (canfuse(as, irl) && canfuse(as, irr) && + ((irl->o == IR_BSHR && irr->o == IR_BSHL) || + (irl->o == IR_BSHL && irr->o == IR_BSHR))) { +@@ -426,7 +431,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + if (ref) { + if (irt_isfp(ir->t)) { + if (fpr <= REGARG_LASTFPR) { +- lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ ++ lj_assertA(rset_test(as->freeset, fpr), ++ "reg %d not free", fpr); /* Must have been evicted. */ + ra_leftov(as, fpr, ref); + fpr++; + } else { +@@ -436,7 +442,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + } + } else { + if (gpr <= REGARG_LASTGPR) { +- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; + } else { +@@ -453,11 +460,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + { + RegSet drop = RSET_SCRATCH; ++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ if (hiop && ra_hasreg((ir+1)->r)) ++ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { +- lua_assert(!irt_ispri(ir->t)); ++ lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (irt_isfp(ir->t)) { + if (ci->flags & CCI_CASTU64) { + Reg dest = ra_dest(as, ir, RSET_FPR) & 31; +@@ -466,6 +476,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + } else { + ra_destreg(as, ir, RID_FPRET); + } ++ } else if (hiop) { ++ ra_destpair(as, ir); + } else { + ra_destreg(as, ir, RID_RET); + } +@@ -515,6 +527,21 @@ static void asm_retf(ASMState *as, IRIns *ir) + emit_lso(as, A64I_LDRx, RID_TMP, base, -8); + } + ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); ++ emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP, tmp); ++ emit_getgl(as, RID_TMP, cur_L); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ + /* -- Type conversions ---------------------------------------------------- */ + + static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +@@ -544,7 +571,7 @@ static void asm_conv(ASMState *as, IRIns *ir) + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + IRRef lref = ir->op1; +- lua_assert(irt_type(ir->t) != st); ++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ +@@ -564,7 +591,8 @@ static void asm_conv(ASMState *as, IRIns *ir) + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ +- lua_assert(irt_isint(ir->t) && st == IRT_NUM); ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg left = ra_alloc1(as, lref, RSET_FPR); +@@ -584,7 +612,7 @@ static void asm_conv(ASMState *as, IRIns *ir) + A64Ins ai = st == IRT_I8 ? A64I_SXTBw : + st == IRT_U8 ? A64I_UXTBw : + st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; +- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + emit_dn(as, ai, dest, left); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); +@@ -597,7 +625,7 @@ static void asm_conv(ASMState *as, IRIns *ir) + emit_dn(as, A64I_SXTW, dest, left); + } + } else { +- if (st64) { ++ if (st64 && !(ir->op2 & IRCONV_NONE)) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ +@@ -648,7 +676,8 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) + { + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); +- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), ++ "store of IR type %d", irt_type(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); +@@ -669,22 +698,23 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) + } + + /* Get pointer to TValue. */ +-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) + { +- IRIns *ir = IR(ref); +- if (irt_isnum(ir->t)) { +- if (irref_isk(ref)) { +- /* Use the number constant itself as a TValue. */ +- ra_allockreg(as, i64ptr(ir_knum(ir)), dest); ++ if ((mode & IRTMPREF_IN1)) { ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { ++ /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, i64ptr(ir_knum(ir)), dest); ++ return; ++ } ++ emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0); + } else { +- /* Otherwise force a spill and use the spill slot. */ +- emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); ++ asm_tvstore64(as, dest, 0, ref); + } +- } else { +- /* Otherwise use g->tmptv to hold the TValue. */ +- asm_tvstore64(as, dest, 0, ref); +- ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); + } ++ /* g->tmptv holds the TValue(s). */ ++ emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL); + } + + static void asm_aref(ASMState *as, IRIns *ir) +@@ -722,6 +752,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = 0, tmp = RID_TMP; ++ Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(ir->op2); +@@ -751,6 +782,28 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + } + } + ++ /* Allocate constants early. */ ++ if (irt_isnum(kt)) { ++ if (!isk) { ++ tisnum = ra_allock(as, LJ_TISNUM << 15, allow); ++ ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); ++ rset_clear(allow, tisnum); ++ } ++ } else if (irt_isaddr(kt)) { ++ if (isk) { ++ int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; ++ scr = ra_allock(as, kk, allow); ++ } else { ++ scr = ra_scratch(as, allow); ++ } ++ rset_clear(allow, scr); ++ } else { ++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); ++ type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow); ++ scr = ra_scratch(as, rset_clear(allow, type)); ++ rset_clear(allow, scr); ++ } ++ + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); + as->invmcp = NULL; +@@ -780,9 +833,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_nm(as, A64I_CMPx, key, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + } else { +- Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow); +- Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); +- rset_clear(allow, tisnum); + emit_nm(as, A64I_FCMPd, key, ftmp); + emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); + emit_cond_branch(as, CC_LO, l_next); +@@ -790,36 +840,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); + } + } else if (irt_isaddr(kt)) { +- Reg scr; + if (isk) { +- int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; +- scr = ra_allock(as, kk, allow); + emit_nm(as, A64I_CMPx, scr, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + } else { +- scr = ra_scratch(as, allow); + emit_nm(as, A64I_CMPx, tmp, scr); + emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); + } +- rset_clear(allow, scr); + } else { +- Reg type, scr; +- lua_assert(irt_ispri(kt) && !irt_isnil(kt)); +- type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); +- scr = ra_scratch(as, rset_clear(allow, type)); +- rset_clear(allow, scr); +- emit_nm(as, A64I_CMPw, scr, type); ++ emit_nm(as, A64I_CMPx, scr, type); + emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); + } + + *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; + if (!isk && irt_isaddr(kt)) { +- Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow); ++ type = ra_allock(as, (int32_t)irt_toitype(kt), allow); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); + rset_clear(allow, type); + } + /* Load main position relative to tab->node into dest. */ +- khash = isk ? ir_khash(irkey) : 1; ++ khash = isk ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); + } else { +@@ -831,9 +871,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_dnm(as, A64I_ANDw, dest, dest, tmphash); + emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); + } else if (irt_isstr(kt)) { +- /* Fetch of str->hash is cheaper than ra_allock. */ ++ /* Fetch of str->sid is cheaper than ra_allock. */ + emit_dnm(as, A64I_ANDw, dest, dest, tmp); +- emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); ++ emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); + emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_dnm(as, A64I_ANDw, dest, dest, tmp); +@@ -869,14 +909,12 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + int bigofs = !emit_checkofs(A64I_LDRx, ofs); +- RegSet allow = RSET_GPR; + Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; +- Reg node = ra_alloc1(as, ir->op1, allow); +- Reg key = ra_scratch(as, rset_clear(allow, node)); +- Reg idx = node; ++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg key, idx = node; ++ RegSet allow = rset_exclude(RSET_GPR, node); + uint64_t k; +- lua_assert(ofs % sizeof(Node) == 0); +- rset_clear(allow, key); ++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (bigofs) { + idx = dest; + rset_clear(allow, dest); +@@ -892,7 +930,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + } else { + k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); + } +- emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); ++ key = ra_scratch(as, allow); ++ emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); + emit_lso(as, A64I_LDRx, key, idx, kofs); + if (bigofs) + emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); +@@ -925,7 +964,7 @@ static void asm_uref(ASMState *as, IRIns *ir) + static void asm_fref(ASMState *as, IRIns *ir) + { + UNUSED(as); UNUSED(ir); +- lua_assert(!ra_used(ir)); ++ lj_assertA(!ra_used(ir), "unfused FREF"); + } + + static void asm_strref(ASMState *as, IRIns *ir) +@@ -977,7 +1016,7 @@ static void asm_fload(ASMState *as, IRIns *ir) + Reg idx; + A64Ins ai = asm_fxloadins(ir); + int32_t ofs; +- if (ir->op1 == REF_NIL) { ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = RID_GL; + ofs = (ir->op2 << 2) - GG_OFS(g); + } else { +@@ -1008,7 +1047,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) + static void asm_xload(ASMState *as, IRIns *ir) + { + Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); +- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); ++ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); + } + +@@ -1026,8 +1065,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + Reg idx, tmp, type; + int32_t ofs = 0; + RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; +- lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || +- irt_isint(ir->t)); ++ lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || ++ irt_isint(ir->t), ++ "bad load type %d", irt_type(ir->t)); + if (ra_used(ir)) { + Reg dest = ra_dest(as, ir, allow); + tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; +@@ -1043,10 +1083,12 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + } + type = ra_scratch(as, rset_clear(gpr, tmp)); + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); ++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; + /* Always do the type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); + if (irt_type(ir->t) >= IRT_NUM) { +- lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); ++ lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), ++ "bad load type %d", irt_type(ir->t)); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); + } else if (irt_isaddr(ir->t)) { +@@ -1056,7 +1098,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); + } else { + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), +- ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); ++ ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp); + } + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); +@@ -1111,8 +1153,10 @@ static void asm_sload(ASMState *as, IRIns *ir) + IRType1 t = ir->t; + Reg dest = RID_NONE, base; + RegSet allow = RSET_GPR; +- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ +- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); ++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), ++ "bad parent SLOAD"); /* Handled by asm_head_side(). */ ++ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), ++ "inconsistent SLOAD variant"); + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); +@@ -1121,7 +1165,8 @@ static void asm_sload(ASMState *as, IRIns *ir) + Reg tmp = RID_NONE; + if ((ir->op2 & IRSLOAD_CONVERT)) + tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); +- lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); ++ lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), ++ "bad SLOAD type %d", irt_type(t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); + if (irt_isaddr(t)) { +@@ -1161,7 +1206,8 @@ dotypecheck: + /* Need type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); + if (irt_type(t) >= IRT_NUM) { +- lua_assert(irt_isinteger(t) || irt_isnum(t)); ++ lj_assertA(irt_isinteger(t) || irt_isnum(t), ++ "bad SLOAD type %d", irt_type(t)); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, LJ_TISNUM << 15, allow), tmp); + } else if (irt_isnil(t)) { +@@ -1196,7 +1242,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); +- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); ++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), ++ "bad CNEW/CNEWI operands"); + + as->gcsteps++; + asm_setupresult(as, ir, ci); /* GCcdata * */ +@@ -1204,7 +1251,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) + if (ir->o == IR_CNEWI) { + int32_t ofs = sizeof(GCcdata); + Reg r = ra_alloc1(as, ir->op2, allow); +- lua_assert(sz == 4 || sz == 8); ++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; +@@ -1231,8 +1278,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); + } +-#else +-#define asm_cnew(as, ir) ((void)0) + #endif + + /* -- Write barriers ------------------------------------------------------ */ +@@ -1241,17 +1286,13 @@ static void asm_tbar(ASMState *as, IRIns *ir) + { + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); +- Reg gr = ra_allock(as, i64ptr(J2G(as->J)), +- rset_exclude(rset_exclude(RSET_GPR, tab), link)); + Reg mark = RID_TMP; + MCLabel l_end = emit_label(as); + emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); +- emit_lso(as, A64I_STRx, tab, gr, +- (int32_t)offsetof(global_State, gc.grayagain)); ++ emit_setgl(as, tab, gc.grayagain); + emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); +- emit_lso(as, A64I_LDRx, link, gr, +- (int32_t)offsetof(global_State, gc.grayagain)); ++ emit_getgl(as, link, gc.grayagain); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); + emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); +@@ -1265,13 +1306,13 @@ static void asm_obar(ASMState *as, IRIns *ir) + RegSet allow = RSET_GPR; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ +- lua_assert(IR(ir->op1)->o == IR_UREFC); ++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ir->op1; /* TValue *tv */ + asm_gencall(as, ci, args); +- ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); ++ emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + obj = IR(ir->op1)->r; + tmp = ra_scratch(as, rset_exclude(allow, obj)); + emit_cond_branch(as, CC_EQ, l_end); +@@ -1309,8 +1350,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir) + } else if (fpm <= IRFPM_TRUNC) { + asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : + fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); +- } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { +- return; + } else { + asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); + } +@@ -1417,46 +1456,12 @@ static void asm_mul(ASMState *as, IRIns *ir) + asm_intmul(as, ir); + } + +-static void asm_div(ASMState *as, IRIns *ir) +-{ +-#if LJ_HASFFI +- if (!irt_isnum(ir->t)) +- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : +- IRCALL_lj_carith_divu64); +- else +-#endif +- asm_fparith(as, ir, A64I_FDIVd); +-} +- +-static void asm_pow(ASMState *as, IRIns *ir) +-{ +-#if LJ_HASFFI +- if (!irt_isnum(ir->t)) +- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : +- IRCALL_lj_carith_powu64); +- else +-#endif +- asm_callid(as, ir, IRCALL_lj_vm_powi); +-} +- + #define asm_addov(as, ir) asm_add(as, ir) + #define asm_subov(as, ir) asm_sub(as, ir) + #define asm_mulov(as, ir) asm_mul(as, ir) + ++#define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd) + #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) +-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +-#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) +- +-static void asm_mod(ASMState *as, IRIns *ir) +-{ +-#if LJ_HASFFI +- if (!irt_isint(ir->t)) +- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : +- IRCALL_lj_carith_modu64); +- else +-#endif +- asm_callid(as, ir, IRCALL_lj_vm_modi); +-} + + static void asm_neg(ASMState *as, IRIns *ir) + { +@@ -1571,7 +1576,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) + #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) + #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) + #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) +-#define asm_brol(as, ir) lua_assert(0) ++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") + + static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) + { +@@ -1587,7 +1592,7 @@ static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) + Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = ((left >> 8) & 31); left &= 31; +- emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); ++ emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left); + emit_nm(as, A64I_FCMPd, left, right); + } + +@@ -1599,8 +1604,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) + asm_intmin_max(as, ir, cc); + } + +-#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) +-#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) ++#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL) ++#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE) + + /* -- Comparisons --------------------------------------------------------- */ + +@@ -1652,15 +1657,16 @@ static void asm_intcomp(ASMState *as, IRIns *ir) + Reg left; + uint32_t m; + int cmpprev0 = 0; +- lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || +- irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); ++ lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || ++ irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), ++ "bad comparison data type %d", irt_type(ir->t)); + if (asm_swapops(as, lref, rref)) { + IRRef tmp = lref; lref = rref; rref = tmp; + if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ + else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ + } + oldcc = cc; +- if (irref_isk(rref) && get_k64val(IR(rref)) == 0) { ++ if (irref_isk(rref) && get_k64val(as, rref) == 0) { + IRIns *irl = IR(lref); + if (cc == CC_GE) cc = CC_PL; + else if (cc == CC_LT) cc = CC_MI; +@@ -1675,7 +1681,7 @@ static void asm_intcomp(ASMState *as, IRIns *ir) + Reg tmp = blref; blref = brref; brref = tmp; + } + if (irref_isk(brref)) { +- uint64_t k = get_k64val(IR(brref)); ++ uint64_t k = get_k64val(as, brref); + if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { + asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, + ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); +@@ -1719,12 +1725,25 @@ static void asm_comp(ASMState *as, IRIns *ir) + + #define asm_equal(as, ir) asm_comp(as, ir) + +-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ ++/* -- Split register ops -------------------------------------------------- */ + +-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++/* Hiword op of a split 64/64 bit op. Previous op is the loword op. */ + static void asm_hiop(ASMState *as, IRIns *ir) + { +- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ ++ /* HIOP is marked as a store because it needs its own DCE logic. */ ++ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ ++ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; ++ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ ++ switch ((ir-1)->o) { ++ case IR_CALLN: ++ case IR_CALLL: ++ case IR_CALLS: ++ case IR_CALLXS: ++ if (!uselo) ++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ ++ break; ++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; ++ } + } + + /* -- Profiling ----------------------------------------------------------- */ +@@ -1732,7 +1751,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) + static void asm_prof(ASMState *as, IRIns *ir) + { + uint32_t k = emit_isk13(HOOK_PROFILE, 0); +- lua_assert(k != 0); ++ lj_assertA(k != 0, "HOOK_PROFILE does not fit in K13"); + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_n(as, A64I_TSTw^k, RID_TMP); +@@ -1750,7 +1769,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, + if (irp) { + if (!ra_hasspill(irp->s)) { + pbase = irp->r; +- lua_assert(ra_hasreg(pbase)); ++ lj_assertA(ra_hasreg(pbase), "base reg lost"); + } else if (allow) { + pbase = rset_pickbot(allow); + } else { +@@ -1762,7 +1781,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, + } + emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); + k = emit_isk12((8*topslot)); +- lua_assert(k); ++ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); + emit_n(as, A64I_CMPx^k, RID_TMP); + emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); + emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, +@@ -1795,7 +1814,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; +- if (irt_isnum(ir->t)) { ++ if ((sn & SNAP_KEYINDEX)) { ++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE); ++ Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) : ++ ra_alloc1(as, ref, allow); ++ rset_clear(allow, r); ++ emit_lso(as, A64I_STRw, r, RID_BASE, ofs); ++ emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4); ++ } else if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); + } else { +@@ -1803,36 +1829,38 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + } + checkmclim(as); + } +- lua_assert(map + nent == flinks); ++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); + } + + /* -- GC handling --------------------------------------------------------- */ + ++/* Marker to prevent patching the GC check exit. */ ++#define ARM64_NOPATCH_GC_CHECK \ ++ (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP)) ++ + /* Check GC threshold and do one or more GC steps. */ + static void asm_gc_check(ASMState *as) + { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; + MCLabel l_end; +- Reg tmp1, tmp2; ++ Reg tmp2; + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ ++ *--as->mcp = ARM64_NOPATCH_GC_CHECK; + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); +- tmp1 = ra_releasetmp(as, ASMREF_TMP1); ++ emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + tmp2 = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp2, as->gcsteps); + /* Jump around GC step if GC total < GC threshold. */ + emit_cond_branch(as, CC_LS, l_end); + emit_nm(as, A64I_CMPx, RID_TMP, tmp2); +- emit_lso(as, A64I_LDRx, tmp2, tmp1, +- (int32_t)offsetof(global_State, gc.threshold)); +- emit_lso(as, A64I_LDRx, RID_TMP, tmp1, +- (int32_t)offsetof(global_State, gc.total)); +- ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); ++ emit_getgl(as, tmp2, gc.threshold); ++ emit_getgl(as, RID_TMP, gc.total); + as->gcsteps = 0; + checkmclim(as); + } +@@ -1851,10 +1879,16 @@ static void asm_loop_fixup(ASMState *as) + p[-2] |= ((uint32_t)delta & mask) << 5; + } else { + ptrdiff_t delta = target - (p - 1); +- p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); ++ p[-1] = A64I_B | A64F_S26(delta); + } + } + ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ UNUSED(as); /* Nothing to do. */ ++} ++ + /* -- Head of trace ------------------------------------------------------- */ + + /* Reload L register from g->cur_L. */ +@@ -1891,7 +1925,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) + rset_clear(allow, ra_dest(as, ir, allow)); + } else { + Reg r = irp->r; +- lua_assert(ra_hasreg(r)); ++ lj_assertA(ra_hasreg(r), "base reg lost"); + rset_clear(allow, r); + if (r != ir->r && !rset_test(as->freeset, r)) + ra_restore(as, regcost_ref(as->cost[r])); +@@ -1915,12 +1949,12 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) + } else { + /* Patch stack adjustment. */ + uint32_t k = emit_isk12(spadj); +- lua_assert(k); ++ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); + p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); + } + /* Patch exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; +- p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); ++ p[-1] = A64I_B | A64F_S26((target-p)+1); + } + + /* Prepare tail of code. */ +@@ -1983,40 +2017,54 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) + { + MCode *p = T->mcode; + MCode *pe = (MCode *)((char *)p + T->szmcode); +- MCode *cstart = NULL, *cend = p; ++ MCode *cstart = NULL; + MCode *mcarea = lj_mcode_patch(J, p, 0); + MCode *px = exitstub_trace_addr(T, exitno); ++ int patchlong = 1; ++ /* Note: this assumes a trace exit is only ever patched once. */ + for (; p < pe; p++) { + /* Look for exitstub branch, replace with branch to target. */ ++ ptrdiff_t delta = target - p; + MCode ins = A64I_LE(*p); + if ((ins & 0xff000000u) == 0x54000000u && + ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { +- /* Patch bcc exitstub. */ +- *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u)); +- cend = p+1; +- if (!cstart) cstart = p; ++ /* Patch bcc, if within range. */ ++ if (A64F_S_OK(delta, 19)) { ++ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); ++ if (!cstart) cstart = p; ++ } + } else if ((ins & 0xfc000000u) == 0x14000000u && + ((ins ^ (px-p)) & 0x03ffffffu) == 0) { +- /* Patch b exitstub. */ +- *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu)); +- cend = p+1; ++ /* Patch b. */ ++ lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); ++ *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta)); + if (!cstart) cstart = p; + } else if ((ins & 0x7e000000u) == 0x34000000u && + ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { +- /* Patch cbz/cbnz exitstub. */ +- *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u)); +- cend = p+1; +- if (!cstart) cstart = p; ++ /* Patch cbz/cbnz, if within range. */ ++ if (p[-1] == ARM64_NOPATCH_GC_CHECK) { ++ patchlong = 0; ++ } else if (A64F_S_OK(delta, 19)) { ++ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); ++ if (!cstart) cstart = p; ++ } + } else if ((ins & 0x7e000000u) == 0x36000000u && + ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { +- /* Patch tbz/tbnz exitstub. */ +- *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u)); +- cend = p+1; +- if (!cstart) cstart = p; ++ /* Patch tbz/tbnz, if within range. */ ++ if (A64F_S_OK(delta, 14)) { ++ *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta)); ++ if (!cstart) cstart = p; ++ } + } + } +- lua_assert(cstart != NULL); +- lj_mcode_sync(cstart, cend); ++ /* Always patch long-range branch in exit stub itself. Except, if we can't. */ ++ if (patchlong) { ++ ptrdiff_t delta = target - px; ++ lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); ++ *px = A64I_B | A64F_S26(delta); ++ if (!cstart) cstart = px; ++ } ++ if (cstart) lj_mcode_sync(cstart, px+1); + lj_mcode_patch(J, mcarea, 1); + } + +diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h +index affe7d89..1497ad9c 100644 +--- a/src/lj_asm_mips.h ++++ b/src/lj_asm_mips.h +@@ -1,6 +1,6 @@ + /* + ** MIPS IR assembler (SSA IR -> machine code). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + /* -- Register allocator extensions --------------------------------------- */ +@@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) + { + Reg r = IR(ref)->r; + if (ra_noreg(r)) { +- if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) ++ if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) + return RID_ZERO; + r = ra_allocref(as, ref, allow); + } else { +@@ -64,18 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) + /* Setup spare long-range jump slots per mcarea. */ + static void asm_sparejump_setup(ASMState *as) + { +- MCode *mxp = as->mcbot; +- /* Assumes sizeof(MCLink) == 8. */ +- if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { +- lua_assert(MIPSI_NOP == 0); +- memset(mxp+2, 0, MIPS_SPAREJUMP*8); +- mxp += MIPS_SPAREJUMP*2; +- lua_assert(mxp < as->mctop); +- lj_mcode_sync(as->mcbot, mxp); +- lj_mcode_commitbot(as->J, mxp); +- as->mcbot = mxp; +- as->mclim = as->mcbot + MCLIM_REDZONE; ++ MCode *mxp = as->mctop; ++ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { ++ mxp -= MIPS_SPAREJUMP*2; ++ lj_assertA(MIPSI_NOP == 0, "bad NOP"); ++ memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); ++ as->mctop = mxp; ++ } ++} ++ ++static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump) ++{ ++ MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); ++ int slot = MIPS_SPAREJUMP; ++ while (slot--) { ++ mxp -= 2; ++ if (*mxp == tjump) { ++ return mxp; ++ } else if (*mxp == MIPSI_NOP) { ++ *mxp = tjump; ++ return mxp; ++ } + } ++ return NULL; + } + + /* Setup exit stub after the end of each trace. */ +@@ -85,7 +96,8 @@ static void asm_exitstub_setup(ASMState *as) + /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ + *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; + *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); +- lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); ++ lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, ++ "branch target out of range"); + *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; + as->mctop = mxp; + } +@@ -102,7 +114,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) + as->invmcp = NULL; + as->loopinv = 1; + as->mcp = p+1; ++#if !LJ_TARGET_MIPSR6 + mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ ++#else ++ mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : ++ (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */ ++#endif + target = p; /* Patch target later in asm_loop_fixup. */ + } + emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); +@@ -176,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) + return ra_allock(as, ofs-(int16_t)ofs, allow); + } + } ++ } else if (ir->o == IR_TMPREF) { ++ *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); ++ return RID_JGL; + } + } + *ofsp = 0; +@@ -191,20 +211,20 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, + if (ra_noreg(ir->r) && canfuse(as, ir)) { + if (ir->o == IR_ADD) { + intptr_t ofs2; +- if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), ++ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), + checki16(ofs2))) { + ref = ir->op1; + ofs = (int32_t)ofs2; + } + } else if (ir->o == IR_STRREF) { + intptr_t ofs2 = 65536; +- lua_assert(ofs == 0); ++ lj_assertA(ofs == 0, "bad usage"); + ofs = (int32_t)sizeof(GCstr); + if (irref_isk(ir->op2)) { +- ofs2 = ofs + get_kval(IR(ir->op2)); ++ ofs2 = ofs + get_kval(as, ir->op2); + ref = ir->op1; + } else if (irref_isk(ir->op1)) { +- ofs2 = ofs + get_kval(IR(ir->op1)); ++ ofs2 = ofs + get_kval(as, ir->op1); + ref = ir->op2; + } + if (!checki16(ofs2)) { +@@ -248,7 +268,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + #if !LJ_SOFTFP + if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && + !(ci->flags & CCI_VARARG)) { +- lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ ++ lj_assertA(rset_test(as->freeset, fpr), ++ "reg %d not free", fpr); /* Already evicted. */ + ra_leftov(as, fpr, ref); + fpr += LJ_32 ? 2 : 1; + gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; +@@ -260,7 +281,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + #endif + if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; + if (gpr <= REGARG_LASTGPR) { +- lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Already evicted. */ + #if !LJ_SOFTFP + if (irt_isfp(ir->t)) { + RegSet of = as->freeset; +@@ -273,7 +295,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + #if LJ_32 + emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); + emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); +- lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ ++ lj_assertA(rset_test(as->freeset, gpr+1), ++ "reg %d not free", gpr+1); /* Already evicted. */ + gpr += 2; + #else + emit_tg(as, MIPSI_DMFC1, gpr, r); +@@ -291,7 +314,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + { + ra_leftov(as, gpr, ref); + gpr++; +-#if LJ_64 ++#if LJ_64 && !LJ_SOFTFP + fpr++; + #endif + } +@@ -302,7 +325,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + emit_spstore(as, ir, r, ofs); + ofs += irt_isnum(ir->t) ? 8 : 4; + #else +- emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); ++ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0)); + ofs += 8; + #endif + } +@@ -313,7 +336,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + #endif + if (gpr <= REGARG_LASTGPR) { + gpr++; +-#if LJ_64 ++#if LJ_64 && !LJ_SOFTFP + fpr++; + #endif + } else { +@@ -328,22 +351,18 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + { + RegSet drop = RSET_SCRATCH; +-#if LJ_32 + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); +-#endif + #if !LJ_SOFTFP + if ((ci->flags & CCI_NOFPRCLOBBER)) + drop &= ~RSET_FPR; + #endif + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ +-#if LJ_32 + if (hiop && ra_hasreg((ir+1)->r)) + rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ +-#endif + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { +- lua_assert(!irt_ispri(ir->t)); ++ lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (!LJ_SOFTFP && irt_isfp(ir->t)) { + if ((ci->flags & CCI_CASTU64)) { + int32_t ofs = sps_scale(ir->s); +@@ -369,10 +388,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + } else { + ra_destreg(as, ir, RID_FPRET); + } +-#if LJ_32 + } else if (hiop) { + ra_destpair(as, ir); +-#endif + } else { + ra_destreg(as, ir, RID_RET); + } +@@ -391,7 +408,7 @@ static void asm_callx(ASMState *as, IRIns *ir) + func = ir->op2; irf = IR(func); + if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } + if (irref_isk(func)) { /* Call to constant address. */ +- ci.func = (ASMFunction)(void *)get_kval(irf); ++ ci.func = (ASMFunction)(void *)get_kval(as, func); + } else { /* Need specific register for indirect calls. */ + Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); + MCode *p = as->mcp; +@@ -411,7 +428,11 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) + { + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| +- RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); ++ RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) ++#if LJ_TARGET_MIPSR6 ++ |RID2RSET(RID_F21) ++#endif ++ ; + if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); + ra_evictset(as, drop); + ra_destreg(as, ir, RID_FPRET); +@@ -438,6 +459,27 @@ static void asm_retf(ASMState *as, IRIns *ir) + emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); + } + ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); ++ if ((as->flags & JIT_F_MIPSXXR2)) { ++ emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp, ++ lj_fls(SBUF_MASK_FLAG), 0); ++ } else { ++ emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp); ++ emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG); ++ } ++ emit_getgl(as, RID_TMP, cur_L); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ + /* -- Type conversions ---------------------------------------------------- */ + + #if !LJ_SOFTFP +@@ -445,8 +487,13 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) + { + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR); ++#if !LJ_TARGET_MIPSR6 + asm_guard(as, MIPSI_BC1F, 0, 0); + emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); ++#else ++ asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31)); ++ emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left); ++#endif + emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); + emit_tg(as, MIPSI_MFC1, dest, tmp); + emit_fg(as, MIPSI_CVT_W_D, tmp, left); +@@ -462,12 +509,36 @@ static void asm_tobit(ASMState *as, IRIns *ir) + emit_tg(as, MIPSI_MFC1, dest, tmp); + emit_fgh(as, MIPSI_ADD_D, tmp, left, right); + } ++#elif LJ_64 /* && LJ_SOFTFP */ ++static void asm_tointg(ASMState *as, IRIns *ir, Reg r) ++{ ++ /* The modified regs must match with the *.dasc implementation. */ ++ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| ++ RID2RSET(RID_R1)|RID2RSET(RID_R12); ++ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ++ ra_evictset(as, drop); ++ /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ ++ ra_destreg(as, ir, RID_RET); ++ asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO); ++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0); ++ if (r == RID_NONE) ++ ra_leftov(as, REGARG_FIRSTGPR, ir->op1); ++ else if (r != REGARG_FIRSTGPR) ++ emit_move(as, REGARG_FIRSTGPR, r); ++} ++ ++static void asm_tobit(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ emit_dta(as, MIPSI_SLL, dest, dest, 0); ++ asm_callid(as, ir, IRCALL_lj_vm_tobit); ++} + #endif + + static void asm_conv(ASMState *as, IRIns *ir) + { + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + #endif + #if LJ_64 +@@ -475,15 +546,20 @@ static void asm_conv(ASMState *as, IRIns *ir) + #endif + IRRef lref = ir->op1; + #if LJ_32 +- lua_assert(!(irt_isint64(ir->t) || +- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ ++ /* 64 bit integer conversions are handled by SPLIT. */ ++ lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), ++ "IR %04d has unsplit 64 bit type", ++ (int)(ir - as->ir) - REF_BIAS); + #endif +-#if LJ_32 && LJ_SOFTFP ++#if LJ_SOFTFP32 + /* FP conversions are handled by SPLIT. */ +- lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); ++ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), ++ "IR %04d has FP type", ++ (int)(ir - as->ir) - REF_BIAS); + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ + #else +- lua_assert(irt_type(ir->t) != st); ++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); ++#if !LJ_SOFTFP + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ +@@ -541,7 +617,8 @@ static void asm_conv(ASMState *as, IRIns *ir) + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ +- lua_assert(irt_isint(ir->t) && st == IRT_NUM); ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); +@@ -575,8 +652,13 @@ static void asm_conv(ASMState *as, IRIns *ir) + (void *)&as->J->k64[LJ_K64_M2P64], + rset_exclude(RSET_GPR, dest)); + emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ +- emit_branch(as, MIPSI_BC1T, 0, 0, l_end); +- emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); ++#if !LJ_TARGET_MIPSR6 ++ emit_branch(as, MIPSI_BC1T, 0, 0, l_end); ++ emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); ++#else ++ emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); ++ emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp); ++#endif + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), + (void *)&as->J->k64[LJ_K64_2P63], + rset_exclude(RSET_GPR, dest)); +@@ -587,8 +669,13 @@ static void asm_conv(ASMState *as, IRIns *ir) + (void *)&as->J->k32[LJ_K32_M2P64], + rset_exclude(RSET_GPR, dest)); + emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ +- emit_branch(as, MIPSI_BC1T, 0, 0, l_end); +- emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); ++#if !LJ_TARGET_MIPSR6 ++ emit_branch(as, MIPSI_BC1T, 0, 0, l_end); ++ emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); ++#else ++ emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); ++ emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp); ++#endif + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), + (void *)&as->J->k32[LJ_K32_2P63], + rset_exclude(RSET_GPR, dest)); +@@ -609,12 +696,49 @@ static void asm_conv(ASMState *as, IRIns *ir) + } + } + } else ++#else ++ if (irt_isfp(ir->t)) { ++#if LJ_64 && LJ_HASFFI ++ if (stfp) { /* FP to FP conversion. */ ++ asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d : ++ IRCALL_softfp_d2f); ++ } else { /* Integer to FP conversion. */ ++ IRCallID cid = ((IRT_IS64 >> st) & 1) ? ++ (irt_isnum(ir->t) ? ++ (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) : ++ (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) : ++ (irt_isnum(ir->t) ? ++ (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) : ++ (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f)); ++ asm_callid(as, ir, cid); ++ } ++#else ++ asm_callid(as, ir, IRCALL_softfp_i2d); ++#endif ++ } else if (stfp) { /* FP to integer conversion. */ ++ if (irt_isguard(ir->t)) { ++ /* Checked conversions are only supported from number to int. */ ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); ++ asm_tointg(as, ir, RID_NONE); ++ } else { ++ IRCallID cid = irt_is64(ir->t) ? ++ ((st == IRT_NUM) ? ++ (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : ++ (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : ++ ((st == IRT_NUM) ? ++ (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : ++ (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); ++ asm_callid(as, ir, cid); ++ } ++ } else ++#endif + #endif + { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); +- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + if ((ir->op2 & IRCONV_SEXT)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { + emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); +@@ -645,7 +769,7 @@ static void asm_conv(ASMState *as, IRIns *ir) + } + } + } else { +- if (st64) { ++ if (st64 && !(ir->op2 & IRCONV_NONE)) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ +@@ -666,7 +790,7 @@ static void asm_strto(ASMState *as, IRIns *ir) + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + int32_t ofs = 0; +-#if LJ_SOFTFP ++#if LJ_SOFTFP32 + ra_evictset(as, RSET_SCRATCH); + if (ra_used(ir)) { + if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && +@@ -711,7 +835,8 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) + { + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); +- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), ++ "store of IR type %d", irt_type(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); +@@ -732,34 +857,63 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) + #endif + + /* Get pointer to TValue. */ +-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) + { +- IRIns *ir = IR(ref); +- if (irt_isnum(ir->t)) { +- if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ +- ra_allockreg(as, igcptr(ir_knum(ir)), dest); +- else /* Otherwise force a spill and use the spill slot. */ +- emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); +- } else { +- /* Otherwise use g->tmptv to hold the TValue. */ ++ int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); ++ if ((mode & IRTMPREF_IN1)) { ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if ((mode & IRTMPREF_OUT1)) { ++#if LJ_SOFTFP ++ emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); ++#if LJ_64 ++ emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64); ++#else ++ lj_assertA(irref_isk(ref), "unsplit FP op"); ++ emit_setgl(as, ++ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), ++ tmptv.u32.lo); ++ emit_setgl(as, ++ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), ++ tmptv.u32.hi); ++#endif ++#else ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); ++ emit_tsi(as, MIPSI_SDC1, (src & 31), RID_JGL, tmpofs); ++#endif ++ } else if (irref_isk(ref)) { ++ /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, igcptr(ir_knum(ir)), dest); ++ } else { ++#if LJ_SOFTFP32 ++ lj_assertA(0, "unsplit FP op"); ++#else ++ /* Otherwise force a spill and use the spill slot. */ ++ emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); ++#endif ++ } ++ } else { ++ /* Otherwise use g->tmptv to hold the TValue. */ + #if LJ_32 +- RegSet allow = rset_exclude(RSET_GPR, dest); +- Reg type; +- emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); +- if (!irt_ispri(ir->t)) { +- Reg src = ra_alloc1(as, ref, allow); +- emit_setgl(as, src, tmptv.gcr); +- } +- if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) +- type = ra_alloc1(as, ref+1, allow); +- else +- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); +- emit_setgl(as, type, tmptv.it); ++ Reg type; ++ emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs); ++ if (!irt_ispri(ir->t)) { ++ Reg src = ra_alloc1(as, ref, RSET_GPR); ++ emit_setgl(as, src, tmptv.gcr); ++ } ++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) ++ type = ra_alloc1(as, ref+1, RSET_GPR); ++ else ++ type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR); ++ emit_setgl(as, type, tmptv.it); + #else +- asm_tvstore64(as, dest, 0, ref); +- emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, +- (int32_t)(offsetof(global_State, tmptv)-32768)); ++ asm_tvstore64(as, dest, 0, ref); ++ emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs); + #endif ++ } ++ } else { ++ emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); + } + } + +@@ -780,8 +934,12 @@ static void asm_aref(ASMState *as, IRIns *ir) + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); ++#if !LJ_TARGET_MIPSR6 + emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); + emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); ++#else ++ emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base); ++#endif + } + + /* Inlined hash lookup. Specialized for key type and for const keys. +@@ -799,6 +957,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; ++#if LJ_64 ++ Reg cmp64 = RID_NONE; ++#endif + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); +@@ -807,11 +968,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + MCLabel l_end, l_loop, l_next; + + rset_clear(allow, tab); +-#if LJ_32 && LJ_SOFTFP +- if (!isk) { +- key = ra_alloc1(as, refkey, allow); +- rset_clear(allow, key); +- if (irkey[1].o == IR_HIOP) { ++ if (!LJ_SOFTFP && irt_isnum(kt)) { ++ key = ra_alloc1(as, refkey, RSET_FPR); ++ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); ++ } else { ++ if (!irt_ispri(kt)) { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ } ++#if LJ_32 ++ if (LJ_SOFTFP && irkey[1].o == IR_HIOP) { + if (ra_hasreg((irkey+1)->r)) { + type = tmpnum = (irkey+1)->r; + tmp1 = ra_scratch(as, allow); +@@ -822,25 +988,33 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + } + rset_clear(allow, tmpnum); + } else { +- type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); ++ type = ra_allock(as, (int32_t)irt_toitype(kt), allow); + rset_clear(allow, type); + } +- } +-#else +- if (irt_isnum(kt)) { +- key = ra_alloc1(as, refkey, RSET_FPR); +- tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); +- } else if (!irt_ispri(kt)) { +- key = ra_alloc1(as, refkey, allow); +- rset_clear(allow, key); +-#if LJ_32 +- type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); +- rset_clear(allow, type); + #endif + } +-#endif + tmp2 = ra_scratch(as, allow); + rset_clear(allow, tmp2); ++#if LJ_64 ++ if (LJ_SOFTFP || !irt_isnum(kt)) { ++ /* Allocate cmp64 register used for 64-bit comparisons */ ++ if (LJ_SOFTFP && irt_isnum(kt)) { ++ cmp64 = key; ++ } else if (!isk && irt_isaddr(kt)) { ++ cmp64 = tmp2; ++ } else { ++ int64_t k; ++ if (isk && irt_isaddr(kt)) { ++ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; ++ } else { ++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); ++ k = ~((int64_t)~irt_toitype(kt) << 47); ++ } ++ cmp64 = ra_allock(as, k, allow); ++ rset_clear(allow, cmp64); ++ } ++ } ++#endif + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); +@@ -861,8 +1035,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + l_end = asm_exitstub_addr(as); + } + if (!LJ_SOFTFP && irt_isnum(kt)) { ++#if !LJ_TARGET_MIPSR6 + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); ++#else ++ emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end); ++ emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key); ++#endif + *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ + emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); + emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); +@@ -883,21 +1062,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); + emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); + emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); +- } else if (irt_isaddr(kt)) { +- Reg refk = tmp2; +- if (isk) { +- int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; +- refk = ra_allock(as, k, allow); +- rset_clear(allow, refk); +- } +- emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); +- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); + } else { +- Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); +- rset_clear(allow, pri); +- lua_assert(irt_ispri(kt) && !irt_isnil(kt)); +- emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); +- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); ++ emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end); ++ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + } + *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); + if (!isk && irt_isaddr(kt)) { +@@ -908,7 +1075,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + #endif + + /* Load main position relative to tab->node into dest. */ +- khash = isk ? ir_khash(irkey) : 1; ++ khash = isk ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); + } else { +@@ -916,7 +1083,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + if (isk) + tmphash = ra_allock(as, khash, allow); + emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); +- lua_assert(sizeof(Node) == 24); ++ lj_assertA(sizeof(Node) == 24, "bad Node size"); + emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); + emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); + emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); +@@ -926,7 +1093,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + if (isk) { + /* Nothing to do. */ + } else if (irt_isstr(kt)) { +- emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); ++ emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); + emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); +@@ -961,7 +1128,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); + if (irt_isnum(kt)) { + emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); +- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); ++ emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0); + emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); + #if !LJ_SOFTFP + emit_tg(as, MIPSI_DMFC1, tmp1, key); +@@ -994,7 +1161,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + Reg key = ra_scratch(as, allow); + int64_t k; + #endif +- lua_assert(ofs % sizeof(Node) == 0); ++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (ofs > 32736) { + idx = dest; + rset_clear(allow, dest); +@@ -1023,7 +1190,7 @@ nolo: + emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); + #else + if (irt_ispri(irkey->t)) { +- lua_assert(!irt_isnil(irkey->t)); ++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = (int64_t)ir_knum(irkey)->u64; +@@ -1062,7 +1229,7 @@ static void asm_uref(ASMState *as, IRIns *ir) + static void asm_fref(ASMState *as, IRIns *ir) + { + UNUSED(as); UNUSED(ir); +- lua_assert(!ra_used(ir)); ++ lj_assertA(!ra_used(ir), "unfused FREF"); + } + + static void asm_strref(ASMState *as, IRIns *ir) +@@ -1117,26 +1284,36 @@ static void asm_strref(ASMState *as, IRIns *ir) + + /* -- Loads and stores ---------------------------------------------------- */ + +-static MIPSIns asm_fxloadins(IRIns *ir) ++static MIPSIns asm_fxloadins(ASMState *as, IRIns *ir) + { ++ UNUSED(as); + switch (irt_type(ir->t)) { + case IRT_I8: return MIPSI_LB; + case IRT_U8: return MIPSI_LBU; + case IRT_I16: return MIPSI_LH; + case IRT_U16: return MIPSI_LHU; +- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; ++ case IRT_NUM: ++ lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); ++ if (!LJ_SOFTFP) return MIPSI_LDC1; ++ /* fallthrough */ + case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; ++ /* fallthrough */ + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; + } + } + +-static MIPSIns asm_fxstoreins(IRIns *ir) ++static MIPSIns asm_fxstoreins(ASMState *as, IRIns *ir) + { ++ UNUSED(as); + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return MIPSI_SB; + case IRT_I16: case IRT_U16: return MIPSI_SH; +- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; ++ case IRT_NUM: ++ lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); ++ if (!LJ_SOFTFP) return MIPSI_SDC1; ++ /* fallthrough */ + case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; ++ /* fallthrough */ + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; + } + } +@@ -1144,10 +1321,10 @@ static MIPSIns asm_fxstoreins(IRIns *ir) + static void asm_fload(ASMState *as, IRIns *ir) + { + Reg dest = ra_dest(as, ir, RSET_GPR); +- MIPSIns mi = asm_fxloadins(ir); ++ MIPSIns mi = asm_fxloadins(as, ir); + Reg idx; + int32_t ofs; +- if (ir->op1 == REF_NIL) { ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = RID_JGL; + ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); + } else { +@@ -1161,7 +1338,7 @@ static void asm_fload(ASMState *as, IRIns *ir) + } + ofs = field_ofs[ir->op2]; + } +- lua_assert(!irt_isfp(ir->t)); ++ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); + emit_tsi(as, mi, dest, idx, ofs); + } + +@@ -1172,8 +1349,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; +- MIPSIns mi = asm_fxstoreins(ir); +- lua_assert(!irt_isfp(ir->t)); ++ MIPSIns mi = asm_fxstoreins(as, ir); ++ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); + emit_tsi(as, mi, src, idx, ofs); + } + } +@@ -1182,8 +1359,9 @@ static void asm_xload(ASMState *as, IRIns *ir) + { + Reg dest = ra_dest(as, ir, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); +- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); +- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); ++ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), ++ "unaligned XLOAD"); ++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); + } + + static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) +@@ -1191,7 +1369,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) + if (ir->r != RID_SINK) { + Reg src = ra_alloc1z(as, ir->op2, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); +- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, ++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, + rset_exclude(RSET_GPR, src), ofs); + } + } +@@ -1200,7 +1378,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) + + static void asm_ahuvload(ASMState *as, IRIns *ir) + { +- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); ++ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); + Reg dest = RID_NONE, type = RID_TMP, idx; + RegSet allow = RSET_GPR; + int32_t ofs = 0; +@@ -1213,8 +1391,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + } + } + if (ra_used(ir)) { +- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || +- irt_isint(ir->t) || irt_isaddr(ir->t)); ++ lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad load type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); + rset_clear(allow, dest); + #if LJ_64 +@@ -1225,6 +1404,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + #endif + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; + rset_clear(allow, idx); + if (irt_isnum(t)) { + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); +@@ -1262,10 +1442,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir) + int32_t ofs = 0; + if (ir->r == RID_SINK) + return; +- if (!LJ_SOFTFP && irt_isnum(ir->t)) { +- src = ra_alloc1(as, ir->op2, RSET_FPR); ++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { ++ src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); +- emit_hsi(as, MIPSI_SDC1, src, idx, ofs); ++ emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs); + } else { + #if LJ_32 + if (!irt_ispri(ir->t)) { +@@ -1313,45 +1493,64 @@ static void asm_sload(ASMState *as, IRIns *ir) + IRType1 t = ir->t; + #if LJ_32 + int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); +- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); ++ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); + if (hiop) + t.irt = IRT_NUM; + #else + int32_t ofs = 8*((int32_t)ir->op1-2); + #endif +- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ +- lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); +-#if LJ_32 && LJ_SOFTFP +- lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ ++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), ++ "bad parent SLOAD"); /* Handled by asm_head_side(). */ ++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), ++ "inconsistent SLOAD variant"); ++#if LJ_SOFTFP32 ++ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), ++ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ + if (hiop && ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } + #else + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { +- dest = ra_scratch(as, RSET_FPR); ++ dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else + #endif + if (ra_used(ir)) { +- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || +- irt_isint(ir->t) || irt_isaddr(ir->t)); ++ lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad SLOAD type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); +- if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { ++ if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) { + if (irt_isint(t)) { +- Reg tmp = ra_scratch(as, RSET_FPR); ++ Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR); ++#if LJ_SOFTFP ++ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); ++ ra_destreg(as, ir, RID_RET); ++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0); ++ if (tmp != REGARG_FIRSTGPR) ++ emit_move(as, REGARG_FIRSTGPR, tmp); ++#else + emit_tg(as, MIPSI_MFC1, dest, tmp); + emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); ++#endif + dest = tmp; + t.irt = IRT_NUM; /* Check for original type. */ + } else { + Reg tmp = ra_scratch(as, RSET_GPR); ++#if LJ_SOFTFP ++ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); ++ ra_destreg(as, ir, RID_RET); ++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0); ++ emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0); ++#else + emit_fg(as, MIPSI_CVT_D_W, dest, dest); + emit_tg(as, MIPSI_MTC1, tmp, dest); ++#endif + dest = tmp; + t.irt = IRT_INT; /* Check for original type. */ + } +@@ -1400,7 +1599,7 @@ dotypecheck: + if (irt_isnum(t)) { + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); +- if (ra_hasreg(dest)) ++ if (!LJ_SOFTFP && ra_hasreg(dest)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + } else { + asm_guard(as, MIPSI_BNE, RID_TMP, +@@ -1410,7 +1609,7 @@ dotypecheck: + } + emit_tsi(as, MIPSI_LD, type, base, ofs); + } else if (ra_hasreg(dest)) { +- if (irt_isnum(t)) ++ if (!LJ_SOFTFP && irt_isnum(t)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + else + emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, +@@ -1431,7 +1630,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet drop = RSET_SCRATCH; +- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); ++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), ++ "bad CNEW/CNEWI operands"); + + as->gcsteps++; + if (ra_hasreg(ir->r)) +@@ -1447,7 +1647,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) + int32_t ofs = sizeof(GCcdata); + if (sz == 8) { + ofs += 4; +- lua_assert((ir+1)->o == IR_HIOP); ++ lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); + if (LJ_LE) ir++; + } + for (;;) { +@@ -1458,10 +1658,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) + ofs -= 4; if (LJ_BE) ir++; else ir--; + } + #else +- emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow), ++ emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow), + RID_RET, sizeof(GCcdata)); + #endif +- lua_assert(sz == 4 || sz == 8); ++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ +@@ -1484,8 +1684,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); + } +-#else +-#define asm_cnew(as, ir) ((void)0) + #endif + + /* -- Write barriers ------------------------------------------------------ */ +@@ -1513,7 +1711,7 @@ static void asm_obar(ASMState *as, IRIns *ir) + MCLabel l_end; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ +- lua_assert(IR(ir->op1)->o == IR_UREFC); ++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ +@@ -1549,33 +1747,46 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + emit_fg(as, mi, dest, left); + } ++#endif + ++#if !LJ_SOFTFP32 + static void asm_fpmath(ASMState *as, IRIns *ir) + { +- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) +- return; ++#if !LJ_SOFTFP + if (ir->op2 <= IRFPM_TRUNC) + asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); + else if (ir->op2 == IRFPM_SQRT) + asm_fpunary(as, ir, MIPSI_SQRT_D); + else ++#endif + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); + } + #endif + ++#if !LJ_SOFTFP ++#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D) ++#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D) ++#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D) ++#elif LJ_64 /* && LJ_SOFTFP */ ++#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add) ++#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub) ++#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul) ++#endif ++ + static void asm_add(ASMState *as, IRIns *ir) + { + IRType1 t = ir->t; +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + if (irt_isnum(t)) { +- asm_fparith(as, ir, MIPSI_ADD_D); ++ asm_fpadd(as, ir); + } else + #endif + { ++ /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (irref_isk(ir->op2)) { +- intptr_t k = get_kval(IR(ir->op2)); ++ intptr_t k = get_kval(as, ir->op2); + if (checki16(k)) { + emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, + left, k); +@@ -1590,9 +1801,9 @@ static void asm_add(ASMState *as, IRIns *ir) + + static void asm_sub(ASMState *as, IRIns *ir) + { +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + if (irt_isnum(ir->t)) { +- asm_fparith(as, ir, MIPSI_SUB_D); ++ asm_fpsub(as, ir); + } else + #endif + { +@@ -1606,9 +1817,9 @@ static void asm_sub(ASMState *as, IRIns *ir) + + static void asm_mul(ASMState *as, IRIns *ir) + { +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + if (irt_isnum(ir->t)) { +- asm_fparith(as, ir, MIPSI_MUL_D); ++ asm_fpmul(as, ir); + } else + #endif + { +@@ -1616,46 +1827,26 @@ static void asm_mul(ASMState *as, IRIns *ir) + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + if (LJ_64 && irt_is64(ir->t)) { ++#if !LJ_TARGET_MIPSR6 + emit_dst(as, MIPSI_MFLO, dest, 0, 0); + emit_dst(as, MIPSI_DMULT, 0, left, right); ++#else ++ emit_dst(as, MIPSI_DMUL, dest, left, right); ++#endif + } else { + emit_dst(as, MIPSI_MUL, dest, left, right); + } + } + } + +-static void asm_mod(ASMState *as, IRIns *ir) ++#if !LJ_SOFTFP32 ++static void asm_fpdiv(ASMState *as, IRIns *ir) + { +-#if LJ_64 && LJ_HASFFI +- if (!irt_isint(ir->t)) +- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : +- IRCALL_lj_carith_modu64); +- else +-#endif +- asm_callid(as, ir, IRCALL_lj_vm_modi); +-} +- + #if !LJ_SOFTFP +-static void asm_pow(ASMState *as, IRIns *ir) +-{ +-#if LJ_64 && LJ_HASFFI +- if (!irt_isnum(ir->t)) +- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : +- IRCALL_lj_carith_powu64); +- else +-#endif +- asm_callid(as, ir, IRCALL_lj_vm_powi); +-} +- +-static void asm_div(ASMState *as, IRIns *ir) +-{ +-#if LJ_64 && LJ_HASFFI +- if (!irt_isnum(ir->t)) +- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : +- IRCALL_lj_carith_divu64); +- else +-#endif + asm_fparith(as, ir, MIPSI_DIV_D); ++#else ++ asm_callid(as, ir, IRCALL_softfp_div); ++#endif + } + #endif + +@@ -1665,6 +1856,13 @@ static void asm_neg(ASMState *as, IRIns *ir) + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, MIPSI_NEG_D); + } else ++#elif LJ_64 /* && LJ_SOFTFP */ ++ if (irt_isnum(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_dst(as, MIPSI_XOR, dest, left, ++ ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest))); ++ } else + #endif + { + Reg dest = ra_dest(as, ir, RSET_GPR); +@@ -1674,14 +1872,22 @@ static void asm_neg(ASMState *as, IRIns *ir) + } + } + ++#if !LJ_SOFTFP + #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) +-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +-#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) ++#elif LJ_64 /* && LJ_SOFTFP */ ++static void asm_abs(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0); ++} ++#endif + + static void asm_arithov(ASMState *as, IRIns *ir) + { ++ /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */ + Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); +- lua_assert(!irt_is64(ir->t)); ++ lj_assertA(!irt_is64(ir->t), "bad usage"); + if (irref_isk(ir->op2)) { + int k = IR(ir->op2)->i; + if (ir->o == IR_SUBOV) k = -k; +@@ -1724,9 +1930,14 @@ static void asm_mulov(ASMState *as, IRIns *ir) + right), dest)); + asm_guard(as, MIPSI_BNE, RID_TMP, tmp); + emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); ++#if !LJ_TARGET_MIPSR6 + emit_dst(as, MIPSI_MFHI, tmp, 0, 0); + emit_dst(as, MIPSI_MFLO, dest, 0, 0); + emit_dst(as, MIPSI_MULT, 0, left, right); ++#else ++ emit_dst(as, MIPSI_MUL, dest, left, right); ++ emit_dst(as, MIPSI_MUH, tmp, left, right); ++#endif + } + + #if LJ_32 && LJ_HASFFI +@@ -1863,7 +2074,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (irref_isk(ir->op2)) { +- intptr_t k = get_kval(IR(ir->op2)); ++ intptr_t k = get_kval(as, ir->op2); + if (checku16(k)) { + emit_tsi(as, mik, dest, left, k); + return; +@@ -1896,7 +2107,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) + #define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) + #define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) + #define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) +-#define asm_brol(as, ir) lua_assert(0) ++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") + + static void asm_bror(ASMState *as, IRIns *ir) + { +@@ -1919,15 +2130,21 @@ static void asm_bror(ASMState *as, IRIns *ir) + } + } + +-#if LJ_32 && LJ_SOFTFP ++#if LJ_SOFTFP + static void asm_sfpmin_max(ASMState *as, IRIns *ir) + { + CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; ++#if LJ_64 ++ IRRef args[2]; ++ args[0] = ir->op1; ++ args[1] = ir->op2; ++#else + IRRef args[4]; + args[0^LJ_BE] = ir->op1; + args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; + args[3^LJ_BE] = (ir+1)->op2; ++#endif + asm_setupresult(as, ir, &ci); + emit_call(as, (void *)ci.func, 0); + ci.func = NULL; +@@ -1937,29 +2154,52 @@ static void asm_sfpmin_max(ASMState *as, IRIns *ir) + + static void asm_min_max(ASMState *as, IRIns *ir, int ismax) + { +- if (!LJ_SOFTFP && irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ asm_sfpmin_max(as, ir); ++#else + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; ++#if !LJ_TARGET_MIPSR6 + if (dest == left) { +- emit_fg(as, MIPSI_MOVT_D, dest, right); ++ emit_fg(as, MIPSI_MOVF_D, dest, right); + } else { +- emit_fg(as, MIPSI_MOVF_D, dest, left); ++ emit_fg(as, MIPSI_MOVT_D, dest, left); + if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); + } +- emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); ++ emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right); ++#else ++ emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right); ++#endif ++#endif + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; +- if (dest == left) { +- emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); ++ if (left == right) { ++ if (dest != left) emit_move(as, dest, left); + } else { +- emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); +- if (dest != right) emit_move(as, dest, right); ++#if !LJ_TARGET_MIPSR6 ++ if (dest == left) { ++ emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); ++ } else { ++ emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); ++ if (dest != right) emit_move(as, dest, right); ++ } ++#else ++ emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); ++ if (dest != right) { ++ emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP); ++ emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP); ++ } else { ++ emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP); ++ emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP); ++ } ++#endif ++ emit_dst(as, MIPSI_SLT, RID_TMP, ++ ismax ? left : right, ismax ? right : left); + } +- emit_dst(as, MIPSI_SLT, RID_TMP, +- ismax ? left : right, ismax ? right : left); + } + } + +@@ -1968,18 +2208,24 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) + + /* -- Comparisons --------------------------------------------------------- */ + +-#if LJ_32 && LJ_SOFTFP ++#if LJ_SOFTFP + /* SFP comparisons. */ + static void asm_sfpcomp(ASMState *as, IRIns *ir) + { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; + RegSet drop = RSET_SCRATCH; + Reg r; ++#if LJ_64 ++ IRRef args[2]; ++ args[0] = ir->op1; ++ args[1] = ir->op2; ++#else + IRRef args[4]; + args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; + args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; ++#endif + +- for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { ++ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) { + if (!rset_test(as->freeset, r) && + regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) + rset_clear(drop, r); +@@ -2033,21 +2279,33 @@ static void asm_comp(ASMState *as, IRIns *ir) + { + /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ + IROp op = ir->o; +- if (!LJ_SOFTFP && irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ asm_sfpcomp(as, ir); ++#else ++#if !LJ_TARGET_MIPSR6 + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); + emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); ++#else ++ Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); ++ asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); ++ emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right); ++#endif ++#endif + } else { + Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); + if (op == IR_ABC) op = IR_UGT; +- if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { ++ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { + MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : + ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); + asm_guard(as, mi, left, 0); + } else { + if (irref_isk(ir->op2)) { +- intptr_t k = get_kval(IR(ir->op2)); ++ intptr_t k = get_kval(as, ir->op2); + if ((op&2)) k++; + if (checki16(k)) { + asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); +@@ -2069,9 +2327,17 @@ static void asm_equal(ASMState *as, IRIns *ir) + Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? + RSET_FPR : RSET_GPR); + right = (left >> 8); left &= 255; +- if (!LJ_SOFTFP && irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ asm_sfpcomp(as, ir); ++#elif !LJ_TARGET_MIPSR6 + asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); + emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); ++#else ++ Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); ++ asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); ++ emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right); ++#endif + } else { + asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); + } +@@ -2114,15 +2380,15 @@ static void asm_comp64eq(ASMState *as, IRIns *ir) + } + #endif + +-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ ++/* -- Split register ops -------------------------------------------------- */ + +-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */ + static void asm_hiop(ASMState *as, IRIns *ir) + { +-#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; ++#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) + if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ + as->curins--; /* Always skip the CONV. */ + #if LJ_HASFFI && !LJ_SOFTFP +@@ -2169,37 +2435,33 @@ static void asm_hiop(ASMState *as, IRIns *ir) + } + return; + } ++#endif + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { +-#if LJ_HASFFI ++#if LJ_32 && LJ_HASFFI + case IR_ADD: as->curins--; asm_add64(as, ir); break; + case IR_SUB: as->curins--; asm_sub64(as, ir); break; + case IR_NEG: as->curins--; asm_neg64(as, ir); break; ++ case IR_CNEWI: ++ /* Nothing to do here. Handled by lo op itself. */ ++ break; + #endif +-#if LJ_SOFTFP ++#if LJ_32 && LJ_SOFTFP + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + case IR_STRTO: + if (!uselo) + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ + break; ++ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: ++ /* Nothing to do here. Handled by lo op itself. */ ++ break; + #endif +- case IR_CALLN: +- case IR_CALLS: +- case IR_CALLXS: ++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; +-#if LJ_SOFTFP +- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: +-#endif +- case IR_CNEWI: +- /* Nothing to do here. Handled by lo op itself. */ +- break; +- default: lua_assert(0); break; ++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +-#else +- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ +-#endif + } + + /* -- Profiling ----------------------------------------------------------- */ +@@ -2264,15 +2526,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { +-#if LJ_SOFTFP ++#if LJ_SOFTFP32 + Reg tmp; + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); +- lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ ++ /* LJ_SOFTFP: must be a number constant. */ ++ lj_assertA(irref_isk(ref), "unsplit FP op"); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); + emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); + if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); + emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); ++#elif LJ_SOFTFP /* && LJ_64 */ ++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); ++ emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs); + #else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); +@@ -2281,7 +2547,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + #if LJ_32 + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + Reg type; +- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), ++ "restore of IR type %d", irt_type(ir->t)); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, allow); + rset_clear(allow, src); +@@ -2294,6 +2561,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + } else if ((sn & SNAP_SOFTFPNUM)) { + type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); + #endif ++ } else if ((sn & SNAP_KEYINDEX)) { ++ type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); + } else { + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + } +@@ -2304,11 +2573,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + } + checkmclim(as); + } +- lua_assert(map + nent == flinks); ++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); + } + + /* -- GC handling --------------------------------------------------------- */ + ++/* Marker to prevent patching the GC check exit. */ ++#define MIPS_NOPATCH_GC_CHECK MIPSI_OR ++ + /* Check GC threshold and do one or more GC steps. */ + static void asm_gc_check(ASMState *as) + { +@@ -2324,6 +2596,7 @@ static void asm_gc_check(ASMState *as) + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); ++ l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */ + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); + tmp = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp, as->gcsteps); +@@ -2352,6 +2625,12 @@ static void asm_loop_fixup(ASMState *as) + } + } + ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ if (as->loopinv) as->mctop--; ++} ++ + /* -- Head of trace ------------------------------------------------------- */ + + /* Coalesce BASE register for a root trace. */ +@@ -2359,7 +2638,6 @@ static void asm_head_root_base(ASMState *as) + { + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; +- if (as->loopinv) as->mctop--; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) +@@ -2374,7 +2652,6 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) + { + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; +- if (as->loopinv) as->mctop--; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) +@@ -2466,32 +2743,39 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) + MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); + for (p++; p < pe; p++) { + if (*p == exitload) { /* Look for load of exit number. */ +- if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */ ++ /* Look for exitstub branch. Yes, this covers all used branch variants. */ ++ if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && ++ ((p[-1] & 0xf0000000u) == MIPSI_BEQ || ++ (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || ++#if !LJ_TARGET_MIPSR6 ++ (p[-1] & 0xffe00000u) == MIPSI_BC1F ++#else ++ (p[-1] & 0xff600000u) == MIPSI_BC1EQZ ++#endif ++ ) && p[-2] != MIPS_NOPATCH_GC_CHECK) { + ptrdiff_t delta = target - p; + if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ + patchbranch: + p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); + *p = MIPSI_NOP; /* Replace the load of the exit number. */ +- cstop = p; ++ cstop = p+1; + if (!cstart) cstart = p-1; + } else { /* Branch out of range. Use spare jump slot in mcarea. */ +- int i; +- for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) { +- if (mcarea[i] == tjump) { +- delta = mcarea+i - p; +- goto patchbranch; +- } else if (mcarea[i] == MIPSI_NOP) { +- mcarea[i] = tjump; +- cstart = mcarea+i; +- delta = mcarea+i - p; ++ MCode *mcjump = asm_sparejump_use(mcarea, tjump); ++ if (mcjump) { ++ lj_mcode_sync(mcjump, mcjump+1); ++ delta = mcjump - p; ++ if (((delta + 0x8000) >> 16) == 0) { + goto patchbranch; ++ } else { ++ lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); + } + } + /* Ignore jump slot overflow. Child trace is simply not attached. */ + } + } else if (p+1 == pe) { + /* Patch NOP after code for inverted loop branch. Use of J is ok. */ +- lua_assert(p[1] == MIPSI_NOP); ++ lj_assertJ(p[1] == MIPSI_NOP, "expected NOP"); + p[1] = tjump; + *p = MIPSI_NOP; /* Replace the load of the exit number. */ + cstop = p+2; +diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h +index 6daa861b..ac5d88ce 100644 +--- a/src/lj_asm_ppc.h ++++ b/src/lj_asm_ppc.h +@@ -1,6 +1,6 @@ + /* + ** PPC IR assembler (SSA IR -> machine code). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + /* -- Register allocator extensions --------------------------------------- */ +@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) + return ra_allock(as, ofs-(int16_t)ofs, allow); + } + } ++ } else if (ir->o == IR_TMPREF) { ++ *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); ++ return RID_JGL; + } + } + *ofsp = 0; +@@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, + return; + } + } else if (ir->o == IR_STRREF) { +- lua_assert(ofs == 0); ++ lj_assertA(ofs == 0, "bad usage"); + ofs = (int32_t)sizeof(GCstr); + if (irref_isk(ir->op2)) { + ofs += IR(ir->op2)->i; +@@ -226,6 +229,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, + emit_tab(as, pi, rt, left, right); + } + ++#if !LJ_SOFTFP + /* Fuse to multiply-add/sub instruction. */ + static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) + { +@@ -245,6 +249,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) + } + return 0; + } ++#endif + + /* -- Calls --------------------------------------------------------------- */ + +@@ -253,16 +258,21 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + { + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 8; +- Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; ++ Reg gpr = REGARG_FIRSTGPR; ++#if !LJ_SOFTFP ++ Reg fpr = REGARG_FIRSTFPR; ++#endif + if ((void *)ci->func) + emit_call(as, (void *)ci->func); + for (n = 0; n < nargs; n++) { /* Setup args. */ + IRRef ref = args[n]; + if (ref) { + IRIns *ir = IR(ref); ++#if !LJ_SOFTFP + if (irt_isfp(ir->t)) { + if (fpr <= REGARG_LASTFPR) { +- lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ ++ lj_assertA(rset_test(as->freeset, fpr), ++ "reg %d not free", fpr); /* Already evicted. */ + ra_leftov(as, fpr, ref); + fpr++; + } else { +@@ -271,9 +281,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + emit_spstore(as, ir, r, ofs); + ofs += irt_isnum(ir->t) ? 8 : 4; + } +- } else { ++ } else ++#endif ++ { + if (gpr <= REGARG_LASTGPR) { +- lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Already evicted. */ + ra_leftov(as, gpr, ref); + gpr++; + } else { +@@ -290,8 +303,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + } + checkmclim(as); + } ++#if !LJ_SOFTFP + if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ + emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); ++#endif + } + + /* Setup result reg/sp for call. Evict scratch regs. */ +@@ -299,16 +314,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + { + RegSet drop = RSET_SCRATCH; + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); ++#if !LJ_SOFTFP + if ((ci->flags & CCI_NOFPRCLOBBER)) + drop &= ~RSET_FPR; ++#endif + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + if (hiop && ra_hasreg((ir+1)->r)) + rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { +- lua_assert(!irt_ispri(ir->t)); +- if (irt_isfp(ir->t)) { ++ lj_assertA(!irt_ispri(ir->t), "PRI dest"); ++ if (!LJ_SOFTFP && irt_isfp(ir->t)) { + if ((ci->flags & CCI_CASTU64)) { + /* Use spill slot or temp slots. */ + int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; +@@ -323,10 +340,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + } else { + ra_destreg(as, ir, RID_FPRET); + } +-#if LJ_32 + } else if (hiop) { + ra_destpair(as, ir); +-#endif + } else { + ra_destreg(as, ir, RID_RET); + } +@@ -375,8 +390,24 @@ static void asm_retf(ASMState *as, IRIns *ir) + emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); + } + ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); ++ emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31); ++ emit_getgl(as, RID_TMP, cur_L); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ + /* -- Type conversions ---------------------------------------------------- */ + ++#if !LJ_SOFTFP + static void asm_tointg(ASMState *as, IRIns *ir, Reg left) + { + RegSet allow = RSET_FPR; +@@ -409,15 +440,27 @@ static void asm_tobit(ASMState *as, IRIns *ir) + emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); + emit_fab(as, PPCI_FADD, tmp, left, right); + } ++#endif + + static void asm_conv(ASMState *as, IRIns *ir) + { + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); ++#if !LJ_SOFTFP + int stfp = (st == IRT_NUM || st == IRT_FLOAT); ++#endif + IRRef lref = ir->op1; +- lua_assert(irt_type(ir->t) != st); +- lua_assert(!(irt_isint64(ir->t) || +- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ ++ /* 64 bit integer conversions are handled by SPLIT. */ ++ lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), ++ "IR %04d has unsplit 64 bit type", ++ (int)(ir - as->ir) - REF_BIAS); ++#if LJ_SOFTFP ++ /* FP conversions are handled by SPLIT. */ ++ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), ++ "IR %04d has FP type", ++ (int)(ir - as->ir) - REF_BIAS); ++ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ ++#else ++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ +@@ -446,7 +489,8 @@ static void asm_conv(ASMState *as, IRIns *ir) + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ +- lua_assert(irt_isint(ir->t) && st == IRT_NUM); ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); +@@ -476,11 +520,13 @@ static void asm_conv(ASMState *as, IRIns *ir) + emit_fb(as, PPCI_FCTIWZ, tmp, left); + } + } +- } else { ++ } else ++#endif ++ { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); +- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + if ((ir->op2 & IRCONV_SEXT)) + emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); + else +@@ -496,42 +542,95 @@ static void asm_strto(ASMState *as, IRIns *ir) + { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; +- int32_t ofs; ++ int32_t ofs = SPOFS_TMP; ++#if LJ_SOFTFP ++ ra_evictset(as, RSET_SCRATCH); ++ if (ra_used(ir)) { ++ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && ++ (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { ++ int i; ++ for (i = 0; i < 2; i++) { ++ Reg r = (ir+i)->r; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ ra_modified(as, r); ++ emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); ++ } ++ } ++ ofs = sps_scale(ir->s & ~1); ++ } else { ++ Reg rhi = ra_dest(as, ir+1, RSET_GPR); ++ Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); ++ emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); ++ emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); ++ } ++ } ++#else + RegSet drop = RSET_SCRATCH; + if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ + ra_evictset(as, drop); ++ if (ir->s) ofs = sps_scale(ir->s); ++#endif + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + /* Store the result to the spill slot or temp slots. */ +- ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; + emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); + } + + /* -- Memory references --------------------------------------------------- */ + + /* Get pointer to TValue. */ +-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) + { +- IRIns *ir = IR(ref); +- if (irt_isnum(ir->t)) { +- if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ +- ra_allockreg(as, i32ptr(ir_knum(ir)), dest); +- else /* Otherwise force a spill and use the spill slot. */ +- emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); +- } else { +- /* Otherwise use g->tmptv to hold the TValue. */ +- RegSet allow = rset_exclude(RSET_GPR, dest); +- Reg type; +- emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768); +- if (!irt_ispri(ir->t)) { +- Reg src = ra_alloc1(as, ref, allow); +- emit_setgl(as, src, tmptv.gcr); ++ int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); ++ if ((mode & IRTMPREF_IN1)) { ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if ((mode & IRTMPREF_OUT1)) { ++#if LJ_SOFTFP ++ lj_assertA(irref_isk(ref), "unsplit FP op"); ++ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); ++ emit_setgl(as, ++ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), ++ tmptv.u32.lo); ++ emit_setgl(as, ++ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), ++ tmptv.u32.hi); ++#else ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); ++ emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs); ++#endif ++ } else if (irref_isk(ref)) { ++ /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, i32ptr(ir_knum(ir)), dest); ++ } else { ++#if LJ_SOFTFP ++ lj_assertA(0, "unsplit FP op"); ++#else ++ /* Otherwise force a spill and use the spill slot. */ ++ emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); ++#endif ++ } ++ } else { ++ /* Otherwise use g->tmptv to hold the TValue. */ ++ Reg type; ++ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); ++ if (!irt_ispri(ir->t)) { ++ Reg src = ra_alloc1(as, ref, RSET_GPR); ++ emit_setgl(as, src, tmptv.gcr); ++ } ++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) ++ type = ra_alloc1(as, ref+1, RSET_GPR); ++ else ++ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); ++ emit_setgl(as, type, tmptv.it); + } +- type = ra_allock(as, irt_toitype(ir->t), allow); +- emit_setgl(as, type, tmptv.it); ++ } else { ++ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); + } + } + +@@ -574,11 +673,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + Reg tisnum = RID_NONE, tmpnum = RID_NONE; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); ++ int isk = irref_isk(refkey); + IRType1 kt = irkey->t; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + + rset_clear(allow, tab); ++#if LJ_SOFTFP ++ if (!isk) { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ if (irkey[1].o == IR_HIOP) { ++ if (ra_hasreg((irkey+1)->r)) { ++ tmpnum = (irkey+1)->r; ++ ra_noweak(as, tmpnum); ++ } else { ++ tmpnum = ra_allocref(as, refkey+1, allow); ++ } ++ rset_clear(allow, tmpnum); ++ } ++ } ++#else + if (irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); +@@ -588,6 +703,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } ++#endif + tmp2 = ra_scratch(as, allow); + rset_clear(allow, tmp2); + +@@ -610,7 +726,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + asm_guardcc(as, CC_EQ); + else + emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); +- if (irt_isnum(kt)) { ++ if (!LJ_SOFTFP && irt_isnum(kt)) { + emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); + emit_condbranch(as, PPCI_BC, CC_GE, l_next); + emit_ab(as, PPCI_CMPLW, tmp1, tisnum); +@@ -620,7 +736,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_ab(as, PPCI_CMPW, tmp2, key); + emit_condbranch(as, PPCI_BC, CC_NE, l_next); + } +- emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); ++ if (LJ_SOFTFP && ra_hasreg(tmpnum)) ++ emit_ab(as, PPCI_CMPW, tmp1, tmpnum); ++ else ++ emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); + if (!irt_ispri(kt)) + emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); + } +@@ -629,35 +748,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + (((char *)as->mcp-(char *)l_loop) & 0xffffu); + + /* Load main position relative to tab->node into dest. */ +- khash = irref_isk(refkey) ? ir_khash(irkey) : 1; ++ khash = isk ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); + } else { + Reg tmphash = tmp1; +- if (irref_isk(refkey)) ++ if (isk) + tmphash = ra_allock(as, khash, allow); + emit_tab(as, PPCI_ADD, dest, dest, tmp1); + emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); + emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); + emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); +- if (irref_isk(refkey)) { ++ if (isk) { + /* Nothing to do. */ + } else if (irt_isstr(kt)) { +- emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); ++ emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); + emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); + emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); + emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); + emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); +- if (irt_isnum(kt)) { ++ if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { ++#if LJ_SOFTFP ++ emit_asb(as, PPCI_XOR, tmp2, key, tmp1); ++ emit_rotlwi(as, dest, tmp1, HASH_ROT1); ++ emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); ++#else + int32_t ofs = ra_spill(as, irkey); + emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); + emit_rotlwi(as, dest, tmp1, HASH_ROT1); + emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); + emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); + emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); ++#endif + } else { + emit_asb(as, PPCI_XOR, tmp2, key, tmp1); + emit_rotlwi(as, dest, tmp1, HASH_ROT1); +@@ -678,7 +803,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + Reg key = RID_NONE, type = RID_TMP, idx = node; + RegSet allow = rset_exclude(RSET_GPR, node); +- lua_assert(ofs % sizeof(Node) == 0); ++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (ofs > 32736) { + idx = dest; + rset_clear(allow, dest); +@@ -737,7 +862,7 @@ static void asm_uref(ASMState *as, IRIns *ir) + static void asm_fref(ASMState *as, IRIns *ir) + { + UNUSED(as); UNUSED(ir); +- lua_assert(!ra_used(ir)); ++ lj_assertA(!ra_used(ir), "unfused FREF"); + } + + static void asm_strref(ASMState *as, IRIns *ir) +@@ -777,26 +902,28 @@ static void asm_strref(ASMState *as, IRIns *ir) + + /* -- Loads and stores ---------------------------------------------------- */ + +-static PPCIns asm_fxloadins(IRIns *ir) ++static PPCIns asm_fxloadins(ASMState *as, IRIns *ir) + { ++ UNUSED(as); + switch (irt_type(ir->t)) { + case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ + case IRT_U8: return PPCI_LBZ; + case IRT_I16: return PPCI_LHA; + case IRT_U16: return PPCI_LHZ; +- case IRT_NUM: return PPCI_LFD; +- case IRT_FLOAT: return PPCI_LFS; ++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD; ++ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; + default: return PPCI_LWZ; + } + } + +-static PPCIns asm_fxstoreins(IRIns *ir) ++static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir) + { ++ UNUSED(as); + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return PPCI_STB; + case IRT_I16: case IRT_U16: return PPCI_STH; +- case IRT_NUM: return PPCI_STFD; +- case IRT_FLOAT: return PPCI_STFS; ++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD; ++ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; + default: return PPCI_STW; + } + } +@@ -804,12 +931,12 @@ static PPCIns asm_fxstoreins(IRIns *ir) + static void asm_fload(ASMState *as, IRIns *ir) + { + Reg dest = ra_dest(as, ir, RSET_GPR); +- PPCIns pi = asm_fxloadins(ir); ++ PPCIns pi = asm_fxloadins(as, ir); + Reg idx; + int32_t ofs; +- if (ir->op1 == REF_NIL) { ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = RID_JGL; +- ofs = (ir->op2 << 2) - 32768; ++ ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { +@@ -821,7 +948,7 @@ static void asm_fload(ASMState *as, IRIns *ir) + } + ofs = field_ofs[ir->op2]; + } +- lua_assert(!irt_isi8(ir->t)); ++ lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8"); + emit_tai(as, pi, dest, idx, ofs); + } + +@@ -832,18 +959,19 @@ static void asm_fstore(ASMState *as, IRIns *ir) + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; +- PPCIns pi = asm_fxstoreins(ir); ++ PPCIns pi = asm_fxstoreins(as, ir); + emit_tai(as, pi, src, idx, ofs); + } + } + + static void asm_xload(ASMState *as, IRIns *ir) + { +- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); +- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); ++ Reg dest = ra_dest(as, ir, ++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); ++ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + if (irt_isi8(ir->t)) + emit_as(as, PPCI_EXTSB, dest, dest); +- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); ++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); + } + + static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) +@@ -857,8 +985,9 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) + Reg src = ra_alloc1(as, irb->op1, RSET_GPR); + asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); + } else { +- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); +- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, ++ Reg src = ra_alloc1(as, ir->op2, ++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); ++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, + rset_exclude(RSET_GPR, src), ofs); + } + } +@@ -871,24 +1000,39 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; + RegSet allow = RSET_GPR; + int32_t ofs = AHUREF_LSX; ++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { ++ t.irt = IRT_NUM; ++ if (ra_used(ir+1)) { ++ type = ra_dest(as, ir+1, allow); ++ rset_clear(allow, type); ++ } ++ ofs = 0; ++ } + if (ra_used(ir)) { +- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); +- if (!irt_isnum(t)) ofs = 0; +- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); ++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad load type %d", irt_type(ir->t)); ++ if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; ++ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); + rset_clear(allow, dest); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ if (ir->o == IR_VLOAD) { ++ ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 : ++ ir->op2 ? 8 * ir->op2 : AHUREF_LSX; ++ } + if (irt_isnum(t)) { + Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); + asm_guardcc(as, CC_GE); + emit_ab(as, PPCI_CMPLW, type, tisnum); + if (ra_hasreg(dest)) { +- if (ofs == AHUREF_LSX) { ++ if (!LJ_SOFTFP && ofs == AHUREF_LSX) { + tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, + (idx&255)), (idx>>8))); + emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); + } else { +- emit_fai(as, PPCI_LFD, dest, idx, ofs); ++ emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, ++ ofs+4*LJ_SOFTFP); + } + } + } else { +@@ -911,7 +1055,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) + int32_t ofs = AHUREF_LSX; + if (ir->r == RID_SINK) + return; +- if (irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP && irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + } else { + if (!irt_ispri(ir->t)) { +@@ -919,11 +1063,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) + rset_clear(allow, src); + ofs = 0; + } +- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); ++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) ++ type = ra_alloc1(as, (ir+1)->op2, allow); ++ else ++ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + rset_clear(allow, type); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); +- if (irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP && irt_isnum(ir->t)) { + if (ofs == AHUREF_LSX) { + emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); + emit_slwi(as, RID_TMP, (idx>>8), 3); +@@ -948,21 +1095,39 @@ static void asm_sload(ASMState *as, IRIns *ir) + IRType1 t = ir->t; + Reg dest = RID_NONE, type = RID_NONE, base; + RegSet allow = RSET_GPR; +- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ +- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); +- lua_assert(LJ_DUALNUM || +- !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); ++ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); ++ if (hiop) ++ t.irt = IRT_NUM; ++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), ++ "bad parent SLOAD"); /* Handled by asm_head_side(). */ ++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), ++ "inconsistent SLOAD variant"); ++ lj_assertA(LJ_DUALNUM || ++ !irt_isint(t) || ++ (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), ++ "bad SLOAD type"); ++#if LJ_SOFTFP ++ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), ++ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ ++ if (hiop && ra_used(ir+1)) { ++ type = ra_dest(as, ir+1, allow); ++ rset_clear(allow, type); ++ } ++#else + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ +- } else if (ra_used(ir)) { +- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); +- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); ++ } else ++#endif ++ if (ra_used(ir)) { ++ lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), ++ "bad SLOAD type %d", irt_type(ir->t)); ++ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); +- if ((ir->op2 & IRSLOAD_CONVERT)) { ++ if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { + if (irt_isint(t)) { + emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); + dest = ra_scratch(as, RSET_FPR); +@@ -994,10 +1159,13 @@ dotypecheck: + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); + asm_guardcc(as, CC_GE); +- emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); ++#if !LJ_SOFTFP + type = RID_TMP; ++#endif ++ emit_ab(as, PPCI_CMPLW, type, tisnum); + } +- if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); ++ if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, ++ base, ofs-(LJ_SOFTFP?0:4)); + } else { + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + asm_guardcc(as, CC_NE); +@@ -1021,7 +1189,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet drop = RSET_SCRATCH; +- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); ++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), ++ "bad CNEW/CNEWI operands"); + + as->gcsteps++; + if (ra_hasreg(ir->r)) +@@ -1034,10 +1203,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) + if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + int32_t ofs = sizeof(GCcdata); +- lua_assert(sz == 4 || sz == 8); ++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + if (sz == 8) { + ofs += 4; +- lua_assert((ir+1)->o == IR_HIOP); ++ lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); + } + for (;;) { + Reg r = ra_alloc1(as, ir->op2, allow); +@@ -1068,8 +1237,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); + } +-#else +-#define asm_cnew(as, ir) ((void)0) + #endif + + /* -- Write barriers ------------------------------------------------------ */ +@@ -1083,7 +1250,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) + emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); + emit_setgl(as, tab, gc.grayagain); +- lua_assert(LJ_GC_BLACK == 0x04); ++ lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK"); + emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ + emit_getgl(as, link, gc.grayagain); + emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); +@@ -1098,7 +1265,7 @@ static void asm_obar(ASMState *as, IRIns *ir) + MCLabel l_end; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ +- lua_assert(IR(ir->op1)->o == IR_UREFC); ++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ +@@ -1119,6 +1286,7 @@ static void asm_obar(ASMState *as, IRIns *ir) + + /* -- Arithmetic and logic operations ------------------------------------- */ + ++#if !LJ_SOFTFP + static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) + { + Reg dest = ra_dest(as, ir, RSET_FPR); +@@ -1139,20 +1307,22 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) + + static void asm_fpmath(ASMState *as, IRIns *ir) + { +- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) +- return; + if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) + asm_fpunary(as, ir, PPCI_FSQRT); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); + } ++#endif + + static void asm_add(ASMState *as, IRIns *ir) + { ++#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) + asm_fparith(as, ir, PPCI_FADD); +- } else { ++ } else ++#endif ++ { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + PPCIns pi; +@@ -1191,10 +1361,13 @@ static void asm_add(ASMState *as, IRIns *ir) + + static void asm_sub(ASMState *as, IRIns *ir) + { ++#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) + asm_fparith(as, ir, PPCI_FSUB); +- } else { ++ } else ++#endif ++ { + PPCIns pi = PPCI_SUBF; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left, right; +@@ -1220,9 +1393,12 @@ static void asm_sub(ASMState *as, IRIns *ir) + + static void asm_mul(ASMState *as, IRIns *ir) + { ++#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + asm_fparith(as, ir, PPCI_FMUL); +- } else { ++ } else ++#endif ++ { + PPCIns pi = PPCI_MULLW; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); +@@ -1244,15 +1420,16 @@ static void asm_mul(ASMState *as, IRIns *ir) + } + } + +-#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) +-#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) +-#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) ++#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV) + + static void asm_neg(ASMState *as, IRIns *ir) + { ++#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, PPCI_FNEG); +- } else { ++ } else ++#endif ++ { + Reg dest, left; + PPCIns pi = PPCI_NEG; + if (as->flagmcp == as->mcp) { +@@ -1267,8 +1444,6 @@ static void asm_neg(ASMState *as, IRIns *ir) + } + + #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) +-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +-#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) + + static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) + { +@@ -1561,11 +1736,42 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) + #define asm_brol(as, ir) \ + asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ + PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) +-#define asm_bror(as, ir) lua_assert(0) ++#define asm_bror(as, ir) lj_assertA(0, "unexpected BROR") ++ ++#if LJ_SOFTFP ++static void asm_sfpmin_max(ASMState *as, IRIns *ir) ++{ ++ CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; ++ IRRef args[4]; ++ MCLabel l_right, l_end; ++ Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); ++ Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); ++ Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); ++ PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; ++ righthi = (lefthi >> 8); lefthi &= 255; ++ rightlo = (leftlo >> 8); leftlo &= 255; ++ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; ++ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; ++ l_end = emit_label(as); ++ if (desthi != righthi) emit_mr(as, desthi, righthi); ++ if (destlo != rightlo) emit_mr(as, destlo, rightlo); ++ l_right = emit_label(as); ++ if (l_end != l_right) emit_jmp(as, l_end); ++ if (desthi != lefthi) emit_mr(as, desthi, lefthi); ++ if (destlo != leftlo) emit_mr(as, destlo, leftlo); ++ if (l_right == as->mcp+1) { ++ cond ^= 4; l_right = l_end; ++as->mcp; ++ } ++ emit_condbranch(as, PPCI_BC, cond, l_right); ++ ra_evictset(as, RSET_SCRATCH); ++ emit_cmpi(as, RID_RET, 1); ++ asm_gencall(as, &ci, args); ++} ++#endif + + static void asm_min_max(ASMState *as, IRIns *ir, int ismax) + { +- if (irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP && irt_isnum(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg tmp = dest; + Reg right, left = ra_alloc2(as, ir, RSET_FPR); +@@ -1573,9 +1779,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) + if (tmp == left || tmp == right) + tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, + dest), left), right)); +- emit_facb(as, PPCI_FSEL, dest, tmp, +- ismax ? left : right, ismax ? right : left); +- emit_fab(as, PPCI_FSUB, tmp, left, right); ++ emit_facb(as, PPCI_FSEL, dest, tmp, left, right); ++ emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg tmp1 = RID_TMP, tmp2 = dest; +@@ -1653,7 +1858,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) + static void asm_comp(ASMState *as, IRIns *ir) + { + PPCCC cc = asm_compmap[ir->o]; +- if (irt_isnum(ir->t)) { ++ if (!LJ_SOFTFP && irt_isnum(ir->t)) { + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + asm_guardcc(as, (cc >> 4)); +@@ -1674,6 +1879,44 @@ static void asm_comp(ASMState *as, IRIns *ir) + + #define asm_equal(as, ir) asm_comp(as, ir) + ++#if LJ_SOFTFP ++/* SFP comparisons. */ ++static void asm_sfpcomp(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; ++ RegSet drop = RSET_SCRATCH; ++ Reg r; ++ IRRef args[4]; ++ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; ++ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; ++ ++ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { ++ if (!rset_test(as->freeset, r) && ++ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) ++ rset_clear(drop, r); ++ } ++ ra_evictset(as, drop); ++ asm_setupresult(as, ir, ci); ++ switch ((IROp)ir->o) { ++ case IR_ULT: ++ asm_guardcc(as, CC_EQ); ++ emit_ai(as, PPCI_CMPWI, RID_RET, 0); ++ case IR_ULE: ++ asm_guardcc(as, CC_EQ); ++ emit_ai(as, PPCI_CMPWI, RID_RET, 1); ++ break; ++ case IR_GE: case IR_GT: ++ asm_guardcc(as, CC_EQ); ++ emit_ai(as, PPCI_CMPWI, RID_RET, 2); ++ default: ++ asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); ++ emit_ai(as, PPCI_CMPWI, RID_RET, 0); ++ break; ++ } ++ asm_gencall(as, ci, args); ++} ++#endif ++ + #if LJ_HASFFI + /* 64 bit integer comparisons. */ + static void asm_comp64(ASMState *as, IRIns *ir) +@@ -1698,24 +1941,41 @@ static void asm_comp64(ASMState *as, IRIns *ir) + } + #endif + +-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ ++/* -- Split register ops -------------------------------------------------- */ + +-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++/* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */ + static void asm_hiop(ASMState *as, IRIns *ir) + { +-#if LJ_HASFFI + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; ++#if LJ_HASFFI || LJ_SOFTFP + if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ + as->curins--; /* Always skip the CONV. */ ++#if LJ_HASFFI && !LJ_SOFTFP + if (usehi || uselo) + asm_conv64(as, ir); + return; ++#endif + } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ + as->curins--; /* Always skip the loword comparison. */ ++#if LJ_SOFTFP ++ if (!irt_isint(ir->t)) { ++ asm_sfpcomp(as, ir-1); ++ return; ++ } ++#endif ++#if LJ_HASFFI + asm_comp64(as, ir); ++#endif ++ return; ++#if LJ_SOFTFP ++ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { ++ as->curins--; /* Always skip the loword min/max. */ ++ if (uselo || usehi) ++ asm_sfpmin_max(as, ir-1); + return; ++#endif + } else if ((ir-1)->o == IR_XSTORE) { + as->curins--; /* Handle both stores here. */ + if ((ir-1)->r != RID_SINK) { +@@ -1724,24 +1984,33 @@ static void asm_hiop(ASMState *as, IRIns *ir) + } + return; + } ++#endif + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { ++#if LJ_HASFFI + case IR_ADD: as->curins--; asm_add64(as, ir); break; + case IR_SUB: as->curins--; asm_sub64(as, ir); break; + case IR_NEG: as->curins--; asm_neg64(as, ir); break; +- case IR_CALLN: +- case IR_CALLXS: ++ case IR_CNEWI: ++ /* Nothing to do here. Handled by lo op itself. */ ++ break; ++#endif ++#if LJ_SOFTFP ++ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: ++ case IR_STRTO: + if (!uselo) +- ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ ++ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ + break; +- case IR_CNEWI: ++ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: + /* Nothing to do here. Handled by lo op itself. */ + break; +- default: lua_assert(0); break; +- } +-#else +- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ + #endif ++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: ++ if (!uselo) ++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ ++ break; ++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; ++ } + } + + /* -- Profiling ----------------------------------------------------------- */ +@@ -1797,12 +2066,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ Reg tmp; ++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE); ++ /* LJ_SOFTFP: must be a number constant. */ ++ lj_assertA(irref_isk(ref), "unsplit FP op"); ++ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); ++ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); ++ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); ++ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); ++ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); ++#else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); ++#endif + } else { + Reg type; + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); +- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), ++ "restore of IR type %d", irt_type(ir->t)); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, allow); + rset_clear(allow, src); +@@ -1811,6 +2093,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + if ((sn & (SNAP_CONT|SNAP_FRAME))) { + if (s == 0) continue; /* Do not overwrite link to previous frame. */ + type = ra_allock(as, (int32_t)(*flinks--), allow); ++#if LJ_SOFTFP ++ } else if ((sn & SNAP_SOFTFPNUM)) { ++ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); ++#endif ++ } else if ((sn & SNAP_KEYINDEX)) { ++ type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); + } else { + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + } +@@ -1818,11 +2106,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + } + checkmclim(as); + } +- lua_assert(map + nent == flinks); ++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); + } + + /* -- GC handling --------------------------------------------------------- */ + ++/* Marker to prevent patching the GC check exit. */ ++#define PPC_NOPATCH_GC_CHECK PPCI_ORIS ++ + /* Check GC threshold and do one or more GC steps. */ + static void asm_gc_check(ASMState *as) + { +@@ -1834,6 +2125,7 @@ static void asm_gc_check(ASMState *as) + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ ++ *--as->mcp = PPC_NOPATCH_GC_CHECK; + emit_ai(as, PPCI_CMPWI, RID_RET, 0); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ +@@ -1865,6 +2157,12 @@ static void asm_loop_fixup(ASMState *as) + } + } + ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ UNUSED(as); /* Nothing to do. */ ++} ++ + /* -- Head of trace ------------------------------------------------------- */ + + /* Coalesce BASE register for a root trace. */ +@@ -1916,7 +2214,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) + as->mctop = p; + } else { + /* Patch stack adjustment. */ +- lua_assert(checki16(CFRAME_SIZE+spadj)); ++ lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); + p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); + p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; + } +@@ -1947,14 +2245,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) + int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) +- if (args[i] && irt_isfp(IR(args[i])->t)) { ++ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; + } else { + if (ngpr > 0) ngpr--; else nslots++; + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; +- return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); ++ return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : ++ REGSP_HINT(RID_RET); + } + + static void asm_setup_target(ASMState *as) +@@ -1972,7 +2271,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) + MCode *px = exitstub_trace_addr(T, exitno); + MCode *cstart = NULL; + MCode *mcarea = lj_mcode_patch(J, p, 0); +- int clearso = 0; ++ int clearso = 0, patchlong = 1; + for (; p < pe; p++) { + /* Look for exitstub branch, try to replace with branch to target. */ + uint32_t ins = *p; +@@ -1984,7 +2283,9 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) + delta -= sizeof(MCode); + } + /* Many, but not all short-range branches can be patched directly. */ +- if (((delta + 0x8000) >> 16) == 0) { ++ if (p[-1] == PPC_NOPATCH_GC_CHECK) { ++ patchlong = 0; ++ } else if (((delta + 0x8000) >> 16) == 0) { + *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) | + ((delta & 0x8000) * (PPCF_Y/0x8000)); + if (!cstart) cstart = p; +@@ -1992,14 +2293,17 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) + } else if ((ins & 0xfc000000u) == PPCI_B && + ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { + ptrdiff_t delta = (char *)target - (char *)p; +- lua_assert(((delta + 0x02000000) >> 26) == 0); ++ lj_assertJ(((delta + 0x02000000) >> 26) == 0, ++ "branch target out of range"); + *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); + if (!cstart) cstart = p; + } + } +- { /* Always patch long-range branch in exit stub itself. */ ++ /* Always patch long-range branch in exit stub itself. Except, if we can't. */ ++ if (patchlong) { + ptrdiff_t delta = (char *)target - (char *)px - clearso; +- lua_assert(((delta + 0x02000000) >> 26) == 0); ++ lj_assertJ(((delta + 0x02000000) >> 26) == 0, ++ "branch target out of range"); + *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); + } + if (!cstart) cstart = px; +diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h +index 3e189b1d..5eb18365 100644 +--- a/src/lj_asm_x86.h ++++ b/src/lj_asm_x86.h +@@ -1,6 +1,6 @@ + /* + ** x86/x64 IR assembler (SSA IR -> machine code). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + /* -- Guard handling ------------------------------------------------------ */ +@@ -31,7 +31,7 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) + #endif + /* Jump to exit handler which fills in the ExitState. */ + *mxp++ = XI_JMP; mxp += 4; +- *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); ++ *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); + /* Commit the code for this group (even if assembly fails later on). */ + lj_mcode_commitbot(as->J, mxp); + as->mcbot = mxp; +@@ -60,7 +60,7 @@ static void asm_guardcc(ASMState *as, int cc) + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; +- *(int32_t *)(p+1) = jmprel(p+5, target); ++ *(int32_t *)(p+1) = jmprel(as->J, p+5, target); + target = p; + cc ^= 1; + if (as->realign) { +@@ -131,7 +131,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) + as->mrm.ofs = 0; + if (irb->o == IR_FLOAD) { + IRIns *ira = IR(irb->op1); +- lua_assert(irb->op2 == IRFL_TAB_ARRAY); ++ lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY"); + /* We can avoid the FLOAD of t->array for colocated arrays. */ + if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && + !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { +@@ -150,7 +150,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) + static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) + { + IRIns *irx; +- lua_assert(ir->o == IR_AREF); ++ lj_assertA(ir->o == IR_AREF, "expected AREF"); + as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow); + irx = IR(ir->op2); + if (irref_isk(ir->op2)) { +@@ -216,9 +216,17 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) + #endif + } + break; ++ case IR_TMPREF: ++#if LJ_GC64 ++ as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv); ++ as->mrm.base = RID_DISPATCH; ++ as->mrm.idx = RID_NONE; ++#else ++ as->mrm.ofs = igcptr(&J2G(as->J)->tmptv); ++ as->mrm.base = as->mrm.idx = RID_NONE; ++#endif ++ return; + default: +- lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO || +- ir->o == IR_KKPTR); + break; + } + } +@@ -230,9 +238,10 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) + /* Fuse FLOAD/FREF reference into memory operand. */ + static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) + { +- lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); ++ lj_assertA(ir->o == IR_FLOAD || ir->o == IR_FREF, ++ "bad IR op %d", ir->o); + as->mrm.idx = RID_NONE; +- if (ir->op1 == REF_NIL) { ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + #if LJ_GC64 + as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); + as->mrm.base = RID_DISPATCH; +@@ -271,7 +280,7 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) + static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) + { + IRIns *irr; +- lua_assert(ir->o == IR_STRREF); ++ lj_assertA(ir->o == IR_STRREF, "bad IR op %d", ir->o); + as->mrm.base = as->mrm.idx = RID_NONE; + as->mrm.scale = XM_SCALE1; + as->mrm.ofs = sizeof(GCstr); +@@ -378,15 +387,17 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) + checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { + as->mrm.ofs = (int32_t)mcpofs(as, k); + as->mrm.base = RID_RIP; +- } else { ++ } else { /* Intern 64 bit constant at bottom of mcode. */ + if (ir->i) { +- lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); ++ lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), ++ "bad interned 64 bit constant"); + } else { + while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; + *(uint64_t*)as->mcbot = *k; + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; ++ lj_mcode_commitbot(as->J, as->mcbot); + } + as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); + as->mrm.base = RID_RIP; +@@ -419,12 +430,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) + } + if (ir->o == IR_KNUM) { + RegSet avail = as->freeset & ~as->modset & RSET_FPR; +- lua_assert(allow != RSET_EMPTY); ++ lj_assertA(allow != RSET_EMPTY, "no register allowed"); + if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ + return asm_fuseloadk64(as, ir); + } else if (ref == REF_BASE || ir->o == IR_KINT64) { + RegSet avail = as->freeset & ~as->modset & RSET_GPR; +- lua_assert(allow != RSET_EMPTY); ++ lj_assertA(allow != RSET_EMPTY, "no register allowed"); + if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ + if (ref == REF_BASE) { + #if LJ_GC64 +@@ -476,6 +487,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) + } + } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) { + asm_fuseahuref(as, ir->op1, xallow); ++ as->mrm.ofs += 8 * ir->op2; + return RID_MRM; + } + } +@@ -605,7 +617,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + #endif + emit_loadi(as, r, ir->i); + } else { +- lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ ++ /* Must have been evicted. */ ++ lj_assertA(rset_test(as->freeset, r), "reg %d not free", r); + if (ra_hasreg(ir->r)) { + ra_noweak(as, ir->r); + emit_movrr(as, ir, r, ir->r); +@@ -614,7 +627,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + } + } + } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ +- lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */ ++ lj_assertA(!(irt_isfloat(ir->t) && irref_isk(ref)), ++ "unexpected float constant"); + if (LJ_32 && (ofs & 4) && irref_isk(ref)) { + /* Split stores for unaligned FP consts. */ + emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); +@@ -645,7 +659,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) + static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + { + RegSet drop = RSET_SCRATCH; +- int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); ++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); + if ((ci->flags & CCI_NOFPRCLOBBER)) + drop &= ~RSET_FPR; + if (ra_hasreg(ir->r)) +@@ -685,12 +699,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) + irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); + } + #endif +-#if LJ_32 + } else if (hiop) { + ra_destpair(as, ir); +-#endif + } else { +- lua_assert(!irt_ispri(ir->t)); ++ lj_assertA(!irt_ispri(ir->t), "PRI dest"); + ra_destreg(as, ir, RID_RET); + } + } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) { +@@ -775,6 +787,21 @@ static void asm_retf(ASMState *as, IRIns *ir) + #endif + } + ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++ emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L); ++ emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ + /* -- Type conversions ---------------------------------------------------- */ + + static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +@@ -809,8 +836,10 @@ static void asm_conv(ASMState *as, IRIns *ir) + int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + IRRef lref = ir->op1; +- lua_assert(irt_type(ir->t) != st); +- lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ ++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); ++ lj_assertA(!(LJ_32 && (irt_isint64(ir->t) || st64)), ++ "IR %04d has unsplit 64 bit type", ++ (int)(ir - as->ir) - REF_BIAS); + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ +@@ -846,7 +875,8 @@ static void asm_conv(ASMState *as, IRIns *ir) + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ +- lua_assert(irt_isint(ir->t) && st == IRT_NUM); ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); +@@ -881,7 +911,7 @@ static void asm_conv(ASMState *as, IRIns *ir) + Reg left, dest = ra_dest(as, ir, RSET_GPR); + RegSet allow = RSET_GPR; + x86Op op; +- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + if (st == IRT_I8) { + op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX; + } else if (st == IRT_U8) { +@@ -915,7 +945,7 @@ static void asm_conv(ASMState *as, IRIns *ir) + } + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); +- if (st64) { ++ if (st64 && !(ir->op2 & IRCONV_NONE)) { + Reg left = asm_fuseload(as, lref, RSET_GPR); + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. +@@ -952,7 +982,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) + emit_sjcc(as, CC_NS, l_end); + emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ + } else { +- lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); ++ lj_assertA(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64, "bad type for CONV"); + } + emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); + /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ +@@ -966,8 +996,8 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) + IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); + IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); + Reg lo, hi; +- lua_assert(st == IRT_NUM || st == IRT_FLOAT); +- lua_assert(dt == IRT_I64 || dt == IRT_U64); ++ lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); ++ lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); + hi = ra_dest(as, ir, RSET_GPR); + lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); + if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); +@@ -1041,47 +1071,48 @@ static void asm_strto(ASMState *as, IRIns *ir) + /* -- Memory references --------------------------------------------------- */ + + /* Get pointer to TValue. */ +-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) + { +- IRIns *ir = IR(ref); +- if (irt_isnum(ir->t)) { +- /* For numbers use the constant itself or a spill slot as a TValue. */ +- if (irref_isk(ref)) +- emit_loada(as, dest, ir_knum(ir)); +- else +- emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); +- } else { +- /* Otherwise use g->tmptv to hold the TValue. */ +-#if LJ_GC64 +- if (irref_isk(ref)) { +- TValue k; +- lj_ir_kvalue(as->J->L, &k, ir); +- emit_movmroi(as, dest, 4, k.u32.hi); +- emit_movmroi(as, dest, 0, k.u32.lo); ++ if ((mode & IRTMPREF_IN1)) { ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { ++ /* Use the number constant itself as a TValue. */ ++ emit_loada(as, dest, ir_knum(ir)); ++ return; ++ } ++ emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0); + } else { +- /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ +- Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); +- if (irt_is64(ir->t)) { +- emit_u32(as, irt_toitype(ir->t) << 15); +- emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); ++#if LJ_GC64 ++ if (irref_isk(ref)) { ++ TValue k; ++ lj_ir_kvalue(as->J->L, &k, ir); ++ emit_movmroi(as, dest, 4, k.u32.hi); ++ emit_movmroi(as, dest, 0, k.u32.lo); + } else { +- /* Currently, no caller passes integers that might end up here. */ +- emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15)); ++ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ ++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); ++ if (irt_is64(ir->t)) { ++ emit_u32(as, irt_toitype(ir->t) << 15); ++ emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); ++ } else { ++ emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15)); ++ } ++ emit_movtomro(as, REX_64IR(ir, src), dest, 0); + } +- emit_movtomro(as, REX_64IR(ir, src), dest, 0); +- } + #else +- if (!irref_isk(ref)) { +- Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); +- emit_movtomro(as, REX_64IR(ir, src), dest, 0); +- } else if (!irt_ispri(ir->t)) { +- emit_movmroi(as, dest, 0, ir->i); +- } +- if (!(LJ_64 && irt_islightud(ir->t))) +- emit_movmroi(as, dest, 4, irt_toitype(ir->t)); ++ if (!irref_isk(ref)) { ++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); ++ emit_movtomro(as, REX_64IR(ir, src), dest, 0); ++ } else if (!irt_ispri(ir->t)) { ++ emit_movmroi(as, dest, 0, ir->i); ++ } ++ if (!(LJ_64 && irt_islightud(ir->t))) ++ emit_movmroi(as, dest, 4, irt_toitype(ir->t)); + #endif +- emit_loada(as, dest, &J2G(as->J)->tmptv); ++ } + } ++ emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */ + } + + static void asm_aref(ASMState *as, IRIns *ir) +@@ -1179,13 +1210,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); + } + } else { +- lua_assert(irt_ispri(kt) && !irt_isnil(kt)); ++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); + #else + } else { + if (!irt_ispri(kt)) { +- lua_assert(irt_isaddr(kt)); ++ lj_assertA(irt_isaddr(kt), "bad HREF key type"); + if (isk) + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), + ptr2addr(ir_kgc(irkey))); +@@ -1193,7 +1224,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); + emit_sjcc(as, CC_NE, l_next); + } +- lua_assert(!irt_isnil(kt)); ++ lj_assertA(!irt_isnil(kt), "bad HREF key type"); + emit_i8(as, irt_toitype(kt)); + emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); + #endif +@@ -1208,23 +1239,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) + #endif + + /* Load main position relative to tab->node into dest. */ +- khash = isk ? ir_khash(irkey) : 1; ++ khash = isk ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); + } else { + emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); +- if ((as->flags & JIT_F_PREFER_IMUL)) { +- emit_i8(as, sizeof(Node)); +- emit_rr(as, XO_IMULi8, dest, dest); +- } else { +- emit_shifti(as, XOg_SHL, dest, 3); +- emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); +- } ++ emit_shifti(as, XOg_SHL, dest, 3); ++ emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); + if (isk) { + emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); + emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); + } else if (irt_isstr(kt)) { +- emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash)); ++ emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid)); + emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); + } else { /* Must match with hashrot() in lj_tab.c. */ + emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); +@@ -1275,10 +1301,10 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + #if !LJ_64 + MCLabel l_exit; + #endif +- lua_assert(ofs % sizeof(Node) == 0); ++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (ra_hasreg(dest)) { + if (ofs != 0) { +- if (dest == node && !(as->flags & JIT_F_LEA_AGU)) ++ if (dest == node) + emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); + else + emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); +@@ -1292,7 +1318,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); + emit_rmro(as, XO_CMP, key|REX_64, node, + ofs + (int32_t)offsetof(Node, key.u64)); +- lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); ++ lj_assertA(irt_isnum(irkey->t) || irt_isgcv(irkey->t), ++ "bad HREFK key type"); + /* Assumes -0.0 is already canonicalized to +0.0. */ + emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : + #if LJ_GC64 +@@ -1303,7 +1330,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); + #endif + } else { +- lua_assert(!irt_isnil(irkey->t)); ++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); + #if LJ_GC64 + emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, node, +@@ -1327,13 +1354,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir) + (int32_t)ir_knum(irkey)->u32.hi); + } else { + if (!irt_ispri(irkey->t)) { +- lua_assert(irt_isgcv(irkey->t)); ++ lj_assertA(irt_isgcv(irkey->t), "bad HREFK key type"); + emit_gmroi(as, XG_ARITHi(XOg_CMP), node, + ofs + (int32_t)offsetof(Node, key.gcr), + ptr2addr(ir_kgc(irkey))); + emit_sjcc(as, CC_NE, l_exit); + } +- lua_assert(!irt_isnil(irkey->t)); ++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); + emit_i8(as, irt_toitype(irkey->t)); + emit_rmro(as, XO_ARITHi8, XOg_CMP, node, + ofs + (int32_t)offsetof(Node, key.it)); +@@ -1406,7 +1433,8 @@ static void asm_fxload(ASMState *as, IRIns *ir) + if (LJ_64 && irt_is64(ir->t)) + dest |= REX_64; + else +- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), ++ "unsplit 64 bit load"); + xo = XO_MOV; + break; + } +@@ -1451,13 +1479,16 @@ static void asm_fxstore(ASMState *as, IRIns *ir) + case IRT_NUM: xo = XO_MOVSDto; break; + case IRT_FLOAT: xo = XO_MOVSSto; break; + #if LJ_64 && !LJ_GC64 +- case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ ++ case IRT_LIGHTUD: ++ /* NYI: mask 64 bit lightuserdata. */ ++ lj_assertA(0, "store of lightuserdata"); + #endif + default: + if (LJ_64 && irt_is64(ir->t)) + src |= REX_64; + else +- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), ++ "unsplit 64 bit store"); + xo = XO_MOVto; + break; + } +@@ -1471,8 +1502,8 @@ static void asm_fxstore(ASMState *as, IRIns *ir) + emit_i8(as, k); + emit_mrm(as, XO_MOVmib, 0, RID_MRM); + } else { +- lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || +- irt_isaddr(ir->t)); ++ lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || ++ irt_isaddr(ir->t), "bad store type"); + emit_i32(as, k); + emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM); + } +@@ -1507,13 +1538,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + #if LJ_GC64 + Reg tmp = RID_NONE; + #endif +- lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || +- (LJ_DUALNUM && irt_isint(ir->t))); ++ lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || ++ (LJ_DUALNUM && irt_isint(ir->t)), ++ "bad load type %d", irt_type(ir->t)); + #if LJ_64 && !LJ_GC64 + if (irt_islightud(ir->t)) { + Reg dest = asm_load_lightud64(as, ir, 1); + if (ra_hasreg(dest)) { + asm_fuseahuref(as, ir->op1, RSET_GPR); ++ if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; + emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); + } + return; +@@ -1523,6 +1556,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + asm_fuseahuref(as, ir->op1, RSET_GPR); ++ if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; + #if LJ_GC64 + if (irt_isaddr(ir->t)) { + emit_shifti(as, XOg_SHR|REX_64, dest, 17); +@@ -1550,12 +1584,14 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) + } + #endif + asm_fuseahuref(as, ir->op1, gpr); ++ if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; + } + /* Always do the type check, even if the load result is unused. */ + as->mrm.ofs += 4; + asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); + if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { +- lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); ++ lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), ++ "bad load type %d", irt_type(ir->t)); + #if LJ_GC64 + emit_u32(as, LJ_TISNUM << 15); + #else +@@ -1637,13 +1673,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) + #endif + emit_mrm(as, XO_MOVto, src, RID_MRM); + } else if (!irt_ispri(irr->t)) { +- lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); ++ lj_assertA(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)), ++ "bad store type"); + emit_i32(as, irr->i); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + } + as->mrm.ofs += 4; + #if LJ_GC64 +- lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); ++ lj_assertA(LJ_DUALNUM && irt_isinteger(ir->t), "bad store type"); + emit_i32(as, LJ_TNUMX << 15); + #else + emit_i32(as, (int32_t)irt_toitype(ir->t)); +@@ -1658,10 +1695,14 @@ static void asm_sload(ASMState *as, IRIns *ir) + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); + IRType1 t = ir->t; + Reg base; +- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ +- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); +- lua_assert(LJ_DUALNUM || +- !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); ++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), ++ "bad parent SLOAD"); /* Handled by asm_head_side(). */ ++ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), ++ "inconsistent SLOAD variant"); ++ lj_assertA(LJ_DUALNUM || ++ !irt_isint(t) || ++ (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), ++ "bad SLOAD type"); + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + Reg left = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ +@@ -1681,7 +1722,8 @@ static void asm_sload(ASMState *as, IRIns *ir) + RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + base = ra_alloc1(as, REF_BASE, RSET_GPR); +- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); ++ lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), ++ "bad SLOAD type %d", irt_type(t)); + if ((ir->op2 & IRSLOAD_CONVERT)) { + t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ + emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); +@@ -1727,7 +1769,8 @@ static void asm_sload(ASMState *as, IRIns *ir) + /* Need type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); + if (LJ_64 && irt_type(t) >= IRT_NUM) { +- lua_assert(irt_isinteger(t) || irt_isnum(t)); ++ lj_assertA(irt_isinteger(t) || irt_isnum(t), ++ "bad SLOAD type %d", irt_type(t)); + #if LJ_GC64 + emit_u32(as, LJ_TISNUM << 15); + #else +@@ -1758,7 +1801,7 @@ static void asm_sload(ASMState *as, IRIns *ir) + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); +- emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); ++ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); + #else + } else { + emit_i8(as, irt_toitype(t)); +@@ -1779,7 +1822,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) + CTInfo info = lj_ctype_info(cts, id, &sz); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; +- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); ++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), ++ "bad CNEW/CNEWI operands"); + + as->gcsteps++; + asm_setupresult(as, ir, ci); /* GCcdata * */ +@@ -1809,7 +1853,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) + int32_t ofs = sizeof(GCcdata); + if (sz == 8) { + ofs += 4; ir++; +- lua_assert(ir->o == IR_HIOP); ++ lj_assertA(ir->o == IR_HIOP, "missing CNEWI HIOP"); + } + do { + if (irref_isk(ir->op2)) { +@@ -1823,7 +1867,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) + ofs -= 4; ir--; + } while (1); + #endif +- lua_assert(sz == 4 || sz == 8); ++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ +@@ -1847,8 +1891,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); + } +-#else +-#define asm_cnew(as, ir) ((void)0) + #endif + + /* -- Write barriers ------------------------------------------------------ */ +@@ -1875,7 +1917,7 @@ static void asm_obar(ASMState *as, IRIns *ir) + MCLabel l_end; + Reg obj; + /* No need for other object barriers (yet). */ +- lua_assert(IR(ir->op1)->o == IR_UREFC); ++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ +@@ -1954,15 +1996,11 @@ static void asm_fpmath(ASMState *as, IRIns *ir) + fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); + ra_left(as, RID_XMM0, ir->op1); + } +- } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { +- /* Rejoined to pow(). */ + } else { + asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); + } + } + +-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +- + static void asm_ldexp(ASMState *as, IRIns *ir) + { + int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ +@@ -1992,22 +2030,11 @@ static void asm_fppowi(ASMState *as, IRIns *ir) + ra_left(as, RID_EAX, ir->op2); + } + +-static void asm_pow(ASMState *as, IRIns *ir) +-{ +-#if LJ_64 && LJ_HASFFI +- if (!irt_isnum(ir->t)) +- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : +- IRCALL_lj_carith_powu64); +- else +-#endif +- asm_fppowi(as, ir); +-} +- + static int asm_swapops(ASMState *as, IRIns *ir) + { + IRIns *irl = IR(ir->op1); + IRIns *irr = IR(ir->op2); +- lua_assert(ra_noreg(irr->r)); ++ lj_assertA(ra_noreg(irr->r), "bad usage"); + if (!irm_iscomm(lj_ir_mode[ir->o])) + return 0; /* Can't swap non-commutative operations. */ + if (irref_isk(ir->op2)) +@@ -2060,8 +2087,9 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) + int32_t k = 0; + if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */ + MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2); +- if ((p[1] & 15) < 14) { +- if ((p[1] & 15) >= 12) p[1] -= 4; /* L <->S, NL <-> NS */ ++ MCode *q = p[0] == 0x0f ? p+1 : p; ++ if ((*q & 15) < 14) { ++ if ((*q & 15) >= 12) *q -= 4; /* L <->S, NL <-> NS */ + as->flagmcp = NULL; + as->mcp = p; + } /* else: cannot transform LE/NLE to cc without use of OF. */ +@@ -2178,8 +2206,7 @@ static void asm_add(ASMState *as, IRIns *ir) + { + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_ADDSD); +- else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || +- irt_is64(ir->t) || !asm_lea(as, ir)) ++ else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir)) + asm_intarith(as, ir, XOg_ADD); + } + +@@ -2199,27 +2226,7 @@ static void asm_mul(ASMState *as, IRIns *ir) + asm_intarith(as, ir, XOg_X_IMUL); + } + +-static void asm_div(ASMState *as, IRIns *ir) +-{ +-#if LJ_64 && LJ_HASFFI +- if (!irt_isnum(ir->t)) +- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : +- IRCALL_lj_carith_divu64); +- else +-#endif +- asm_fparith(as, ir, XO_DIVSD); +-} +- +-static void asm_mod(ASMState *as, IRIns *ir) +-{ +-#if LJ_64 && LJ_HASFFI +- if (!irt_isint(ir->t)) +- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : +- IRCALL_lj_carith_modu64); +- else +-#endif +- asm_callid(as, ir, IRCALL_lj_vm_modi); +-} ++#define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD) + + static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) + { +@@ -2319,7 +2326,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv) + dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); + if (dest == RID_ECX) { + dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX)); +- emit_rr(as, XO_MOV, RID_ECX, dest); ++ emit_rr(as, XO_MOV, REX_64IR(ir, RID_ECX), dest); + } + right = irr->r; + if (ra_noreg(right)) +@@ -2417,8 +2424,9 @@ static void asm_comp(ASMState *as, IRIns *ir) + IROp leftop = (IROp)(IR(lref)->o); + Reg r64 = REX_64IR(ir, 0); + int32_t imm = 0; +- lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || +- irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); ++ lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || ++ irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), ++ "bad comparison data type %d", irt_type(ir->t)); + /* Swap constants (only for ABC) and fusable loads to the right. */ + if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { + if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */ +@@ -2500,7 +2508,7 @@ static void asm_comp(ASMState *as, IRIns *ir) + /* Use test r,r instead of cmp r,0. */ + x86Op xo = XO_TEST; + if (irt_isu8(ir->t)) { +- lua_assert(ir->o == IR_EQ || ir->o == IR_NE); ++ lj_assertA(ir->o == IR_EQ || ir->o == IR_NE, "bad usage"); + xo = XO_TESTb; + if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) { + if (LJ_64) { +@@ -2602,15 +2610,15 @@ static void asm_comp_int64(ASMState *as, IRIns *ir) + } + #endif + +-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ ++/* -- Split register ops -------------------------------------------------- */ + +-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */ + static void asm_hiop(ASMState *as, IRIns *ir) + { +-#if LJ_32 && LJ_HASFFI + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; ++#if LJ_32 && LJ_HASFFI + if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ + as->curins--; /* Always skip the CONV. */ + if (usehi || uselo) +@@ -2624,8 +2632,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) + asm_fxstore(as, ir); + return; + } ++#endif + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { ++#if LJ_32 && LJ_HASFFI + case IR_ADD: + as->flagmcp = NULL; + as->curins--; +@@ -2648,19 +2658,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) + asm_neg_not(as, ir-1, XOg_NEG); + break; + } +- case IR_CALLN: +- case IR_CALLXS: +- if (!uselo) +- ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ +- break; + case IR_CNEWI: + /* Nothing to do here. Handled by CNEWI itself. */ + break; +- default: lua_assert(0); break; +- } +-#else +- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ + #endif ++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: ++ if (!uselo) ++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ ++ break; ++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; ++ } + } + + /* -- Profiling ----------------------------------------------------------- */ +@@ -2721,12 +2728,21 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; +- if (irt_isnum(ir->t)) { ++ if ((sn & SNAP_KEYINDEX)) { ++ emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX); ++ if (irref_isk(ref)) { ++ emit_movmroi(as, RID_BASE, ofs, ir->i); ++ } else { ++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); ++ emit_movtomro(as, src, RID_BASE, ofs); ++ } ++ } else if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); + } else { +- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || +- (LJ_DUALNUM && irt_isinteger(ir->t))); ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || ++ (LJ_DUALNUM && irt_isinteger(ir->t)), ++ "restore of IR type %d", irt_type(ir->t)); + if (!irref_isk(ref)) { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); + #if LJ_GC64 +@@ -2771,7 +2787,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) + } + checkmclim(as); + } +- lua_assert(map + nent == flinks); ++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); + } + + /* -- GC handling --------------------------------------------------------- */ +@@ -2815,16 +2831,16 @@ static void asm_loop_fixup(ASMState *as) + MCode *target = as->mcp; + if (as->realign) { /* Realigned loops use short jumps. */ + as->realign = NULL; /* Stop another retry. */ +- lua_assert(((intptr_t)target & 15) == 0); ++ lj_assertA(((intptr_t)target & 15) == 0, "loop realign failed"); + if (as->loopinv) { /* Inverted loop branch? */ + p -= 5; + p[0] = XI_JMP; +- lua_assert(target - p >= -128); ++ lj_assertA(target - p >= -128, "loop realign failed"); + p[-1] = (MCode)(target - p); /* Patch sjcc. */ + if (as->loopinv == 2) + p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ + } else { +- lua_assert(target - p >= -128); ++ lj_assertA(target - p >= -128, "loop realign failed"); + p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ + p[-2] = XI_JMPs; + } +@@ -2853,6 +2869,12 @@ static void asm_loop_fixup(ASMState *as) + } + } + ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ UNUSED(as); /* Nothing to do. */ ++} ++ + /* -- Head of trace ------------------------------------------------------- */ + + /* Coalesce BASE register for a root trace. */ +@@ -2901,7 +2923,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) + MCode *target, *q; + int32_t spadj = as->T->spadjust; + if (spadj == 0) { +- p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); ++ p -= LJ_64 ? 7 : 6; + } else { + MCode *p1; + /* Patch stack adjustment. */ +@@ -2913,24 +2935,15 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) + p1 = p-9; + *(int32_t *)p1 = spadj; + } +- if ((as->flags & JIT_F_LEA_AGU)) { +-#if LJ_64 +- p1[-4] = 0x48; +-#endif +- p1[-3] = (MCode)XI_LEA; +- p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); +- p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); +- } else { + #if LJ_64 +- p1[-3] = 0x48; ++ p1[-3] = 0x48; + #endif +- p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); +- p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); +- } ++ p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); ++ p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); + } + /* Patch exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; +- *(int32_t *)(p-4) = jmprel(p, target); ++ *(int32_t *)(p-4) = jmprel(as->J, p, target); + p[-5] = XI_JMP; + /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ + for (q = as->mctop-1; q >= p; q--) +@@ -2957,7 +2970,7 @@ static void asm_tail_prep(ASMState *as) + as->invmcp = as->mcp = p; + } else { + /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ +- as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); ++ as->mcp = p - (LJ_64 ? 7 : 6); + as->invmcp = NULL; + } + } +@@ -3097,23 +3110,30 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) + MSize len = T->szmcode; + MCode *px = exitstub_addr(J, exitno) - 6; + MCode *pe = p+len-6; ++ MCode *pgc = NULL; + #if LJ_GC64 + uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); + #else + uint32_t statei = u32ptr(&J2G(J)->vmstate); + #endif + if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) +- *(int32_t *)(p+len-4) = jmprel(p+len, target); ++ *(int32_t *)(p+len-4) = jmprel(J, p+len, target); + /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ + for (; p < pe; p += asm_x86_inslen(p)) { + intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; + if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) + break; + } +- lua_assert(p < pe); +- for (; p < pe; p += asm_x86_inslen(p)) +- if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) +- *(int32_t *)(p+2) = jmprel(p+6, target); ++ lj_assertJ(p < pe, "instruction length decoder failed"); ++ for (; p < pe; p += asm_x86_inslen(p)) { ++ if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px && ++ p != pgc) { ++ *(int32_t *)(p+2) = jmprel(J, p+6, target); ++ } else if (*p == XI_CALL && ++ (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { ++ pgc = p+7; /* Do not patch GC check exit. */ ++ } ++ } + lj_mcode_sync(T->mcode, T->mcode + T->szmcode); + lj_mcode_patch(J, mcarea, 1); + } +diff --git a/src/lj_assert.c b/src/lj_assert.c +new file mode 100644 +index 00000000..35a63ce3 +--- /dev/null ++++ b/src/lj_assert.c +@@ -0,0 +1,28 @@ ++/* ++** Internal assertions. ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++#define lj_assert_c ++#define LUA_CORE ++ ++#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) ++ ++#include <stdio.h> ++ ++#include "lj_obj.h" ++ ++void lj_assert_fail(global_State *g, const char *file, int line, ++ const char *func, const char *fmt, ...) ++{ ++ va_list argp; ++ va_start(argp, fmt); ++ fprintf(stderr, "LuaJIT ASSERT %s:%d: %s: ", file, line, func); ++ vfprintf(stderr, fmt, argp); ++ fputc('\n', stderr); ++ va_end(argp); ++ UNUSED(g); /* May be NULL. TODO: optionally dump state. */ ++ abort(); ++} ++ ++#endif +diff --git a/src/lj_bc.c b/src/lj_bc.c +index a597692c..16c22dc3 100644 +--- a/src/lj_bc.c ++++ b/src/lj_bc.c +@@ -1,6 +1,6 @@ + /* + ** Bytecode instruction modes. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_bc_c +diff --git a/src/lj_bc.h b/src/lj_bc.h +index 69a45f28..ad517b6b 100644 +--- a/src/lj_bc.h ++++ b/src/lj_bc.h +@@ -1,6 +1,6 @@ + /* + ** Bytecode instruction format. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_BC_H +diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h +index fdfc6ec0..d968d3f4 100644 +--- a/src/lj_bcdump.h ++++ b/src/lj_bcdump.h +@@ -1,6 +1,6 @@ + /* + ** Bytecode dump definitions. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_BCDUMP_H +diff --git a/src/lj_bcread.c b/src/lj_bcread.c +index 48c5e7c7..298e6c45 100644 +--- a/src/lj_bcread.c ++++ b/src/lj_bcread.c +@@ -1,6 +1,6 @@ + /* + ** Bytecode reader. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_bcread_c +@@ -47,17 +47,17 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em) + /* Refill buffer. */ + static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) + { +- lua_assert(len != 0); ++ lj_assertLS(len != 0, "empty refill"); + if (len > LJ_MAX_BUF || ls->c < 0) + bcread_error(ls, LJ_ERR_BCBAD); + do { + const char *buf; + size_t sz; +- char *p = sbufB(&ls->sb); ++ char *p = ls->sb.b; + MSize n = (MSize)(ls->pe - ls->p); + if (n) { /* Copy remainder to buffer. */ + if (sbuflen(&ls->sb)) { /* Move down in buffer. */ +- lua_assert(ls->pe == sbufP(&ls->sb)); ++ lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer"); + if (ls->p != p) memmove(p, ls->p, n); + } else { /* Copy from buffer provided by reader. */ + p = lj_buf_need(&ls->sb, len); +@@ -66,38 +66,39 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) + ls->p = p; + ls->pe = p + n; + } +- setsbufP(&ls->sb, p + n); ++ ls->sb.w = p + n; + buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */ + if (buf == NULL || sz == 0) { /* EOF? */ + if (need) bcread_error(ls, LJ_ERR_BCBAD); + ls->c = -1; /* Only bad if we get called again. */ + break; + } ++ if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L); + if (n) { /* Append to buffer. */ + n += (MSize)sz; + p = lj_buf_need(&ls->sb, n < len ? len : n); +- memcpy(sbufP(&ls->sb), buf, sz); +- setsbufP(&ls->sb, p + n); ++ memcpy(ls->sb.w, buf, sz); ++ ls->sb.w = p + n; + ls->p = p; + ls->pe = p + n; + } else { /* Return buffer provided by reader. */ + ls->p = buf; + ls->pe = buf + sz; + } +- } while (ls->p + len > ls->pe); ++ } while ((MSize)(ls->pe - ls->p) < len); + } + + /* Need a certain number of bytes. */ + static LJ_AINLINE void bcread_need(LexState *ls, MSize len) + { +- if (LJ_UNLIKELY(ls->p + len > ls->pe)) ++ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) + bcread_fill(ls, len, 1); + } + + /* Want to read up to a certain number of bytes, but may need less. */ + static LJ_AINLINE void bcread_want(LexState *ls, MSize len) + { +- if (LJ_UNLIKELY(ls->p + len > ls->pe)) ++ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) + bcread_fill(ls, len, 0); + } + +@@ -106,7 +107,7 @@ static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len) + { + uint8_t *p = (uint8_t *)ls->p; + ls->p += len; +- lua_assert(ls->p <= ls->pe); ++ lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); + return p; + } + +@@ -119,7 +120,7 @@ static void bcread_block(LexState *ls, void *q, MSize len) + /* Read byte from buffer. */ + static LJ_AINLINE uint32_t bcread_byte(LexState *ls) + { +- lua_assert(ls->p < ls->pe); ++ lj_assertLS(ls->p < ls->pe, "buffer read overflow"); + return (uint32_t)(uint8_t)*ls->p++; + } + +@@ -127,7 +128,7 @@ static LJ_AINLINE uint32_t bcread_byte(LexState *ls) + static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls) + { + uint32_t v = lj_buf_ruleb128(&ls->p); +- lua_assert(ls->p <= ls->pe); ++ lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); + return v; + } + +@@ -144,7 +145,7 @@ static uint32_t bcread_uleb128_33(LexState *ls) + } while (*p++ >= 0x80); + } + ls->p = (char *)p; +- lua_assert(ls->p <= ls->pe); ++ lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); + return v; + } + +@@ -191,7 +192,7 @@ static void bcread_ktabk(LexState *ls, TValue *o) + o->u32.lo = bcread_uleb128(ls); + o->u32.hi = bcread_uleb128(ls); + } else { +- lua_assert(tp <= BCDUMP_KTAB_TRUE); ++ lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); + setpriV(o, ~tp); + } + } +@@ -213,7 +214,7 @@ static GCtab *bcread_ktab(LexState *ls) + for (i = 0; i < nhash; i++) { + TValue key; + bcread_ktabk(ls, &key); +- lua_assert(!tvisnil(&key)); ++ lj_assertLS(!tvisnil(&key), "nil key"); + bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); + } + } +@@ -250,7 +251,7 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) + #endif + } else { + lua_State *L = ls->L; +- lua_assert(tp == BCDUMP_KGC_CHILD); ++ lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp); + if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ + bcread_error(ls, LJ_ERR_BCBAD); + L->top--; +@@ -398,11 +399,7 @@ static int bcread_header(LexState *ls) + if ((flags & BCDUMP_F_FFI)) { + #if LJ_HASFFI + lua_State *L = ls->L; +- if (!ctype_ctsG(G(L))) { +- ptrdiff_t oldtop = savestack(L, L->top); +- luaopen_ffi(L); /* Load FFI library on-demand. */ +- L->top = restorestack(L, oldtop); +- } ++ ctype_loadffi(L); + #else + return 0; + #endif +@@ -421,7 +418,7 @@ static int bcread_header(LexState *ls) + GCproto *lj_bcread(LexState *ls) + { + lua_State *L = ls->L; +- lua_assert(ls->c == BCDUMP_HEAD1); ++ lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header"); + bcread_savetop(L, ls, L->top); + lj_buf_reset(&ls->sb); + /* Check for a valid bytecode dump header. */ +@@ -447,8 +444,7 @@ GCproto *lj_bcread(LexState *ls) + setprotoV(L, L->top, pt); + incr_top(L); + } +- if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 || +- L->top-1 != bcread_oldtop(L, ls)) ++ if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) + bcread_error(ls, LJ_ERR_BCBAD); + /* Pop off last prototype. */ + L->top--; +diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c +index 5e05caea..c5c042e0 100644 +--- a/src/lj_bcwrite.c ++++ b/src/lj_bcwrite.c +@@ -1,6 +1,6 @@ + /* + ** Bytecode writer. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_bcwrite_c +@@ -29,8 +29,17 @@ typedef struct BCWriteCtx { + void *wdata; /* Writer callback data. */ + int strip; /* Strip debug info. */ + int status; /* Status from writer callback. */ ++#ifdef LUA_USE_ASSERT ++ global_State *g; ++#endif + } BCWriteCtx; + ++#ifdef LUA_USE_ASSERT ++#define lj_assertBCW(c, ...) lj_assertG_(ctx->g, (c), __VA_ARGS__) ++#else ++#define lj_assertBCW(c, ...) ((void)ctx) ++#endif ++ + /* -- Bytecode writer ----------------------------------------------------- */ + + /* Write a single constant key/value of a template table. */ +@@ -53,7 +62,7 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) + if (num == (lua_Number)k) { /* -0 is never a constant. */ + *p++ = BCDUMP_KTAB_INT; + p = lj_strfmt_wuleb128(p, k); +- setsbufP(&ctx->sb, p); ++ ctx->sb.w = p; + return; + } + } +@@ -61,10 +70,10 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) + p = lj_strfmt_wuleb128(p, o->u32.lo); + p = lj_strfmt_wuleb128(p, o->u32.hi); + } else { +- lua_assert(tvispri(o)); ++ lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); + *p++ = BCDUMP_KTAB_NIL+~itype(o); + } +- setsbufP(&ctx->sb, p); ++ ctx->sb.w = p; + } + + /* Write a template table. */ +@@ -88,7 +97,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) + /* Write number of array slots and hash slots. */ + p = lj_strfmt_wuleb128(p, narray); + p = lj_strfmt_wuleb128(p, nhash); +- setsbufP(&ctx->sb, p); ++ ctx->sb.w = p; + if (narray) { /* Write array entries (may contain nil). */ + MSize i; + TValue *o = tvref(t->array); +@@ -121,7 +130,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) + tp = BCDUMP_KGC_STR + gco2str(o)->len; + need = 5+gco2str(o)->len; + } else if (o->gch.gct == ~LJ_TPROTO) { +- lua_assert((pt->flags & PROTO_CHILD)); ++ lj_assertBCW((pt->flags & PROTO_CHILD), "prototype has unexpected child"); + tp = BCDUMP_KGC_CHILD; + #if LJ_HASFFI + } else if (o->gch.gct == ~LJ_TCDATA) { +@@ -132,12 +141,14 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) + } else if (id == CTID_UINT64) { + tp = BCDUMP_KGC_U64; + } else { +- lua_assert(id == CTID_COMPLEX_DOUBLE); ++ lj_assertBCW(id == CTID_COMPLEX_DOUBLE, ++ "bad cdata constant CTID %d", id); + tp = BCDUMP_KGC_COMPLEX; + } + #endif + } else { +- lua_assert(o->gch.gct == ~LJ_TTAB); ++ lj_assertBCW(o->gch.gct == ~LJ_TTAB, ++ "bad constant GC type %d", o->gch.gct); + tp = BCDUMP_KGC_TAB; + need = 1+2*5; + } +@@ -161,7 +172,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) + } + #endif + } +- setsbufP(&ctx->sb, p); ++ ctx->sb.w = p; + } + } + +@@ -195,7 +206,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) + p = lj_strfmt_wuleb128(p, o->u32.hi); + } + } +- setsbufP(&ctx->sb, p); ++ ctx->sb.w = p; + } + + /* Write bytecode instructions. */ +@@ -219,10 +230,7 @@ static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt) + q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); + } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { + BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8); +- BCIns ins = traceref(J, rd)->startins; +- q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL); +- q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins); +- q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins); ++ memcpy(q, &traceref(J, rd)->startins, 4); + } + } + } +@@ -273,7 +281,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) + /* Write bytecode instructions and upvalue refs. */ + p = bcwrite_bytecode(ctx, p, pt); + p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2); +- setsbufP(&ctx->sb, p); ++ ctx->sb.w = p; + + /* Write constants. */ + bcwrite_kgc(ctx, pt); +@@ -283,16 +291,16 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) + if (sizedbg) { + p = lj_buf_more(&ctx->sb, sizedbg); + p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg); +- setsbufP(&ctx->sb, p); ++ ctx->sb.w = p; + } + + /* Pass buffer to writer function. */ + if (ctx->status == 0) { + MSize n = sbuflen(&ctx->sb) - 5; + MSize nn = (lj_fls(n)+8)*9 >> 6; +- char *q = sbufB(&ctx->sb) + (5 - nn); ++ char *q = ctx->sb.b + (5 - nn); + p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */ +- lua_assert(p == sbufB(&ctx->sb) + 5); ++ lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write"); + ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata); + } + } +@@ -316,8 +324,8 @@ static void bcwrite_header(BCWriteCtx *ctx) + p = lj_strfmt_wuleb128(p, len); + p = lj_buf_wmem(p, name, len); + } +- ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb), +- (MSize)(p - sbufB(&ctx->sb)), ctx->wdata); ++ ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b, ++ (MSize)(p - ctx->sb.b), ctx->wdata); + } + + /* Write footer of bytecode dump. */ +@@ -352,6 +360,9 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, + ctx.wdata = data; + ctx.strip = strip; + ctx.status = 0; ++#ifdef LUA_USE_ASSERT ++ ctx.g = G(L); ++#endif + lj_buf_init(L, &ctx.sb); + status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); + if (status == 0) status = ctx.status; +diff --git a/src/lj_buf.c b/src/lj_buf.c +index 0dfe7f98..d31bd99e 100644 +--- a/src/lj_buf.c ++++ b/src/lj_buf.c +@@ -1,6 +1,6 @@ + /* + ** Buffer handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_buf_c +@@ -20,44 +20,85 @@ static void buf_grow(SBuf *sb, MSize sz) + { + MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz; + char *b; ++ GCSize flag; + if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF; + while (nsz < sz) nsz += nsz; +- b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz); +- setmref(sb->b, b); +- setmref(sb->p, b + len); +- setmref(sb->e, b + nsz); ++ flag = sbufflag(sb); ++ if ((flag & SBUF_FLAG_COW)) { /* Copy-on-write semantics. */ ++ lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW"); ++ b = (char *)lj_mem_new(sbufL(sb), nsz); ++ setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW); ++ setgcrefnull(sbufX(sb)->cowref); ++ memcpy(b, sb->b, osz); ++ } else { ++ b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz); ++ } ++ if ((flag & SBUF_FLAG_EXT)) { ++ sbufX(sb)->r = sbufX(sb)->r - sb->b + b; /* Adjust read pointer, too. */ ++ } ++ /* Adjust buffer pointers. */ ++ sb->b = b; ++ sb->w = b + len; ++ sb->e = b + nsz; ++ if ((flag & SBUF_FLAG_BORROW)) { /* Adjust borrowed buffer pointers. */ ++ SBuf *bsb = mref(sbufX(sb)->bsb, SBuf); ++ bsb->b = b; ++ bsb->w = b + len; ++ bsb->e = b + nsz; ++ } + } + + LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz) + { +- lua_assert(sz > sbufsz(sb)); ++ lj_assertG_(G(sbufL(sb)), sz > sbufsz(sb), "SBuf overflow"); + if (LJ_UNLIKELY(sz > LJ_MAX_BUF)) + lj_err_mem(sbufL(sb)); + buf_grow(sb, sz); +- return sbufB(sb); ++ return sb->b; + } + + LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz) + { +- MSize len = sbuflen(sb); +- lua_assert(sz > sbufleft(sb)); +- if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) +- lj_err_mem(sbufL(sb)); +- buf_grow(sb, len + sz); +- return sbufP(sb); ++ if (sbufisext(sb)) { ++ SBufExt *sbx = (SBufExt *)sb; ++ MSize len = sbufxlen(sbx); ++ if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) ++ lj_err_mem(sbufL(sbx)); ++ if (len + sz > sbufsz(sbx)) { /* Must grow. */ ++ buf_grow((SBuf *)sbx, len + sz); ++ } else if (sbufxslack(sbx) < (sbufsz(sbx) >> 3)) { ++ /* Also grow to avoid excessive compactions, if slack < size/8. */ ++ buf_grow((SBuf *)sbx, sbuflen(sbx) + sz); /* Not sbufxlen! */ ++ return sbx->w; ++ } ++ if (sbx->r != sbx->b) { /* Compact by moving down. */ ++ memmove(sbx->b, sbx->r, len); ++ sbx->r = sbx->b; ++ sbx->w = sbx->b + len; ++ lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf compact"); ++ } ++ } else { ++ MSize len = sbuflen(sb); ++ lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow"); ++ if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) ++ lj_err_mem(sbufL(sb)); ++ buf_grow(sb, len + sz); ++ } ++ return sb->w; + } + + void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb) + { +- char *b = sbufB(sb); +- MSize osz = (MSize)(sbufE(sb) - b); ++ char *b = sb->b; ++ MSize osz = (MSize)(sb->e - b); + if (osz > 2*LJ_MIN_SBUF) { +- MSize n = (MSize)(sbufP(sb) - b); ++ MSize n = (MSize)(sb->w - b); + b = lj_mem_realloc(L, b, osz, (osz >> 1)); +- setmref(sb->b, b); +- setmref(sb->p, b + n); +- setmref(sb->e, b + (osz >> 1)); ++ sb->b = b; ++ sb->w = b + n; ++ sb->e = b + (osz >> 1); + } ++ lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt"); + } + + char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz) +@@ -67,30 +108,62 @@ char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz) + return lj_buf_need(sb, sz); + } + ++#if LJ_HASBUFFER && LJ_HASJIT ++void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref) ++{ ++ lua_State *L = sbufL(sbx); ++ lj_bufx_free(L, sbx); ++ lj_bufx_set_cow(L, sbx, p, len); ++ setgcref(sbx->cowref, ref); ++ lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref); ++} ++ ++#if LJ_HASFFI ++MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz) ++{ ++ lj_buf_more((SBuf *)sbx, sz); ++ return sbufleft(sbx); ++} ++#endif ++#endif ++ + /* -- Low-level buffer put operations ------------------------------------- */ + + SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len) + { +- char *p = lj_buf_more(sb, len); +- p = lj_buf_wmem(p, q, len); +- setsbufP(sb, p); ++ char *w = lj_buf_more(sb, len); ++ w = lj_buf_wmem(w, q, len); ++ sb->w = w; + return sb; + } + +-SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) ++#if LJ_HASJIT || LJ_HASFFI ++static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c) + { +- char *p = lj_buf_more(sb, 1); +- *p++ = (char)c; +- setsbufP(sb, p); ++ char *w = lj_buf_more2(sb, 1); ++ *w++ = (char)c; ++ sb->w = w; + return sb; + } + ++SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) ++{ ++ char *w = sb->w; ++ if (LJ_LIKELY(w < sb->e)) { ++ *w++ = (char)c; ++ sb->w = w; ++ return sb; ++ } ++ return lj_buf_putchar2(sb, c); ++} ++#endif ++ + SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) + { + MSize len = s->len; +- char *p = lj_buf_more(sb, len); +- p = lj_buf_wmem(p, strdata(s), len); +- setsbufP(sb, p); ++ char *w = lj_buf_more(sb, len); ++ w = lj_buf_wmem(w, strdata(s), len); ++ sb->w = w; + return sb; + } + +@@ -99,47 +172,47 @@ SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) + SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s) + { + MSize len = s->len; +- char *p = lj_buf_more(sb, len), *e = p+len; ++ char *w = lj_buf_more(sb, len), *e = w+len; + const char *q = strdata(s)+len-1; +- while (p < e) +- *p++ = *q--; +- setsbufP(sb, p); ++ while (w < e) ++ *w++ = *q--; ++ sb->w = w; + return sb; + } + + SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s) + { + MSize len = s->len; +- char *p = lj_buf_more(sb, len), *e = p+len; ++ char *w = lj_buf_more(sb, len), *e = w+len; + const char *q = strdata(s); +- for (; p < e; p++, q++) { ++ for (; w < e; w++, q++) { + uint32_t c = *(unsigned char *)q; + #if LJ_TARGET_PPC +- *p = c + ((c >= 'A' && c <= 'Z') << 5); ++ *w = c + ((c >= 'A' && c <= 'Z') << 5); + #else + if (c >= 'A' && c <= 'Z') c += 0x20; +- *p = c; ++ *w = c; + #endif + } +- setsbufP(sb, p); ++ sb->w = w; + return sb; + } + + SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s) + { + MSize len = s->len; +- char *p = lj_buf_more(sb, len), *e = p+len; ++ char *w = lj_buf_more(sb, len), *e = w+len; + const char *q = strdata(s); +- for (; p < e; p++, q++) { ++ for (; w < e; w++, q++) { + uint32_t c = *(unsigned char *)q; + #if LJ_TARGET_PPC +- *p = c - ((c >= 'a' && c <= 'z') << 5); ++ *w = c - ((c >= 'a' && c <= 'z') << 5); + #else + if (c >= 'a' && c <= 'z') c -= 0x20; +- *p = c; ++ *w = c; + #endif + } +- setsbufP(sb, p); ++ sb->w = w; + return sb; + } + +@@ -148,21 +221,21 @@ SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep) + MSize len = s->len; + if (rep > 0 && len) { + uint64_t tlen = (uint64_t)rep * len; +- char *p; ++ char *w; + if (LJ_UNLIKELY(tlen > LJ_MAX_STR)) + lj_err_mem(sbufL(sb)); +- p = lj_buf_more(sb, (MSize)tlen); ++ w = lj_buf_more(sb, (MSize)tlen); + if (len == 1) { /* Optimize a common case. */ + uint32_t c = strdata(s)[0]; +- do { *p++ = c; } while (--rep > 0); ++ do { *w++ = c; } while (--rep > 0); + } else { + const char *e = strdata(s) + len; + do { + const char *q = strdata(s); +- do { *p++ = *q++; } while (q < e); ++ do { *w++ = *q++; } while (q < e); + } while (--rep > 0); + } +- setsbufP(sb, p); ++ sb->w = w; + } + return sb; + } +@@ -173,27 +246,27 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) + if (i <= e) { + for (;;) { + cTValue *o = lj_tab_getint(t, i); +- char *p; ++ char *w; + if (!o) { + badtype: /* Error: bad element type. */ +- setsbufP(sb, (void *)(intptr_t)i); /* Store failing index. */ ++ sb->w = (char *)(intptr_t)i; /* Store failing index. */ + return NULL; + } else if (tvisstr(o)) { + MSize len = strV(o)->len; +- p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len); ++ w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len); + } else if (tvisint(o)) { +- p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); ++ w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); + } else if (tvisnum(o)) { +- p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); ++ w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); + } else { + goto badtype; + } + if (i++ == e) { +- setsbufP(sb, p); ++ sb->w = w; + break; + } +- if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen); +- setsbufP(sb, p); ++ if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen); ++ sb->w = w; + } + } + return sb; +@@ -203,7 +276,7 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) + + GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb) + { +- return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb)); ++ return lj_str_new(sbufL(sb), sb->b, sbuflen(sb)); + } + + /* Concatenate two strings. */ +@@ -219,14 +292,14 @@ GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2) + /* Read ULEB128 from buffer. */ + uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp) + { +- const uint8_t *p = (const uint8_t *)*pp; +- uint32_t v = *p++; ++ const uint8_t *w = (const uint8_t *)*pp; ++ uint32_t v = *w++; + if (LJ_UNLIKELY(v >= 0x80)) { + int sh = 0; + v &= 0x7f; +- do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80); ++ do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80); + } +- *pp = (const char *)p; ++ *pp = (const char *)w; + return v; + } + +diff --git a/src/lj_buf.h b/src/lj_buf.h +index a4051694..e2ac922e 100644 +--- a/src/lj_buf.h ++++ b/src/lj_buf.h +@@ -1,6 +1,6 @@ + /* + ** Buffer handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_BUF_H +@@ -10,16 +10,60 @@ + #include "lj_gc.h" + #include "lj_str.h" + +-/* Resizable string buffers. Struct definition in lj_obj.h. */ +-#define sbufB(sb) (mref((sb)->b, char)) +-#define sbufP(sb) (mref((sb)->p, char)) +-#define sbufE(sb) (mref((sb)->e, char)) +-#define sbufL(sb) (mref((sb)->L, lua_State)) +-#define sbufsz(sb) ((MSize)(sbufE((sb)) - sbufB((sb)))) +-#define sbuflen(sb) ((MSize)(sbufP((sb)) - sbufB((sb)))) +-#define sbufleft(sb) ((MSize)(sbufE((sb)) - sbufP((sb)))) +-#define setsbufP(sb, q) (setmref((sb)->p, (q))) +-#define setsbufL(sb, l) (setmref((sb)->L, (l))) ++/* Resizable string buffers. */ ++ ++/* The SBuf struct definition is in lj_obj.h: ++** char *w; Write pointer. ++** char *e; End pointer. ++** char *b; Base pointer. ++** MRef L; lua_State, used for buffer resizing. Extension bits in 3 LSB. ++*/ ++ ++/* Extended string buffer. */ ++typedef struct SBufExt { ++ SBufHeader; ++ union { ++ GCRef cowref; /* Copy-on-write object reference. */ ++ MRef bsb; /* Borrowed string buffer. */ ++ }; ++ char *r; /* Read pointer. */ ++ GCRef dict_str; /* Serialization string dictionary table. */ ++ GCRef dict_mt; /* Serialization metatable dictionary table. */ ++ int depth; /* Remaining recursion depth. */ ++} SBufExt; ++ ++#define sbufsz(sb) ((MSize)((sb)->e - (sb)->b)) ++#define sbuflen(sb) ((MSize)((sb)->w - (sb)->b)) ++#define sbufleft(sb) ((MSize)((sb)->e - (sb)->w)) ++#define sbufxlen(sbx) ((MSize)((sbx)->w - (sbx)->r)) ++#define sbufxslack(sbx) ((MSize)((sbx)->r - (sbx)->b)) ++ ++#define SBUF_MASK_FLAG (7) ++#define SBUF_MASK_L (~(GCSize)SBUF_MASK_FLAG) ++#define SBUF_FLAG_EXT 1 /* Extended string buffer. */ ++#define SBUF_FLAG_COW 2 /* Copy-on-write buffer. */ ++#define SBUF_FLAG_BORROW 4 /* Borrowed string buffer. */ ++ ++#define sbufL(sb) \ ++ ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L)) ++#define setsbufL(sb, l) (setmref((sb)->L, (l))) ++#define setsbufXL(sb, l, flag) \ ++ (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag))) ++#define setsbufXL_(sb, l) \ ++ (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) & SBUF_MASK_FLAG))) ++ ++#define sbufflag(sb) (mrefu((sb)->L)) ++#define sbufisext(sb) (sbufflag((sb)) & SBUF_FLAG_EXT) ++#define sbufiscow(sb) (sbufflag((sb)) & SBUF_FLAG_COW) ++#define sbufisborrow(sb) (sbufflag((sb)) & SBUF_FLAG_BORROW) ++#define sbufiscoworborrow(sb) (sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW)) ++#define sbufX(sb) \ ++ (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb)) ++#define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag))) ++ ++#define tvisbuf(o) \ ++ (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER) ++#define bufV(o) check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o)))) + + /* Buffer management */ + LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz); +@@ -30,12 +74,12 @@ LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz); + static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb) + { + setsbufL(sb, L); +- setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL); ++ sb->w = sb->e = sb->b = NULL; + } + + static LJ_AINLINE void lj_buf_reset(SBuf *sb) + { +- setmrefr(sb->p, sb->b); ++ sb->w = sb->b; + } + + static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L) +@@ -48,26 +92,77 @@ static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L) + + static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb) + { +- lj_mem_free(g, sbufB(sb), sbufsz(sb)); ++ lj_assertG(!sbufisext(sb), "bad free of SBufExt"); ++ lj_mem_free(g, sb->b, sbufsz(sb)); + } + + static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz) + { + if (LJ_UNLIKELY(sz > sbufsz(sb))) + return lj_buf_need2(sb, sz); +- return sbufB(sb); ++ return sb->b; + } + + static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz) + { + if (LJ_UNLIKELY(sz > sbufleft(sb))) + return lj_buf_more2(sb, sz); +- return sbufP(sb); ++ return sb->w; ++} ++ ++/* Extended buffer management */ ++static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx) ++{ ++ memset(sbx, 0, sizeof(SBufExt)); ++ setsbufXL(sbx, L, SBUF_FLAG_EXT); ++} ++ ++static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb) ++{ ++ setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW); ++ setmref(sbx->bsb, sb); ++ sbx->r = sbx->w = sbx->b = sb->b; ++ sbx->e = sb->e; ++} ++ ++static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx, ++ const char *p, MSize len) ++{ ++ setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW); ++ sbx->r = sbx->b = (char *)p; ++ sbx->w = sbx->e = (char *)p + len; ++} ++ ++static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx) ++{ ++ if (sbufiscow(sbx)) { ++ setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW)); ++ setgcrefnull(sbx->cowref); ++ sbx->b = sbx->e = NULL; ++ } ++ sbx->r = sbx->w = sbx->b; + } + ++static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx) ++{ ++ if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx)); ++ setsbufXL(sbx, L, SBUF_FLAG_EXT); ++ setgcrefnull(sbx->cowref); ++ sbx->r = sbx->w = sbx->b = sbx->e = NULL; ++} ++ ++#if LJ_HASBUFFER && LJ_HASJIT ++LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o); ++#if LJ_HASFFI ++LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz); ++#endif ++#endif ++ + /* Low-level buffer put operations */ + LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len); ++#if LJ_HASJIT || LJ_HASFFI + LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c); ++#endif + LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s); + + static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len) +@@ -77,9 +172,9 @@ static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len) + + static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c) + { +- char *p = lj_buf_more(sb, 1); +- *p++ = (char)c; +- setsbufP(sb, p); ++ char *w = lj_buf_more(sb, 1); ++ *w++ = (char)c; ++ sb->w = w; + } + + /* High-level buffer put operations */ +@@ -97,7 +192,7 @@ LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp); + + static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb) + { +- return lj_str_new(L, sbufB(sb), sbuflen(sb)); ++ return lj_str_new(L, sb->b, sbuflen(sb)); + } + + #endif +diff --git a/src/lj_carith.c b/src/lj_carith.c +index 218abd26..dc745a37 100644 +--- a/src/lj_carith.c ++++ b/src/lj_carith.c +@@ -1,6 +1,6 @@ + /* + ** C data arithmetic. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "lj_obj.h" +@@ -122,7 +122,7 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm) + setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2)); + return 1; + } else { +- lua_assert(mm == MM_le); ++ lj_assertL(mm == MM_le, "bad metamethod %d", mm); + setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2)); + return 1; + } +@@ -208,7 +208,9 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm) + *up = lj_carith_powu64(u0, u1); + break; + case MM_unm: *up = (uint64_t)-(int64_t)u0; break; +- default: lua_assert(0); break; ++ default: ++ lj_assertL(0, "bad metamethod %d", mm); ++ break; + } + lj_gc_check(L); + return 1; +@@ -265,7 +267,7 @@ int lj_carith_op(lua_State *L, MMS mm) + { + CTState *cts = ctype_cts(L); + CDArith ca; +- if (carith_checkarg(L, cts, &ca)) { ++ if (carith_checkarg(L, cts, &ca) && mm != MM_len && mm != MM_concat) { + if (carith_int64(L, cts, &ca, mm) || carith_ptr(L, cts, &ca, mm)) { + copyTV(L, &G(L)->tmptv2, L->top-1); /* Remember for trace recorder. */ + return 1; +@@ -301,7 +303,9 @@ uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op) + case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break; + case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break; + case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break; +- default: lua_assert(0); break; ++ default: ++ lj_assertX(0, "bad shift op %d", op); ++ break; + } + return x; + } +@@ -347,7 +351,6 @@ uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id) + } + } + +- + /* -- 64 bit integer arithmetic helpers ----------------------------------- */ + + #if LJ_32 && LJ_HASJIT +diff --git a/src/lj_carith.h b/src/lj_carith.h +index 67d976bf..2fa5c657 100644 +--- a/src/lj_carith.h ++++ b/src/lj_carith.h +@@ -1,6 +1,6 @@ + /* + ** C data arithmetic. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_CARITH_H +diff --git a/src/lj_ccall.c b/src/lj_ccall.c +index 5c252e5b..3c029823 100644 +--- a/src/lj_ccall.c ++++ b/src/lj_ccall.c +@@ -1,6 +1,6 @@ + /* + ** FFI C call handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "lj_obj.h" +@@ -334,20 +334,21 @@ + isfp = sz == 2*sizeof(float) ? 2 : 1; + + #define CCALL_HANDLE_REGARG \ +- if (LJ_TARGET_IOS && isva) { \ ++ if (LJ_TARGET_OSX && isva) { \ + /* IOS: All variadic arguments are on the stack. */ \ + } else if (isfp) { /* Try to pass argument in FPRs. */ \ +- int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \ ++ int n2 = ctype_isvector(d->info) ? 1 : \ ++ isfp == 1 ? n : (d->size >> (4-isfp)); \ + if (nfpr + n2 <= CCALL_NARG_FPR) { \ + dp = &cc->fpr[nfpr]; \ + nfpr += n2; \ + goto done; \ + } else { \ + nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ +- if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ ++ if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ + } \ + } else { /* Try to pass argument in GPRs. */ \ +- if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ ++ if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ + ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ + if (ngpr + n <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ +@@ -355,7 +356,7 @@ + goto done; \ + } else { \ + ngpr = maxgpr; /* Prevent reordering. */ \ +- if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ ++ if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ + } \ + } + +@@ -387,6 +388,25 @@ + #define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex by value in 2 or 4 GPRs. */ + ++#define CCALL_HANDLE_GPR \ ++ /* Try to pass argument in GPRs. */ \ ++ if (n > 1) { \ ++ /* int64_t or complex (float). */ \ ++ lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \ ++ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \ ++ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ ++ else if (ngpr + n > maxgpr) \ ++ ngpr = maxgpr; /* Prevent reordering. */ \ ++ } \ ++ if (ngpr + n <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ ++#if LJ_ABI_SOFTFP ++#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR ++#else + #define CCALL_HANDLE_REGARG \ + if (isfp) { /* Try to pass argument in FPRs. */ \ + if (nfpr + 1 <= CCALL_NARG_FPR) { \ +@@ -395,24 +415,16 @@ + d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ + goto done; \ + } \ +- } else { /* Try to pass argument in GPRs. */ \ +- if (n > 1) { \ +- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ +- if (ctype_isinteger(d->info)) \ +- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ +- else if (ngpr + n > maxgpr) \ +- ngpr = maxgpr; /* Prevent reordering. */ \ +- } \ +- if (ngpr + n <= maxgpr) { \ +- dp = &cc->gpr[ngpr]; \ +- ngpr += n; \ +- goto done; \ +- } \ ++ } else { \ ++ CCALL_HANDLE_GPR \ + } ++#endif + ++#if !LJ_ABI_SOFTFP + #define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ ++#endif + + #elif LJ_TARGET_MIPS32 + /* -- MIPS o32 calling conventions ---------------------------------------- */ +@@ -631,7 +643,8 @@ static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl, CTSize ofs) + ccall_classify_struct(cts, ct, rcl, ofs); + } else { + int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT; +- lua_assert(ctype_hassize(ct->info)); ++ lj_assertCTS(ctype_hassize(ct->info), ++ "classify ctype %08x without size", ct->info); + if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */ + rcl[(ofs >= 8)] |= cl; + } +@@ -656,12 +669,13 @@ static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs) + } + + /* Try to split up a small struct into registers. */ +-static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl) ++static int ccall_struct_reg(CCallState *cc, CTState *cts, GPRArg *dp, int *rcl) + { + MSize ngpr = cc->ngpr, nfpr = cc->nfpr; + uint32_t i; ++ UNUSED(cts); + for (i = 0; i < 2; i++) { +- lua_assert(!(rcl[i] & CCALL_RCL_MEM)); ++ lj_assertCTS(!(rcl[i] & CCALL_RCL_MEM), "pass mem struct in reg"); + if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */ + if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */ + cc->gpr[ngpr++] = dp[i]; +@@ -682,7 +696,8 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl, + dp[0] = dp[1] = 0; + /* Convert to temp. struct. */ + lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); +- if (ccall_struct_reg(cc, dp, rcl)) { /* Register overflow? Pass on stack. */ ++ if (ccall_struct_reg(cc, cts, dp, rcl)) { ++ /* Register overflow? Pass on stack. */ + MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; + if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ + cc->nsp = nsp + n; +@@ -838,7 +853,8 @@ noth: /* Not a homogeneous float/double aggregate. */ + return 0; /* Struct is in GPRs. */ + } + +-void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) ++static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, ++ int ft) + { + if (LJ_ABI_SOFTFP ? ft : + ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { +@@ -978,7 +994,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + if (fid) { /* Get argument type from field. */ + CType *ctf = ctype_get(cts, fid); + fid = ctf->sib; +- lua_assert(ctype_isfield(ctf->info)); ++ lj_assertL(ctype_isfield(ctf->info), "field expected"); + did = ctype_cid(ctf->info); + } else { + if (!(ct->info & CTF_VARARG)) +@@ -1080,7 +1096,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + } + if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ + +-#if LJ_TARGET_X64 || LJ_TARGET_PPC ++#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) + cc->nfpr = nfpr; /* Required for vararg functions. */ + #endif + cc->nsp = nsp; +@@ -1126,7 +1142,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, + CCALL_HANDLE_RET + #endif + /* No reference types end up here, so there's no need for the CTypeID. */ +- lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info))); ++ lj_assertL(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)), ++ "unexpected reference ctype"); + return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp); + } + +@@ -1150,7 +1167,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) + lj_vm_ffi_call(&cc); + if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ + TValue tv; +- setlightudV(&tv, (void *)cc.func); ++ tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000); + setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); + } + ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ +diff --git a/src/lj_ccall.h b/src/lj_ccall.h +index 59f66481..aae5777b 100644 +--- a/src/lj_ccall.h ++++ b/src/lj_ccall.h +@@ -1,6 +1,6 @@ + /* + ** FFI C call handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_CCALL_H +@@ -86,9 +86,9 @@ typedef union FPRArg { + #elif LJ_TARGET_PPC + + #define CCALL_NARG_GPR 8 +-#define CCALL_NARG_FPR 8 ++#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) + #define CCALL_NRET_GPR 4 /* For complex double. */ +-#define CCALL_NRET_FPR 1 ++#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) + #define CCALL_SPS_EXTRA 4 + #define CCALL_SPS_FREE 0 + +diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c +index 846827b1..80d738c6 100644 +--- a/src/lj_ccallback.c ++++ b/src/lj_ccallback.c +@@ -1,6 +1,6 @@ + /* + ** FFI C callback handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "lj_obj.h" +@@ -107,9 +107,9 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p) + /* Initialize machine code for callback function pointers. */ + #if LJ_OS_NOJIT + /* Disabled callback support. */ +-#define callback_mcode_init(g, p) UNUSED(p) ++#define callback_mcode_init(g, p) (p) + #elif LJ_TARGET_X86ORX64 +-static void callback_mcode_init(global_State *g, uint8_t *page) ++static void *callback_mcode_init(global_State *g, uint8_t *page) + { + uint8_t *p = page; + uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; +@@ -143,10 +143,10 @@ static void callback_mcode_init(global_State *g, uint8_t *page) + *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); + } + } +- lua_assert(p - page <= CALLBACK_MCODE_SIZE); ++ return p; + } + #elif LJ_TARGET_ARM +-static void callback_mcode_init(global_State *g, uint32_t *page) ++static void *callback_mcode_init(global_State *g, uint32_t *page) + { + uint32_t *p = page; + void *target = (void *)lj_vm_ffi_callback; +@@ -165,10 +165,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page) + *p = ARMI_B | ((page-p-2) & 0x00ffffffu); + p++; + } +- lua_assert(p - page <= CALLBACK_MCODE_SIZE); ++ return p; + } + #elif LJ_TARGET_ARM64 +-static void callback_mcode_init(global_State *g, uint32_t *page) ++static void *callback_mcode_init(global_State *g, uint32_t *page) + { + uint32_t *p = page; + void *target = (void *)lj_vm_ffi_callback; +@@ -185,10 +185,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page) + *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); + p++; + } +- lua_assert(p - page <= CALLBACK_MCODE_SIZE); ++ return p; + } + #elif LJ_TARGET_PPC +-static void callback_mcode_init(global_State *g, uint32_t *page) ++static void *callback_mcode_init(global_State *g, uint32_t *page) + { + uint32_t *p = page; + void *target = (void *)lj_vm_ffi_callback; +@@ -204,10 +204,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page) + *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); + p++; + } +- lua_assert(p - page <= CALLBACK_MCODE_SIZE); ++ return p; + } + #elif LJ_TARGET_MIPS +-static void callback_mcode_init(global_State *g, uint32_t *page) ++static void *callback_mcode_init(global_State *g, uint32_t *page) + { + uint32_t *p = page; + uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; +@@ -236,11 +236,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page) + p++; + *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; + } +- lua_assert(p - page <= CALLBACK_MCODE_SIZE); ++ return p; + } + #else + /* Missing support for this architecture. */ +-#define callback_mcode_init(g, p) UNUSED(p) ++#define callback_mcode_init(g, p) (p) + #endif + + /* -- Machine code management --------------------------------------------- */ +@@ -256,6 +256,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page) + #ifndef MAP_ANONYMOUS + #define MAP_ANONYMOUS MAP_ANON + #endif ++#ifdef PROT_MPROTECT ++#define CCPROT_CREATE (PROT_MPROTECT(PROT_EXEC)) ++#else ++#define CCPROT_CREATE 0 ++#endif + + #endif + +@@ -263,15 +268,15 @@ static void callback_mcode_init(global_State *g, uint32_t *page) + static void callback_mcode_new(CTState *cts) + { + size_t sz = (size_t)CALLBACK_MCODE_SIZE; +- void *p; ++ void *p, *pe; + if (CALLBACK_MAX_SLOT == 0) + lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); + #if LJ_TARGET_WINDOWS +- p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); ++ p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + if (!p) + lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); + #elif LJ_TARGET_POSIX +- p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, ++ p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + if (p == MAP_FAILED) + lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); +@@ -280,12 +285,15 @@ static void callback_mcode_new(CTState *cts) + p = lj_mem_new(cts->L, sz); + #endif + cts->cb.mcode = p; +- callback_mcode_init(cts->g, p); ++ pe = callback_mcode_init(cts->g, p); ++ UNUSED(pe); ++ lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz, ++ "miscalculated CALLBACK_MAX_SLOT"); + lj_mcode_sync(p, (char *)p + sz); + #if LJ_TARGET_WINDOWS + { + DWORD oprot; +- VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); ++ LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot); + } + #elif LJ_TARGET_POSIX + mprotect(p, sz, (PROT_READ|PROT_EXEC)); +@@ -406,7 +414,7 @@ void lj_ccallback_mcode_free(CTState *cts) + nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ + } \ + } else { \ +- if (!LJ_TARGET_IOS && n > 1) \ ++ if (!LJ_TARGET_OSX && n > 1) \ + ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ + if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ +@@ -419,6 +427,24 @@ void lj_ccallback_mcode_free(CTState *cts) + + #elif LJ_TARGET_PPC + ++#define CALLBACK_HANDLE_GPR \ ++ if (n > 1) { \ ++ lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ ++ ctype_isinteger(cta->info)) && n == 2, /* int64_t. */ \ ++ "bad GPR type"); \ ++ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ ++ } \ ++ if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } ++ ++#if LJ_ABI_SOFTFP ++#define CALLBACK_HANDLE_REGARG \ ++ CALLBACK_HANDLE_GPR \ ++ UNUSED(isfp); ++#else + #define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr + 1 <= CCALL_NARG_FPR) { \ +@@ -427,20 +453,15 @@ void lj_ccallback_mcode_free(CTState *cts) + goto done; \ + } \ + } else { /* Try to pass argument in GPRs. */ \ +- if (n > 1) { \ +- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ +- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ +- } \ +- if (ngpr + n <= maxgpr) { \ +- sp = &cts->cb.gpr[ngpr]; \ +- ngpr += n; \ +- goto done; \ +- } \ ++ CALLBACK_HANDLE_GPR \ + } ++#endif + ++#if !LJ_ABI_SOFTFP + #define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ ++#endif + + #elif LJ_TARGET_MIPS32 + +@@ -533,13 +554,13 @@ static void callback_conv_args(CTState *cts, lua_State *L) + if (LJ_FR2) { + (o++)->u64 = LJ_CONT_FFI_CALLBACK; + (o++)->u64 = rid; +- o++; + } else { + o->u32.lo = LJ_CONT_FFI_CALLBACK; + o->u32.hi = rid; + o++; + } + setframe_gc(o, obj2gco(fn), fntp); ++ if (LJ_FR2) o++; + setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT); + L->top = L->base = ++o; + if (!ct) +@@ -567,7 +588,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) + CTSize sz; + int isfp; + MSize n; +- lua_assert(ctype_isfield(ctf->info)); ++ lj_assertCTS(ctype_isfield(ctf->info), "field expected"); + cta = ctype_rawchild(cts, ctf); + isfp = ctype_isfp(cta->info); + sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); +@@ -659,7 +680,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf) + { + lua_State *L = cts->L; + global_State *g = cts->g; +- lua_assert(L != NULL); ++ lj_assertG(L != NULL, "uninitialized cts->L in callback"); + if (tvref(g->jit_base)) { + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); + if (g->panic) g->panic(L); +@@ -744,7 +765,7 @@ static CType *callback_checkfunc(CTState *cts, CType *ct) + CType *ctf = ctype_get(cts, fid); + if (!ctype_isattrib(ctf->info)) { + CType *cta; +- lua_assert(ctype_isfield(ctf->info)); ++ lj_assertCTS(ctype_isfield(ctf->info), "field expected"); + cta = ctype_rawchild(cts, ctf); + if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) || + (ctype_isnum(cta->info) && cta->size <= 8)) || +diff --git a/src/lj_ccallback.h b/src/lj_ccallback.h +index a8cdad38..9506ce42 100644 +--- a/src/lj_ccallback.h ++++ b/src/lj_ccallback.h +@@ -1,6 +1,6 @@ + /* + ** FFI C callback handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_CCALLBACK_H +diff --git a/src/lj_cconv.c b/src/lj_cconv.c +index 13b8230d..613f66e2 100644 +--- a/src/lj_cconv.c ++++ b/src/lj_cconv.c +@@ -1,6 +1,6 @@ + /* + ** C type conversions. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "lj_obj.h" +@@ -8,6 +8,7 @@ + #if LJ_HASFFI + + #include "lj_err.h" ++#include "lj_buf.h" + #include "lj_tab.h" + #include "lj_ctype.h" + #include "lj_cdata.h" +@@ -122,19 +123,25 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, + CTInfo dinfo = d->info, sinfo = s->info; + void *tmpptr; + +- lua_assert(!ctype_isenum(dinfo) && !ctype_isenum(sinfo)); +- lua_assert(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo)); ++ lj_assertCTS(!ctype_isenum(dinfo) && !ctype_isenum(sinfo), ++ "unresolved enum"); ++ lj_assertCTS(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo), ++ "unstripped attribute"); + + if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) + goto err_conv; + + /* Some basic sanity checks. */ +- lua_assert(!ctype_isnum(dinfo) || dsize > 0); +- lua_assert(!ctype_isnum(sinfo) || ssize > 0); +- lua_assert(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4); +- lua_assert(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4); +- lua_assert(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize); +- lua_assert(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize); ++ lj_assertCTS(!ctype_isnum(dinfo) || dsize > 0, "bad size for number type"); ++ lj_assertCTS(!ctype_isnum(sinfo) || ssize > 0, "bad size for number type"); ++ lj_assertCTS(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4, ++ "bad size for bool type"); ++ lj_assertCTS(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4, ++ "bad size for bool type"); ++ lj_assertCTS(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize, ++ "bad size for integer type"); ++ lj_assertCTS(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize, ++ "bad size for integer type"); + + switch (cconv_idx2(dinfo, sinfo)) { + /* Destination is a bool. */ +@@ -357,7 +364,7 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, + if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s) + goto err_conv; /* Must be exact same type. */ + copyval: /* Copy value. */ +- lua_assert(dsize == ssize); ++ lj_assertCTS(dsize == ssize, "value copy with different sizes"); + memcpy(dp, sp, dsize); + break; + +@@ -389,7 +396,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, + lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, + (uint8_t *)&o->n, sp, 0); + /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ +- lua_assert(tvisnum(o)); ++ lj_assertCTS(tvisnum(o), "non-canonical NaN passed"); + } + } else { + uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0); +@@ -406,7 +413,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, + CTSize sz; + copyval: /* Copy value. */ + sz = s->size; +- lua_assert(sz != CTSIZE_INVALID); ++ lj_assertCTS(sz != CTSIZE_INVALID, "value copy with invalid size"); + /* Attributes are stripped, qualifiers are kept (but mostly ignored). */ + cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz); + setcdataV(cts->L, o, cd); +@@ -421,19 +428,22 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) + CTInfo info = s->info; + CTSize pos, bsz; + uint32_t val; +- lua_assert(ctype_isbitfield(info)); ++ lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); + /* NYI: packed bitfields may cause misaligned reads. */ + switch (ctype_bitcsz(info)) { + case 4: val = *(uint32_t *)sp; break; + case 2: val = *(uint16_t *)sp; break; + case 1: val = *(uint8_t *)sp; break; +- default: lua_assert(0); val = 0; break; ++ default: ++ lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); ++ val = 0; ++ break; + } + /* Check if a packed bitfield crosses a container boundary. */ + pos = ctype_bitpos(info); + bsz = ctype_bitbsz(info); +- lua_assert(pos < 8*ctype_bitcsz(info)); +- lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); ++ lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); ++ lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); + if (pos + bsz > 8*ctype_bitcsz(info)) + lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); + if (!(info & CTF_BOOL)) { +@@ -449,7 +459,7 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) + } + } else { + uint32_t b = (val >> pos) & 1; +- lua_assert(bsz == 1); ++ lj_assertCTS(bsz == 1, "bad bool bitfield size"); + setboolV(o, b); + setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */ + } +@@ -553,13 +563,15 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, + sid = cdataV(o)->ctypeid; + s = ctype_get(cts, sid); + if (ctype_isref(s->info)) { /* Resolve reference for value. */ +- lua_assert(s->size == CTSIZE_PTR); ++ lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); + sp = *(void **)sp; + sid = ctype_cid(s->info); + } + s = ctype_raw(cts, sid); + if (ctype_isfunc(s->info)) { ++ CTypeID did = ctype_typeid(cts, d); + sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR); ++ d = ctype_get(cts, did); /* cts->tab may have been reallocated. */ + } else { + if (ctype_isenum(s->info)) s = ctype_child(cts, s); + goto doconv; +@@ -571,7 +583,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, + CType *cct = lj_ctype_getfield(cts, d, str, &ofs); + if (!cct || !ctype_isconstval(cct->info)) + goto err_conv; +- lua_assert(d->size == 4); ++ lj_assertCTS(d->size == 4, "only 32 bit enum supported"); /* NYI */ + sp = (uint8_t *)&cct->size; + sid = ctype_cid(cct->info); + } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */ +@@ -610,8 +622,10 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, + tmpptr = uddata(ud); + if (ud->udtype == UDTYPE_IO_FILE) + tmpptr = *(void **)tmpptr; ++ else if (ud->udtype == UDTYPE_BUFFER) ++ tmpptr = ((SBufExt *)tmpptr)->r; + } else if (tvislightud(o)) { +- tmpptr = lightudV(o); ++ tmpptr = lightudV(cts->g, o); + } else if (tvisfunc(o)) { + void *p = lj_ccallback_new(cts, d, funcV(o)); + if (p) { +@@ -635,10 +649,10 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) + CTInfo info = d->info; + CTSize pos, bsz; + uint32_t val, mask; +- lua_assert(ctype_isbitfield(info)); ++ lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); + if ((info & CTF_BOOL)) { + uint8_t tmpbool; +- lua_assert(ctype_bitbsz(info) == 1); ++ lj_assertCTS(ctype_bitbsz(info) == 1, "bad bool bitfield size"); + lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0); + val = tmpbool; + } else { +@@ -647,8 +661,8 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) + } + pos = ctype_bitpos(info); + bsz = ctype_bitbsz(info); +- lua_assert(pos < 8*ctype_bitcsz(info)); +- lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); ++ lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); ++ lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); + /* Check if a packed bitfield crosses a container boundary. */ + if (pos + bsz > 8*ctype_bitcsz(info)) + lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); +@@ -659,7 +673,9 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) + case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break; + case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break; + case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break; +- default: lua_assert(0); break; ++ default: ++ lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); ++ break; + } + } + +diff --git a/src/lj_cconv.h b/src/lj_cconv.h +index 0a0b66c9..cd927328 100644 +--- a/src/lj_cconv.h ++++ b/src/lj_cconv.h +@@ -1,6 +1,6 @@ + /* + ** C type conversions. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_CCONV_H +@@ -27,13 +27,14 @@ enum { + static LJ_AINLINE uint32_t cconv_idx(CTInfo info) + { + uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ +- lua_assert(ctype_type(info) <= CT_MAYCONVERT); ++ lj_assertX(ctype_type(info) <= CT_MAYCONVERT, ++ "cannot convert ctype %08x", info); + #if LJ_64 + idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); + #else + idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); + #endif +- lua_assert(idx < 8); ++ lj_assertX(idx < 8, "cannot convert ctype %08x", info); + return idx; + } + +diff --git a/src/lj_cdata.c b/src/lj_cdata.c +index 68e16d76..ffc31078 100644 +--- a/src/lj_cdata.c ++++ b/src/lj_cdata.c +@@ -1,6 +1,6 @@ + /* + ** C data management. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "lj_obj.h" +@@ -35,7 +35,7 @@ GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align) + uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); + uintptr_t almask = (1u << align) - 1u; + GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); +- lua_assert((char *)cd - p < 65536); ++ lj_assertL((char *)cd - p < 65536, "excessive cdata alignment"); + cdatav(cd)->offset = (uint16_t)((char *)cd - p); + cdatav(cd)->extra = extra; + cdatav(cd)->len = sz; +@@ -76,8 +76,8 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) + } else if (LJ_LIKELY(!cdataisv(cd))) { + CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid); + CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR; +- lua_assert(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || +- ctype_isextern(ct->info)); ++ lj_assertG(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || ++ ctype_isextern(ct->info), "free of ctype without a size"); + lj_mem_free(g, cd, sizeof(GCcdata) + sz); + } else { + lj_mem_free(g, memcdatav(cd), sizecdatav(cd)); +@@ -115,7 +115,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, + + /* Resolve reference for cdata object. */ + if (ctype_isref(ct->info)) { +- lua_assert(ct->size == CTSIZE_PTR); ++ lj_assertCTS(ct->size == CTSIZE_PTR, "ref is not pointer-sized"); + p = *(uint8_t **)p; + ct = ctype_child(cts, ct); + } +@@ -126,7 +126,8 @@ collect_attrib: + if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size; + ct = ctype_child(cts, ct); + } +- lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */ ++ /* Interning rejects refs to refs. */ ++ lj_assertCTS(!ctype_isref(ct->info), "bad ref of ref"); + + if (tvisint(key)) { + idx = (ptrdiff_t)intV(key); +@@ -212,7 +213,8 @@ collect_attrib: + static void cdata_getconst(CTState *cts, TValue *o, CType *ct) + { + CType *ctt = ctype_child(cts, ct); +- lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); ++ lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, ++ "only 32 bit const supported"); /* NYI */ + /* Constants are already zero-extended/sign-extended to 32 bits. */ + if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) + setnumV(o, (lua_Number)(uint32_t)ct->size); +@@ -233,13 +235,14 @@ int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp) + } + + /* Get child type of pointer/array/field. */ +- lua_assert(ctype_ispointer(s->info) || ctype_isfield(s->info)); ++ lj_assertCTS(ctype_ispointer(s->info) || ctype_isfield(s->info), ++ "pointer or field expected"); + sid = ctype_cid(s->info); + s = ctype_get(cts, sid); + + /* Resolve reference for field. */ + if (ctype_isref(s->info)) { +- lua_assert(s->size == CTSIZE_PTR); ++ lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); + sp = *(uint8_t **)sp; + sid = ctype_cid(s->info); + s = ctype_get(cts, sid); +@@ -266,12 +269,13 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) + } + + /* Get child type of pointer/array/field. */ +- lua_assert(ctype_ispointer(d->info) || ctype_isfield(d->info)); ++ lj_assertCTS(ctype_ispointer(d->info) || ctype_isfield(d->info), ++ "pointer or field expected"); + d = ctype_child(cts, d); + + /* Resolve reference for field. */ + if (ctype_isref(d->info)) { +- lua_assert(d->size == CTSIZE_PTR); ++ lj_assertCTS(d->size == CTSIZE_PTR, "ref is not pointer-sized"); + dp = *(uint8_t **)dp; + d = ctype_child(cts, d); + } +@@ -286,7 +290,8 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) + d = ctype_child(cts, d); + } + +- lua_assert(ctype_hassize(d->info) && !ctype_isvoid(d->info)); ++ lj_assertCTS(ctype_hassize(d->info), "store to ctype without size"); ++ lj_assertCTS(!ctype_isvoid(d->info), "store to void type"); + + if (((d->info|qual) & CTF_CONST)) { + err_const: +diff --git a/src/lj_cdata.h b/src/lj_cdata.h +index 5bb0f5dc..b93bec86 100644 +--- a/src/lj_cdata.h ++++ b/src/lj_cdata.h +@@ -1,6 +1,6 @@ + /* + ** C data management. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_CDATA_H +@@ -18,7 +18,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz) + if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ + return ((void *)(uintptr_t)*(uint32_t *)p); + } else { +- lua_assert(sz == CTSIZE_PTR); ++ lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); + return *(void **)p; + } + } +@@ -29,7 +29,7 @@ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v) + if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ + *(uint32_t *)p = (uint32_t)(uintptr_t)v; + } else { +- lua_assert(sz == CTSIZE_PTR); ++ lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); + *(void **)p = (void *)v; + } + } +@@ -40,7 +40,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id, CTSize sz) + GCcdata *cd; + #ifdef LUA_USE_ASSERT + CType *ct = ctype_raw(cts, id); +- lua_assert((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz); ++ lj_assertCTS((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz, ++ "inconsistent size of fixed-size cdata alloc"); + #endif + cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz); + cd->gct = ~LJ_TCDATA; +diff --git a/src/lj_clib.c b/src/lj_clib.c +index 61426590..d8636a48 100644 +--- a/src/lj_clib.c ++++ b/src/lj_clib.c +@@ -1,6 +1,6 @@ + /* + ** FFI C library loader. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "lj_obj.h" +@@ -119,12 +119,13 @@ static void *clib_loadlib(lua_State *L, const char *name, int global) + RTLD_LAZY | (global?RTLD_GLOBAL:RTLD_LOCAL)); + if (!h) { + const char *e, *err = dlerror(); +- if (*err == '/' && (e = strchr(err, ':')) && ++ if (err && *err == '/' && (e = strchr(err, ':')) && + (name = clib_resolve_lds(L, strdata(lj_str_new(L, err, e-err))))) { + h = dlopen(name, RTLD_LAZY | (global?RTLD_GLOBAL:RTLD_LOCAL)); + if (h) return h; + err = dlerror(); + } ++ if (!err) err = "dlopen failed"; + lj_err_callermsg(L, err); + } + return h; +@@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); + /* Default libraries. */ + enum { + CLIB_HANDLE_EXE, ++#if !LJ_TARGET_UWP + CLIB_HANDLE_DLL, + CLIB_HANDLE_CRT, + CLIB_HANDLE_KERNEL32, + CLIB_HANDLE_USER32, + CLIB_HANDLE_GDI32, ++#endif + CLIB_HANDLE_MAX + }; + +@@ -208,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name) + static void *clib_loadlib(lua_State *L, const char *name, int global) + { + DWORD oldwerr = GetLastError(); +- void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0); ++ void *h = LJ_WIN_LOADLIBA(clib_extname(L, name)); + if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); + SetLastError(oldwerr); + UNUSED(global); +@@ -218,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global) + static void clib_unloadlib(CLibrary *cl) + { + if (cl->handle == CLIB_DEFHANDLE) { ++#if !LJ_TARGET_UWP + MSize i; + for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { + void *h = clib_def_handle[i]; +@@ -226,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl) + FreeLibrary((HINSTANCE)h); + } + } ++#endif + } else if (cl->handle) { + FreeLibrary((HINSTANCE)cl->handle); + } + } + ++#if LJ_TARGET_UWP ++EXTERN_C IMAGE_DOS_HEADER __ImageBase; ++#endif ++ + static void *clib_getsym(CLibrary *cl, const char *name) + { + void *p = NULL; +@@ -239,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name) + for (i = 0; i < CLIB_HANDLE_MAX; i++) { + HINSTANCE h = (HINSTANCE)clib_def_handle[i]; + if (!(void *)h) { /* Resolve default library handles (once). */ ++#if LJ_TARGET_UWP ++ h = (HINSTANCE)&__ImageBase; ++#else + switch (i) { + case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; + case CLIB_HANDLE_DLL: +@@ -249,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name) + GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (const char *)&_fmode, &h); + break; +- case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0); break; +- case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break; +- case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break; ++ case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break; ++ case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break; ++ case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break; + } + if (!h) continue; ++#endif + clib_def_handle[i] = (void *)h; + } + p = (void *)GetProcAddress(h, name); +@@ -337,7 +350,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) + lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name)); + if (ctype_isconstval(ct->info)) { + CType *ctt = ctype_child(cts, ct); +- lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); ++ lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, ++ "only 32 bit const supported"); /* NYI */ + if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) + setnumV(tv, (lua_Number)(uint32_t)ct->size); + else +@@ -349,7 +363,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) + #endif + void *p = clib_getsym(cl, sym); + GCcdata *cd; +- lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info)); ++ lj_assertCTS(ctype_isfunc(ct->info) || ctype_isextern(ct->info), ++ "unexpected ctype %08x in clib", ct->info); + #if LJ_TARGET_X86 && LJ_ABI_WIN + /* Retry with decorated name for fastcall/stdcall functions. */ + if (!p && ctype_isfunc(ct->info)) { +@@ -372,6 +387,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) + cd = lj_cdata_new(cts, id, CTSIZE_PTR); + *(void **)cdataptr(cd) = p; + setcdataV(L, tv, cd); ++ lj_gc_anybarriert(L, cl->cache); + } + } + return tv; +diff --git a/src/lj_clib.h b/src/lj_clib.h +index fcc9dac5..848543d5 100644 +--- a/src/lj_clib.h ++++ b/src/lj_clib.h +@@ -1,6 +1,6 @@ + /* + ** FFI C library loader. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_CLIB_H +diff --git a/src/lj_cparse.c b/src/lj_cparse.c +index 83cfd112..efe80759 100644 +--- a/src/lj_cparse.c ++++ b/src/lj_cparse.c +@@ -1,6 +1,6 @@ + /* + ** C declaration parser. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "lj_obj.h" +@@ -28,6 +28,30 @@ + ** If in doubt, please check the input against your favorite C compiler. + */ + ++#ifdef LUA_USE_ASSERT ++#define lj_assertCP(c, ...) (lj_assertG_(G(cp->L), (c), __VA_ARGS__)) ++#else ++#define lj_assertCP(c, ...) ((void)cp) ++#endif ++ ++/* -- Miscellaneous ------------------------------------------------------- */ ++ ++/* Match string against a C literal. */ ++#define cp_str_is(str, k) \ ++ ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1)) ++ ++/* Check string against a linear list of matches. */ ++int lj_cparse_case(GCstr *str, const char *match) ++{ ++ MSize len; ++ int n; ++ for (n = 0; (len = (MSize)*match++); n++, match += len) { ++ if (str->len == len && !memcmp(match, strdata(str), len)) ++ return n; ++ } ++ return -1; ++} ++ + /* -- C lexer ------------------------------------------------------------- */ + + /* C lexer token names. */ +@@ -43,7 +67,7 @@ LJ_NORET static void cp_err(CPState *cp, ErrMsg em); + + static const char *cp_tok2str(CPState *cp, CPToken tok) + { +- lua_assert(tok < CTOK_FIRSTDECL); ++ lj_assertCP(tok < CTOK_FIRSTDECL, "bad CPToken %d", tok); + if (tok > CTOK_OFS) + return ctoknames[tok-CTOK_OFS-1]; + else if (!lj_char_iscntrl(tok)) +@@ -109,9 +133,9 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...) + tokstr = NULL; + } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || + tok >= CTOK_FIRSTDECL) { +- if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$'); ++ if (cp->sb.w == cp->sb.b) cp_save(cp, '$'); + cp_save(cp, '\0'); +- tokstr = sbufB(&cp->sb); ++ tokstr = cp->sb.b; + } else { + tokstr = cp_tok2str(cp, tok); + } +@@ -151,7 +175,8 @@ static CPToken cp_number(CPState *cp) + TValue o; + do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); + cp_save(cp, '\0'); +- fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C); ++ fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1, ++ &o, STRSCAN_OPT_C); + if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; + else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; + else if (!(cp->mode & CPARSE_MODE_SKIP)) +@@ -254,7 +279,7 @@ static CPToken cp_string(CPState *cp) + return CTOK_STRING; + } else { + if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '''); +- cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb); ++ cp->val.i32 = (int32_t)(char)*cp->sb.b; + cp->val.id = CTID_INT32; + return CTOK_INTEGER; + } +@@ -373,7 +398,7 @@ static void cp_init(CPState *cp) + cp->curpack = 0; + cp->packstack[0] = 255; + lj_buf_init(cp->L, &cp->sb); +- lua_assert(cp->p != NULL); ++ lj_assertCP(cp->p != NULL, "uninitialized cp->p"); + cp_get(cp); /* Read-ahead first char. */ + cp->tok = 0; + cp->tmask = CPNS_DEFAULT; +@@ -576,28 +601,34 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + k->id = k2.id > k3.id ? k2.id : k3.id; + continue; + } ++ /* fallthrough */ + case 1: + if (cp_opt(cp, CTOK_OROR)) { + cp_expr_sub(cp, &k2, 2); k->i32 = k->u32 || k2.u32; k->id = CTID_INT32; + continue; + } ++ /* fallthrough */ + case 2: + if (cp_opt(cp, CTOK_ANDAND)) { + cp_expr_sub(cp, &k2, 3); k->i32 = k->u32 && k2.u32; k->id = CTID_INT32; + continue; + } ++ /* fallthrough */ + case 3: + if (cp_opt(cp, '|')) { + cp_expr_sub(cp, &k2, 4); k->u32 = k->u32 | k2.u32; goto arith_result; + } ++ /* fallthrough */ + case 4: + if (cp_opt(cp, '^')) { + cp_expr_sub(cp, &k2, 5); k->u32 = k->u32 ^ k2.u32; goto arith_result; + } ++ /* fallthrough */ + case 5: + if (cp_opt(cp, '&')) { + cp_expr_sub(cp, &k2, 6); k->u32 = k->u32 & k2.u32; goto arith_result; + } ++ /* fallthrough */ + case 6: + if (cp_opt(cp, CTOK_EQ)) { + cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 == k2.u32; k->id = CTID_INT32; +@@ -606,6 +637,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 != k2.u32; k->id = CTID_INT32; + continue; + } ++ /* fallthrough */ + case 7: + if (cp_opt(cp, '<')) { + cp_expr_sub(cp, &k2, 8); +@@ -640,6 +672,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + k->id = CTID_INT32; + continue; + } ++ /* fallthrough */ + case 8: + if (cp_opt(cp, CTOK_SHL)) { + cp_expr_sub(cp, &k2, 9); k->u32 = k->u32 << k2.u32; +@@ -652,6 +685,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + k->u32 = k->u32 >> k2.u32; + continue; + } ++ /* fallthrough */ + case 9: + if (cp_opt(cp, '+')) { + cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 + k2.u32; +@@ -661,6 +695,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri) + } else if (cp_opt(cp, '-')) { + cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 - k2.u32; goto arith_result; + } ++ /* fallthrough */ + case 10: + if (cp_opt(cp, '*')) { + cp_expr_unary(cp, &k2); k->u32 = k->u32 * k2.u32; goto arith_result; +@@ -824,12 +859,13 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) + /* The cid is already part of info for copies of pointers/functions. */ + idx = ct->next; + if (ctype_istypedef(info)) { +- lua_assert(id == 0); ++ lj_assertCP(id == 0, "typedef not at toplevel"); + id = ctype_cid(info); + /* Always refetch info/size, since struct/enum may have been completed. */ + cinfo = ctype_get(cp->cts, id)->info; + csize = ctype_get(cp->cts, id)->size; +- lua_assert(ctype_isstruct(cinfo) || ctype_isenum(cinfo)); ++ lj_assertCP(ctype_isstruct(cinfo) || ctype_isenum(cinfo), ++ "typedef of bad type"); + } else if (ctype_isfunc(info)) { /* Intern function. */ + CType *fct; + CTypeID fid; +@@ -862,7 +898,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) + /* Inherit csize/cinfo from original type. */ + } else { + if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */ +- lua_assert(id == 0); ++ lj_assertCP(id == 0, "number not at toplevel"); + if (!(info & CTF_BOOL)) { + CTSize msize = ctype_msizeP(decl->attr); + CTSize vsize = ctype_vsizeP(decl->attr); +@@ -917,7 +953,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) + info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN); + info |= (cinfo & CTF_QUAL); /* Inherit qual. */ + } else { +- lua_assert(ctype_isvoid(info)); ++ lj_assertCP(ctype_isvoid(info), "bad ctype %08x", info); + } + csize = size; + cinfo = info+id; +@@ -929,8 +965,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) + + /* -- C declaration parser ------------------------------------------------ */ + +-#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) +- + /* Reset declaration state to declaration specifier. */ + static void cp_decl_reset(CPDecl *decl) + { +@@ -1059,44 +1093,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl) + if (cp->tok == CTOK_IDENT) { + GCstr *attrstr = cp->str; + cp_next(cp); +- switch (attrstr->hash) { +- case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */ ++ switch (lj_cparse_case(attrstr, ++ "\007aligned" "\013__aligned__" ++ "\006packed" "\012__packed__" ++ "\004mode" "\010__mode__" ++ "\013vector_size" "\017__vector_size__" ++#if LJ_TARGET_X86 ++ "\007regparm" "\013__regparm__" ++ "\005cdecl" "\011__cdecl__" ++ "\010thiscall" "\014__thiscall__" ++ "\010fastcall" "\014__fastcall__" ++ "\007stdcall" "\013__stdcall__" ++ "\012sseregparm" "\016__sseregparm__" ++#endif ++ )) { ++ case 0: case 1: /* aligned */ + cp_decl_align(cp, decl); + break; +- case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */ ++ case 2: case 3: /* packed */ + decl->attr |= CTFP_PACKED; + break; +- case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */ ++ case 4: case 5: /* mode */ + cp_decl_mode(cp, decl); + break; +- case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */ ++ case 6: case 7: /* vector_size */ + { + CTSize vsize = cp_decl_sizeattr(cp); + if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); + } + break; + #if LJ_TARGET_X86 +- case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ ++ case 8: case 9: /* regparm */ + CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); + decl->fattr |= CTFP_CCONV; + break; +- case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ ++ case 10: case 11: /* cdecl */ + CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); + decl->fattr |= CTFP_CCONV; + break; +- case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ ++ case 12: case 13: /* thiscall */ + CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); + decl->fattr |= CTFP_CCONV; + break; +- case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ ++ case 14: case 15: /* fastcall */ + CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); + decl->fattr |= CTFP_CCONV; + break; +- case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ ++ case 16: case 17: /* stdcall */ + CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); + decl->fattr |= CTFP_CCONV; + break; +- case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ ++ case 18: case 19: /* sseregparm */ + decl->fattr |= CTF_SSEREGPARM; + decl->fattr |= CTFP_CCONV; + break; +@@ -1128,16 +1175,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl) + while (cp->tok == CTOK_IDENT) { + GCstr *attrstr = cp->str; + cp_next(cp); +- switch (attrstr->hash) { +- case H_(bc2395fa,98f267f8): /* align */ ++ if (cp_str_is(attrstr, "align")) { + cp_decl_align(cp, decl); +- break; +- default: /* Ignore all other attributes. */ ++ } else { /* Ignore all other attributes. */ + if (cp_opt(cp, '(')) { + while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); + cp_check(cp, ')'); + } +- break; + } + } + cp_check(cp, ')'); +@@ -1548,7 +1592,7 @@ end_decl: + cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC); + sz = sizeof(int); + } +- lua_assert(sz != 0); ++ lj_assertCP(sz != 0, "basic ctype with zero size"); + info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */ + info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */ + cp_push(decl, info, sz); +@@ -1717,17 +1761,16 @@ static CTypeID cp_decl_abstract(CPState *cp) + static void cp_pragma(CPState *cp, BCLine pragmaline) + { + cp_next(cp); +- if (cp->tok == CTOK_IDENT && +- cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */ ++ if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) { + cp_next(cp); + cp_check(cp, '('); + if (cp->tok == CTOK_IDENT) { +- if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ ++ if (cp_str_is(cp->str, "push")) { + if (cp->curpack < CPARSE_MAX_PACKSTACK) { + cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; + cp->curpack++; + } +- } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ ++ } else if (cp_str_is(cp->str, "pop")) { + if (cp->curpack > 0) cp->curpack--; + } else { + cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); +@@ -1776,13 +1819,11 @@ static void cp_decl_multi(CPState *cp) + if (tok == CTOK_INTEGER) { + cp_line(cp, hashline); + continue; +- } else if (tok == CTOK_IDENT && +- cp->str->hash == H_(187aab88,fcb60b42)) { /* line */ ++ } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line")) { + if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok); + cp_line(cp, hashline); + continue; +- } else if (tok == CTOK_IDENT && +- cp->str->hash == H_(f5e6b4f8,1d509107)) { /* pragma */ ++ } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma")) { + cp_pragma(cp, hashline); + continue; + } else { +@@ -1811,7 +1852,7 @@ static void cp_decl_multi(CPState *cp) + /* Treat both static and extern function declarations as extern. */ + ct = ctype_get(cp->cts, ctypeid); + /* We always get new anonymous functions (typedefs are copied). */ +- lua_assert(gcref(ct->name) == NULL); ++ lj_assertCP(gcref(ct->name) == NULL, "unexpected named function"); + id = ctypeid; /* Just name it. */ + } else if ((scl & CDF_STATIC)) { /* Accept static constants. */ + id = cp_decl_constinit(cp, &ct, ctypeid); +@@ -1853,8 +1894,6 @@ static void cp_decl_single(CPState *cp) + if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); + } + +-#undef H_ +- + /* ------------------------------------------------------------------------ */ + + /* Protected callback for C parser. */ +@@ -1870,7 +1909,7 @@ static TValue *cpcparser(lua_State *L, lua_CFunction dummy, void *ud) + cp_decl_single(cp); + if (cp->param && cp->param != cp->L->top) + cp_err(cp, LJ_ERR_FFI_NUMPARAM); +- lua_assert(cp->depth == 0); ++ lj_assertCP(cp->depth == 0, "unbalanced cparser declaration depth"); + return NULL; + } + +diff --git a/src/lj_cparse.h b/src/lj_cparse.h +index bad1060b..fd88a9f4 100644 +--- a/src/lj_cparse.h ++++ b/src/lj_cparse.h +@@ -1,6 +1,6 @@ + /* + ** C declaration parser. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_CPARSE_H +@@ -60,6 +60,8 @@ typedef struct CPState { + + LJ_FUNC int lj_cparse(CPState *cp); + ++LJ_FUNC int lj_cparse_case(GCstr *str, const char *match); ++ + #endif + + #endif +diff --git a/src/lj_crecord.c b/src/lj_crecord.c +index e32ae23e..165f95d9 100644 +--- a/src/lj_crecord.c ++++ b/src/lj_crecord.c +@@ -1,6 +1,6 @@ + /* + ** Trace recorder for C data operations. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_ffrecord_c +@@ -61,7 +61,8 @@ static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o) + static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) + { + CTypeID id; +- lua_assert(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID); ++ lj_assertJ(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID, ++ "expected CTypeID cdata"); + id = *(CTypeID *)cdataptr(cd); + tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT); + emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id)); +@@ -77,7 +78,7 @@ static CTypeID argv2ctype(jit_State *J, TRef tr, cTValue *o) + /* Specialize to the string containing the C type declaration. */ + emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, s)); + cp.L = J->L; +- cp.cts = ctype_ctsG(J2G(J)); ++ cp.cts = ctype_cts(J->L); + oldtop = cp.cts->top; + cp.srcname = strdata(s); + cp.p = strdata(s); +@@ -212,7 +213,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp, + ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); + ml[i].trofs = trofs; + i++; +- rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; ++ rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1; + if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ + rwin = 0; + for ( ; j < i; j++) { +@@ -237,13 +238,14 @@ static void crec_copy(jit_State *J, TRef trdst, TRef trsrc, TRef trlen, + if (len > CREC_COPY_MAXLEN) goto fallback; + if (ct) { + CTState *cts = ctype_ctsG(J2G(J)); +- lua_assert(ctype_isarray(ct->info) || ctype_isstruct(ct->info)); ++ lj_assertJ(ctype_isarray(ct->info) || ctype_isstruct(ct->info), ++ "copy of non-aggregate"); + if (ctype_isarray(ct->info)) { + CType *cct = ctype_rawchild(cts, ct); + tp = crec_ct2irt(cts, cct); + if (tp == IRT_CDATA) goto rawcopy; + step = lj_ir_type_size[tp]; +- lua_assert((len & (step-1)) == 0); ++ lj_assertJ((len & (step-1)) == 0, "copy of fractional size"); + } else if ((ct->info & CTF_UNION)) { + step = (1u << ctype_align(ct->info)); + goto rawcopy; +@@ -614,10 +616,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) + sp = lj_ir_kptr(J, NULL); + } else if (tref_isudata(sp)) { + GCudata *ud = udataV(sval); +- if (ud->udtype == UDTYPE_IO_FILE) { ++ if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) { + TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE); +- emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); +- sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE); ++ emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype)); ++ sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, ++ ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE : ++ IRFL_SBUF_R); + } else { + sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata))); + } +@@ -629,7 +633,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) + /* Specialize to the name of the enum constant. */ + emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str)); + if (cct && ctype_isconstval(cct->info)) { +- lua_assert(ctype_child(cts, cct)->size == 4); ++ lj_assertJ(ctype_child(cts, cct)->size == 4, ++ "only 32 bit const supported"); /* NYI */ + svisnz = (void *)(intptr_t)(ofs != 0); + sp = lj_ir_kint(J, (int32_t)ofs); + sid = ctype_cid(cct->info); +@@ -643,8 +648,7 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) + } + } else if (tref_islightud(sp)) { + #if LJ_64 +- sp = emitir(IRT(IR_BAND, IRT_P64), sp, +- lj_ir_kint64(J, U64x(00007fff,ffffffff))); ++ lj_trace_err(J, LJ_TRERR_NYICONV); + #endif + } else { /* NYI: tref_istab(sp). */ + IRType t; +@@ -757,7 +761,7 @@ static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) + IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0); + TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0); + CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz; +- lua_assert(t <= IRT_U32); /* NYI: 64 bit bitfields. */ ++ lj_assertJ(t <= IRT_U32, "only 32 bit bitfields supported"); /* NYI */ + if (rd->data == 0) { /* __index metamethod. */ + if ((info & CTF_BOOL)) { + tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos)))); +@@ -769,7 +773,7 @@ static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) + tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos)); + tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift)); + } else { +- lua_assert(bsz < 32); /* Full-size fields cannot end up here. */ ++ lj_assertJ(bsz < 32, "unexpected full bitfield index"); + tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos)); + tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1))); + /* We can omit the U32 to NUM conversion, since bsz < 32. */ +@@ -884,7 +888,7 @@ again: + crec_index_bf(J, rd, ptr, fct->info); + return; + } else { +- lua_assert(ctype_isfield(fct->info)); ++ lj_assertJ(ctype_isfield(fct->info), "field expected"); + sid = ctype_cid(fct->info); + } + } +@@ -1022,8 +1026,26 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) + crec_ct_tv(J, dc, dp, sp, sval); + } + } else if (ctype_isstruct(d->info)) { +- CTypeID fid = d->sib; ++ CTypeID fid; + MSize i = 1; ++ if (!J->base[1]) { /* Handle zero-fill of struct-of-NYI. */ ++ fid = d->sib; ++ while (fid) { ++ CType *df = ctype_get(cts, fid); ++ fid = df->sib; ++ if (ctype_isfield(df->info)) { ++ CType *dc; ++ if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ ++ dc = ctype_rawchild(cts, df); /* Field type. */ ++ if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) || ++ ctype_isenum(dc->info))) ++ goto special; ++ } else if (!ctype_isconstval(df->info)) { ++ goto special; ++ } ++ } ++ } ++ fid = d->sib; + while (fid) { + CType *df = ctype_get(cts, fid); + fid = df->sib; +@@ -1048,6 +1070,11 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) + dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, + lj_ir_kintp(J, df->size + sizeof(GCcdata))); + crec_ct_tv(J, dc, dp, sp, sval); ++ if ((d->info & CTF_UNION)) { ++ if (d->size != dc->size) /* NYI: partial init of union. */ ++ lj_trace_err(J, LJ_TRERR_NYICONV); ++ break; ++ } + } else if (!ctype_isconstval(df->info)) { + /* NYI: init bitfields and sub-structures. */ + lj_trace_err(J, LJ_TRERR_NYICONV); +@@ -1111,7 +1138,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, + if (fid) { /* Get argument type from field. */ + CType *ctf = ctype_get(cts, fid); + fid = ctf->sib; +- lua_assert(ctype_isfield(ctf->info)); ++ lj_assertJ(ctype_isfield(ctf->info), "field expected"); + did = ctype_cid(ctf->info); + } else { + if (!(ct->info & CTF_VARARG)) +@@ -1130,7 +1157,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, + else + tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); + } +- } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { ++ } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) { + lj_needsplit(J); + } + #if LJ_TARGET_X86 +@@ -1209,8 +1236,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) + TRef tr; + TValue tv; + /* Check for blacklisted C functions that might call a callback. */ +- setlightudV(&tv, +- cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4)); ++ tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4) >> 2) | U64x(800000000, 00000000); + if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) + lj_trace_err(J, LJ_TRERR_BLACKL); + if (ctype_isvoid(ctr->info)) { +@@ -1530,8 +1556,10 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) + } + { + TRef tr; +- if (!(tr = crec_arith_int64(J, sp, s, (MMS)rd->data)) && +- !(tr = crec_arith_ptr(J, sp, s, (MMS)rd->data)) && ++ MMS mm = (MMS)rd->data; ++ if ((mm == MM_len || mm == MM_concat || ++ (!(tr = crec_arith_int64(J, sp, s, mm)) && ++ !(tr = crec_arith_ptr(J, sp, s, mm)))) && + !(tr = crec_arith_meta(J, sp, s, cts, rd))) + return; + J->base[0] = tr; +@@ -1879,10 +1907,36 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) + d = ctype_get(cts, CTID_DOUBLE); + J->base[0] = crec_ct_tv(J, d, 0, J->base[0], &rd->argv[0]); + } else { ++ /* Specialize to the ctype that couldn't be converted. */ ++ argv2cdata(J, J->base[0], &rd->argv[0]); + J->base[0] = TREF_NIL; + } + } + ++TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o) ++{ ++ CTypeID id = argv2cdata(J, tr, o)->ctypeid; ++ if (!(id == CTID_INT64 || id == CTID_UINT64)) ++ lj_trace_err(J, LJ_TRERR_BADTYPE); ++ lj_needsplit(J); ++ return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr, ++ IRFL_CDATA_INT64); ++} ++ ++#if LJ_HASBUFFER ++TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o) ++{ ++ CTState *cts = ctype_ctsG(J2G(J)); ++ if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE); ++ return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o); ++} ++ ++TRef lj_crecord_topuint8(jit_State *J, TRef tr) ++{ ++ return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr); ++} ++#endif ++ + #undef IR + #undef emitir + #undef emitconv +diff --git a/src/lj_crecord.h b/src/lj_crecord.h +index c165def4..e1a2d9c0 100644 +--- a/src/lj_crecord.h ++++ b/src/lj_crecord.h +@@ -1,6 +1,6 @@ + /* + ** Trace recorder for C data operations. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_CRECORD_H +@@ -33,6 +33,11 @@ LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd); + LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr); + + LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); ++LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o); ++#if LJ_HASBUFFER ++LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o); ++LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr); ++#endif + #endif + + #endif +diff --git a/src/lj_ctype.c b/src/lj_ctype.c +index 0ea89c74..6741437c 100644 +--- a/src/lj_ctype.c ++++ b/src/lj_ctype.c +@@ -1,6 +1,6 @@ + /* + ** C type management. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include "lj_obj.h" +@@ -153,7 +153,7 @@ CTypeID lj_ctype_new(CTState *cts, CType **ctp) + { + CTypeID id = cts->top; + CType *ct; +- lua_assert(cts->L); ++ lj_assertCTS(cts->L, "uninitialized cts->L"); + if (LJ_UNLIKELY(id >= cts->sizetab)) { + if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV); + #ifdef LUAJIT_CTYPE_CHECK_ANCHOR +@@ -182,7 +182,7 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size) + { + uint32_t h = ct_hashtype(info, size); + CTypeID id = cts->hash[h]; +- lua_assert(cts->L); ++ lj_assertCTS(cts->L, "uninitialized cts->L"); + while (id) { + CType *ct = ctype_get(cts, id); + if (ct->info == info && ct->size == size) +@@ -298,9 +298,9 @@ CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem) + } + ct = ctype_raw(cts, arrid); + } +- lua_assert(ctype_isvlarray(ct->info)); /* Must be a VLA. */ ++ lj_assertCTS(ctype_isvlarray(ct->info), "VLA expected"); + ct = ctype_rawchild(cts, ct); /* Get array element. */ +- lua_assert(ctype_hassize(ct->info)); ++ lj_assertCTS(ctype_hassize(ct->info), "bad VLA without size"); + /* Calculate actual size of VLA and check for overflow. */ + xsz += (uint64_t)ct->size * nelem; + return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID; +@@ -323,7 +323,8 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp) + } else { + if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN); + qual |= (info & ~(CTF_ALIGN|CTMASK_CID)); +- lua_assert(ctype_hassize(info) || ctype_isfunc(info)); ++ lj_assertCTS(ctype_hassize(info) || ctype_isfunc(info), ++ "ctype without size"); + *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size; + break; + } +@@ -528,7 +529,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id) + ctype_appc(ctr, ')'); + break; + default: +- lua_assert(0); ++ lj_assertG_(ctr->cts->g, 0, "bad ctype %08x", info); + break; + } + ct = ctype_get(ctr->cts, ctype_cid(info)); +@@ -582,7 +583,7 @@ GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) + lj_strfmt_putfnum(sb, STRFMT_G14, re.n); + if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+'); + lj_strfmt_putfnum(sb, STRFMT_G14, im.n); +- lj_buf_putchar(sb, sbufP(sb)[-1] >= 'a' ? 'I' : 'i'); ++ lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i'); + return lj_buf_str(L, sb); + } + +diff --git a/src/lj_ctype.h b/src/lj_ctype.h +index 0c220a88..700250df 100644 +--- a/src/lj_ctype.h ++++ b/src/lj_ctype.h +@@ -1,6 +1,6 @@ + /* + ** C type management. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_CTYPE_H +@@ -260,6 +260,12 @@ typedef struct CTState { + + #define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */ + ++#ifdef LUA_USE_ASSERT ++#define lj_assertCTS(c, ...) (lj_assertG_(cts->g, (c), __VA_ARGS__)) ++#else ++#define lj_assertCTS(c, ...) ((void)cts) ++#endif ++ + /* -- Predefined types ---------------------------------------------------- */ + + /* Target-dependent types. */ +@@ -292,6 +298,7 @@ typedef struct CTState { + _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \ + _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \ + _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \ ++ _(P_UINT8, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_UINT8) \ + _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \ + _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \ + CTTYDEFP(_) \ +@@ -383,6 +390,16 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L) + return cts; + } + ++/* Load FFI library on-demand. */ ++#define ctype_loadffi(L) \ ++ do { \ ++ if (!ctype_ctsG(G(L))) { \ ++ ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \ ++ luaopen_ffi(L); \ ++ L->top = (TValue *)(mref(L->stack, char) + oldtop); \ ++ } \ ++ } while (0) ++ + /* Save and restore state of C type table. */ + #define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts) + #define LJ_CTYPE_RESTORE(cts) \ +@@ -392,7 +409,8 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L) + /* Check C type ID for validity when assertions are enabled. */ + static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id) + { +- lua_assert(id > 0 && id < cts->top); UNUSED(cts); ++ UNUSED(cts); ++ lj_assertCTS(id > 0 && id < cts->top, "bad CTID %d", id); + return id; + } + +@@ -408,8 +426,9 @@ static LJ_AINLINE CType *ctype_get(CTState *cts, CTypeID id) + /* Get child C type. */ + static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct) + { +- lua_assert(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || +- ctype_isbitfield(ct->info))); /* These don't have children. */ ++ lj_assertCTS(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || ++ ctype_isbitfield(ct->info)), ++ "ctype %08x has no children", ct->info); + return ctype_get(cts, ctype_cid(ct->info)); + } + +diff --git a/src/lj_debug.c b/src/lj_debug.c +index 959dc289..3dffad90 100644 +--- a/src/lj_debug.c ++++ b/src/lj_debug.c +@@ -1,6 +1,6 @@ + /* + ** Debugging and introspection. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_debug_c +@@ -55,7 +55,8 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) + const BCIns *ins; + GCproto *pt; + BCPos pos; +- lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD); ++ lj_assertL(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD, ++ "function or frame expected"); + if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ + return NO_BCPOS; + } else if (nextframe == NULL) { /* Lua function on top. */ +@@ -93,6 +94,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) + } + } + ins = cframe_pc(cf); ++ if (!ins) return NO_BCPOS; + } + } + pt = funcproto(fn); +@@ -100,7 +102,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) + #if LJ_HASJIT + if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ + GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); +- lua_assert(bc_isret(bc_op(ins[-1]))); ++ lj_assertL(bc_isret(bc_op(ins[-1])), "return bytecode expected"); + pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); + } + #endif +@@ -133,7 +135,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe) + BCPos pc = debug_framepc(L, fn, nextframe); + if (pc != NO_BCPOS) { + GCproto *pt = funcproto(fn); +- lua_assert(pc <= pt->sizebc); ++ lj_assertL(pc <= pt->sizebc, "PC out of range"); + return lj_debug_line(pt, pc); + } + return -1; +@@ -214,26 +216,29 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, + const char *lj_debug_uvname(GCproto *pt, uint32_t idx) + { + const uint8_t *p = proto_uvinfo(pt); +- lua_assert(idx < pt->sizeuv); ++ lj_assertX(idx < pt->sizeuv, "bad upvalue index"); + if (!p) return ""; + if (idx) while (*p++ || --idx) ; + return (const char *)p; + } + + /* Get name and value of upvalue. */ +-const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp) ++const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, GCobj **op) + { + if (tvisfunc(o)) { + GCfunc *fn = funcV(o); + if (isluafunc(fn)) { + GCproto *pt = funcproto(fn); + if (idx < pt->sizeuv) { +- *tvp = uvval(&gcref(fn->l.uvptr[idx])->uv); ++ GCobj *uvo = gcref(fn->l.uvptr[idx]); ++ *tvp = uvval(&uvo->uv); ++ *op = uvo; + return lj_debug_uvname(pt, idx); + } + } else { + if (idx < fn->c.nupvalues) { + *tvp = &fn->c.upvalue[idx]; ++ *op = obj2gco(fn); + return ""; + } + } +@@ -429,20 +434,21 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext) + GCfunc *fn; + if (*what == '>') { + TValue *func = L->top - 1; +- api_check(L, tvisfunc(func)); ++ if (!tvisfunc(func)) return 0; + fn = funcV(func); + L->top--; + what++; + } else { + uint32_t offset = (uint32_t)ar->i_ci & 0xffff; + uint32_t size = (uint32_t)ar->i_ci >> 16; +- lua_assert(offset != 0); ++ lj_assertL(offset != 0, "bad frame offset"); + frame = tvref(L->stack) + offset; + if (size) nextframe = frame + size; +- lua_assert(frame <= tvref(L->maxstack) && +- (!nextframe || nextframe <= tvref(L->maxstack))); ++ lj_assertL(frame <= tvref(L->maxstack) && ++ (!nextframe || nextframe <= tvref(L->maxstack)), ++ "broken frame chain"); + fn = frame_func(frame); +- lua_assert(fn->c.gct == ~LJ_TFUNC); ++ lj_assertL(fn->c.gct == ~LJ_TFUNC, "bad frame function"); + } + for (; *what; what++) { + if (*what == 'S') { +@@ -642,7 +648,7 @@ void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth) + level += dir; + } + if (lastlen) +- setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */ ++ sb->w = sb->b + lastlen; /* Zap trailing separator. */ + } + #endif + +diff --git a/src/lj_debug.h b/src/lj_debug.h +index 5917c00b..a6e21701 100644 +--- a/src/lj_debug.h ++++ b/src/lj_debug.h +@@ -1,6 +1,6 @@ + /* + ** Debugging and introspection. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_DEBUG_H +@@ -29,7 +29,8 @@ typedef struct lj_Debug { + LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size); + LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc); + LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx); +-LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); ++LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, ++ GCobj **op); + LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, + BCReg slot, const char **name); + LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame, +diff --git a/src/lj_def.h b/src/lj_def.h +index 2d8fff66..3a28026c 100644 +--- a/src/lj_def.h ++++ b/src/lj_def.h +@@ -1,6 +1,6 @@ + /* + ** LuaJIT common internal definitions. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_DEF_H +@@ -8,8 +8,8 @@ + + #include "lua.h" + +-#if defined(_MSC_VER) +-/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */ ++#if defined(_MSC_VER) && (_MSC_VER < 1700) ++/* Old MSVC is stuck in the last century and doesn't have C99's stdint.h. */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; +@@ -80,7 +80,6 @@ typedef unsigned int uintptr_t; + #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ + #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ + #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ +-#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */ + + /* JIT compiler limits. */ + #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ +@@ -105,9 +104,10 @@ typedef unsigned int uintptr_t; + #define checku16(x) ((x) == (int32_t)(uint16_t)(x)) + #define checki32(x) ((x) == (int32_t)(x)) + #define checku32(x) ((x) == (uint32_t)(x)) ++#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0) + #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) + #define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) +-#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1) ++#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1) + + /* Every half-decent C compiler transforms this into a rotate instruction. */ + #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) +@@ -120,7 +120,7 @@ typedef uintptr_t BloomFilter; + #define bloomset(b, x) ((b) |= bloombit((x))) + #define bloomtest(b, x) ((b) & bloombit((x))) + +-#if defined(__GNUC__) || defined(__psp2__) ++#if defined(__GNUC__) || defined(__clang__) || defined(__psp2__) + + #define LJ_NORET __attribute__((noreturn)) + #define LJ_ALIGN(n) __attribute__((aligned(n))) +@@ -182,7 +182,7 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) + { + return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); + } +-#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) ++#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __clang__ + static LJ_AINLINE uint32_t lj_bswap(uint32_t x) + { + return (uint32_t)__builtin_bswap32((int32_t)x); +@@ -263,19 +263,19 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) + return _CountLeadingZeros(x) ^ 31; + } + #else +-unsigned char _BitScanForward(uint32_t *, unsigned long); +-unsigned char _BitScanReverse(uint32_t *, unsigned long); ++unsigned char _BitScanForward(unsigned long *, unsigned long); ++unsigned char _BitScanReverse(unsigned long *, unsigned long); + #pragma intrinsic(_BitScanForward) + #pragma intrinsic(_BitScanReverse) + + static LJ_AINLINE uint32_t lj_ffs(uint32_t x) + { +- uint32_t r; _BitScanForward(&r, x); return r; ++ unsigned long r; _BitScanForward(&r, x); return (uint32_t)r; + } + + static LJ_AINLINE uint32_t lj_fls(uint32_t x) + { +- uint32_t r; _BitScanReverse(&r, x); return r; ++ unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r; + } + #endif + +@@ -338,14 +338,28 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) + #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET + #define LJ_ASMF_NORET LJ_ASMF LJ_NORET + +-/* Runtime assertions. */ +-#ifdef lua_assert +-#define check_exp(c, e) (lua_assert(c), (e)) +-#define api_check(l, e) lua_assert(e) ++/* Internal assertions. */ ++#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) ++#define lj_assert_check(g, c, ...) \ ++ ((c) ? (void)0 : \ ++ (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0)) ++#define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) + #else +-#define lua_assert(c) ((void)0) ++#define lj_checkapi(c, ...) ((void)L) ++#endif ++ ++#ifdef LUA_USE_ASSERT ++#define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__) ++#define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__) ++#define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) ++#define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__) ++#define check_exp(c, e) (lj_assertX((c), #c), (e)) ++#else ++#define lj_assertG_(g, c, ...) ((void)0) ++#define lj_assertG(c, ...) ((void)g) ++#define lj_assertL(c, ...) ((void)L) ++#define lj_assertX(c, ...) ((void)0) + #define check_exp(c, e) (e) +-#define api_check luai_apicheck + #endif + + /* Static assertions. */ +@@ -359,4 +373,9 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) + extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) + #endif + ++/* PRNG state. Need this here, details in lj_prng.h. */ ++typedef struct PRNGState { ++ uint64_t u[4]; ++} PRNGState; ++ + #endif +diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c +index 5d6795f8..7b73d3dd 100644 +--- a/src/lj_dispatch.c ++++ b/src/lj_dispatch.c +@@ -1,6 +1,6 @@ + /* + ** Instruction dispatch handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_dispatch_c +@@ -68,6 +68,8 @@ void lj_dispatch_init(GG_State *GG) + /* The JIT engine is off by default. luaopen_jit() turns it on. */ + disp[BC_FORL] = disp[BC_IFORL]; + disp[BC_ITERL] = disp[BC_IITERL]; ++ /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */ ++ disp[BC_ITERN] = &lj_vm_IITERN; + disp[BC_LOOP] = disp[BC_ILOOP]; + disp[BC_FUNCF] = disp[BC_IFUNCF]; + disp[BC_FUNCV] = disp[BC_IFUNCV]; +@@ -118,19 +120,21 @@ void lj_dispatch_update(global_State *g) + mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; + if (oldmode != mode) { /* Mode changed? */ + ASMFunction *disp = G2GG(g)->dispatch; +- ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv; ++ ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv; + g->dispatchmode = mode; + + /* Hotcount if JIT is on, but not while recording. */ + if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) { + f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]); + f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]); ++ f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]); + f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]); + f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]); + f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]); + } else { /* Otherwise use the non-hotcounting instructions. */ + f_forl = disp[GG_LEN_DDISP+BC_IFORL]; + f_iterl = disp[GG_LEN_DDISP+BC_IITERL]; ++ f_itern = &lj_vm_IITERN; + f_loop = disp[GG_LEN_DDISP+BC_ILOOP]; + f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]); + f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]); +@@ -138,6 +142,7 @@ void lj_dispatch_update(global_State *g) + /* Init static counting instruction dispatch first (may be copied below). */ + disp[GG_LEN_DDISP+BC_FORL] = f_forl; + disp[GG_LEN_DDISP+BC_ITERL] = f_iterl; ++ disp[GG_LEN_DDISP+BC_ITERN] = f_itern; + disp[GG_LEN_DDISP+BC_LOOP] = f_loop; + + /* Set dynamic instruction dispatch. */ +@@ -165,6 +170,7 @@ void lj_dispatch_update(global_State *g) + /* Otherwise set dynamic counting ins. */ + disp[BC_FORL] = f_forl; + disp[BC_ITERL] = f_iterl; ++ disp[BC_ITERN] = f_itern; + disp[BC_LOOP] = f_loop; + /* Set dynamic return dispatch. */ + if ((mode & DISPMODE_RET)) { +@@ -252,15 +258,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) + } else { + if (!(mode & LUAJIT_MODE_ON)) + G2J(g)->flags &= ~(uint32_t)JIT_F_ON; +-#if LJ_TARGET_X86ORX64 +- else if ((G2J(g)->flags & JIT_F_SSE2)) +- G2J(g)->flags |= (uint32_t)JIT_F_ON; +- else +- return 0; /* Don't turn on JIT compiler without SSE2 support. */ +-#else + else + G2J(g)->flags |= (uint32_t)JIT_F_ON; +-#endif + lj_dispatch_update(g); + } + break; +@@ -302,7 +301,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) + if (idx != 0) { + cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx; + if (tvislightud(tv)) +- g->wrapf = (lua_CFunction)lightudV(tv); ++ g->wrapf = (lua_CFunction)lightudV(g, tv); + else + return 0; /* Failed. */ + } else { +@@ -374,7 +373,7 @@ static void callhook(lua_State *L, int event, BCLine line) + hook_enter(g); + #endif + hookf(L, &ar); +- lua_assert(hook_active(g)); ++ lj_assertG(hook_active(g), "active hook flag removed"); + setgcref(g->cur_L, obj2gco(L)); + #if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF + lj_profile_hook_leave(g); +@@ -422,7 +421,8 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) + #endif + J->L = L; + lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ +- lua_assert(L->top - L->base == delta); ++ lj_assertG(L->top - L->base == delta, ++ "unbalanced stack after tracing of instruction"); + } + } + #endif +@@ -482,7 +482,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) + #endif + pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1); + lj_trace_hot(J, pc); +- lua_assert(L->top - L->base == delta); ++ lj_assertG(L->top - L->base == delta, ++ "unbalanced stack after hot call"); + goto out; + } else if (J->state != LJ_TRACE_IDLE && + !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) { +@@ -491,7 +492,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) + #endif + /* Record the FUNC* bytecodes, too. */ + lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ +- lua_assert(L->top - L->base == delta); ++ lj_assertG(L->top - L->base == delta, ++ "unbalanced stack after hot instruction"); + } + #endif + if ((g->hookmask & LUA_MASKCALL)) { +diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h +index 5bda51a2..2331bd42 100644 +--- a/src/lj_dispatch.h ++++ b/src/lj_dispatch.h +@@ -1,6 +1,6 @@ + /* + ** Instruction dispatch handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_DISPATCH_H +@@ -31,7 +31,7 @@ extern double __divdf3(double a, double b); + #define SFGOTDEF(_) + #endif + #if LJ_HASJIT +-#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) ++#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot) + #else + #define JITGOTDEF(_) + #endif +@@ -89,12 +89,20 @@ typedef uint16_t HotCount; + typedef struct GG_State { + lua_State L; /* Main thread. */ + global_State g; /* Global state. */ ++#if LJ_TARGET_ARM ++ /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ ++ uint8_t align1[(16-sizeof(global_State))&15]; ++#endif + #if LJ_TARGET_MIPS + ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ + #endif + #if LJ_HASJIT + jit_State J; /* JIT state. */ + HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ ++#if LJ_TARGET_ARM ++ /* Ditto for J. */ ++ uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; ++#endif + #endif + ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */ + BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */ +diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h +index dee8bdcc..615e4c3a 100644 +--- a/src/lj_emit_arm.h ++++ b/src/lj_emit_arm.h +@@ -1,6 +1,6 @@ + /* + ** ARM instruction emitter. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + /* -- Constant encoding --------------------------------------------------- */ +@@ -81,7 +81,8 @@ static void emit_m(ASMState *as, ARMIns ai, Reg rm) + + static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) + { +- lua_assert(ofs >= -255 && ofs <= 255); ++ lj_assertA(ofs >= -255 && ofs <= 255, ++ "load/store offset %d out of range", ofs); + if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; + *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | + ((ofs & 0xf0) << 4) | (ofs & 0x0f); +@@ -89,7 +90,8 @@ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) + + static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) + { +- lua_assert(ofs >= -4095 && ofs <= 4095); ++ lj_assertA(ofs >= -4095 && ofs <= 4095, ++ "load/store offset %d out of range", ofs); + /* Combine LDR/STR pairs to LDRD/STRD. */ + if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && + (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && +@@ -106,7 +108,8 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) + #if !LJ_SOFTFP + static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) + { +- lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); ++ lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0, ++ "load/store offset %d out of range", ofs); + if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; + *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); + } +@@ -124,7 +127,7 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); +- lua_assert(r != d); ++ lj_assertA(r != d, "dest reg not free"); + if (emit_canremat(ref)) { + int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); + uint32_t k = emit_isk12(ARMI_ADD, delta); +@@ -142,13 +145,13 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) + } + + /* Try to find a two step delta relative to another constant. */ +-static int emit_kdelta2(ASMState *as, Reg d, int32_t i) ++static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) + { + RegSet work = ~as->freeset & RSET_GPR; + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); +- lua_assert(r != d); ++ lj_assertA(r != rd, "dest reg %d not free", rd); + if (emit_canremat(ref)) { + int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; + if (other) { +@@ -159,8 +162,8 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i) + k2 = emit_isk12(0, delta & (255 << sh)); + k = emit_isk12(0, delta & ~(255 << sh)); + if (k) { +- emit_dn(as, ARMI_ADD^k2^inv, d, d); +- emit_dn(as, ARMI_ADD^k^inv, d, r); ++ emit_dn(as, ARMI_ADD^k2^inv, rd, rd); ++ emit_dn(as, ARMI_ADD^k^inv, rd, r); + return 1; + } + } +@@ -171,23 +174,24 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i) + } + + /* Load a 32 bit constant into a GPR. */ +-static void emit_loadi(ASMState *as, Reg r, int32_t i) ++static void emit_loadi(ASMState *as, Reg rd, int32_t i) + { + uint32_t k = emit_isk12(ARMI_MOV, i); +- lua_assert(rset_test(as->freeset, r) || r == RID_TMP); ++ lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP, ++ "dest reg %d not free", rd); + if (k) { + /* Standard K12 constant. */ +- emit_d(as, ARMI_MOV^k, r); ++ emit_d(as, ARMI_MOV^k, rd); + } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { + /* 16 bit loword constant for ARMv6T2. */ +- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); +- } else if (emit_kdelta1(as, r, i)) { ++ emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); ++ } else if (emit_kdelta1(as, rd, i)) { + /* One step delta relative to another constant. */ + } else if ((as->flags & JIT_F_ARMV6T2)) { + /* 32 bit hiword/loword constant for ARMv6T2. */ +- emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); +- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); +- } else if (emit_kdelta2(as, r, i)) { ++ emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); ++ emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); ++ } else if (emit_kdelta2(as, rd, i)) { + /* Two step delta relative to another constant. */ + } else { + /* Otherwise construct the constant with up to 4 instructions. */ +@@ -197,15 +201,15 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) + int32_t m = i & (255 << sh); + i &= ~(255 << sh); + if (i == 0) { +- emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); ++ emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd); + break; + } +- emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); ++ emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd); + } + } + } + +-#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) ++#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr))) + + static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); + +@@ -261,7 +265,7 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target) + { + MCode *p = as->mcp; + ptrdiff_t delta = (target - p) - 1; +- lua_assert(((delta + 0x00800000) >> 24) == 0); ++ lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range"); + *--p = ai | ((uint32_t)delta & 0x00ffffffu); + as->mcp = p; + } +@@ -289,7 +293,7 @@ static void emit_call(ASMState *as, void *target) + static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) + { + #if LJ_SOFTFP +- lua_assert(!irt_isnum(ir->t)); UNUSED(ir); ++ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); + #else + if (dst >= RID_MAX_GPR) { + emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, +@@ -313,7 +317,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) + static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) + { + #if LJ_SOFTFP +- lua_assert(!irt_isnum(ir->t)); UNUSED(ir); ++ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); + #else + if (r >= RID_MAX_GPR) + emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); +@@ -326,7 +330,7 @@ static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) + static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) + { + #if LJ_SOFTFP +- lua_assert(!irt_isnum(ir->t)); UNUSED(ir); ++ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); + #else + if (r >= RID_MAX_GPR) + emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); +diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h +index 6da4c7d4..00086e8a 100644 +--- a/src/lj_emit_arm64.h ++++ b/src/lj_emit_arm64.h +@@ -1,6 +1,6 @@ + /* + ** ARM64 instruction emitter. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. + ** Sponsored by Cisco Systems, Inc. +@@ -8,8 +8,9 @@ + + /* -- Constant encoding --------------------------------------------------- */ + +-static uint64_t get_k64val(IRIns *ir) ++static uint64_t get_k64val(ASMState *as, IRRef ref) + { ++ IRIns *ir = IR(ref); + if (ir->o == IR_KINT64) { + return ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { +@@ -17,7 +18,8 @@ static uint64_t get_k64val(IRIns *ir) + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (uint64_t)ir_kptr(ir); + } else { +- lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); ++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, ++ "bad 64 bit const IR op %d", ir->o); + return ir->i; /* Sign-extended. */ + } + } +@@ -122,7 +124,7 @@ static int emit_checkofs(A64Ins ai, int64_t ofs) + static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) + { + int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; +- lua_assert(ot); ++ lj_assertA(ot, "load/store offset %d out of range", ofs); + /* Combine LDR/STR pairs to LDP/STP. */ + if ((sc == 2 || sc == 3) && + (!(ai & 0x400000) || rd != rn) && +@@ -161,15 +163,15 @@ nopair: + /* Try to find an N-step delta relative to other consts with N < lim. */ + static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) + { +- RegSet work = ~as->freeset & RSET_GPR; ++ RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL); + if (lim <= 1) return 0; /* Can't beat that. */ + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); +- lua_assert(r != rd); ++ lj_assertA(r != rd, "dest reg %d not free", rd); + if (ref < REF_TRUE) { + uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : +- get_k64val(IR(ref)); ++ get_k64val(as, ref); + int64_t delta = (int64_t)(k - kx); + if (delta == 0) { + emit_dm(as, A64I_MOVx, rd, r); +@@ -192,39 +194,41 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) + + static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) + { +- uint32_t k13 = emit_isk13(u64, is64); +- if (k13) { /* Can the constant be represented as a bitmask immediate? */ +- emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); +- } else { +- int i, zeros = 0, ones = 0, neg; +- if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ +- /* Count homogeneous 16 bit fragments. */ +- for (i = 0; i < 4; i++) { +- uint64_t frag = (u64 >> i*16) & 0xffff; +- zeros += (frag == 0); +- ones += (frag == 0xffff); ++ int i, zeros = 0, ones = 0, neg; ++ if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ ++ /* Count homogeneous 16 bit fragments. */ ++ for (i = 0; i < 4; i++) { ++ uint64_t frag = (u64 >> i*16) & 0xffff; ++ zeros += (frag == 0); ++ ones += (frag == 0xffff); ++ } ++ neg = ones > zeros; /* Use MOVN if it pays off. */ ++ if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ ++ uint32_t k13 = emit_isk13(u64, is64); ++ if (k13) { ++ emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); ++ return; + } +- neg = ones > zeros; /* Use MOVN if it pays off. */ +- if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { +- int shift = 0, lshift = 0; +- uint64_t n64 = neg ? ~u64 : u64; +- if (n64 != 0) { +- /* Find first/last fragment to be filled. */ +- shift = (63-emit_clz64(n64)) & ~15; +- lshift = emit_ctz64(n64) & ~15; +- } +- /* MOVK requires the original value (u64). */ +- while (shift > lshift) { +- uint32_t u16 = (u64 >> shift) & 0xffff; +- /* Skip fragments that are correctly filled by MOVN/MOVZ. */ +- if (u16 != (neg ? 0xffff : 0)) +- emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); +- shift -= 16; +- } +- /* But MOVN needs an inverted value (n64). */ +- emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | +- A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); ++ } ++ if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { ++ int shift = 0, lshift = 0; ++ uint64_t n64 = neg ? ~u64 : u64; ++ if (n64 != 0) { ++ /* Find first/last fragment to be filled. */ ++ shift = (63-emit_clz64(n64)) & ~15; ++ lshift = emit_ctz64(n64) & ~15; ++ } ++ /* MOVK requires the original value (u64). */ ++ while (shift > lshift) { ++ uint32_t u16 = (u64 >> shift) & 0xffff; ++ /* Skip fragments that are correctly filled by MOVN/MOVZ. */ ++ if (u16 != (neg ? 0xffff : 0)) ++ emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); ++ shift -= 16; + } ++ /* But MOVN needs an inverted value (n64). */ ++ emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | ++ A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); + } + } + +@@ -241,7 +245,7 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) + #define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) + #define checkmcpofs(as, k) \ +- ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) ++ (A64F_S_OK(mcpofs(as, k)>>2, 19)) + + static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); + +@@ -312,7 +316,7 @@ static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) + { + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; +- lua_assert(((delta + 0x40000) >> 19) == 0); ++ lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); + *p = A64I_BCC | A64F_S19(delta) | cond; + } + +@@ -320,24 +324,25 @@ static void emit_branch(ASMState *as, A64Ins ai, MCode *target) + { + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; +- lua_assert(((delta + 0x02000000) >> 26) == 0); +- *p = ai | ((uint32_t)delta & 0x03ffffffu); ++ lj_assertA(A64F_S_OK(delta, 26), "branch target out of range"); ++ *p = ai | A64F_S26(delta); + } + + static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) + { + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; +- lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0); ++ lj_assertA(bit < 63, "bit number out of range"); ++ lj_assertA(A64F_S_OK(delta, 14), "branch target out of range"); + if (bit > 31) ai |= A64I_X; +- *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r; ++ *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r; + } + + static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) + { + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; +- lua_assert(((delta + 0x40000) >> 19) == 0); ++ lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); + *p = ai | A64F_S19(delta) | r; + } + +@@ -347,8 +352,8 @@ static void emit_call(ASMState *as, void *target) + { + MCode *p = --as->mcp; + ptrdiff_t delta = (char *)target - (char *)p; +- if ((((delta>>2) + 0x02000000) >> 26) == 0) { +- *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); ++ if (A64F_S_OK(delta>>2, 26)) { ++ *p = A64I_BL | A64F_S26(delta>>2); + } else { /* Target out of range: need indirect call. But don't use R0-R7. */ + Reg r = ra_allock(as, i64ptr(target), + RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); +diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h +index 8a9ee24d..c13615dd 100644 +--- a/src/lj_emit_mips.h ++++ b/src/lj_emit_mips.h +@@ -1,28 +1,32 @@ + /* + ** MIPS instruction emitter. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #if LJ_64 +-static intptr_t get_k64val(IRIns *ir) ++static intptr_t get_k64val(ASMState *as, IRRef ref) + { ++ IRIns *ir = IR(ref); + if (ir->o == IR_KINT64) { + return (intptr_t)ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (intptr_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (intptr_t)ir_kptr(ir); ++ } else if (LJ_SOFTFP && ir->o == IR_KNUM) { ++ return (intptr_t)ir_knum(ir)->u64; + } else { +- lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); ++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, ++ "bad 64 bit const IR op %d", ir->o); + return ir->i; /* Sign-extended. */ + } + } + #endif + + #if LJ_64 +-#define get_kval(ir) get_k64val(ir) ++#define get_kval(as, ref) get_k64val(as, ref) + #else +-#define get_kval(ir) ((ir)->i) ++#define get_kval(as, ref) (IR((ref))->i) + #endif + + /* -- Emit basic instructions --------------------------------------------- */ +@@ -66,7 +70,7 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) + } + } + +-#if LJ_64 ++#if LJ_64 || LJ_HASBUFFER + static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, + uint32_t lsb) + { +@@ -80,18 +84,18 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, + #define emit_canremat(ref) ((ref) <= REF_BASE) + + /* Try to find a one step delta relative to another constant. */ +-static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) ++static int emit_kdelta1(ASMState *as, Reg rd, intptr_t i) + { + RegSet work = ~as->freeset & RSET_GPR; + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); +- lua_assert(r != t); ++ lj_assertA(r != rd, "dest reg %d not free", rd); + if (ref < ASMREF_L) { + intptr_t delta = (intptr_t)((uintptr_t)i - +- (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref)))); ++ (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref))); + if (checki16(delta)) { +- emit_tsi(as, MIPSI_AADDIU, t, r, delta); ++ emit_tsi(as, MIPSI_AADDIU, rd, r, delta); + return 1; + } + } +@@ -136,6 +140,7 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) + } else if (emit_kdelta1(as, r, (intptr_t)u64)) { + return; + } else { ++ /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */ + if ((u64 & 0xffff)) { + emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); + } +@@ -220,7 +225,7 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) + { + MCode *p = as->mcp; + ptrdiff_t delta = target - p; +- lua_assert(((delta + 0x8000) >> 16) == 0); ++ lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); + *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); + as->mcp = p; + } +@@ -234,10 +239,22 @@ static void emit_jmp(ASMState *as, MCode *target) + static void emit_call(ASMState *as, void *target, int needcfa) + { + MCode *p = as->mcp; +- *--p = MIPSI_NOP; ++#if LJ_TARGET_MIPSR6 ++ ptrdiff_t delta = (char *)target - (char *)p; ++ if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */ ++ *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu); ++ as->mcp = p; ++ return; ++ } ++#endif ++ *--p = MIPSI_NOP; /* Delay slot. */ + if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { ++#if !LJ_TARGET_MIPSR6 + *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | + (((uintptr_t)target >>2) & 0x03ffffffu); ++#else ++ *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); ++#endif + } else { /* Target out of range: need indirect call. */ + *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); + needcfa = 1; +@@ -284,7 +301,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) + static void emit_addptr(ASMState *as, Reg r, int32_t ofs) + { + if (ofs) { +- lua_assert(checki16(ofs)); ++ lj_assertA(checki16(ofs), "offset %d out of range", ofs); + emit_tsi(as, MIPSI_AADDIU, r, r, ofs); + } + } +diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h +index 21c3c2ac..649a6d17 100644 +--- a/src/lj_emit_ppc.h ++++ b/src/lj_emit_ppc.h +@@ -1,6 +1,6 @@ + /* + ** PPC instruction emitter. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + /* -- Emit basic instructions --------------------------------------------- */ +@@ -41,13 +41,13 @@ static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs, + + static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) + { +- lua_assert(n >= 0 && n < 32); ++ lj_assertA(n >= 0 && n < 32, "shift out or range"); + emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); + } + + static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) + { +- lua_assert(n >= 0 && n < 32); ++ lj_assertA(n >= 0 && n < 32, "shift out or range"); + emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); + } + +@@ -57,17 +57,17 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) + #define emit_canremat(ref) ((ref) <= REF_BASE) + + /* Try to find a one step delta relative to another constant. */ +-static int emit_kdelta1(ASMState *as, Reg t, int32_t i) ++static int emit_kdelta1(ASMState *as, Reg rd, int32_t i) + { + RegSet work = ~as->freeset & RSET_GPR; + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); +- lua_assert(r != t); ++ lj_assertA(r != rd, "dest reg %d not free", rd); + if (ref < ASMREF_L) { + int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); + if (checki16(delta)) { +- emit_tai(as, PPCI_ADDI, t, r, delta); ++ emit_tai(as, PPCI_ADDI, rd, r, delta); + return 1; + } + } +@@ -144,7 +144,7 @@ static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) + { + MCode *p = --as->mcp; + ptrdiff_t delta = (char *)target - (char *)p; +- lua_assert(((delta + 0x8000) >> 16) == 0); ++ lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); + pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); + *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); + } +diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h +index 5207f9da..85202768 100644 +--- a/src/lj_emit_x86.h ++++ b/src/lj_emit_x86.h +@@ -1,6 +1,6 @@ + /* + ** x86/x64 instruction emitter. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + /* -- Emit basic instructions --------------------------------------------- */ +@@ -45,7 +45,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, + *(uint32_t *)(p+delta-5) = (uint32_t)xo; + return p+delta-5; + } +-#if defined(__GNUC__) ++#if defined(__GNUC__) || defined(__clang__) + if (__builtin_constant_p(xo) && n == -2) + p[delta-2] = (MCode)(xo >> 24); + else if (__builtin_constant_p(xo) && n == -3) +@@ -92,7 +92,7 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2) + /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ + static int32_t ptr2addr(const void *p) + { +- lua_assert((uintptr_t)p < (uintptr_t)0x80000000); ++ lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range"); + return i32ptr(p); + } + #else +@@ -208,7 +208,7 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) + rb = RID_ESP; + #endif + } else if (LJ_GC64 && rb == RID_RIP) { +- lua_assert(as->mrm.idx == RID_NONE); ++ lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index"); + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = as->mrm.ofs; +@@ -274,10 +274,12 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) + /* mov r, i / xor r, r */ + static void emit_loadi(ASMState *as, Reg r, int32_t i) + { +- /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */ ++ /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP/jcc. */ + if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP || + (as->curins+1 < as->T->nins && +- IR(as->curins+1)->o == IR_HIOP)))) { ++ IR(as->curins+1)->o == IR_HIOP))) && ++ !((*as->mcp == 0x0f && (as->mcp[1] & 0xf0) == XI_JCCn) || ++ (*as->mcp & 0xf0) == XI_JCCs)) { + emit_rr(as, XO_ARITH(XOg_XOR), r, r); + } else { + MCode *p = as->mcp; +@@ -343,9 +345,27 @@ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) + emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); + } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { + emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); +- } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { +- emit_rmro(as, xo, rr, rr, 0); +- emit_loadu64(as, rr, (uintptr_t)addr); ++ } else if (!checki32((intptr_t)addr)) { ++ Reg ra = (rr & 15); ++ if (xo != XO_MOV) { ++ /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ ++ uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; ++ uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; ++ ra = RID_DISPATCH; ++ if (checku32(dispaddr)) { ++ emit_loadi(as, ra, (int32_t)dispaddr); ++ } else { /* Full-size 64 bit load. */ ++ MCode *p = as->mcp; ++ *(uint64_t *)(p-8) = dispaddr; ++ p[-9] = (MCode)(XI_MOVri+(ra&7)); ++ p[-10] = 0x48 + ((ra>>3)&1); ++ p -= 10; ++ as->mcp = p; ++ } ++ if (xo == XO_GROUP3b) emit_i8(as, i8); ++ } ++ emit_rmro(as, xo, rr, ra, 0); ++ emit_loadu64(as, ra, (uintptr_t)addr); + } else + #endif + { +@@ -381,7 +401,8 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) + emit_rma(as, xo, r64, k); + } else { + if (ir->i) { +- lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); ++ lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), ++ "bad interned 64 bit constant"); + } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { + emit_loadu64(as, r, *k); + return; +@@ -413,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target) + { + MCode *p = as->mcp; + ptrdiff_t delta = target - p; +- lua_assert(delta == (int8_t)delta); ++ lj_assertA(delta == (int8_t)delta, "short jump target out of range"); + p[-1] = (MCode)(int8_t)delta; + p[-2] = XI_JMPs; + as->mcp = p - 2; +@@ -425,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target) + { + MCode *p = as->mcp; + ptrdiff_t delta = target - p; +- lua_assert(delta == (int8_t)delta); ++ lj_assertA(delta == (int8_t)delta, "short jump target out of range"); + p[-1] = (MCode)(int8_t)delta; + p[-2] = (MCode)(XI_JCCs+(cc&15)); + as->mcp = p - 2; +@@ -451,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source) + #define emit_label(as) ((as)->mcp) + + /* Compute relative 32 bit offset for jump and call instructions. */ +-static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) ++static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) + { + ptrdiff_t delta = target - p; +- lua_assert(delta == (int32_t)delta); ++ UNUSED(J); ++ lj_assertJ(delta == (int32_t)delta, "jump target out of range"); + return (int32_t)delta; + } + +@@ -462,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) + static void emit_jcc(ASMState *as, int cc, MCode *target) + { + MCode *p = as->mcp; +- *(int32_t *)(p-4) = jmprel(p, target); ++ *(int32_t *)(p-4) = jmprel(as->J, p, target); + p[-5] = (MCode)(XI_JCCn+(cc&15)); + p[-6] = 0x0f; + as->mcp = p - 6; +@@ -472,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target) + static void emit_jmp(ASMState *as, MCode *target) + { + MCode *p = as->mcp; +- *(int32_t *)(p-4) = jmprel(p, target); ++ *(int32_t *)(p-4) = jmprel(as->J, p, target); + p[-5] = XI_JMP; + as->mcp = p - 5; + } +@@ -489,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target) + return; + } + #endif +- *(int32_t *)(p-4) = jmprel(p, target); ++ *(int32_t *)(p-4) = jmprel(as->J, p, target); + p[-5] = XI_CALL; + as->mcp = p - 5; + } +@@ -539,10 +561,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) + static void emit_addptr(ASMState *as, Reg r, int32_t ofs) + { + if (ofs) { +- if ((as->flags & JIT_F_LEA_AGU)) +- emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); +- else +- emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); ++ emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); + } + } + +diff --git a/src/lj_err.c b/src/lj_err.c +index b6be357e..fda4a59c 100644 +--- a/src/lj_err.c ++++ b/src/lj_err.c +@@ -1,6 +1,6 @@ + /* + ** Error handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_err_c +@@ -29,12 +29,18 @@ + ** Pros and Cons: + ** + ** - EXT requires unwind tables for *all* functions on the C stack between +-** the pcall/catch and the error/throw. This is the default on x64, +-** but needs to be manually enabled on x86/PPC for non-C++ code. ++** the pcall/catch and the error/throw. C modules used by Lua code can ++** throw errors, so these need to have unwind tables, too. Transitively ++** this applies to all system libraries used by C modules -- at least ++** when they have callbacks which may throw an error. + ** +-** - INT is faster when actually throwing errors (but this happens rarely). ++** - INT is faster when actually throwing errors, but this happens rarely. + ** Setting up error handlers is zero-cost in any case. + ** ++** - INT needs to save *all* callee-saved registers when entering the ++** interpreter. EXT only needs to save those actually used inside the ++** interpreter. JIT-compiled code may need to save some more. ++** + ** - EXT provides full interoperability with C++ exceptions. You can throw + ** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames. + ** C++ destructors are called as needed. C++ exceptions caught by pcall +@@ -46,27 +52,38 @@ + ** the wrapper function feature. Lua errors thrown through C++ frames + ** cannot be caught by C++ code and C++ destructors are not run. + ** +-** EXT is the default on x64 systems and on Windows, INT is the default on all +-** other systems. ++** - EXT can handle errors from internal helper functions that are called ++** from JIT-compiled code (except for Windows/x86 and 32 bit ARM). ++** INT has no choice but to call the panic handler, if this happens. ++** Note: this is mainly relevant for out-of-memory errors. ++** ++** EXT is the default on all systems where the toolchain produces unwind ++** tables by default (*). This is hard-coded and/or detected in src/Makefile. ++** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL ++** ++** INT is the default on all other systems. ++** ++** EXT can be manually enabled for toolchains that are able to produce ++** conforming unwind tables: ++** "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL" ++** As explained above, *all* C code used directly or indirectly by LuaJIT ++** must be compiled with -funwind-tables (or -fexceptions). C++ code must ++** *not* be compiled with -fno-exceptions. ++** ++** If you're unsure whether error handling inside the VM works correctly, ++** try running this and check whether it prints "OK": + ** +-** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack +-** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled +-** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set +-** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules +-** and all C libraries that have callbacks which may be used to call back +-** into Lua. C++ code must *not* be compiled with -fno-exceptions. ++** luajit -e "print(select(2, load('OK')):match('OK'))" + ** +-** EXT is mandatory on WIN64 since the calling convention has an abundance +-** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). +-** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). ++** (*) Originally, toolchains only generated unwind tables for C++ code. For ++** interoperability reasons, this can be manually enabled for plain C code, ++** too (with -funwind-tables). With the introduction of the x64 architecture, ++** the corresponding POSIX and Windows ABIs mandated unwind tables for all ++** code. Over the following years most desktop and server platforms have ++** enabled unwind tables by default on all architectures. OTOH mobile and ++** embedded platforms do not consistently mandate unwind tables. + */ + +-#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND +-#define LJ_UNWIND_EXT 1 +-#elif LJ_TARGET_WINDOWS +-#define LJ_UNWIND_EXT 1 +-#endif +- + /* -- Error messages ------------------------------------------------------ */ + + /* Error message strings. */ +@@ -150,6 +167,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) + case FRAME_CONT: /* Continuation frame. */ + if (frame_iscont_fficb(frame)) + goto unwind_c; ++ /* fallthrough */ + case FRAME_VARG: /* Vararg frame. */ + frame = frame_prevd(frame); + break; +@@ -183,7 +201,172 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) + + /* -- External frame unwinding -------------------------------------------- */ + +-#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN ++#if LJ_ABI_WIN ++ ++/* ++** Someone in Redmond owes me several days of my life. A lot of this is ++** undocumented or just plain wrong on MSDN. Some of it can be gathered ++** from 3rd party docs or must be found by trial-and-error. They really ++** don't want you to write your own language-specific exception handler ++** or to interact gracefully with MSVC. :-( ++** ++** Apparently MSVC doesn't call C++ destructors for foreign exceptions ++** unless you compile your C++ code with /EHa. Unfortunately this means ++** catch (...) also catches things like access violations. The use of ++** _set_se_translator doesn't really help, because it requires /EHa, too. ++*/ ++ ++#define WIN32_LEAN_AND_MEAN ++#include <windows.h> ++ ++#if LJ_TARGET_X86 ++typedef void *UndocumentedDispatcherContext; /* Unused on x86. */ ++#else ++/* Taken from: http://www.nynaeve.net/?p=99 */ ++typedef struct UndocumentedDispatcherContext { ++ ULONG64 ControlPc; ++ ULONG64 ImageBase; ++ PRUNTIME_FUNCTION FunctionEntry; ++ ULONG64 EstablisherFrame; ++ ULONG64 TargetIp; ++ PCONTEXT ContextRecord; ++ void (*LanguageHandler)(void); ++ PVOID HandlerData; ++ PUNWIND_HISTORY_TABLE HistoryTable; ++ ULONG ScopeIndex; ++ ULONG Fill0; ++} UndocumentedDispatcherContext; ++#endif ++ ++/* Another wild guess. */ ++extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); ++ ++#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) ++/* Workaround for broken MinGW64 declaration. */ ++VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); ++#define RtlUnwindEx RtlUnwindEx_FIXED ++#endif ++ ++#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) ++#define LJ_GCC_EXCODE ((DWORD)0x20474343) ++ ++#define LJ_EXCODE ((DWORD)0xe24c4a00) ++#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) ++#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) ++#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) ++ ++/* Windows exception handler for interpreter frame. */ ++LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, ++ void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) ++{ ++#if LJ_TARGET_X86 ++ void *cf = (char *)f - CFRAME_OFS_SEH; ++#else ++ void *cf = f; ++#endif ++ lua_State *L = cframe_L(cf); ++ int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? ++ LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; ++ if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ ++ /* Unwind internal frames. */ ++ err_unwind(L, cf, errcode); ++ } else { ++ void *cf2 = err_unwind(L, cf, 0); ++ if (cf2) { /* We catch it, so start unwinding the upper frames. */ ++ if (rec->ExceptionCode == LJ_MSVC_EXCODE || ++ rec->ExceptionCode == LJ_GCC_EXCODE) { ++#if !LJ_TARGET_CYGWIN ++ __DestructExceptionObject(rec, 1); ++#endif ++ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); ++ } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { ++ /* Don't catch access violations etc. */ ++ return 1; /* ExceptionContinueSearch */ ++ } ++#if LJ_TARGET_X86 ++ UNUSED(ctx); ++ UNUSED(dispatch); ++ /* Call all handlers for all lower C frames (including ourselves) again ++ ** with EH_UNWINDING set. Then call the specified function, passing cf ++ ** and errcode. ++ */ ++ lj_vm_rtlunwind(cf, (void *)rec, ++ (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? ++ (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); ++ /* lj_vm_rtlunwind does not return. */ ++#else ++ /* Unwind the stack and call all handlers for all lower C frames ++ ** (including ourselves) again with EH_UNWINDING set. Then set ++ ** stack pointer = cf, result = errcode and jump to the specified target. ++ */ ++ RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? ++ lj_vm_unwind_ff_eh : ++ lj_vm_unwind_c_eh), ++ rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); ++ /* RtlUnwindEx should never return. */ ++#endif ++ } ++ } ++ return 1; /* ExceptionContinueSearch */ ++} ++ ++#if LJ_UNWIND_JIT ++ ++#if LJ_TARGET_X64 ++#define CONTEXT_REG_PC Rip ++#elif LJ_TARGET_ARM64 ++#define CONTEXT_REG_PC Pc ++#else ++#error "NYI: Windows arch-specific unwinder for JIT-compiled code" ++#endif ++ ++/* Windows unwinder for JIT-compiled code. */ ++static void err_unwind_win_jit(global_State *g, int errcode) ++{ ++ CONTEXT ctx; ++ UNWIND_HISTORY_TABLE hist; ++ ++ memset(&hist, 0, sizeof(hist)); ++ RtlCaptureContext(&ctx); ++ while (1) { ++ uintptr_t frame, base, addr = ctx.CONTEXT_REG_PC; ++ void *hdata; ++ PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist); ++ if (!func) { /* Found frame without .pdata: must be JIT-compiled code. */ ++ ExitNo exitno; ++ uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno); ++ if (stub) { /* Jump to side exit to unwind the trace. */ ++ ctx.CONTEXT_REG_PC = stub; ++ G2J(g)->exitcode = errcode; ++ RtlRestoreContext(&ctx, NULL); /* Does not return. */ ++ } ++ break; ++ } ++ RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func, ++ &ctx, &hdata, &frame, NULL); ++ if (!addr) break; ++ } ++ /* Unwinding failed, if we end up here. */ ++} ++#endif ++ ++/* Raise Windows exception. */ ++static void err_raise_ext(global_State *g, int errcode) ++{ ++#if LJ_UNWIND_JIT ++ if (tvref(g->jit_base)) { ++ err_unwind_win_jit(g, errcode); ++ return; /* Unwinding failed. */ ++ } ++#elif LJ_HASJIT ++ /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */ ++ setmref(g->jit_base, NULL); ++#endif ++ UNUSED(g); ++ RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); ++} ++ ++#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__)) + + /* + ** We have to use our own definitions instead of the mandatory (!) unwind.h, +@@ -193,6 +376,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) + typedef struct _Unwind_Context _Unwind_Context; + + #define _URC_OK 0 ++#define _URC_FATAL_PHASE2_ERROR 2 + #define _URC_FATAL_PHASE1_ERROR 3 + #define _URC_HANDLER_FOUND 6 + #define _URC_INSTALL_CONTEXT 7 +@@ -212,9 +396,11 @@ typedef struct _Unwind_Exception + void (*excleanup)(int, struct _Unwind_Exception *); + uintptr_t p1, p2; + } __attribute__((__aligned__)) _Unwind_Exception; ++#define UNWIND_EXCEPTION_TYPE _Unwind_Exception + + extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); + extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); ++extern uintptr_t _Unwind_GetIP(_Unwind_Context *); + extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); + extern void _Unwind_DeleteException(_Unwind_Exception *); + extern int _Unwind_RaiseException(_Unwind_Exception *); +@@ -232,7 +418,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, + lua_State *L; + if (version != 1) + return _URC_FATAL_PHASE1_ERROR; +- UNUSED(uexclass); + cf = (void *)_Unwind_GetCFA(ctx); + L = cframe_L(cf); + if ((actions & _UA_SEARCH_PHASE)) { +@@ -280,25 +465,139 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, + ** it on non-x64 because the interpreter restores all callee-saved regs. + */ + lj_err_throw(L, errcode); ++#if LJ_TARGET_X64 ++#error "Broken build system -- only use the provided Makefiles!" ++#endif + #endif + } + return _URC_CONTINUE_UNWIND; + } + +-#if LJ_UNWIND_EXT +-#if LJ_TARGET_OSX || defined(__OpenBSD__) +-/* Sorry, no thread safety for OSX. Complain to Apple, not me. */ +-static _Unwind_Exception static_uex; ++#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT) ++struct dwarf_eh_bases { void *tbase, *dbase, *func; }; ++extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases); ++ ++/* Verify that external error handling actually has a chance to work. */ ++void lj_err_verify(void) ++{ ++ struct dwarf_eh_bases ehb; ++ lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables"); ++ /* Check disabled, because of broken Fedora/ARM64. See #722. ++ lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables"); ++ */ ++} ++#endif ++ ++#if LJ_UNWIND_JIT ++/* DWARF2 personality handler for JIT-compiled code. */ ++static int err_unwind_jit(int version, int actions, ++ uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) ++{ ++ /* NYI: FFI C++ exception interoperability. */ ++ if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass)) ++ return _URC_FATAL_PHASE1_ERROR; ++ if ((actions & _UA_SEARCH_PHASE)) { ++ return _URC_HANDLER_FOUND; ++ } ++ if ((actions & _UA_CLEANUP_PHASE)) { ++ global_State *g = *(global_State **)(uex+1); ++ ExitNo exitno; ++ uintptr_t addr = _Unwind_GetIP(ctx); /* Return address _after_ call. */ ++ uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno); ++ lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame"); ++ if (stub) { /* Jump to side exit to unwind the trace. */ ++ G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass); ++#ifdef LJ_TARGET_MIPS ++ _Unwind_SetGR(ctx, 4, stub); ++ _Unwind_SetGR(ctx, 5, exitno); ++ _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub); + #else +-static __thread _Unwind_Exception static_uex; ++ _Unwind_SetIP(ctx, stub); + #endif ++ return _URC_INSTALL_CONTEXT; ++ } ++ return _URC_FATAL_PHASE2_ERROR; ++ } ++ return _URC_FATAL_PHASE1_ERROR; ++} + +-/* Raise DWARF2 exception. */ +-static void err_raise_ext(int errcode) ++/* DWARF2 template frame info for JIT-compiled code. ++** ++** After copying the template to the start of the mcode segment, ++** the frame handler function and the code size is patched. ++** The frame handler always installs a new context to jump to the exit, ++** so don't bother to add any unwind opcodes. ++*/ ++static const uint8_t err_frame_jit_template[] = { ++#if LJ_BE ++ 0,0,0, ++#endif ++ LJ_64 ? 0x1c : 0x14, /* CIE length. */ ++#if LJ_LE ++ 0,0,0, ++#endif ++ 0,0,0,0, 1, 'z','P','R',0, /* CIE mark, CIE version, augmentation. */ ++ 1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG, /* Code/data align, RA. */ ++#if LJ_64 ++ 10, 0, 0,0,0,0,0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */ ++ 0,0,0,0,0, /* Alignment. */ ++#else ++ 6, 0, 0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */ ++ 0, /* Alignment. */ ++#endif ++#if LJ_BE ++ 0,0,0, ++#endif ++ LJ_64 ? 0x14 : 0x10, /* FDE length. */ ++ 0,0,0, ++ LJ_64 ? 0x24 : 0x1c, /* CIE offset. */ ++ 0,0,0, ++ LJ_64 ? 0x14 : 0x10, /* Code offset. After Final FDE. */ ++#if LJ_LE ++ 0,0,0, ++#endif ++ 0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */ ++#if LJ_64 ++ 0,0,0,0, /* Alignment. */ ++#endif ++ 0,0,0,0 /* Final FDE. */ ++}; ++ ++#define ERR_FRAME_JIT_OFS_HANDLER 0x12 ++#define ERR_FRAME_JIT_OFS_FDE (LJ_64 ? 0x20 : 0x18) ++#define ERR_FRAME_JIT_OFS_CODE_SIZE (LJ_64 ? 0x2c : 0x24) ++#if LJ_TARGET_OSX ++#define ERR_FRAME_JIT_OFS_REGISTER ERR_FRAME_JIT_OFS_FDE ++#else ++#define ERR_FRAME_JIT_OFS_REGISTER 0 ++#endif ++ ++extern void __register_frame(const void *); ++extern void __deregister_frame(const void *); ++ ++uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info) ++{ ++ void **handler; ++ memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template)); ++ handler = (void *)err_unwind_jit; ++ memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler)); ++ *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) = ++ (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base)); ++ __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER); ++#ifdef LUA_USE_ASSERT ++ { ++ struct dwarf_eh_bases ehb; ++ lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb), ++ "bad JIT unwind table registration"); ++ } ++#endif ++ return info + sizeof(err_frame_jit_template); ++} ++ ++void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info) + { +- static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); +- static_uex.excleanup = NULL; +- _Unwind_RaiseException(&static_uex); ++ UNUSED(base); UNUSED(sz); ++ __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER); + } + #endif + +@@ -310,6 +609,7 @@ static void err_raise_ext(int errcode) + #define _US_FORCE_UNWIND 8 + + typedef struct _Unwind_Control_Block _Unwind_Control_Block; ++#define UNWIND_EXCEPTION_TYPE _Unwind_Control_Block + + struct _Unwind_Control_Block { + uint64_t exclass; +@@ -368,136 +668,63 @@ LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb, + } + if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) + return _URC_FAILURE; ++#ifdef LUA_USE_ASSERT ++ /* We should never get here unless this is a forced unwind aka backtrace. */ ++ if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) { ++ _Unwind_SetGR(ctx, 0, 0xff33aa88); ++ } ++#endif + return _URC_CONTINUE_UNWIND; + } + +-#if LJ_UNWIND_EXT +-static __thread _Unwind_Control_Block static_uex; ++#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT) ++typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *); ++extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *); + +-static void err_raise_ext(int errcode) ++static int err_verify_bt(_Unwind_Context *ctx, int *got) + { +- memset(&static_uex, 0, sizeof(static_uex)); +- static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); +- _Unwind_RaiseException(&static_uex); ++ if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; } ++ else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); } ++ return _URC_OK; + } +-#endif + +-#endif /* LJ_TARGET_ARM */ +- +-#elif LJ_ABI_WIN ++/* Verify that external error handling actually has a chance to work. */ ++void lj_err_verify(void) ++{ ++ int got = 0; ++ _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got); ++ lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables"); ++} ++#endif + + /* +-** Someone in Redmond owes me several days of my life. A lot of this is +-** undocumented or just plain wrong on MSDN. Some of it can be gathered +-** from 3rd party docs or must be found by trial-and-error. They really +-** don't want you to write your own language-specific exception handler +-** or to interact gracefully with MSVC. :-( ++** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM. + ** +-** Apparently MSVC doesn't call C++ destructors for foreign exceptions +-** unless you compile your C++ code with /EHa. Unfortunately this means +-** catch (...) also catches things like access violations. The use of +-** _set_se_translator doesn't really help, because it requires /EHa, too. ++** The quirky ARM unwind API doesn't have __register_frame(). ++** A potential workaround might involve _Unwind_Backtrace. ++** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway, ++** since they are built without unwind tables by default. + */ + +-#define WIN32_LEAN_AND_MEAN +-#include <windows.h> +- +-#if LJ_TARGET_X64 +-/* Taken from: http://www.nynaeve.net/?p=99 */ +-typedef struct UndocumentedDispatcherContext { +- ULONG64 ControlPc; +- ULONG64 ImageBase; +- PRUNTIME_FUNCTION FunctionEntry; +- ULONG64 EstablisherFrame; +- ULONG64 TargetIp; +- PCONTEXT ContextRecord; +- void (*LanguageHandler)(void); +- PVOID HandlerData; +- PUNWIND_HISTORY_TABLE HistoryTable; +- ULONG ScopeIndex; +- ULONG Fill0; +-} UndocumentedDispatcherContext; +-#else +-typedef void *UndocumentedDispatcherContext; +-#endif +- +-/* Another wild guess. */ +-extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); +- +-#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) +-/* Workaround for broken MinGW64 declaration. */ +-VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); +-#define RtlUnwindEx RtlUnwindEx_FIXED +-#endif ++#endif /* LJ_TARGET_ARM */ + +-#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) +-#define LJ_GCC_EXCODE ((DWORD)0x20474343) + +-#define LJ_EXCODE ((DWORD)0xe24c4a00) +-#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) +-#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) +-#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) ++#if LJ_UNWIND_EXT ++static __thread struct { ++ UNWIND_EXCEPTION_TYPE ex; ++ global_State *g; ++} static_uex; + +-/* Windows exception handler for interpreter frame. */ +-LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, +- void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) ++/* Raise external exception. */ ++static void err_raise_ext(global_State *g, int errcode) + { +-#if LJ_TARGET_X64 +- void *cf = f; +-#else +- void *cf = (char *)f - CFRAME_OFS_SEH; +-#endif +- lua_State *L = cframe_L(cf); +- int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? +- LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; +- if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ +- /* Unwind internal frames. */ +- err_unwind(L, cf, errcode); +- } else { +- void *cf2 = err_unwind(L, cf, 0); +- if (cf2) { /* We catch it, so start unwinding the upper frames. */ +- if (rec->ExceptionCode == LJ_MSVC_EXCODE || +- rec->ExceptionCode == LJ_GCC_EXCODE) { +-#if LJ_TARGET_WINDOWS +- __DestructExceptionObject(rec, 1); +-#endif +- setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); +- } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { +- /* Don't catch access violations etc. */ +- return 1; /* ExceptionContinueSearch */ +- } +-#if LJ_TARGET_X64 +- /* Unwind the stack and call all handlers for all lower C frames +- ** (including ourselves) again with EH_UNWINDING set. Then set +- ** rsp = cf, rax = errcode and jump to the specified target. +- */ +- RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? +- lj_vm_unwind_ff_eh : +- lj_vm_unwind_c_eh), +- rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); +- /* RtlUnwindEx should never return. */ +-#else +- UNUSED(ctx); +- UNUSED(dispatch); +- /* Call all handlers for all lower C frames (including ourselves) again +- ** with EH_UNWINDING set. Then call the specified function, passing cf +- ** and errcode. +- */ +- lj_vm_rtlunwind(cf, (void *)rec, +- (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? +- (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); +- /* lj_vm_rtlunwind does not return. */ +-#endif +- } +- } +- return 1; /* ExceptionContinueSearch */ ++ memset(&static_uex, 0, sizeof(static_uex)); ++ static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode); ++ static_uex.g = g; ++ _Unwind_RaiseException(&static_uex.ex); + } + +-/* Raise Windows exception. */ +-static void err_raise_ext(int errcode) +-{ +- RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); +-} ++#endif + + #endif + +@@ -508,22 +735,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) + { + global_State *g = G(L); + lj_trace_abort(g); +- setmref(g->jit_base, NULL); + L->status = LUA_OK; + #if LJ_UNWIND_EXT +- err_raise_ext(errcode); ++ err_raise_ext(g, errcode); + /* + ** A return from this function signals a corrupt C stack that cannot be + ** unwound. We have no choice but to call the panic function and exit. + ** + ** Usually this is caused by a C function without unwind information. +- ** This should never happen on x64, but may happen if you've manually +- ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every* +- ** non-C++ file with -funwind-tables. ++ ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL ++ ** and forgot to recompile *every* non-C++ file with -funwind-tables. + */ + if (G(L)->panic) + G(L)->panic(L); + #else ++#if LJ_HASJIT ++ setmref(g->jit_base, NULL); ++#endif + { + void *cf = err_unwind(L, NULL, errcode); + if (cframe_unwind_ff(cf)) +@@ -585,6 +813,7 @@ static ptrdiff_t finderrfunc(lua_State *L) + if (cframe_canyield(cf)) return 0; + if (cframe_errfunc(cf) >= 0) + return cframe_errfunc(cf); ++ cf = cframe_prev(cf); + frame = frame_prevd(frame); + break; + case FRAME_PCALL: +@@ -593,7 +822,7 @@ static ptrdiff_t finderrfunc(lua_State *L) + return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */ + return 0; + default: +- lua_assert(0); ++ lj_assertL(0, "bad frame type"); + return 0; + } + } +@@ -601,9 +830,9 @@ static ptrdiff_t finderrfunc(lua_State *L) + } + + /* Runtime error. */ +-LJ_NOINLINE void lj_err_run(lua_State *L) ++LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) + { +- ptrdiff_t ef = finderrfunc(L); ++ ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L); + if (ef) { + TValue *errfunc = restorestack(L, ef); + TValue *top = L->top; +@@ -622,6 +851,16 @@ LJ_NOINLINE void lj_err_run(lua_State *L) + lj_err_throw(L, LUA_ERRRUN); + } + ++#if LJ_HASJIT ++LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode) ++{ ++ if (errcode == LUA_ERRRUN) ++ lj_err_run(L); ++ else ++ lj_err_throw(L, errcode); ++} ++#endif ++ + /* Formatted runtime error message. */ + LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) + { +@@ -690,9 +929,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) + const BCIns *pc = cframe_Lpc(L); + if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { + const char *tname = lj_typename(o); ++ setframe_gc(o, obj2gco(L), LJ_TTHREAD); + if (LJ_FR2) o++; + setframe_pc(o, pc); +- setframe_gc(o, obj2gco(L), LJ_TTHREAD); + L->top = L->base = o+1; + err_msgv(L, LJ_ERR_BADCALL, tname); + } +@@ -702,25 +941,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) + /* Error in context of caller. */ + LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) + { +- TValue *frame = L->base-1; +- TValue *pframe = NULL; +- if (frame_islua(frame)) { +- pframe = frame_prevl(frame); +- } else if (frame_iscont(frame)) { +- if (frame_iscont_fficb(frame)) { +- pframe = frame; +- frame = NULL; +- } else { +- pframe = frame_prevd(frame); ++ TValue *frame = NULL, *pframe = NULL; ++ if (!(LJ_HASJIT && tvref(G(L)->jit_base))) { ++ frame = L->base-1; ++ if (frame_islua(frame)) { ++ pframe = frame_prevl(frame); ++ } else if (frame_iscont(frame)) { ++ if (frame_iscont_fficb(frame)) { ++ pframe = frame; ++ frame = NULL; ++ } else { ++ pframe = frame_prevd(frame); + #if LJ_HASFFI +- /* Remove frame for FFI metamethods. */ +- if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && +- frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { +- L->base = pframe+1; +- L->top = frame; +- setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); +- } ++ /* Remove frame for FFI metamethods. */ ++ if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && ++ frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { ++ L->base = pframe+1; ++ L->top = frame; ++ setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); ++ } + #endif ++ } + } + } + lj_debug_addloc(L, msg, pframe, frame); +diff --git a/src/lj_err.h b/src/lj_err.h +index cba5fb71..2e8a251f 100644 +--- a/src/lj_err.h ++++ b/src/lj_err.h +@@ -1,6 +1,6 @@ + /* + ** Error handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_ERR_H +@@ -23,7 +23,10 @@ LJ_DATA const char *lj_err_allmsg; + LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); + LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); + LJ_FUNC_NORET void lj_err_mem(lua_State *L); +-LJ_FUNC_NORET void lj_err_run(lua_State *L); ++LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L); ++#if LJ_HASJIT ++LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode); ++#endif + LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); + LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok, + BCLine line, ErrMsg em, va_list argp); +@@ -38,4 +41,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...); + LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname); + LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt); + ++#if LJ_UNWIND_JIT && !LJ_ABI_WIN ++LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info); ++LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info); ++#else ++#define lj_err_register_mcode(base, sz, info) (info) ++#define lj_err_deregister_mcode(base, sz, info) UNUSED(base) ++#endif ++ ++#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT) ++LJ_FUNC void lj_err_verify(void); ++#else ++#define lj_err_verify() ((void)0) ++#endif ++ + #endif +diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h +index 060a9f89..89e67496 100644 +--- a/src/lj_errmsg.h ++++ b/src/lj_errmsg.h +@@ -1,6 +1,6 @@ + /* + ** VM error messages. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + /* This file may be included multiple times with different ERRDEF macros. */ +@@ -67,6 +67,7 @@ ERRDEF(PROTMT, "cannot change a protected metatable") + ERRDEF(UNPACK, "too many results to unpack") + ERRDEF(RDRSTR, "reader function must return a string") + ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) ++ERRDEF(NUMRNG, "number out of range") + ERRDEF(IDXRNG, "index out of range") + ERRDEF(BASERNG, "base out of range") + ERRDEF(LVLRNG, "level out of range") +@@ -101,11 +102,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)") + ERRDEF(BADMODN, "name conflict for module " LUA_QS) + #if LJ_HASJIT + ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") +-#if LJ_TARGET_X86ORX64 +-ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") +-#else + ERRDEF(NOJIT, "JIT compiler disabled") +-#endif + #elif defined(LJ_ARCH_NOJIT) + ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") + #else +@@ -183,6 +180,19 @@ ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields") + ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") + #endif + ++#if LJ_HASBUFFER ++/* String buffer errors. */ ++ERRDEF(BUFFER_SELF, "cannot put buffer into itself") ++ERRDEF(BUFFER_BADOPT, "bad options table") ++ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS) ++ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x") ++ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d") ++ERRDEF(BUFFER_DEPTH, "too deep to serialize") ++ERRDEF(BUFFER_DUPKEY, "duplicate table key") ++ERRDEF(BUFFER_EOB, "unexpected end of buffer") ++ERRDEF(BUFFER_LEFTOV, "left-over data in buffer") ++#endif ++ + #undef ERRDEF + + /* Detecting unused error messages: +diff --git a/src/lj_ff.h b/src/lj_ff.h +index 31d65a00..e355f44e 100644 +--- a/src/lj_ff.h ++++ b/src/lj_ff.h +@@ -1,6 +1,6 @@ + /* + ** Fast function IDs. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_FF_H +diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c +index dfdee2db..97b40f98 100644 +--- a/src/lj_ffrecord.c ++++ b/src/lj_ffrecord.c +@@ -1,6 +1,6 @@ + /* + ** Fast function call recorder. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_ffrecord_c +@@ -11,6 +11,7 @@ + #if LJ_HASJIT + + #include "lj_err.h" ++#include "lj_buf.h" + #include "lj_str.h" + #include "lj_tab.h" + #include "lj_frame.h" +@@ -28,6 +29,7 @@ + #include "lj_vm.h" + #include "lj_strscan.h" + #include "lj_strfmt.h" ++#include "lj_serialize.h" + + /* Some local macros to save typing. Undef'd at the end. */ + #define IR(ref) (&J->cur.ir[(ref)]) +@@ -107,6 +109,10 @@ static void recff_stitch(jit_State *J) + const BCIns *pc = frame_pc(base-1); + TValue *pframe = frame_prevl(base-1); + ++ /* Check for this now. Throwing in lj_record_stop messes up the stack. */ ++ if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap]) ++ lj_trace_err(J, LJ_TRERR_SNAPOV); ++ + /* Move func + args up in Lua stack and insert continuation. */ + memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); + setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT); +@@ -182,6 +188,14 @@ static TRef recff_bufhdr(jit_State *J) + lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); + } + ++/* Emit TMPREF. */ ++static TRef recff_tmpref(jit_State *J, TRef tr, int mode) ++{ ++ if (!LJ_DUALNUM && tref_isinteger(tr)) ++ tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); ++ return emitir(IRT(IR_TMPREF, IRT_PGC), tr, mode); ++} ++ + /* -- Base library fast functions ----------------------------------------- */ + + static void LJ_FASTCALL recff_assert(jit_State *J, RecordFFData *rd) +@@ -281,7 +295,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd) + if (tref_isstr(tr)) + J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); + else if (tref_istab(tr)) +- J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); ++ J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL); + /* else: Interpreter will throw. */ + UNUSED(rd); + } +@@ -296,7 +310,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv) + } else { + TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); + TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); +- emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); ++ emitir(IRTGI(IR_EQ), trchar, lj_ir_kint(J, '#')); + } + return 0; + } else { /* select(n, ...) */ +@@ -317,9 +331,9 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd) + ptrdiff_t n = (ptrdiff_t)J->maxslot; + if (start < 0) start += n; + else if (start > n) start = n; +- rd->nres = n - start; + if (start >= 1) { + ptrdiff_t i; ++ rd->nres = n - start; + for (i = 0; i < n - start; i++) + J->base[i] = J->base[start+i]; + } /* else: Interpreter will throw. */ +@@ -455,6 +469,7 @@ static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) + #endif + lj_record_call(J, 0, J->maxslot - 1); + rd->nres = -1; /* Pending call. */ ++ J->needsnap = 1; /* Start catching on-trace errors. */ + } /* else: Interpreter will throw. */ + } + +@@ -490,6 +505,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) + if (errcode) + lj_err_throw(J->L, errcode); /* Propagate errors. */ + rd->nres = -1; /* Pending call. */ ++ J->needsnap = 1; /* Start catching on-trace errors. */ + } /* else: Interpreter will throw. */ + } + +@@ -505,6 +521,40 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) + recff_nyiu(J, rd); + } + ++static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd) ++{ ++#if LJ_BE ++ /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, ++ ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. ++ */ ++ recff_nyi(J, rd); ++#else ++ TRef tab = J->base[0]; ++ if (tref_istab(tab)) { ++ RecordIndex ix; ++ cTValue *keyv; ++ ix.tab = tab; ++ if (tref_isnil(J->base[1])) { /* Shortcut for start of traversal. */ ++ ix.key = lj_ir_kint(J, 0); ++ keyv = niltvg(J2G(J)); ++ } else { ++ TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); ++ ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp); ++ keyv = &rd->argv[1]; ++ } ++ copyTV(J->L, &ix.tabv, &rd->argv[0]); ++ ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv); ++ /* Omit the value, if not used by the caller. */ ++ ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) && ++ bc_b(frame_pc(J->L->base-1)[-1])-1 < 2); ++ ix.mobj = 0; /* We don't need the next index. */ ++ rd->nres = lj_record_next(J, &ix); ++ J->base[0] = ix.key; ++ J->base[1] = ix.val; ++ } /* else: Interpreter will throw. */ ++#endif ++} ++ + /* -- Math library fast functions ----------------------------------------- */ + + static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) +@@ -563,7 +613,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd) + { + TRef tr = lj_ir_tonum(J, J->base[0]); + TRef tr2 = lj_ir_tonum(J, J->base[1]); +- J->base[0] = emitir(IRTN(IR_ATAN2), tr, tr2); ++ J->base[0] = lj_ir_call(J, IRCALL_atan2, tr, tr2); + UNUSED(rd); + } + +@@ -580,43 +630,12 @@ static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd) + UNUSED(rd); + } + +-/* Record math.asin, math.acos, math.atan. */ +-static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd) +-{ +- TRef y = lj_ir_tonum(J, J->base[0]); +- TRef x = lj_ir_knum_one(J); +- uint32_t ffid = rd->data; +- if (ffid != FF_math_atan) { +- TRef tmp = emitir(IRTN(IR_MUL), y, y); +- tmp = emitir(IRTN(IR_SUB), x, tmp); +- tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT); +- if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; } +- } +- J->base[0] = emitir(IRTN(IR_ATAN2), y, x); +-} +- +-static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd) ++static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd) + { + TRef tr = lj_ir_tonum(J, J->base[0]); + J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); + } + +-static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) +-{ +- TRef tr = J->base[0]; +- if (tref_isinteger(tr)) { +- J->base[0] = tr; +- J->base[1] = lj_ir_kint(J, 0); +- } else { +- TRef trt; +- tr = lj_ir_tonum(J, tr); +- trt = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC); +- J->base[0] = trt; +- J->base[1] = emitir(IRTN(IR_SUB), tr, trt); +- } +- rd->nres = 2; +-} +- + static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) + { + J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], +@@ -647,7 +666,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) + GCudata *ud = udataV(&J->fn->c.upvalue[0]); + TRef tr, one; + lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ +- tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); ++ tr = lj_ir_call(J, IRCALL_lj_prng_u64d, lj_ir_kptr(J, uddata(ud))); + one = lj_ir_knum_one(J); + tr = emitir(IRTN(IR_SUB), tr, one); + if (J->base[0]) { +@@ -738,7 +757,7 @@ static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd) + #if LJ_HASFFI + TRef hdr = recff_bufhdr(J); + TRef tr = recff_bit64_tohex(J, rd, hdr); +- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); ++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + #else + recff_nyiu(J, rd); /* Don't bother working around this NYI. */ + #endif +@@ -864,8 +883,10 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) + if (i > 1) { /* Concatenate the strings, if there's more than one. */ + TRef hdr = recff_bufhdr(J), tr = hdr; + for (i = 0; J->base[i] != 0; i++) +- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]); +- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); ++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]); ++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); ++ } else if (i == 0) { ++ J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty); + } + UNUSED(rd); + } +@@ -881,19 +902,19 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd) + emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1)); + if (vrep > 1) { + TRef hdr2 = recff_bufhdr(J); +- TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep); +- tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str); +- str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2); ++ TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep); ++ tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str); ++ str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2); + } + } + tr = hdr = recff_bufhdr(J); + if (str2) { +- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str); ++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str); + str = str2; + rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1)); + } + tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep); +- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); ++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + } + + static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) +@@ -901,7 +922,7 @@ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) + TRef str = lj_ir_tostr(J, J->base[0]); + TRef hdr = recff_bufhdr(J); + TRef tr = lj_ir_call(J, rd->data, hdr, str); +- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); ++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + } + + static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) +@@ -949,7 +970,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) + str->len-(MSize)start, pat->len)) { + TRef pos; + emitir(IRTG(IR_NE, IRT_PGC), tr, trp0); +- pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_PGC), trstr, tr0)); ++ /* Recompute offset. trsptr may not point into trstr after folding. */ ++ pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart); + J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1)); + J->base[1] = emitir(IRTI(IR_ADD), pos, trplen); + rd->nres = 2; +@@ -963,34 +985,40 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) + } + } + +-static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) ++static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx) + { +- TRef trfmt = lj_ir_tostr(J, J->base[0]); +- GCstr *fmt = argv2str(J, &rd->argv[0]); +- int arg = 1; +- TRef hdr, tr; ++ ptrdiff_t arg = sbufx; ++ TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]); ++ GCstr *fmt = argv2str(J, &rd->argv[arg]); + FormatState fs; + SFormat sf; + /* Specialize to the format string. */ + emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt)); +- tr = hdr = recff_bufhdr(J); + lj_strfmt_init(&fs, strdata(fmt), fmt->len); + while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */ +- TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++]; ++ TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg]; + TRef trsf = lj_ir_kint(J, (int32_t)sf); + IRCallID id; + switch (STRFMT_TYPE(sf)) { + case STRFMT_LIT: +- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, ++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, + lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len))); + break; + case STRFMT_INT: + id = IRCALL_lj_strfmt_putfnum_int; + handle_int: +- if (!tref_isinteger(tra)) ++ if (!tref_isinteger(tra)) { ++#if LJ_HASFFI ++ if (tref_iscdata(tra)) { ++ tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]); ++ tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); ++ break; ++ } ++#endif + goto handle_num; ++ } + if (sf == STRFMT_INT) { /* Shortcut for plain %d. */ +- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, ++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, + emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); + } else { + #if LJ_HASFFI +@@ -1012,15 +1040,16 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) + handle_num: + tra = lj_ir_tonum(J, tra); + tr = lj_ir_call(J, id, tr, trsf, tra); +- if (LJ_SOFTFP) lj_needsplit(J); ++ if (LJ_SOFTFP32) lj_needsplit(J); + break; + case STRFMT_STR: + if (!tref_isstr(tra)) { + recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */ ++ /* NYI: also buffers. */ + return; + } + if (sf == STRFMT_STR) /* Shortcut for plain %s. */ +- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra); ++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra); + else if ((sf & STRFMT_T_QUOTED)) + tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra); + else +@@ -1029,7 +1058,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) + case STRFMT_CHAR: + tra = lj_opt_narrow_toint(J, tra); + if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */ +- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, ++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, + emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR)); + else + tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra); +@@ -1041,9 +1070,326 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) + return; + } + } +- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); ++ if (sbufx) { ++ emitir(IRT(IR_USE, IRT_NIL), tr, 0); ++ } else { ++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); ++ } ++} ++ ++static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) ++{ ++ recff_format(J, rd, recff_bufhdr(J), 0); ++} ++ ++/* -- Buffer library fast functions --------------------------------------- */ ++ ++#if LJ_HASBUFFER ++ ++static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud) ++{ ++ return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L); ++} ++ ++static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val) ++{ ++ TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L); ++ emitir(IRT(IR_FSTORE, IRT_PGC), fref, val); ++} ++ ++static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl) ++{ ++ return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl); ++} ++ ++static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val) ++{ ++ TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl); ++ emitir(IRT(IR_FSTORE, IRT_PTR), fref, val); ++} ++ ++static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw) ++{ ++ TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr); ++ if (LJ_64) ++ len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE); ++ return len; ++} ++ ++/* Emit typecheck for string buffer. */ ++static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, int arg) ++{ ++ TRef trtype, ud = J->base[arg]; ++ if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE); ++ trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE); ++ emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER)); ++ J->needsnap = 1; ++ return ud; + } + ++/* Emit BUFHDR for write to extended string buffer. */ ++static TRef recff_sbufx_write(jit_State *J, TRef ud) ++{ ++ TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata))); ++ return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE); ++} ++ ++/* Check for integer in range for the buffer API. */ ++static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, int arg) ++{ ++ TRef tr = J->base[arg]; ++ TRef trlim = lj_ir_kint(J, LJ_MAX_BUF); ++ if (tref_isinteger(tr)) { ++ emitir(IRTGI(IR_ULE), tr, trlim); ++ } else if (tref_isnum(tr)) { ++ tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); ++ emitir(IRTGI(IR_ULE), tr, trlim); ++#if LJ_HASFFI ++ } else if (tref_iscdata(tr)) { ++ tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]); ++ emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF)); ++ tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE); ++#else ++ UNUSED(rd); ++#endif ++ } else { ++ lj_trace_err(J, LJ_TRERR_BADTYPE); ++ } ++ return tr; ++} ++ ++static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ SBufExt *sbx = bufV(&rd->argv[0]); ++ int iscow = (int)sbufiscow(sbx); ++ TRef trl = recff_sbufx_get_L(J, ud); ++ TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW)); ++ TRef zero = lj_ir_kint(J, 0); ++ emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero); ++ if (iscow) { ++ trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, ++ LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) : ++ lj_ir_kint(J, SBUF_FLAG_COW)); ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero); ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero); ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero); ++ recff_sbufx_set_L(J, ud, trl); ++ emitir(IRT(IR_FSTORE, IRT_PGC), ++ emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero); ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero); ++ } else { ++ TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B); ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb); ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb); ++ } ++} ++ ++static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); ++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); ++ TRef len = recff_sbufx_len(J, trr, trw); ++ TRef trn = recff_sbufx_checkint(J, rd, 1); ++ len = emitir(IRTI(IR_MIN), len, trn); ++ trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len); ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); ++} ++ ++static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trbuf = recff_sbufx_write(J, ud); ++ TRef tr = J->base[1]; ++ if (tref_isstr(tr)) { ++ TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); ++ TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); ++ lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); ++#if LJ_HASFFI ++ } else if (tref_iscdata(tr)) { ++ TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]); ++ TRef len = recff_sbufx_checkint(J, rd, 2); ++ lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); ++#endif ++ } /* else: Interpreter will throw. */ ++} ++ ++static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trbuf = recff_sbufx_write(J, ud); ++ TRef tr; ++ ptrdiff_t arg; ++ if (!J->base[1]) return; ++ for (arg = 1; (tr = J->base[arg]); arg++) { ++ if (tref_isstr(tr)) { ++ trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr); ++ } else if (tref_isnumber(tr)) { ++ trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, ++ emitir(IRT(IR_TOSTR, IRT_STR), tr, ++ tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT)); ++ } else if (tref_isudata(tr)) { ++ TRef ud2 = recff_sbufx_check(J, rd, arg); ++ TRef trr = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_R); ++ TRef trw = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_W); ++ TRef len = recff_sbufx_len(J, trr, trw); ++ emitir(IRTG(IR_NE, IRT_PGC), ud, ud2); ++ trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len); ++ } else { ++ recff_nyiu(J, rd); ++ } ++ } ++ emitir(IRT(IR_USE, IRT_NIL), trbuf, 0); ++} ++ ++static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trbuf = recff_sbufx_write(J, ud); ++ recff_format(J, rd, trbuf, 1); ++} ++ ++static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); ++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); ++ TRef tr; ++ ptrdiff_t arg; ++ if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; } ++ for (arg = 0; (tr = J->base[arg+1]); arg++) { ++ TRef len = recff_sbufx_len(J, trr, trw); ++ if (tref_isnil(tr)) { ++ J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len); ++ trr = trw; ++ } else { ++ TRef trn = recff_sbufx_checkint(J, rd, arg+1); ++ TRef tru; ++ len = emitir(IRTI(IR_MIN), len, trn); ++ tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len); ++ J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len); ++ trr = tru; /* Doing the ADD before the SNEW generates better code. */ ++ } ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); ++ } ++ rd->nres = arg; ++} ++ ++static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); ++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); ++ J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw)); ++} ++ ++static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); ++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); ++ J->base[0] = recff_sbufx_len(J, trr, trw); ++} ++ ++#if LJ_HASFFI ++static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trbuf = recff_sbufx_write(J, ud); ++ TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]); ++ TRef len = recff_sbufx_checkint(J, rd, 2); ++ trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len); ++ emitir(IRT(IR_USE, IRT_NIL), trbuf, 0); ++} ++ ++static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trbuf = recff_sbufx_write(J, ud); ++ TRef trsz = recff_sbufx_checkint(J, rd, 1); ++ J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz); ++ J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W)); ++ rd->nres = 2; ++} ++ ++static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef len = recff_sbufx_checkint(J, rd, 1); ++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); ++ TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E); ++ TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw); ++ if (LJ_64) ++ left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE); ++ emitir(IRTGI(IR_ULE), len, left); ++ trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len); ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw); ++} ++ ++static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); ++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); ++ J->base[0] = lj_crecord_topuint8(J, trr); ++ J->base[1] = recff_sbufx_len(J, trr, trw); ++ rd->nres = 2; ++} ++#endif ++ ++static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trbuf = recff_sbufx_write(J, ud); ++ TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); ++ lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp); ++ /* No IR_USE needed, since the call is a store. */ ++} ++ ++static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd) ++{ ++ TRef ud = recff_sbufx_check(J, rd, 0); ++ TRef trbuf = recff_sbufx_write(J, ud); ++ TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1); ++ TRef trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp); ++ IRType t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0])); ++ /* No IR_USE needed, since the call is a store. */ ++ J->base[0] = lj_record_vload(J, tmp, 0, t); ++ /* The sbx->r store must be after the VLOAD type check, in case it fails. */ ++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); ++} ++ ++static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd) ++{ ++ TRef tmp = recff_tmpref(J, J->base[0], IRTMPREF_IN1); ++ J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp); ++ /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */ ++ emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0); ++ UNUSED(rd); ++} ++ ++static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd) ++{ ++ if (tvisstr(&rd->argv[0])) { ++ GCstr *str = strV(&rd->argv[0]); ++ SBufExt sbx; ++ IRType t; ++ TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1); ++ TRef tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]); ++ /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM. ++ ** That's why IRCALL_lj_serialize_decode needs a fake INT result. ++ */ ++ emitir(IRT(IR_USE, IRT_NIL), tr, 0); ++ memset(&sbx, 0, sizeof(SBufExt)); ++ lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len); ++ t = (IRType)lj_serialize_peektype(&sbx); ++ J->base[0] = lj_record_vload(J, tmp, 0, t); ++ } /* else: Interpreter will throw. */ ++} ++ ++#endif ++ + /* -- Table library fast functions ---------------------------------------- */ + + static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) +@@ -1054,7 +1400,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) + rd->nres = 0; + if (tref_istab(ix.tab) && ix.val) { + if (!J->base[2]) { /* Simple push: t[#t+1] = v */ +- TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); ++ TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL); + GCtab *t = tabV(&rd->argv[0]); + ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); + settabV(J->L, &ix.tabv, t); +@@ -1078,11 +1424,11 @@ static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd) + lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); + TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? + lj_opt_narrow_toint(J, J->base[3]) : +- lj_ir_call(J, IRCALL_lj_tab_len, tab); ++ emitir(IRTI(IR_ALEN), tab, TREF_NIL); + TRef hdr = recff_bufhdr(J); + TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); + emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); +- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); ++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + } /* else: Interpreter will throw. */ + UNUSED(rd); + } +@@ -1114,13 +1460,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) + { + TRef tr, ud, fp; + if (id) { /* io.func() */ +-#if LJ_GC64 +- /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ + ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); +-#else +- tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); +- ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); +-#endif + } else { /* fp:method() */ + ud = J->base[0]; + if (!tref_isudata(ud)) +diff --git a/src/lj_ffrecord.h b/src/lj_ffrecord.h +index 3b407450..aca6ca1d 100644 +--- a/src/lj_ffrecord.h ++++ b/src/lj_ffrecord.h +@@ -1,6 +1,6 @@ + /* + ** Fast function call recorder. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_FFRECORD_H +diff --git a/src/lj_frame.h b/src/lj_frame.h +index 19c49a4a..b6146454 100644 +--- a/src/lj_frame.h ++++ b/src/lj_frame.h +@@ -1,6 +1,6 @@ + /* + ** Stack frames. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_FRAME_H +@@ -46,7 +46,7 @@ enum { + #define frame_gc(f) (gcval((f)-1)) + #define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz) + #define frame_pc(f) ((const BCIns *)frame_ftsz(f)) +-#define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp))) ++#define setframe_gc(f, p, tp) (setgcVraw((f), (p), (tp))) + #define setframe_ftsz(f, sz) ((f)->ftsz = (sz)) + #define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc)) + #else +@@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + #endif + #define CFRAME_SHIFT_MULTRES 3 + #elif LJ_TARGET_ARM64 +-#define CFRAME_OFS_ERRF 196 +-#define CFRAME_OFS_NRES 200 +-#define CFRAME_OFS_PREV 160 +-#define CFRAME_OFS_L 176 +-#define CFRAME_OFS_PC 168 +-#define CFRAME_OFS_MULTRES 192 ++#define CFRAME_OFS_ERRF 36 ++#define CFRAME_OFS_NRES 40 ++#define CFRAME_OFS_PREV 0 ++#define CFRAME_OFS_L 16 ++#define CFRAME_OFS_PC 8 ++#define CFRAME_OFS_MULTRES 32 + #define CFRAME_SIZE 208 + #define CFRAME_SHIFT_MULTRES 3 + #elif LJ_TARGET_PPC +@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + #define CFRAME_OFS_L 36 + #define CFRAME_OFS_PC 32 + #define CFRAME_OFS_MULTRES 28 +-#define CFRAME_SIZE 272 ++#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128) + #define CFRAME_SHIFT_MULTRES 3 + #endif + #elif LJ_TARGET_MIPS32 +diff --git a/src/lj_func.c b/src/lj_func.c +index 639dad87..cf8ca08f 100644 +--- a/src/lj_func.c ++++ b/src/lj_func.c +@@ -1,6 +1,6 @@ + /* + ** Function handling (prototypes, functions and upvalues). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -24,9 +24,11 @@ void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt) + + /* -- Upvalues ------------------------------------------------------------ */ + +-static void unlinkuv(GCupval *uv) ++static void unlinkuv(global_State *g, GCupval *uv) + { +- lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); ++ UNUSED(g); ++ lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, ++ "broken upvalue chain"); + setgcrefr(uvnext(uv)->prev, uv->prev); + setgcrefr(uvprev(uv)->next, uv->next); + } +@@ -40,7 +42,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) + GCupval *uv; + /* Search the sorted list of open upvalues. */ + while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) { +- lua_assert(!p->closed && uvval(p) != &p->tv); ++ lj_assertG(!p->closed && uvval(p) != &p->tv, "closed upvalue in chain"); + if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */ + if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ + flipwhite(obj2gco(p)); +@@ -61,7 +63,8 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) + setgcrefr(uv->next, g->uvhead.next); + setgcref(uvnext(uv)->prev, obj2gco(uv)); + setgcref(g->uvhead.next, obj2gco(uv)); +- lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); ++ lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, ++ "broken upvalue chain"); + return uv; + } + +@@ -84,12 +87,13 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) + while (gcref(L->openupval) != NULL && + uvval((uv = gco2uv(gcref(L->openupval)))) >= level) { + GCobj *o = obj2gco(uv); +- lua_assert(!isblack(o) && !uv->closed && uvval(uv) != &uv->tv); ++ lj_assertG(!isblack(o), "bad black upvalue"); ++ lj_assertG(!uv->closed && uvval(uv) != &uv->tv, "closed upvalue in chain"); + setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ + if (isdead(g, o)) { + lj_func_freeuv(g, uv); + } else { +- unlinkuv(uv); ++ unlinkuv(g, uv); + lj_gc_closeuv(g, uv); + } + } +@@ -98,7 +102,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) + void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) + { + if (!uv->closed) +- unlinkuv(uv); ++ unlinkuv(g, uv); + lj_mem_freet(g, uv); + } + +diff --git a/src/lj_func.h b/src/lj_func.h +index 901751b9..b864a5bf 100644 +--- a/src/lj_func.h ++++ b/src/lj_func.h +@@ -1,6 +1,6 @@ + /* + ** Function handling (prototypes, functions and upvalues). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_FUNC_H +diff --git a/src/lj_gc.c b/src/lj_gc.c +index 2aaf5b2c..5a238542 100644 +--- a/src/lj_gc.c ++++ b/src/lj_gc.c +@@ -1,6 +1,6 @@ + /* + ** Garbage collector. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -25,6 +25,7 @@ + #include "lj_cdata.h" + #endif + #include "lj_trace.h" ++#include "lj_dispatch.h" + #include "lj_vm.h" + + #define GCSTEPSIZE 1024u +@@ -41,7 +42,8 @@ + + /* Mark a TValue (if needed). */ + #define gc_marktv(g, tv) \ +- { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \ ++ { lj_assertG(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct), \ ++ "TValue and GC type mismatch"); \ + if (tviswhite(tv)) gc_mark(g, gcV(tv)); } + + /* Mark a GCobj (if needed). */ +@@ -55,21 +57,32 @@ + static void gc_mark(global_State *g, GCobj *o) + { + int gct = o->gch.gct; +- lua_assert(iswhite(o) && !isdead(g, o)); ++ lj_assertG(iswhite(o), "mark of non-white object"); ++ lj_assertG(!isdead(g, o), "mark of dead object"); + white2gray(o); + if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) { + GCtab *mt = tabref(gco2ud(o)->metatable); + gray2black(o); /* Userdata are never gray. */ + if (mt) gc_markobj(g, mt); + gc_markobj(g, tabref(gco2ud(o)->env)); ++ if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) { ++ SBufExt *sbx = (SBufExt *)uddata(gco2ud(o)); ++ if (sbufiscow(sbx) && gcref(sbx->cowref)) ++ gc_markobj(g, gcref(sbx->cowref)); ++ if (gcref(sbx->dict_str)) ++ gc_markobj(g, gcref(sbx->dict_str)); ++ if (gcref(sbx->dict_mt)) ++ gc_markobj(g, gcref(sbx->dict_mt)); ++ } + } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { + GCupval *uv = gco2uv(o); + gc_marktv(g, uvval(uv)); + if (uv->closed) + gray2black(o); /* Closed upvalues are never gray. */ + } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { +- lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || +- gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE); ++ lj_assertG(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || ++ gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE, ++ "bad GC type %d", gct); + setgcrefr(o->gch.gclist, g->gc.gray); + setgcref(g->gc.gray, o); + } +@@ -102,7 +115,8 @@ static void gc_mark_uv(global_State *g) + { + GCupval *uv; + for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { +- lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); ++ lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, ++ "broken upvalue chain"); + if (isgray(obj2gco(uv))) + gc_marktv(g, uvval(uv)); + } +@@ -197,7 +211,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t) + for (i = 0; i <= hmask; i++) { + Node *n = &node[i]; + if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ +- lua_assert(!tvisnil(&n->key)); ++ lj_assertG(!tvisnil(&n->key), "mark of nil key in non-empty slot"); + if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); + if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); + } +@@ -212,7 +226,8 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) + gc_markobj(g, tabref(fn->c.env)); + if (isluafunc(fn)) { + uint32_t i; +- lua_assert(fn->l.nupvalues <= funcproto(fn)->sizeuv); ++ lj_assertG(fn->l.nupvalues <= funcproto(fn)->sizeuv, ++ "function upvalues out of range"); + gc_markobj(g, funcproto(fn)); + for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ + gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); +@@ -228,7 +243,7 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) + static void gc_marktrace(global_State *g, TraceNo traceno) + { + GCobj *o = obj2gco(traceref(G2J(g), traceno)); +- lua_assert(traceno != G2J(g)->cur.traceno); ++ lj_assertG(traceno != G2J(g)->cur.traceno, "active trace escaped"); + if (iswhite(o)) { + white2gray(o); + setgcrefr(o->gch.gclist, g->gc.gray); +@@ -309,7 +324,7 @@ static size_t propagatemark(global_State *g) + { + GCobj *o = gcref(g->gc.gray); + int gct = o->gch.gct; +- lua_assert(isgray(o)); ++ lj_assertG(isgray(o), "propagation of non-gray object"); + gray2black(o); + setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ + if (LJ_LIKELY(gct == ~LJ_TTAB)) { +@@ -341,7 +356,7 @@ static size_t propagatemark(global_State *g) + return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + + T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); + #else +- lua_assert(0); ++ lj_assertG(0, "bad GC type %d", gct); + return 0; + #endif + } +@@ -395,11 +410,13 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) + if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ + gc_fullsweep(g, &gco2th(o)->openupval); + if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ +- lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); ++ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), ++ "sweep of undead object"); + makewhite(g, o); /* Value is alive, change to the current white. */ + p = &o->gch.nextgc; + } else { /* Otherwise value is dead, free it. */ +- lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); ++ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, ++ "sweep of unlive object"); + setgcrefr(*p, o->gch.nextgc); + if (o == gcref(g->gc.root)) + setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ +@@ -409,6 +426,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) + return p; + } + ++/* Sweep one string interning table chain. Preserves hashalg bit. */ ++static void gc_sweepstr(global_State *g, GCRef *chain) ++{ ++ /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */ ++ int ow = otherwhite(g); ++ uintptr_t u = gcrefu(*chain); ++ GCRef q; ++ GCRef *p = &q; ++ GCobj *o; ++ setgcrefp(q, (u & ~(uintptr_t)1)); ++ while ((o = gcref(*p)) != NULL) { ++ if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ ++ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), ++ "sweep of undead string"); ++ makewhite(g, o); /* String is alive, change to the current white. */ ++ p = &o->gch.nextgc; ++ } else { /* Otherwise string is dead, free it. */ ++ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, ++ "sweep of unlive string"); ++ setgcrefr(*p, o->gch.nextgc); ++ lj_str_free(g, gco2str(o)); ++ } ++ } ++ setgcrefp(*chain, (gcrefu(q) | (u & 1))); ++} ++ + /* Check whether we can clear a key or a value slot from a table. */ + static int gc_mayclear(cTValue *o, int val) + { +@@ -426,11 +469,12 @@ static int gc_mayclear(cTValue *o, int val) + } + + /* Clear collected entries from weak tables. */ +-static void gc_clearweak(GCobj *o) ++static void gc_clearweak(global_State *g, GCobj *o) + { ++ UNUSED(g); + while (o) { + GCtab *t = gco2tab(o); +- lua_assert((t->marked & LJ_GC_WEAK)); ++ lj_assertG((t->marked & LJ_GC_WEAK), "clear of non-weak table"); + if ((t->marked & LJ_GC_WEAKVAL)) { + MSize i, asize = t->asize; + for (i = 0; i < asize; i++) { +@@ -466,6 +510,7 @@ static void gc_call_finalizer(global_State *g, lua_State *L, + TValue *top; + lj_trace_abort(g); + hook_entergc(g); /* Disable hooks and new traces during __gc. */ ++ if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); + g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ + top = L->top; + copyTV(L, top++, mo); +@@ -474,6 +519,7 @@ static void gc_call_finalizer(global_State *g, lua_State *L, + L->top = top+1; + errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */ + hook_restore(g, oldh); ++ if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); + g->gc.threshold = oldt; /* Restore GC threshold. */ + if (errcode) + lj_err_throw(L, errcode); /* Propagate errors. */ +@@ -485,7 +531,7 @@ static void gc_finalize(lua_State *L) + global_State *g = G(L); + GCobj *o = gcnext(gcref(g->gc.mmudata)); + cTValue *mo; +- lua_assert(tvref(g->jit_base) == NULL); /* Must not be called on trace. */ ++ lj_assertG(tvref(g->jit_base) == NULL, "finalizer called on trace"); + /* Unchain from list of userdata to be finalized. */ + if (o == gcref(g->gc.mmudata)) + setgcrefnull(g->gc.mmudata); +@@ -560,9 +606,9 @@ void lj_gc_freeall(global_State *g) + /* Free everything, except super-fixed objects (the main thread). */ + g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; + gc_fullsweep(g, &g->gc.root); +- strmask = g->strmask; ++ strmask = g->str.mask; + for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ +- gc_fullsweep(g, &g->strhash[i]); ++ gc_sweepstr(g, &g->str.tab[i]); + } + + /* -- Collector ----------------------------------------------------------- */ +@@ -577,7 +623,7 @@ static void atomic(global_State *g, lua_State *L) + + setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ + setgcrefnull(g->gc.weak); +- lua_assert(!iswhite(obj2gco(mainthread(g)))); ++ lj_assertG(!iswhite(obj2gco(mainthread(g))), "main thread turned white"); + gc_markobj(g, L); /* Mark running thread. */ + gc_traverse_curtrace(g); /* Traverse current trace. */ + gc_mark_gcroot(g); /* Mark GC roots (again). */ +@@ -592,7 +638,7 @@ static void atomic(global_State *g, lua_State *L) + udsize += gc_propagate_gray(g); /* And propagate the marks. */ + + /* All marking done, clear weak tables. */ +- gc_clearweak(gcref(g->gc.weak)); ++ gc_clearweak(g, gcref(g->gc.weak)); + + lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */ + +@@ -625,21 +671,21 @@ static size_t gc_onestep(lua_State *L) + return 0; + case GCSsweepstring: { + GCSize old = g->gc.total; +- gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ +- if (g->gc.sweepstr > g->strmask) ++ gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */ ++ if (g->gc.sweepstr > g->str.mask) + g->gc.state = GCSsweep; /* All string hash chains sweeped. */ +- lua_assert(old >= g->gc.total); ++ lj_assertG(old >= g->gc.total, "sweep increased memory"); + g->gc.estimate -= old - g->gc.total; + return GCSWEEPCOST; + } + case GCSsweep: { + GCSize old = g->gc.total; + setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); +- lua_assert(old >= g->gc.total); ++ lj_assertG(old >= g->gc.total, "sweep increased memory"); + g->gc.estimate -= old - g->gc.total; + if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { +- if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) +- lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ ++ if (g->str.num <= (g->str.mask >> 2) && g->str.mask > LJ_MIN_STRTAB*2-1) ++ lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */ + if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ + g->gc.state = GCSfinalize; + #if LJ_HASFFI +@@ -668,7 +714,7 @@ static size_t gc_onestep(lua_State *L) + g->gc.debt = 0; + return 0; + default: +- lua_assert(0); ++ lj_assertG(0, "bad GC state"); + return 0; + } + } +@@ -742,7 +788,8 @@ void lj_gc_fullgc(lua_State *L) + } + while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep) + gc_onestep(L); /* Finish sweep. */ +- lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause); ++ lj_assertG(g->gc.state == GCSfinalize || g->gc.state == GCSpause, ++ "bad GC state"); + /* Now perform a full GC. */ + g->gc.state = GCSpause; + do { gc_onestep(L); } while (g->gc.state != GCSpause); +@@ -755,9 +802,11 @@ void lj_gc_fullgc(lua_State *L) + /* Move the GC propagation frontier forward. */ + void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) + { +- lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); +- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); +- lua_assert(o->gch.gct != ~LJ_TTAB); ++ lj_assertG(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o), ++ "bad object states for forward barrier"); ++ lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, ++ "bad GC state"); ++ lj_assertG(o->gch.gct != ~LJ_TTAB, "barrier object is not a table"); + /* Preserve invariant during propagation. Otherwise it doesn't matter. */ + if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) + gc_mark(g, v); /* Move frontier forward. */ +@@ -794,7 +843,8 @@ void lj_gc_closeuv(global_State *g, GCupval *uv) + lj_gc_barrierf(g, o, gcV(&uv->tv)); + } else { + makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ +- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); ++ lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, ++ "bad GC state"); + } + } + } +@@ -814,12 +864,13 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno) + void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz) + { + global_State *g = G(L); +- lua_assert((osz == 0) == (p == NULL)); ++ lj_assertG((osz == 0) == (p == NULL), "realloc API violation"); + p = g->allocf(g->allocd, p, osz, nsz); + if (p == NULL && nsz > 0) + lj_err_mem(L); +- lua_assert((nsz == 0) == (p == NULL)); +- lua_assert(checkptrGC(p)); ++ lj_assertG((nsz == 0) == (p == NULL), "allocf API violation"); ++ lj_assertG(checkptrGC(p), ++ "allocated memory address %p outside required range", p); + g->gc.total = (g->gc.total - osz) + nsz; + return p; + } +@@ -831,7 +882,8 @@ void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size) + GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); + if (o == NULL) + lj_err_mem(L); +- lua_assert(checkptrGC(o)); ++ lj_assertG(checkptrGC(o), ++ "allocated memory address %p outside required range", o); + g->gc.total += size; + setgcrefr(o->gch.nextgc, g->gc.root); + setgcref(g->gc.root, o); +diff --git a/src/lj_gc.h b/src/lj_gc.h +index 669bbe92..af8c476b 100644 +--- a/src/lj_gc.h ++++ b/src/lj_gc.h +@@ -1,6 +1,6 @@ + /* + ** Garbage collector. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_GC_H +@@ -81,8 +81,10 @@ LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno); + static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) + { + GCobj *o = obj2gco(t); +- lua_assert(isblack(o) && !isdead(g, o)); +- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); ++ lj_assertG(isblack(o) && !isdead(g, o), ++ "bad object states for backward barrier"); ++ lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, ++ "bad GC state"); + black2gray(o); + setgcrefr(t->gclist, g->gc.grayagain); + setgcref(g->gc.grayagain, o); +diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c +index c219ffac..0e2777b8 100644 +--- a/src/lj_gdbjit.c ++++ b/src/lj_gdbjit.c +@@ -1,6 +1,6 @@ + /* + ** Client for the GDB JIT API. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_gdbjit_c +@@ -363,7 +363,7 @@ static const ELFheader elfhdr_template = { + .eosabi = 12, + #elif defined(__DragonFly__) + .eosabi = 0, +-#elif (defined(__sun__) && defined(__svr4__)) ++#elif LJ_TARGET_SOLARIS + .eosabi = 6, + #else + .eosabi = 0, +@@ -724,7 +724,7 @@ static void gdbjit_buildobj(GDBJITctx *ctx) + SECTALIGN(ctx->p, sizeof(uintptr_t)); + gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); + ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); +- lua_assert(ctx->objsize < sizeof(GDBJITobj)); ++ lj_assertX(ctx->objsize < sizeof(GDBJITobj), "GDBJITobj overflow"); + } + + #undef SECTALIGN +@@ -782,7 +782,8 @@ void lj_gdbjit_addtrace(jit_State *J, GCtrace *T) + ctx.spadjp = CFRAME_SIZE_JIT + + (MSize)(parent ? traceref(J, parent)->spadjust : 0); + ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; +- lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); ++ lj_assertJ(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, ++ "start PC out of range"); + ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); + ctx.filename = proto_chunknamestr(pt); + if (*ctx.filename == '@' || *ctx.filename == '=') +diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h +index bbaa1568..53596c87 100644 +--- a/src/lj_gdbjit.h ++++ b/src/lj_gdbjit.h +@@ -1,6 +1,6 @@ + /* + ** Client for the GDB JIT API. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_GDBJIT_H +diff --git a/src/lj_ir.c b/src/lj_ir.c +index 5baece67..71bf8855 100644 +--- a/src/lj_ir.c ++++ b/src/lj_ir.c +@@ -1,6 +1,6 @@ + /* + ** SSA IR (Intermediate Representation) emitter. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_ir_c +@@ -30,15 +30,16 @@ + #endif + #include "lj_vm.h" + #include "lj_strscan.h" ++#include "lj_serialize.h" + #include "lj_strfmt.h" +-#include "lj_lib.h" ++#include "lj_prng.h" + + /* Some local macros to save typing. Undef'd at the end. */ + #define IR(ref) (&J->cur.ir[(ref)]) + #define fins (&J->fold.ins) + + /* Pass IR on to next optimization in chain (FOLD). */ +-#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) ++#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) + + /* -- IR tables ----------------------------------------------------------- */ + +@@ -90,8 +91,9 @@ static void lj_ir_growbot(jit_State *J) + { + IRIns *baseir = J->irbuf + J->irbotlim; + MSize szins = J->irtoplim - J->irbotlim; +- lua_assert(szins != 0); +- lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); ++ lj_assertJ(szins != 0, "zero IR size"); ++ lj_assertJ(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim, ++ "unexpected IR growth"); + if (J->cur.nins + (szins >> 1) < J->irtoplim) { + /* More than half of the buffer is free on top: shift up by a quarter. */ + MSize ofs = szins >> 2; +@@ -146,11 +148,12 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...) + } + + /* Load field of type t from GG_State + offset. Must be 32 bit aligned. */ +-LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) ++TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) + { +- lua_assert((ofs & 3) == 0); ++ lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset"); + ofs >>= 2; +- lua_assert(ofs >= IRFL__MAX && ofs <= 0x3ff); /* 10 bit FOLD key limit. */ ++ lj_assertJ(ofs >= IRFL__MAX && ofs <= 0x3ff, ++ "GG_State field offset breaks 10 bit FOLD key limit"); + lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs); + return lj_opt_fold(J); + } +@@ -181,7 +184,7 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J) + static LJ_AINLINE IRRef ir_nextk64(jit_State *J) + { + IRRef ref = J->cur.nk - 2; +- lua_assert(J->state != LJ_TRACE_ASM); ++ lj_assertJ(J->state != LJ_TRACE_ASM, "bad JIT state"); + if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); + J->cur.nk = ref; + return ref; +@@ -277,7 +280,7 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) + { + IRIns *ir, *cir = J->cur.ir; + IRRef ref; +- lua_assert(!isdead(J2G(J), o)); ++ lj_assertJ(!isdead(J2G(J), o), "interning of dead GC object"); + for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) + if (ir_kgc(&cir[ref]) == o) + goto found; +@@ -299,7 +302,7 @@ TRef lj_ir_ktrace(jit_State *J) + { + IRRef ref = ir_nextkgc(J); + IRIns *ir = IR(ref); +- lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); ++ lj_assertJ(irt_toitype_(IRT_P64) == LJ_TTRACE, "mismatched type mapping"); + ir->t.irt = IRT_P64; + ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */ + ir->op12 = 0; +@@ -313,7 +316,7 @@ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) + IRIns *ir, *cir = J->cur.ir; + IRRef ref; + #if LJ_64 && !LJ_GC64 +- lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); ++ lj_assertJ((void *)(uintptr_t)u32ptr(ptr) == ptr, "out-of-range GC pointer"); + #endif + for (ref = J->chain[op]; ref; ref = cir[ref].prev) + if (ir_kptr(&cir[ref]) == ptr) +@@ -360,7 +363,8 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot) + IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); + IRRef ref; + /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ +- lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot); ++ lj_assertJ(tref_isk(key) && slot == (IRRef)(IRRef1)slot, ++ "out-of-range key/slot"); + for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) + if (cir[ref].op12 == op12) + goto found; +@@ -381,13 +385,15 @@ found: + void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) + { + UNUSED(L); +- lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ ++ lj_assertL(ir->o != IR_KSLOT, "unexpected KSLOT"); /* Common mistake. */ + switch (ir->o) { + case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; + case IR_KINT: setintV(tv, ir->i); break; + case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; +- case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break; +- case IR_KNULL: setlightudV(tv, NULL); break; ++ case IR_KPTR: case IR_KKPTR: ++ setnumV(tv, (lua_Number)(uintptr_t)ir_kptr(ir)); ++ break; ++ case IR_KNULL: setintV(tv, 0); break; + case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; + #if LJ_HASFFI + case IR_KINT64: { +@@ -397,7 +403,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) + break; + } + #endif +- default: lua_assert(0); break; ++ default: lj_assertL(0, "bad IR constant op %d", ir->o); break; + } + } + +@@ -457,7 +463,7 @@ int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op) + case IR_UGE: return !(a < b); + case IR_ULE: return !(a > b); + case IR_UGT: return !(a <= b); +- default: lua_assert(0); return 0; ++ default: lj_assertX(0, "bad IR op %d", op); return 0; + } + } + +@@ -470,7 +476,7 @@ int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op) + case IR_GE: return (res >= 0); + case IR_LE: return (res <= 0); + case IR_GT: return (res > 0); +- default: lua_assert(0); return 0; ++ default: lj_assertX(0, "bad IR op %d", op); return 0; + } + } + +diff --git a/src/lj_ir.h b/src/lj_ir.h +index 34c27853..2b127f6c 100644 +--- a/src/lj_ir.h ++++ b/src/lj_ir.h +@@ -1,6 +1,6 @@ + /* + ** SSA IR (Intermediate Representation) format. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_IR_H +@@ -75,7 +75,6 @@ + _(NEG, N , ref, ref) \ + \ + _(ABS, N , ref, ref) \ +- _(ATAN2, N , ref, ref) \ + _(LDEXP, N , ref, ref) \ + _(MIN, C , ref, ref) \ + _(MAX, C , ref, ref) \ +@@ -96,6 +95,7 @@ + _(UREFO, LW, ref, lit) \ + _(UREFC, LW, ref, lit) \ + _(FREF, R , ref, lit) \ ++ _(TMPREF, S , ref, lit) \ + _(STRREF, N , ref, ref) \ + _(LREF, L , ___, ___) \ + \ +@@ -106,7 +106,8 @@ + _(FLOAD, L , ref, lit) \ + _(XLOAD, L , ref, lit) \ + _(SLOAD, L , lit, lit) \ +- _(VLOAD, L , ref, ___) \ ++ _(VLOAD, L , ref, lit) \ ++ _(ALEN, L , ref, ref) \ + \ + _(ASTORE, S , ref, ref) \ + _(HSTORE, S , ref, ref) \ +@@ -124,8 +125,8 @@ + \ + /* Buffer operations. */ \ + _(BUFHDR, L , ref, lit) \ +- _(BUFPUT, L , ref, ref) \ +- _(BUFSTR, A , ref, ref) \ ++ _(BUFPUT, LW, ref, ref) \ ++ _(BUFSTR, AW, ref, ref) \ + \ + /* Barriers. */ \ + _(TBAR, S , ref, ___) \ +@@ -133,15 +134,15 @@ + _(XBAR, S , ___, ___) \ + \ + /* Type conversions. */ \ +- _(CONV, NW, ref, lit) \ ++ _(CONV, N , ref, lit) \ + _(TOBIT, N , ref, ref) \ + _(TOSTR, N , ref, lit) \ + _(STRTO, N , ref, ___) \ + \ + /* Calls. */ \ +- _(CALLN, N , ref, lit) \ +- _(CALLA, A , ref, lit) \ +- _(CALLL, L , ref, lit) \ ++ _(CALLN, NW, ref, lit) \ ++ _(CALLA, AW, ref, lit) \ ++ _(CALLL, LW, ref, lit) \ + _(CALLS, S , ref, lit) \ + _(CALLXS, S , ref, ref) \ + _(CARG, N , ref, ref) \ +@@ -178,8 +179,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE); + /* FPMATH sub-functions. ORDER FPM. */ + #define IRFPMDEF(_) \ + _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ +- _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \ +- _(SIN) _(COS) _(TAN) \ ++ _(SQRT) _(LOG) _(LOG2) \ + _(OTHER) + + typedef enum { +@@ -205,9 +205,15 @@ IRFPMDEF(FPMENUM) + _(UDATA_META, offsetof(GCudata, metatable)) \ + _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ + _(UDATA_FILE, sizeof(GCudata)) \ ++ _(SBUF_W, sizeof(GCudata) + offsetof(SBufExt, w)) \ ++ _(SBUF_E, sizeof(GCudata) + offsetof(SBufExt, e)) \ ++ _(SBUF_B, sizeof(GCudata) + offsetof(SBufExt, b)) \ ++ _(SBUF_L, sizeof(GCudata) + offsetof(SBufExt, L)) \ ++ _(SBUF_REF, sizeof(GCudata) + offsetof(SBufExt, cowref)) \ ++ _(SBUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \ + _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \ + _(CDATA_PTR, sizeof(GCcdata)) \ +- _(CDATA_INT, sizeof(GCcdata)) \ ++ _(CDATA_INT, sizeof(GCcdata)) \ + _(CDATA_INT64, sizeof(GCcdata)) \ + _(CDATA_INT64_4, sizeof(GCcdata) + 4) + +@@ -218,6 +224,11 @@ IRFLDEF(FLENUM) + IRFL__MAX + } IRFieldID; + ++/* TMPREF mode bits, stored in op2. */ ++#define IRTMPREF_IN1 0x01 /* First input value. */ ++#define IRTMPREF_OUT1 0x02 /* First output value. */ ++#define IRTMPREF_OUT2 0x04 /* Second output value. */ ++ + /* SLOAD mode bits, stored in op2. */ + #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ + #define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ +@@ -225,15 +236,17 @@ IRFLDEF(FLENUM) + #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ + #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ + #define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ ++#define IRSLOAD_KEYINDEX 0x40 /* Table traversal key index. */ + +-/* XLOAD mode, stored in op2. */ +-#define IRXLOAD_READONLY 1 /* Load from read-only data. */ +-#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ +-#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ ++/* XLOAD mode bits, stored in op2. */ ++#define IRXLOAD_READONLY 0x01 /* Load from read-only data. */ ++#define IRXLOAD_VOLATILE 0x02 /* Load from volatile data. */ ++#define IRXLOAD_UNALIGNED 0x04 /* Unaligned load. */ + + /* BUFHDR mode, stored in op2. */ + #define IRBUFHDR_RESET 0 /* Reset buffer. */ + #define IRBUFHDR_APPEND 1 /* Append to buffer. */ ++#define IRBUFHDR_WRITE 2 /* Write to string buffer. */ + + /* CONV mode, stored in op2. */ + #define IRCONV_SRCMASK 0x001f /* Source IRType. */ +@@ -250,6 +263,7 @@ IRFLDEF(FLENUM) + #define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */ + #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ + #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ ++#define IRCONV_NONE IRCONV_ANY /* INT|*64 no conv, but change type. */ + + /* TOSTR mode, stored in op2. */ + #define IRTOSTR_INT 0 /* Convert integer to string. */ +@@ -377,10 +391,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1; + #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) + + #if LJ_GC64 ++/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ + #define IRT_IS64 \ + ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\ + (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\ +- (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)) ++ (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\ ++ (1u<<IRT_NIL)) + #elif LJ_64 + #define IRT_IS64 \ + ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) +@@ -412,11 +428,12 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) + + static LJ_AINLINE uint32_t irt_toitype_(IRType t) + { +- lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD); ++ lj_assertX(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD, ++ "no plain type tag for lightuserdata"); + if (LJ_DUALNUM && t > IRT_NUM) { + return LJ_TISNUM; + } else { +- lua_assert(t <= IRT_NUM); ++ lj_assertX(t <= IRT_NUM, "no plain type tag for IR type %d", t); + return ~(uint32_t)t; + } + } +@@ -479,6 +496,7 @@ typedef uint32_t TRef; + #define TREF_REFMASK 0x0000ffff + #define TREF_FRAME 0x00010000 + #define TREF_CONT 0x00020000 ++#define TREF_KEYINDEX 0x00100000 + + #define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) + +@@ -560,6 +578,11 @@ typedef union IRIns { + TValue tv; /* TValue constant (overlaps entire slot). */ + } IRIns; + ++#define ir_isk64(ir) \ ++ ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ ++ (LJ_GC64 && \ ++ ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR))) ++ + #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr)) + #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) + #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) +@@ -567,12 +590,7 @@ typedef union IRIns { + #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) + #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) + #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) +-#define ir_k64(ir) \ +- check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ +- (LJ_GC64 && \ +- ((ir)->o == IR_KGC || \ +- (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \ +- &(ir)[1].tv) ++#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv) + #define ir_kptr(ir) \ + check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ + mref((ir)[LJ_GC64].ptr, void)) +@@ -585,4 +603,12 @@ static LJ_AINLINE int ir_sideeff(IRIns *ir) + + LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W); + ++/* Replace IR instruction with NOP. */ ++static LJ_AINLINE void lj_ir_nop(IRIns *ir) ++{ ++ ir->ot = IRT(IR_NOP, IRT_NIL); ++ ir->op1 = ir->op2 = 0; ++ ir->prev = 0; ++} ++ + #endif +diff --git a/src/lj_ircall.h b/src/lj_ircall.h +index 973c36e6..9e7013ba 100644 +--- a/src/lj_ircall.h ++++ b/src/lj_ircall.h +@@ -1,6 +1,6 @@ + /* + ** IR CALL* instruction definitions. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_IRCALL_H +@@ -21,6 +21,7 @@ typedef struct CCallInfo { + + #define CCI_OTSHIFT 16 + #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ ++#define CCI_TYPE(ci) (((ci)->flags>>CCI_OTSHIFT) & IRT_TYPE) + #define CCI_OPSHIFT 24 + #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ + +@@ -29,10 +30,12 @@ typedef struct CCallInfo { + #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) + #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) + #define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) ++#define CCI_CALL_FA (CCI_CALL_A|CCI_CC_FASTCALL) + #define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL) + #define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL) + + /* C call info flags. */ ++#define CCI_T (IRT_GUARD << CCI_OTSHIFT) /* May throw. */ + #define CCI_L 0x0100 /* Implicit L arg. */ + #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ + #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ +@@ -51,7 +54,7 @@ typedef struct CCallInfo { + #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) + #define CCI_XA (1u << CCI_XARGS_SHIFT) + +-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) ++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) + #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) + #else + #define CCI_XNARGS(ci) CCI_NARGS((ci)) +@@ -78,13 +81,19 @@ typedef struct CCallInfo { + #define IRCALLCOND_SOFTFP_FFI(x) NULL + #endif + +-#if LJ_SOFTFP && LJ_TARGET_MIPS32 ++#if LJ_SOFTFP && LJ_TARGET_MIPS + #define IRCALLCOND_SOFTFP_MIPS(x) x + #else + #define IRCALLCOND_SOFTFP_MIPS(x) NULL + #endif + +-#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) ++#if LJ_SOFTFP && LJ_TARGET_MIPS64 ++#define IRCALLCOND_SOFTFP_MIPS64(x) x ++#else ++#define IRCALLCOND_SOFTFP_MIPS64(x) NULL ++#endif ++ ++#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) + + #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) + #define IRCALLCOND_FP64_FFI(x) x +@@ -104,6 +113,18 @@ typedef struct CCallInfo { + #define IRCALLCOND_FFI32(x) NULL + #endif + ++#if LJ_HASBUFFER ++#define IRCALLCOND_BUFFER(x) x ++#else ++#define IRCALLCOND_BUFFER(x) NULL ++#endif ++ ++#if LJ_HASBUFFER && LJ_HASFFI ++#define IRCALLCOND_BUFFFI(x) x ++#else ++#define IRCALLCOND_BUFFFI(x) NULL ++#endif ++ + #if LJ_SOFTFP + #define XA_FP CCI_XA + #define XA2_FP (CCI_XA+CCI_XA) +@@ -112,6 +133,14 @@ typedef struct CCallInfo { + #define XA2_FP 0 + #endif + ++#if LJ_SOFTFP32 ++#define XA_FP32 CCI_XA ++#define XA2_FP32 (CCI_XA+CCI_XA) ++#else ++#define XA_FP32 0 ++#define XA2_FP32 0 ++#endif ++ + #if LJ_32 + #define XA_64 CCI_XA + #define XA2_64 (CCI_XA+CCI_XA) +@@ -124,40 +153,57 @@ typedef struct CCallInfo { + #define IRCALLDEF(_) \ + _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ + _(ANY, lj_str_find, 4, N, PGC, 0) \ +- _(ANY, lj_str_new, 3, S, STR, CCI_L) \ ++ _(ANY, lj_str_new, 3, S, STR, CCI_L|CCI_T) \ + _(ANY, lj_strscan_num, 2, FN, INT, 0) \ +- _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \ +- _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \ +- _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \ +- _(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \ +- _(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \ +- _(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \ +- _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \ +- _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \ +- _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \ +- _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \ +- _(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \ +- _(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \ +- _(ANY, lj_buf_putmem, 3, S, PGC, 0) \ +- _(ANY, lj_buf_putstr, 2, FL, PGC, 0) \ +- _(ANY, lj_buf_putchar, 2, FL, PGC, 0) \ +- _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \ +- _(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \ +- _(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \ +- _(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \ +- _(ANY, lj_buf_puttab, 5, L, PGC, 0) \ +- _(ANY, lj_buf_tostr, 1, FL, STR, 0) \ +- _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \ +- _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ +- _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ ++ _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L|CCI_T) \ ++ _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L|CCI_T) \ ++ _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L|CCI_T) \ ++ _(ANY, lj_strfmt_putint, 2, FL, PGC, CCI_T) \ ++ _(ANY, lj_strfmt_putnum, 2, FL, PGC, CCI_T) \ ++ _(ANY, lj_strfmt_putquoted, 2, FL, PGC, CCI_T) \ ++ _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64|CCI_T) \ ++ _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP|CCI_T) \ ++ _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP|CCI_T) \ ++ _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP|CCI_T) \ ++ _(ANY, lj_strfmt_putfstr, 3, L, PGC, CCI_T) \ ++ _(ANY, lj_strfmt_putfchar, 3, L, PGC, CCI_T) \ ++ _(ANY, lj_buf_putmem, 3, S, PGC, CCI_T) \ ++ _(ANY, lj_buf_putstr, 2, FL, PGC, CCI_T) \ ++ _(ANY, lj_buf_putchar, 2, FL, PGC, CCI_T) \ ++ _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, CCI_T) \ ++ _(ANY, lj_buf_putstr_lower, 2, FL, PGC, CCI_T) \ ++ _(ANY, lj_buf_putstr_upper, 2, FL, PGC, CCI_T) \ ++ _(ANY, lj_buf_putstr_rep, 3, L, PGC, CCI_T) \ ++ _(ANY, lj_buf_puttab, 5, L, PGC, CCI_T) \ ++ _(BUFFER, lj_bufx_set, 4, S, NIL, 0) \ ++ _(BUFFFI, lj_bufx_more, 2, FS, INT, CCI_T) \ ++ _(BUFFER, lj_serialize_put, 2, FS, PGC, CCI_T) \ ++ _(BUFFER, lj_serialize_get, 2, FS, PTR, CCI_T) \ ++ _(BUFFER, lj_serialize_encode, 2, FA, STR, CCI_L|CCI_T) \ ++ _(BUFFER, lj_serialize_decode, 3, A, INT, CCI_L|CCI_T) \ ++ _(ANY, lj_buf_tostr, 1, FL, STR, CCI_T) \ ++ _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L|CCI_T) \ ++ _(ANY, lj_tab_new1, 2, FA, TAB, CCI_L|CCI_T) \ ++ _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \ + _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ +- _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ ++ _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \ ++ _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \ ++ _(ANY, lj_vm_next, 2, FL, PTR, 0) \ + _(ANY, lj_tab_len, 1, FL, INT, 0) \ ++ _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ + _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ + _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ +- _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ +- _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ ++ _(ANY, lj_mem_newgco, 2, FA, PGC, CCI_L|CCI_T) \ ++ _(ANY, lj_prng_u64d, 1, FS, NUM, CCI_CASTU64) \ + _(ANY, lj_vm_modi, 2, FN, INT, 0) \ ++ _(ANY, log10, 1, N, NUM, XA_FP) \ ++ _(ANY, exp, 1, N, NUM, XA_FP) \ ++ _(ANY, sin, 1, N, NUM, XA_FP) \ ++ _(ANY, cos, 1, N, NUM, XA_FP) \ ++ _(ANY, tan, 1, N, NUM, XA_FP) \ ++ _(ANY, asin, 1, N, NUM, XA_FP) \ ++ _(ANY, acos, 1, N, NUM, XA_FP) \ ++ _(ANY, atan, 1, N, NUM, XA_FP) \ + _(ANY, sinh, 1, N, NUM, XA_FP) \ + _(ANY, cosh, 1, N, NUM, XA_FP) \ + _(ANY, tanh, 1, N, NUM, XA_FP) \ +@@ -169,32 +215,27 @@ typedef struct CCallInfo { + _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ + _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ + _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ +- _(ANY, exp, 1, N, NUM, XA_FP) \ +- _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \ + _(ANY, log, 1, N, NUM, XA_FP) \ + _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ +- _(ANY, log10, 1, N, NUM, XA_FP) \ +- _(ANY, sin, 1, N, NUM, XA_FP) \ +- _(ANY, cos, 1, N, NUM, XA_FP) \ +- _(ANY, tan, 1, N, NUM, XA_FP) \ + _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ + _(ANY, pow, 2, N, NUM, XA2_FP) \ + _(ANY, atan2, 2, N, NUM, XA2_FP) \ + _(ANY, ldexp, 2, N, NUM, XA_FP) \ +- _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ +- _(SOFTFP, softfp_add, 4, N, NUM, 0) \ +- _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ +- _(SOFTFP, softfp_mul, 4, N, NUM, 0) \ +- _(SOFTFP, softfp_div, 4, N, NUM, 0) \ +- _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ ++ _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ ++ _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \ + _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ +- _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ +- _(SOFTFP_MIPS, lj_vm_sfmin, 4, N, NUM, 0) \ +- _(SOFTFP_MIPS, lj_vm_sfmax, 4, N, NUM, 0) \ ++ _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \ ++ _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \ ++ _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ + _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ + _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ +- _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ +- _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ ++ _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ ++ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ + _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ + _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ + _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ +@@ -272,7 +313,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; + #define fp64_f2l __aeabi_f2lz + #define fp64_f2ul __aeabi_f2ulz + #endif +-#elif LJ_TARGET_MIPS ++#elif LJ_TARGET_MIPS || LJ_TARGET_PPC + #define softfp_add __adddf3 + #define softfp_sub __subdf3 + #define softfp_mul __muldf3 +@@ -315,7 +356,7 @@ extern double lj_vm_sfmax(double a, double b); + #endif + + #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) +-#ifdef __GNUC__ ++#if defined(__GNUC__) || defined(__clang__) + #define fp64_l2d __floatdidf + #define fp64_ul2d __floatundidf + #define fp64_l2f __floatdisf +diff --git a/src/lj_iropt.h b/src/lj_iropt.h +index 73aef0ef..0541090d 100644 +--- a/src/lj_iropt.h ++++ b/src/lj_iropt.h +@@ -1,6 +1,6 @@ + /* + ** Common header for IR emitter and optimizations. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_IROPT_H +@@ -120,10 +120,11 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); + LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); + LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); + LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); +-LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); ++LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J); + LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); + LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); + LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); ++LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim); + LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); + + /* Dead-store elimination. */ +@@ -150,7 +151,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); + /* Optimization passes. */ + LJ_FUNC void lj_opt_dce(jit_State *J); + LJ_FUNC int lj_opt_loop(jit_State *J); +-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) ++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) + LJ_FUNC void lj_opt_split(jit_State *J); + #else + #define lj_opt_split(J) UNUSED(J) +diff --git a/src/lj_jit.h b/src/lj_jit.h +index 92054e3d..c9fe8319 100644 +--- a/src/lj_jit.h ++++ b/src/lj_jit.h +@@ -1,6 +1,6 @@ + /* + ** Common definitions for the JIT compiler. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_JIT_H +@@ -9,73 +9,85 @@ + #include "lj_obj.h" + #include "lj_ir.h" + +-/* JIT engine flags. */ ++/* -- JIT engine flags ---------------------------------------------------- */ ++ ++/* General JIT engine flags. 4 bits. */ + #define JIT_F_ON 0x00000001 + +-/* CPU-specific JIT engine flags. */ ++/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */ ++#define JIT_F_CPU 0x00000010 ++ + #if LJ_TARGET_X86ORX64 +-#define JIT_F_SSE2 0x00000010 +-#define JIT_F_SSE3 0x00000020 +-#define JIT_F_SSE4_1 0x00000040 +-#define JIT_F_PREFER_IMUL 0x00000080 +-#define JIT_F_LEA_AGU 0x00000100 +-#define JIT_F_BMI2 0x00000200 +- +-/* Names for the CPU-specific flags. Must match the order above. */ +-#define JIT_F_CPU_FIRST JIT_F_SSE2 +-#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" ++ ++#define JIT_F_SSE3 (JIT_F_CPU << 0) ++#define JIT_F_SSE4_1 (JIT_F_CPU << 1) ++#define JIT_F_BMI2 (JIT_F_CPU << 2) ++ ++ ++#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" ++ + #elif LJ_TARGET_ARM +-#define JIT_F_ARMV6_ 0x00000010 +-#define JIT_F_ARMV6T2_ 0x00000020 +-#define JIT_F_ARMV7 0x00000040 +-#define JIT_F_VFPV2 0x00000080 +-#define JIT_F_VFPV3 0x00000100 +- +-#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) +-#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) ++ ++#define JIT_F_ARMV6_ (JIT_F_CPU << 0) ++#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1) ++#define JIT_F_ARMV7 (JIT_F_CPU << 2) ++#define JIT_F_ARMV8 (JIT_F_CPU << 3) ++#define JIT_F_VFPV2 (JIT_F_CPU << 4) ++#define JIT_F_VFPV3 (JIT_F_CPU << 5) ++ ++#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) ++#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) + #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) + +-/* Names for the CPU-specific flags. Must match the order above. */ +-#define JIT_F_CPU_FIRST JIT_F_ARMV6_ +-#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" ++#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3" ++ + #elif LJ_TARGET_PPC +-#define JIT_F_SQRT 0x00000010 +-#define JIT_F_ROUND 0x00000020 + +-/* Names for the CPU-specific flags. Must match the order above. */ +-#define JIT_F_CPU_FIRST JIT_F_SQRT ++#define JIT_F_SQRT (JIT_F_CPU << 0) ++#define JIT_F_ROUND (JIT_F_CPU << 1) ++ + #define JIT_F_CPUSTRING "\4SQRT\5ROUND" ++ + #elif LJ_TARGET_MIPS +-#define JIT_F_MIPSXXR2 0x00000010 + +-/* Names for the CPU-specific flags. Must match the order above. */ +-#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 ++#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0) ++ + #if LJ_TARGET_MIPS32 ++#if LJ_TARGET_MIPSR6 ++#define JIT_F_CPUSTRING "\010MIPS32R6" ++#else + #define JIT_F_CPUSTRING "\010MIPS32R2" ++#endif ++#else ++#if LJ_TARGET_MIPSR6 ++#define JIT_F_CPUSTRING "\010MIPS64R6" + #else + #define JIT_F_CPUSTRING "\010MIPS64R2" + #endif ++#endif ++ + #else +-#define JIT_F_CPU_FIRST 0 ++ + #define JIT_F_CPUSTRING "" ++ + #endif + +-/* Optimization flags. */ ++/* Optimization flags. 12 bits. */ ++#define JIT_F_OPT 0x00010000 + #define JIT_F_OPT_MASK 0x0fff0000 + +-#define JIT_F_OPT_FOLD 0x00010000 +-#define JIT_F_OPT_CSE 0x00020000 +-#define JIT_F_OPT_DCE 0x00040000 +-#define JIT_F_OPT_FWD 0x00080000 +-#define JIT_F_OPT_DSE 0x00100000 +-#define JIT_F_OPT_NARROW 0x00200000 +-#define JIT_F_OPT_LOOP 0x00400000 +-#define JIT_F_OPT_ABC 0x00800000 +-#define JIT_F_OPT_SINK 0x01000000 +-#define JIT_F_OPT_FUSE 0x02000000 ++#define JIT_F_OPT_FOLD (JIT_F_OPT << 0) ++#define JIT_F_OPT_CSE (JIT_F_OPT << 1) ++#define JIT_F_OPT_DCE (JIT_F_OPT << 2) ++#define JIT_F_OPT_FWD (JIT_F_OPT << 3) ++#define JIT_F_OPT_DSE (JIT_F_OPT << 4) ++#define JIT_F_OPT_NARROW (JIT_F_OPT << 5) ++#define JIT_F_OPT_LOOP (JIT_F_OPT << 6) ++#define JIT_F_OPT_ABC (JIT_F_OPT << 7) ++#define JIT_F_OPT_SINK (JIT_F_OPT << 8) ++#define JIT_F_OPT_FUSE (JIT_F_OPT << 9) + + /* Optimizations names for -O. Must match the order above. */ +-#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD + #define JIT_F_OPTSTRING \ + "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" + +@@ -87,6 +99,8 @@ + JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) + #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 + ++/* -- JIT engine parameters ----------------------------------------------- */ ++ + #if LJ_TARGET_WINDOWS || LJ_64 + /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ + #define JIT_P_sizemcode_DEFAULT 64 +@@ -129,11 +143,14 @@ JIT_PARAMDEF(JIT_PARAMENUM) + #define JIT_PARAMSTR(len, name, value) #len #name + #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) + ++/* -- JIT engine data structures ------------------------------------------ */ ++ + /* Trace compiler state. */ + typedef enum { + LJ_TRACE_IDLE, /* Trace compiler idle. */ + LJ_TRACE_ACTIVE = 0x10, + LJ_TRACE_RECORD, /* Bytecode recording active. */ ++ LJ_TRACE_RECORD_1ST, /* Record 1st instruction, too. */ + LJ_TRACE_START, /* New trace started. */ + LJ_TRACE_END, /* End of trace. */ + LJ_TRACE_ASM, /* Assemble trace. */ +@@ -158,10 +175,17 @@ typedef uint8_t MCode; + typedef uint32_t MCode; + #endif + ++/* Linked list of MCode areas. */ ++typedef struct MCLink { ++ MCode *next; /* Next area. */ ++ size_t size; /* Size of current area. */ ++} MCLink; ++ + /* Stack snapshot header. */ + typedef struct SnapShot { +- uint16_t mapofs; /* Offset into snapshot map. */ ++ uint32_t mapofs; /* Offset into snapshot map. */ + IRRef1 ref; /* First IR ref for this snapshot. */ ++ uint16_t mcofs; /* Offset into machine code in MCode units. */ + uint8_t nslots; /* Number of valid slots. */ + uint8_t topslot; /* Maximum frame extent. */ + uint8_t nent; /* Number of compressed entries. */ +@@ -177,12 +201,15 @@ typedef uint32_t SnapEntry; + #define SNAP_CONT 0x020000 /* Continuation slot. */ + #define SNAP_NORESTORE 0x040000 /* No need to restore slot. */ + #define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */ ++#define SNAP_KEYINDEX 0x100000 /* Traversal key index. */ + LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); + LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); ++LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX); + + #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) + #define SNAP_TR(slot, tr) \ +- (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) ++ (((SnapEntry)(slot) << 24) + \ ++ ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK))) + #if !LJ_FR2 + #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) + #endif +@@ -227,8 +254,7 @@ typedef enum { + /* Trace object. */ + typedef struct GCtrace { + GCHeader; +- uint8_t topslot; /* Top stack slot already checked to be allocated. */ +- uint8_t linktype; /* Type of link. */ ++ uint16_t nsnap; /* Number of snapshots. */ + IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ + #if LJ_GC64 + uint32_t unused_gc64; +@@ -236,8 +262,7 @@ typedef struct GCtrace { + GCRef gclist; + IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ + IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ +- uint16_t nsnap; /* Number of snapshots. */ +- uint16_t nsnapmap; /* Number of snapshot map elements. */ ++ uint32_t nsnapmap; /* Number of snapshot map elements. */ + SnapShot *snap; /* Snapshot array. */ + SnapEntry *snapmap; /* Snapshot map. */ + GCRef startpt; /* Starting prototype. */ +@@ -254,6 +279,8 @@ typedef struct GCtrace { + TraceNo1 nextroot; /* Next root trace for same prototype. */ + TraceNo1 nextside; /* Next side trace of same root trace. */ + uint8_t sinktags; /* Trace has SINK tags. */ ++ uint8_t topslot; /* Top stack slot already checked to be allocated. */ ++ uint8_t linktype; /* Type of link. */ + uint8_t unused1; + #ifdef LUAJIT_USE_GDBJIT + void *gdbjit_entry; /* GDB JIT entry. */ +@@ -368,7 +395,7 @@ enum { + ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) + + /* Set/reset flag to activate the SPLIT pass for the current trace. */ +-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) ++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) + #define lj_needsplit(J) (J->needsplit = 1) + #define lj_resetsplit(J) (J->needsplit = 0) + #else +@@ -416,9 +443,9 @@ typedef struct jit_State { + int32_t framedepth; /* Current frame depth. */ + int32_t retdepth; /* Return frame depth (count of RETF). */ + ++ uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */ + TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ +- TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ +- uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ ++ TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */ + + IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ + IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ +@@ -431,7 +458,7 @@ typedef struct jit_State { + MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ + + PostProc postproc; /* Required post-processing after execution. */ +-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) ++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) + uint8_t needsplit; /* Need SPLIT pass. */ + #endif + uint8_t retryrec; /* Retry recording. */ +@@ -450,7 +477,6 @@ typedef struct jit_State { + + HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ + uint32_t penaltyslot; /* Round-robin index into penalty slots. */ +- uint32_t prngstate; /* PRNG state. */ + + #ifdef LUAJIT_ENABLE_TABLE_BUMP + RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ +@@ -464,6 +490,7 @@ typedef struct jit_State { + const BCIns *startpc; /* Bytecode PC of starting instruction. */ + TraceNo parent; /* Parent of current side trace (0 for root traces). */ + ExitNo exitno; /* Exit number in parent of current side trace. */ ++ int exitcode; /* Exit code from unwound trace. */ + + BCIns *patchpc; /* PC for pending re-patch. */ + BCIns patchins; /* Instruction for pending re-patch. */ +@@ -482,18 +509,12 @@ typedef struct jit_State { + BCLine prev_line; /* Previous line. */ + int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ + #endif +-} +-#if LJ_TARGET_ARM +-LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ +-#endif +-jit_State; ++} jit_State; + +-/* Trivial PRNG e.g. used for penalty randomization. */ +-static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) +-{ +- /* Yes, this LCG is very weak, but that doesn't matter for our use case. */ +- J->prngstate = J->prngstate * 1103515245 + 12345; +- return J->prngstate >> (32-bits); +-} ++#ifdef LUA_USE_ASSERT ++#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__) ++#else ++#define lj_assertJ(c, ...) ((void)J) ++#endif + + #endif +diff --git a/src/lj_lex.c b/src/lj_lex.c +index 2d2f8194..cc6fa533 100644 +--- a/src/lj_lex.c ++++ b/src/lj_lex.c +@@ -1,6 +1,6 @@ + /* + ** Lexical analyzer. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -48,6 +48,12 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls) + size_t sz; + const char *p = ls->rfunc(ls->L, ls->rdata, &sz); + if (p == NULL || sz == 0) return LEX_EOF; ++ if (sz >= LJ_MAX_BUF) { ++ if (sz != ~(size_t)0) lj_err_mem(ls->L); ++ sz = ~(uintptr_t)0 - (uintptr_t)p; ++ if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1; ++ ls->endmark = 1; ++ } + ls->pe = p + sz; + ls->p = p + 1; + return (LexChar)(uint8_t)p[0]; +@@ -76,7 +82,7 @@ static LJ_AINLINE LexChar lex_savenext(LexState *ls) + static void lex_newline(LexState *ls) + { + LexChar old = ls->c; +- lua_assert(lex_iseol(ls)); ++ lj_assertLS(lex_iseol(ls), "bad usage"); + lex_next(ls); /* Skip "\n" or "\r". */ + if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */ + if (++ls->linenumber >= LJ_MAX_LINE) +@@ -90,7 +96,7 @@ static void lex_number(LexState *ls, TValue *tv) + { + StrScanFmt fmt; + LexChar c, xp = 'e'; +- lua_assert(lj_char_isdigit(ls->c)); ++ lj_assertLS(lj_char_isdigit(ls->c), "bad usage"); + if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x') + xp = 'p'; + while (lj_char_isident(ls->c) || ls->c == '.' || +@@ -99,7 +105,7 @@ static void lex_number(LexState *ls, TValue *tv) + lex_savenext(ls); + } + lex_save(ls, '\0'); +- fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv, ++ fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv, + (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | + (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); + if (LJ_DUALNUM && fmt == STRSCAN_INT) { +@@ -110,12 +116,9 @@ static void lex_number(LexState *ls, TValue *tv) + } else if (fmt != STRSCAN_ERROR) { + lua_State *L = ls->L; + GCcdata *cd; +- lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); +- if (!ctype_ctsG(G(L))) { +- ptrdiff_t oldtop = savestack(L, L->top); +- luaopen_ffi(L); /* Load FFI library on-demand. */ +- L->top = restorestack(L, oldtop); +- } ++ lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG, ++ "unexpected number format %d", fmt); ++ ctype_loadffi(L); + if (fmt == STRSCAN_IMAG) { + cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); + ((double *)cdataptr(cd))[0] = 0; +@@ -127,7 +130,8 @@ static void lex_number(LexState *ls, TValue *tv) + lj_parse_keepcdata(ls, tv, cd); + #endif + } else { +- lua_assert(fmt == STRSCAN_ERROR); ++ lj_assertLS(fmt == STRSCAN_ERROR, ++ "unexpected number format %d", fmt); + lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); + } + } +@@ -137,8 +141,8 @@ static int lex_skipeq(LexState *ls) + { + int count = 0; + LexChar s = ls->c; +- lua_assert(s == '[' || s == ']'); +- while (lex_savenext(ls) == '=') ++ lj_assertLS(s == '[' || s == ']', "bad usage"); ++ while (lex_savenext(ls) == '=' && count < 0x20000000) + count++; + return (ls->c == s) ? count : (-count) - 1; + } +@@ -172,7 +176,7 @@ static void lex_longstring(LexState *ls, TValue *tv, int sep) + } + } endloop: + if (tv) { +- GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep), ++ GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep), + sbuflen(&ls->sb) - 2*(2 + (MSize)sep)); + setstrV(ls->L, tv, str); + } +@@ -278,7 +282,7 @@ static void lex_string(LexState *ls, TValue *tv) + } + lex_savenext(ls); /* Skip trailing delimiter. */ + setstrV(ls->L, tv, +- lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2)); ++ lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2)); + } + + /* -- Main lexical scanner ------------------------------------------------ */ +@@ -298,7 +302,7 @@ static LexToken lex_scan(LexState *ls, TValue *tv) + do { + lex_savenext(ls); + } while (lj_char_isident(ls->c)); +- s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb)); ++ s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb)); + setstrV(ls->L, tv, s); + if (s->reserved > 0) /* Reserved word? */ + return TK_OFS + s->reserved; +@@ -406,6 +410,7 @@ int lj_lex_setup(lua_State *L, LexState *ls) + ls->lookahead = TK_eof; /* No look-ahead token. */ + ls->linenumber = 1; + ls->lastline = 1; ++ ls->endmark = 0; + lex_next(ls); /* Read-ahead first char. */ + if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb && + (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ +@@ -462,7 +467,7 @@ void lj_lex_next(LexState *ls) + /* Look ahead for the next token. */ + LexToken lj_lex_lookahead(LexState *ls) + { +- lua_assert(ls->lookahead == TK_eof); ++ lj_assertLS(ls->lookahead == TK_eof, "double lookahead"); + ls->lookahead = lex_scan(ls, &ls->lookaheadval); + return ls->lookahead; + } +@@ -487,7 +492,7 @@ void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...) + tokstr = NULL; + } else if (tok == TK_name || tok == TK_string || tok == TK_number) { + lex_save(ls, '\0'); +- tokstr = sbufB(&ls->sb); ++ tokstr = ls->sb.b; + } else { + tokstr = lj_lex_token2str(ls, tok); + } +diff --git a/src/lj_lex.h b/src/lj_lex.h +index 33fa8657..d2230b6a 100644 +--- a/src/lj_lex.h ++++ b/src/lj_lex.h +@@ -1,6 +1,6 @@ + /* + ** Lexical analyzer. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_LEX_H +@@ -73,6 +73,7 @@ typedef struct LexState { + BCInsLine *bcstack; /* Stack for bytecode instructions/line numbers. */ + MSize sizebcstack; /* Size of bytecode stack. */ + uint32_t level; /* Syntactical nesting level. */ ++ int endmark; /* Trust bytecode end marker, even if not at EOF. */ + } LexState; + + LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls); +@@ -83,4 +84,10 @@ LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok); + LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...); + LJ_FUNC void lj_lex_init(lua_State *L); + ++#ifdef LUA_USE_ASSERT ++#define lj_assertLS(c, ...) (lj_assertG_(G(ls->L), (c), __VA_ARGS__)) ++#else ++#define lj_assertLS(c, ...) ((void)ls) ++#endif ++ + #endif +diff --git a/src/lj_lib.c b/src/lj_lib.c +index b8638de6..438056d8 100644 +--- a/src/lj_lib.c ++++ b/src/lj_lib.c +@@ -1,6 +1,6 @@ + /* + ** Library function support. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_lib_c +@@ -16,6 +16,9 @@ + #include "lj_func.h" + #include "lj_bc.h" + #include "lj_dispatch.h" ++#if LJ_HASFFI ++#include "lj_ctype.h" ++#endif + #include "lj_vm.h" + #include "lj_strscan.h" + #include "lj_strfmt.h" +@@ -301,3 +304,56 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst) + return def; + } + ++/* -- Strict type checks -------------------------------------------------- */ ++ ++/* The following type checks do not coerce between strings and numbers. ++** And they handle plain int64_t/uint64_t FFI numbers, too. ++*/ ++ ++#if LJ_HASBUFFER ++GCstr *lj_lib_checkstrx(lua_State *L, int narg) ++{ ++ TValue *o = L->base + narg-1; ++ if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING); ++ return strV(o); ++} ++ ++int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b) ++{ ++ TValue *o = L->base + narg-1; ++ lj_assertL(b >= 0, "expected range must be non-negative"); ++ if (o < L->top) { ++ if (LJ_LIKELY(tvisint(o))) { ++ int32_t i = intV(o); ++ if (i >= a && i <= b) return i; ++ } else if (LJ_LIKELY(tvisnum(o))) { ++ /* For performance reasons, this doesn't check for integerness or ++ ** integer overflow. Overflow detection still works, since all FPUs ++ ** return either MININT or MAXINT, which is then out of range. ++ */ ++ int32_t i = (int32_t)numV(o); ++ if (i >= a && i <= b) return i; ++#if LJ_HASFFI ++ } else if (tviscdata(o)) { ++ GCcdata *cd = cdataV(o); ++ if (cd->ctypeid == CTID_INT64) { ++ int64_t i = *(int64_t *)cdataptr(cd); ++ if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i; ++ } else if (cd->ctypeid == CTID_UINT64) { ++ uint64_t i = *(uint64_t *)cdataptr(cd); ++ if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i; ++ } else { ++ goto badtype; ++ } ++#endif ++ } else { ++ goto badtype; ++ } ++ lj_err_arg(L, narg, LJ_ERR_NUMRNG); ++ } ++badtype: ++ lj_err_argt(L, narg, LUA_TNUMBER); ++ return 0; /* unreachable */ ++} ++#endif ++ +diff --git a/src/lj_lib.h b/src/lj_lib.h +index 37ec9d78..f59e9ea2 100644 +--- a/src/lj_lib.h ++++ b/src/lj_lib.h +@@ -1,6 +1,6 @@ + /* + ** Library function support. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_LIB_H +@@ -46,6 +46,12 @@ LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); + LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); + LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); + ++#if LJ_HASBUFFER ++LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg); ++LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg, ++ int32_t a, int32_t b); ++#endif ++ + /* Avoid including lj_frame.h. */ + #if LJ_GC64 + #define lj_lib_upvalue(L, n) \ +@@ -107,9 +113,4 @@ LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, + #define LIBINIT_FFID 0xfe + #define LIBINIT_END 0xff + +-/* Exported library functions. */ +- +-typedef struct RandomState RandomState; +-LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs); +- + #endif +diff --git a/src/lj_load.c b/src/lj_load.c +index 9a31d9a1..af0c2b1f 100644 +--- a/src/lj_load.c ++++ b/src/lj_load.c +@@ -1,6 +1,6 @@ + /* + ** Load and dump code. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include <errno.h> +@@ -159,7 +159,7 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s) + LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) + { + cTValue *o = L->top-1; +- api_check(L, L->top > L->base); ++ lj_checkapi(L->top > L->base, "top slot empty"); + if (tvisfunc(o) && isluafunc(funcV(o))) + return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0); + else +diff --git a/src/lj_mcode.c b/src/lj_mcode.c +index 77035bf7..b3efbc55 100644 +--- a/src/lj_mcode.c ++++ b/src/lj_mcode.c +@@ -1,6 +1,6 @@ + /* + ** Machine code management. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_mcode_c +@@ -14,6 +14,7 @@ + #include "lj_mcode.h" + #include "lj_trace.h" + #include "lj_dispatch.h" ++#include "lj_prng.h" + #endif + #if LJ_HASJIT || LJ_HASFFI + #include "lj_vm.h" +@@ -44,7 +45,7 @@ void lj_mcode_sync(void *start, void *end) + sys_icache_invalidate(start, (char *)end-(char *)start); + #elif LJ_TARGET_PPC + lj_vm_cachesync(start, end); +-#elif defined(__GNUC__) ++#elif defined(__GNUC__) || defined(__clang__) + __clear_cache(start, end); + #else + #error "Missing builtin to flush instruction cache" +@@ -66,8 +67,8 @@ void lj_mcode_sync(void *start, void *end) + + static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) + { +- void *p = VirtualAlloc((void *)hint, sz, +- MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); ++ void *p = LJ_WIN_VALLOC((void *)hint, sz, ++ MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); + if (!p && !hint) + lj_trace_err(J, LJ_TRERR_MCODEAL); + return p; +@@ -82,7 +83,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz) + static int mcode_setprot(void *p, size_t sz, DWORD prot) + { + DWORD oprot; +- return !VirtualProtect(p, sz, prot, &oprot); ++ return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); + } + + #elif LJ_TARGET_POSIX +@@ -96,10 +97,15 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) + #define MCPROT_RW (PROT_READ|PROT_WRITE) + #define MCPROT_RX (PROT_READ|PROT_EXEC) + #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) ++#ifdef PROT_MPROTECT ++#define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX)) ++#else ++#define MCPROT_CREATE 0 ++#endif + + static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) + { +- void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); ++ void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) { + if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); + p = NULL; +@@ -118,52 +124,34 @@ static int mcode_setprot(void *p, size_t sz, int prot) + return mprotect(p, sz, prot); + } + +-#elif LJ_64 +- +-#error "Missing OS support for explicit placement of executable memory" +- + #else + +-/* Fallback allocator. This will fail if memory is not executable by default. */ +-#define LUAJIT_UNPROTECT_MCODE +-#define MCPROT_RW 0 +-#define MCPROT_RX 0 +-#define MCPROT_RWX 0 +- +-static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) +-{ +- UNUSED(hint); UNUSED(prot); +- return lj_mem_new(J->L, sz); +-} +- +-static void mcode_free(jit_State *J, void *p, size_t sz) +-{ +- lj_mem_free(J2G(J), p, sz); +-} ++#error "Missing OS support for explicit placement of executable memory" + + #endif + + /* -- MCode area protection ----------------------------------------------- */ + +-/* Define this ONLY if page protection twiddling becomes a bottleneck. */ +-#ifdef LUAJIT_UNPROTECT_MCODE ++#if LUAJIT_SECURITY_MCODE == 0 + +-/* It's generally considered to be a potential security risk to have ++/* Define this ONLY if page protection twiddling becomes a bottleneck. ++** ++** It's generally considered to be a potential security risk to have + ** pages with simultaneous write *and* execute access in a process. + ** + ** Do not even think about using this mode for server processes or +-** apps handling untrusted external data (such as a browser). ++** apps handling untrusted external data. + ** + ** The security risk is not in LuaJIT itself -- but if an adversary finds +-** any *other* flaw in your C application logic, then any RWX memory page +-** simplifies writing an exploit considerably. ++** any *other* flaw in your C application logic, then any RWX memory pages ++** simplify writing an exploit considerably. + */ + #define MCPROT_GEN MCPROT_RWX + #define MCPROT_RUN MCPROT_RWX + + static void mcode_protect(jit_State *J, int prot) + { +- UNUSED(J); UNUSED(prot); ++ UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); + } + + #else +@@ -242,7 +230,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) + } + /* Next try probing 64K-aligned pseudo-random addresses. */ + do { +- hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; ++ hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000); + } while (!(hint + sz < range+range)); + hint = target + hint - range; + } +@@ -255,7 +243,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) + /* All memory addresses are reachable by relative jumps. */ + static void *mcode_alloc(jit_State *J, size_t sz) + { +-#ifdef __OpenBSD__ ++#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP + /* Allow better executable memory allocation for OpenBSD W^X mode. */ + void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); + if (p && mcode_setprot(p, sz, MCPROT_GEN)) { +@@ -272,12 +260,6 @@ static void *mcode_alloc(jit_State *J, size_t sz) + + /* -- MCode area management ----------------------------------------------- */ + +-/* Linked list of MCode areas. */ +-typedef struct MCLink { +- MCode *next; /* Next area. */ +- size_t size; /* Size of current area. */ +-} MCLink; +- + /* Allocate a new MCode area. */ + static void mcode_allocarea(jit_State *J) + { +@@ -292,6 +274,7 @@ static void mcode_allocarea(jit_State *J) + ((MCLink *)J->mcarea)->next = oldarea; + ((MCLink *)J->mcarea)->size = sz; + J->szallmcarea += sz; ++ J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot); + } + + /* Free all MCode areas. */ +@@ -302,7 +285,9 @@ void lj_mcode_free(jit_State *J) + J->szallmcarea = 0; + while (mc) { + MCode *next = ((MCLink *)mc)->next; +- mcode_free(J, mc, ((MCLink *)mc)->size); ++ size_t sz = ((MCLink *)mc)->size; ++ lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink)); ++ mcode_free(J, mc, sz); + mc = next; + } + } +@@ -337,35 +322,36 @@ void lj_mcode_abort(jit_State *J) + /* Set/reset protection to allow patching of MCode areas. */ + MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) + { +-#ifdef LUAJIT_UNPROTECT_MCODE +- UNUSED(J); UNUSED(ptr); UNUSED(finish); +- return NULL; +-#else + if (finish) { ++#if LUAJIT_SECURITY_MCODE + if (J->mcarea == ptr) + mcode_protect(J, MCPROT_RUN); + else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) + mcode_protfail(J); ++#endif + return NULL; + } else { + MCode *mc = J->mcarea; + /* Try current area first to use the protection cache. */ + if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { ++#if LUAJIT_SECURITY_MCODE + mcode_protect(J, MCPROT_GEN); ++#endif + return mc; + } + /* Otherwise search through the list of MCode areas. */ + for (;;) { + mc = ((MCLink *)mc)->next; +- lua_assert(mc != NULL); ++ lj_assertJ(mc != NULL, "broken MCode area chain"); + if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { ++#if LUAJIT_SECURITY_MCODE + if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) + mcode_protfail(J); ++#endif + return mc; + } + } + } +-#endif + } + + /* Limit of MCode reservation reached. */ +diff --git a/src/lj_mcode.h b/src/lj_mcode.h +index f0847e93..caaaec61 100644 +--- a/src/lj_mcode.h ++++ b/src/lj_mcode.h +@@ -1,6 +1,6 @@ + /* + ** Machine code management. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_MCODE_H +diff --git a/src/lj_meta.c b/src/lj_meta.c +index 0bd4d842..660dfec0 100644 +--- a/src/lj_meta.c ++++ b/src/lj_meta.c +@@ -1,6 +1,6 @@ + /* + ** Metamethod handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -47,7 +47,7 @@ void lj_meta_init(lua_State *L) + cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) + { + cTValue *mo = lj_tab_getstr(mt, name); +- lua_assert(mm <= MM_FAST); ++ lj_assertX(mm <= MM_FAST, "bad metamethod %d", mm); + if (!mo || tvisnil(mo)) { /* No metamethod? */ + mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */ + return NULL; +@@ -86,8 +86,8 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv) + else + top->u32.lo = LJ_CONT_TAILCALL; + setframe_pc(top++, pc); +- if (LJ_FR2) top++; + setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */ ++ if (LJ_FR2) top++; + setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT); + L->base = L->top = top+1; + /* +@@ -240,8 +240,8 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) + int fromc = 0; + if (left < 0) { left = -left; fromc = 1; } + do { +- if (!(tvisstr(top) || tvisnumber(top)) || +- !(tvisstr(top-1) || tvisnumber(top-1))) { ++ if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) || ++ !(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) { + cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); + if (tvisnil(mo)) { + mo = lj_meta_lookup(L, top, MM_concat); +@@ -277,10 +277,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) + ** next step: [...][CAT stack ............] + */ + TValue *e, *o = top; +- uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; ++ uint64_t tlen = tvisstr(o) ? strV(o)->len : ++ tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; + SBuf *sb; + do { +- o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; ++ o--; tlen += tvisstr(o) ? strV(o)->len : ++ tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; + } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1))); + if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); + sb = lj_buf_tmp_(L); +@@ -290,6 +292,9 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) + GCstr *s = strV(o); + MSize len = s->len; + lj_buf_putmem(sb, strdata(s), len); ++ } else if (tvisbuf(o)) { ++ SBufExt *sbx = bufV(o); ++ lj_buf_putmem(sb, sbx->r, sbufxlen(sbx)); + } else if (tvisint(o)) { + lj_strfmt_putint(sb, intV(o)); + } else { +@@ -363,7 +368,7 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins) + } else if (op == BC_ISEQN) { + o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; + } else { +- lua_assert(op == BC_ISEQP); ++ lj_assertL(op == BC_ISEQP, "bad bytecode op %d", op); + setpriV(&tv, ~bc_d(ins)); + o2 = &tv; + } +@@ -426,7 +431,7 @@ void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp) + { + L->top = curr_topL(L); + ra++; tp--; +- lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */ ++ lj_assertL(LJ_DUALNUM || tp != ~LJ_TNUMX, "bad type for ISTYPE"); + if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra); + else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra); + else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra); +diff --git a/src/lj_meta.h b/src/lj_meta.h +index 73b45724..400a1d74 100644 +--- a/src/lj_meta.h ++++ b/src/lj_meta.h +@@ -1,6 +1,6 @@ + /* + ** Metamethod handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_META_H +diff --git a/src/lj_obj.c b/src/lj_obj.c +index ee33aeb3..a2c3dc5b 100644 +--- a/src/lj_obj.c ++++ b/src/lj_obj.c +@@ -1,6 +1,6 @@ + /* + ** Miscellaneous object handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_obj_c +@@ -34,12 +34,13 @@ int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2) + } + + /* Return pointer to object or its object data. */ +-const void * LJ_FASTCALL lj_obj_ptr(cTValue *o) ++const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o) + { ++ UNUSED(g); + if (tvisudata(o)) + return uddata(udataV(o)); + else if (tvislightud(o)) +- return lightudV(o); ++ return lightudV(g, o); + else if (LJ_HASFFI && tviscdata(o)) + return cdataptr(cdataV(o)); + else if (tvisgcv(o)) +diff --git a/src/lj_obj.h b/src/lj_obj.h +index 52372c3e..1a6445fc 100644 +--- a/src/lj_obj.h ++++ b/src/lj_obj.h +@@ -1,6 +1,6 @@ + /* + ** LuaJIT VM tags, values and objects. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -13,7 +13,7 @@ + #include "lj_def.h" + #include "lj_arch.h" + +-/* -- Memory references (32 bit address space) ---------------------------- */ ++/* -- Memory references --------------------------------------------------- */ + + /* Memory and GC object sizes. */ + typedef uint32_t MSize; +@@ -34,17 +34,21 @@ typedef struct MRef { + + #if LJ_GC64 + #define mref(r, t) ((t *)(void *)(r).ptr64) ++#define mrefu(r) ((r).ptr64) + + #define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p)) ++#define setmrefu(r, u) ((r).ptr64 = (uint64_t)(u)) + #define setmrefr(r, v) ((r).ptr64 = (v).ptr64) + #else + #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) ++#define mrefu(r) ((r).ptr32) + + #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) ++#define setmrefu(r, u) ((r).ptr32 = (uint32_t)(u)) + #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) + #endif + +-/* -- GC object references (32 bit address space) ------------------------- */ ++/* -- GC object references ------------------------------------------------ */ + + /* GCobj reference */ + typedef struct GCRef { +@@ -153,11 +157,9 @@ typedef int32_t BCLine; /* Bytecode line number. */ + typedef void (*ASMFunction)(void); + + /* Resizable string buffer. Need this here, details in lj_buf.h. */ ++#define SBufHeader char *w, *e, *b; MRef L + typedef struct SBuf { +- MRef p; /* String buffer pointer. */ +- MRef e; /* String buffer end pointer. */ +- MRef b; /* String buffer base. */ +- MRef L; /* lua_State, used for buffer resizing. */ ++ SBufHeader; + } SBuf; + + /* -- Tags and values ----------------------------------------------------- */ +@@ -232,7 +234,7 @@ typedef const TValue cTValue; + ** ---MSW---.---LSW--- + ** primitive types | itype | | + ** lightuserdata | itype | void * | (32 bit platforms) +-** lightuserdata |ffff| void * | (64 bit platforms, 47 bit pointers) ++** lightuserdata |ffff|seg| ofs | (64 bit platforms) + ** GC objects | itype | GCRef | + ** int (LJ_DUALNUM)| itype | int | + ** number -------double------ +@@ -245,7 +247,8 @@ typedef const TValue cTValue; + ** + ** ------MSW------.------LSW------ + ** primitive types |1..1|itype|1..................1| +-** GC objects/lightud |1..1|itype|-------GCRef--------| ++** GC objects |1..1|itype|-------GCRef--------| ++** lightuserdata |1..1|itype|seg|------ofs-------| + ** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------| + ** number ------------double------------- + ** +@@ -281,18 +284,31 @@ typedef const TValue cTValue; + #define LJ_TISGCV (LJ_TSTR+1) + #define LJ_TISTABUD LJ_TTAB + ++/* Type marker for slot holding a traversal index. Must be lightuserdata. */ ++#define LJ_KEYINDEX 0xfffe7fffu ++ + #if LJ_GC64 + #define LJ_GCVMASK (((uint64_t)1 << 47) - 1) + #endif + ++#if LJ_64 ++/* To stay within 47 bits, lightuserdata is segmented. */ ++#define LJ_LIGHTUD_BITS_SEG 8 ++#define LJ_LIGHTUD_BITS_LO (47 - LJ_LIGHTUD_BITS_SEG) ++#endif ++ + /* -- String object ------------------------------------------------------- */ + ++typedef uint32_t StrHash; /* String hash value. */ ++typedef uint32_t StrID; /* String ID. */ ++ + /* String object header. String payload follows. */ + typedef struct GCstr { + GCHeader; + uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ +- uint8_t unused; +- MSize hash; /* Hash of string. */ ++ uint8_t hashalg; /* Hash algorithm. */ ++ StrID sid; /* Interned string ID. */ ++ StrHash hash; /* Hash of string. */ + MSize len; /* Size of string. */ + } GCstr; + +@@ -300,7 +316,6 @@ typedef struct GCstr { + #define strdata(s) ((const char *)((s)+1)) + #define strdatawr(s) ((char *)((s)+1)) + #define strVdata(o) strdata(strV(o)) +-#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1) + + /* -- Userdata object ----------------------------------------------------- */ + +@@ -320,6 +335,7 @@ enum { + UDTYPE_USERDATA, /* Regular userdata. */ + UDTYPE_IO_FILE, /* I/O library FILE. */ + UDTYPE_FFI_CLIB, /* FFI C library namespace. */ ++ UDTYPE_BUFFER, /* String buffer. */ + UDTYPE__MAX + }; + +@@ -570,13 +586,18 @@ typedef enum { + #define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) + #define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) + ++/* Garbage collector state. */ + typedef struct GCState { + GCSize total; /* Memory currently allocated. */ + GCSize threshold; /* Memory threshold. */ + uint8_t currentwhite; /* Current white color. */ + uint8_t state; /* GC state. */ + uint8_t nocdatafin; /* No cdata finalizer called. */ +- uint8_t unused2; ++#if LJ_64 ++ uint8_t lightudnum; /* Number of lightuserdata segments - 1. */ ++#else ++ uint8_t unused1; ++#endif + MSize sweepstr; /* Sweep position in string table. */ + GCRef root; /* List of all collectable objects. */ + MRef sweep; /* Sweep position in root list. */ +@@ -588,27 +609,41 @@ typedef struct GCState { + GCSize estimate; /* Estimate of memory actually in use. */ + MSize stepmul; /* Incremental GC step granularity. */ + MSize pause; /* Pause between successive GC cycles. */ ++#if LJ_64 ++ MRef lightudseg; /* Upper bits of lightuserdata segments. */ ++#endif + } GCState; + ++/* String interning state. */ ++typedef struct StrInternState { ++ GCRef *tab; /* String hash table anchors. */ ++ MSize mask; /* String hash mask (size of hash table - 1). */ ++ MSize num; /* Number of strings in hash table. */ ++ StrID id; /* Next string ID. */ ++ uint8_t idreseed; /* String ID reseed counter. */ ++ uint8_t second; /* String interning table uses secondary hashing. */ ++ uint8_t unused1; ++ uint8_t unused2; ++ LJ_ALIGN(8) uint64_t seed; /* Random string seed. */ ++} StrInternState; ++ + /* Global state, shared by all threads of a Lua universe. */ + typedef struct global_State { +- GCRef *strhash; /* String hash table (hash chain anchors). */ +- MSize strmask; /* String hash mask (size of hash table - 1). */ +- MSize strnum; /* Number of strings in hash table. */ + lua_Alloc allocf; /* Memory allocator. */ + void *allocd; /* Memory allocator data. */ + GCState gc; /* Garbage collector. */ +- volatile int32_t vmstate; /* VM state or current JIT code trace number. */ +- SBuf tmpbuf; /* Temporary string buffer. */ + GCstr strempty; /* Empty string. */ + uint8_t stremptyz; /* Zero terminator of empty string. */ + uint8_t hookmask; /* Hook mask. */ + uint8_t dispatchmode; /* Dispatch mode. */ + uint8_t vmevmask; /* VM event mask. */ ++ StrInternState str; /* String interning. */ ++ volatile int32_t vmstate; /* VM state or current JIT code trace number. */ + GCRef mainthref; /* Link to main thread. */ +- TValue registrytv; /* Anchor for registry. */ ++ SBuf tmpbuf; /* Temporary string buffer. */ + TValue tmptv, tmptv2; /* Temporary TValues. */ + Node nilnode; /* Fallback 1-element hash part (nil key and value). */ ++ TValue registrytv; /* Anchor for registry. */ + GCupval uvhead; /* Head of double-linked list of all open upvalues. */ + int32_t hookcount; /* Instruction hook countdown. */ + int32_t hookcstart; /* Start count for instruction hook counter. */ +@@ -620,6 +655,7 @@ typedef struct global_State { + GCRef cur_L; /* Currently executing lua_State. */ + MRef jit_base; /* Current JIT code L->base or NULL. */ + MRef ctype_state; /* Pointer to C type state. */ ++ PRNGState prng; /* Global PRNG state. */ + GCRef gcroot[GCROOT_MAX]; /* GC roots. */ + } global_State; + +@@ -638,7 +674,8 @@ typedef struct global_State { + #define HOOK_PROFILE 0x80 + #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) + #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) +-#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) ++#define hook_entergc(g) \ ++ ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE) + #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) + #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) + #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) +@@ -678,6 +715,11 @@ struct lua_State { + #define curr_topL(L) (L->base + curr_proto(L)->framesize) + #define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) + ++#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) ++LJ_FUNC_NORET void lj_assert_fail(global_State *g, const char *file, int line, ++ const char *func, const char *fmt, ...); ++#endif ++ + /* -- GC object definition and conversions -------------------------------- */ + + /* GC header for generic access to common fields of GC objects. */ +@@ -731,10 +773,6 @@ typedef union GCobj { + + /* -- TValue getters/setters ---------------------------------------------- */ + +-#ifdef LUA_USE_ASSERT +-#include "lj_gc.h" +-#endif +- + /* Macros to test types. */ + #if LJ_GC64 + #define itype(o) ((uint32_t)((o)->it64 >> 47)) +@@ -795,10 +833,23 @@ typedef union GCobj { + #endif + #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o))) + #if LJ_64 +-#define lightudV(o) \ +- check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) ++#define lightudseg(u) \ ++ (((u) >> LJ_LIGHTUD_BITS_LO) & ((1 << LJ_LIGHTUD_BITS_SEG)-1)) ++#define lightudlo(u) \ ++ ((u) & (((uint64_t)1 << LJ_LIGHTUD_BITS_LO) - 1)) ++#define lightudup(p) \ ++ ((uint32_t)(((p) >> LJ_LIGHTUD_BITS_LO) << (LJ_LIGHTUD_BITS_LO-32))) ++static LJ_AINLINE void *lightudV(global_State *g, cTValue *o) ++{ ++ uint64_t u = o->u64; ++ uint64_t seg = lightudseg(u); ++ uint32_t *segmap = mref(g->gc.lightudseg, uint32_t); ++ lj_assertG(tvislightud(o), "lightuserdata expected"); ++ lj_assertG(seg <= g->gc.lightudnum, "bad lightuserdata segment %d", seg); ++ return (void *)(((uint64_t)segmap[seg] << 32) | lightudlo(u)); ++} + #else +-#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) ++#define lightudV(g, o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) + #endif + #define gcV(o) check_exp(tvisgcv(o), gcval(o)) + #define strV(o) check_exp(tvisstr(o), &gcval(o)->str) +@@ -824,7 +875,7 @@ typedef union GCobj { + #define setpriV(o, i) (setitype((o), (i))) + #endif + +-static LJ_AINLINE void setlightudV(TValue *o, void *p) ++static LJ_AINLINE void setrawlightudV(TValue *o, void *p) + { + #if LJ_GC64 + o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47); +@@ -835,29 +886,29 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p) + #endif + } + +-#if LJ_64 +-#define checklightudptr(L, p) \ +- (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) +-#else +-#define checklightudptr(L, p) (p) +-#endif +- +-#if LJ_FR2 ++#if LJ_FR2 || LJ_32 + #define contptr(f) ((void *)(f)) + #define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f)) +-#elif LJ_64 ++#else + #define contptr(f) \ + ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin)) + #define setcont(o, f) \ + ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) +-#else +-#define contptr(f) ((void *)(f)) +-#define setcont(o, f) setlightudV((o), contptr(f)) + #endif + +-#define tvchecklive(L, o) \ +- UNUSED(L), lua_assert(!tvisgcv(o) || \ +- ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) ++static LJ_AINLINE void checklivetv(lua_State *L, TValue *o, const char *msg) ++{ ++ UNUSED(L); UNUSED(o); UNUSED(msg); ++#if LUA_USE_ASSERT ++ if (tvisgcv(o)) { ++ lj_assertL(~itype(o) == gcval(o)->gch.gct, ++ "mismatch of TValue type %d vs GC type %d", ++ ~itype(o), gcval(o)->gch.gct); ++ /* Copy of isdead check from lj_gc.h to avoid circular include. */ ++ lj_assertL(!(gcval(o)->gch.marked & (G(L)->gc.currentwhite ^ 3) & 3), msg); ++ } ++#endif ++} + + static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) + { +@@ -870,11 +921,12 @@ static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) + + static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it) + { +- setgcVraw(o, v, it); tvchecklive(L, o); ++ setgcVraw(o, v, it); ++ checklivetv(L, o, "store to dead GC object"); + } + + #define define_setV(name, type, tag) \ +-static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \ ++static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \ + { \ + setgcV(L, o, obj2gco(v), tag); \ + } +@@ -917,13 +969,17 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i) + /* Copy tagged values. */ + static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) + { +- *o1 = *o2; tvchecklive(L, o1); ++ *o1 = *o2; ++ checklivetv(L, o1, "copy of dead GC object"); + } + + /* -- Number to integer conversion ---------------------------------------- */ + + #if LJ_SOFTFP + LJ_ASMF int32_t lj_vm_tobit(double x); ++#if LJ_TARGET_MIPS64 ++LJ_ASMF int32_t lj_vm_tointg(double x); ++#endif + #endif + + static LJ_AINLINE int32_t lj_num2bit(lua_Number n) +@@ -939,14 +995,22 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) + + #define lj_num2int(n) ((int32_t)(n)) + ++/* ++** This must match the JIT backend behavior. In particular for archs ++** that don't have a common hardware instruction for this conversion. ++** Note that signed FP to unsigned int conversions have an undefined ++** result and should never be relied upon in portable FFI code. ++** See also: C99 or C11 standard, 6.3.1.4, footnote of (1). ++*/ + static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) + { +-#ifdef _MSC_VER +- if (n >= 9223372036854775808.0) /* They think it's a feature. */ +- return (uint64_t)(int64_t)(n - 18446744073709551616.0); +- else ++#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS ++ int64_t i = (int64_t)n; ++ if (i < 0) i = (int64_t)(n - 18446744073709551616.0); ++ return (uint64_t)i; ++#else ++ return (uint64_t)n; + #endif +- return (uint64_t)n; + } + + static LJ_AINLINE int32_t numberVint(cTValue *o) +@@ -975,6 +1039,6 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1]; + + /* Compare two objects without calling metamethods. */ + LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2); +-LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o); ++LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o); + + #endif +diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c +index 2417f324..d6b9e549 100644 +--- a/src/lj_opt_dce.c ++++ b/src/lj_opt_dce.c +@@ -1,6 +1,6 @@ + /* + ** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_opt_dce_c +@@ -47,10 +47,7 @@ static void dce_propagate(jit_State *J) + pchain[ir->o] = &ir->prev; + } else if (!ir_sideeff(ir)) { + *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */ +- ir->t.irt = IRT_NIL; +- ir->o = IR_NOP; /* Replace instruction with NOP. */ +- ir->op1 = ir->op2 = 0; +- ir->prev = 0; ++ lj_ir_nop(ir); + continue; + } + if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t); +diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c +index acbf36a5..2f903e27 100644 +--- a/src/lj_opt_fold.c ++++ b/src/lj_opt_fold.c +@@ -2,7 +2,7 @@ + ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. + ** ABCelim: Array Bounds Check Elimination. + ** CSE: Common-Subexpression Elimination. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_opt_fold_c +@@ -173,7 +173,6 @@ LJFOLD(ADD KNUM KNUM) + LJFOLD(SUB KNUM KNUM) + LJFOLD(MUL KNUM KNUM) + LJFOLD(DIV KNUM KNUM) +-LJFOLD(ATAN2 KNUM KNUM) + LJFOLD(LDEXP KNUM KNUM) + LJFOLD(MIN KNUM KNUM) + LJFOLD(MAX KNUM KNUM) +@@ -213,11 +212,36 @@ LJFOLDF(kfold_fpmath) + return lj_ir_knum(J, y); + } + ++LJFOLD(CALLN KNUM any) ++LJFOLDF(kfold_fpcall1) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; ++ if (CCI_TYPE(ci) == IRT_NUM) { ++ double y = ((double (*)(double))ci->func)(knumleft); ++ return lj_ir_knum(J, y); ++ } ++ return NEXTFOLD; ++} ++ ++LJFOLD(CALLN CARG IRCALL_atan2) ++LJFOLDF(kfold_fpcall2) ++{ ++ if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { ++ const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; ++ double a = ir_knum(IR(fleft->op1))->n; ++ double b = ir_knum(IR(fleft->op2))->n; ++ double y = ((double (*)(double, double))ci->func)(a, b); ++ return lj_ir_knum(J, y); ++ } ++ return NEXTFOLD; ++} ++ + LJFOLD(POW KNUM KINT) ++LJFOLD(POW KNUM KNUM) + LJFOLDF(kfold_numpow) + { + lua_Number a = knumleft; +- lua_Number b = (lua_Number)fright->i; ++ lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright; + lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); + return lj_ir_knum(J, y); + } +@@ -258,7 +282,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) + case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; + case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; + case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; +- default: lua_assert(0); break; ++ default: lj_assertX(0, "bad IR op %d", op); break; + } + return k1; + } +@@ -330,7 +354,7 @@ LJFOLDF(kfold_intcomp) + case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); + case IR_ABC: + case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); +- default: lua_assert(0); return FAILFOLD; ++ default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; + } + } + +@@ -344,10 +368,12 @@ LJFOLDF(kfold_intcomp0) + + /* -- Constant folding for 64 bit integers -------------------------------- */ + +-static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) ++static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, ++ IROp op) + { +- switch (op) { ++ UNUSED(J); + #if LJ_HASFFI ++ switch (op) { + case IR_ADD: k1 += k2; break; + case IR_SUB: k1 -= k2; break; + case IR_MUL: k1 *= k2; break; +@@ -359,9 +385,12 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) + case IR_BSAR: k1 >>= (k2 & 63); break; + case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; + case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; +-#endif +- default: UNUSED(k2); lua_assert(0); break; ++ default: lj_assertJ(0, "bad IR op %d", op); break; + } ++#else ++ UNUSED(k2); UNUSED(op); ++ lj_assertJ(0, "FFI IR op without FFI"); ++#endif + return k1; + } + +@@ -373,7 +402,7 @@ LJFOLD(BOR KINT64 KINT64) + LJFOLD(BXOR KINT64 KINT64) + LJFOLDF(kfold_int64arith) + { +- return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, ++ return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64, + ir_k64(fright)->u64, (IROp)fins->o)); + } + +@@ -395,7 +424,7 @@ LJFOLDF(kfold_int64arith2) + } + return INT64FOLD(k1); + #else +- UNUSED(J); lua_assert(0); return FAILFOLD; ++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; + #endif + } + +@@ -411,7 +440,7 @@ LJFOLDF(kfold_int64shift) + int32_t sh = (fright->i & 63); + return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); + #else +- UNUSED(J); lua_assert(0); return FAILFOLD; ++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; + #endif + } + +@@ -421,7 +450,7 @@ LJFOLDF(kfold_bnot64) + #if LJ_HASFFI + return INT64FOLD(~ir_k64(fleft)->u64); + #else +- UNUSED(J); lua_assert(0); return FAILFOLD; ++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; + #endif + } + +@@ -431,7 +460,7 @@ LJFOLDF(kfold_bswap64) + #if LJ_HASFFI + return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); + #else +- UNUSED(J); lua_assert(0); return FAILFOLD; ++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; + #endif + } + +@@ -456,10 +485,10 @@ LJFOLDF(kfold_int64comp) + case IR_UGE: return CONDFOLD(a >= b); + case IR_ULE: return CONDFOLD(a <= b); + case IR_UGT: return CONDFOLD(a > b); +- default: lua_assert(0); return FAILFOLD; ++ default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; + } + #else +- UNUSED(J); lua_assert(0); return FAILFOLD; ++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; + #endif + } + +@@ -471,7 +500,7 @@ LJFOLDF(kfold_int64comp0) + return DROPFOLD; + return NEXTFOLD; + #else +- UNUSED(J); lua_assert(0); return FAILFOLD; ++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; + #endif + } + +@@ -485,6 +514,7 @@ LJFOLDF(kfold_snew_kptr) + } + + LJFOLD(SNEW any KINT) ++LJFOLD(XSNEW any KINT) + LJFOLDF(kfold_snew_empty) + { + if (fright->i == 0) +@@ -496,7 +526,7 @@ LJFOLD(STRREF KGC KINT) + LJFOLDF(kfold_strref) + { + GCstr *str = ir_kstr(fleft); +- lua_assert((MSize)fright->i <= str->len); ++ lj_assertJ((MSize)fright->i <= str->len, "bad string ref"); + return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); + } + +@@ -548,22 +578,49 @@ LJFOLDF(kfold_strcmp) + ** The compromise is to declare them as loads, emit them like stores and + ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain + ** fragments left over from CSE are eliminated by DCE. ++** ++** The string buffer methods emit a USE instead of a BUFSTR to keep the ++** chain alive. + */ + +-/* BUFHDR is emitted like a store, see below. */ ++LJFOLD(BUFHDR any any) ++LJFOLDF(bufhdr_merge) ++{ ++ return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD; ++} + +-LJFOLD(BUFPUT BUFHDR BUFSTR) +-LJFOLDF(bufput_append) ++LJFOLD(BUFPUT any BUFSTR) ++LJFOLDF(bufput_bufstr) + { +- /* New buffer, no other buffer op inbetween and same buffer? */ +- if ((J->flags & JIT_F_OPT_FWD) && +- !(fleft->op2 & IRBUFHDR_APPEND) && +- fleft->prev == fright->op2 && +- fleft->op1 == IR(fright->op2)->op1) { +- IRRef ref = fins->op1; +- IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */ +- IR(ref)->op1 = fright->op1; +- return ref; ++ if ((J->flags & JIT_F_OPT_FWD)) { ++ IRRef hdr = fright->op2; ++ /* New buffer, no other buffer op inbetween and same buffer? */ ++ if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET && ++ fleft->prev == hdr && ++ fleft->op1 == IR(hdr)->op1) { ++ IRRef ref = fins->op1; ++ IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */ ++ IR(ref)->op1 = fright->op1; ++ return ref; ++ } ++ /* Replay puts to global temporary buffer. */ ++ if (IR(hdr)->op2 == IRBUFHDR_RESET) { ++ IRIns *ir = IR(fright->op1); ++ /* For now only handle single string.reverse .lower .upper .rep. */ ++ if (ir->o == IR_CALLL && ++ ir->op2 >= IRCALL_lj_buf_putstr_reverse && ++ ir->op2 <= IRCALL_lj_buf_putstr_rep) { ++ IRIns *carg1 = IR(ir->op1); ++ if (ir->op2 == IRCALL_lj_buf_putstr_rep) { ++ IRIns *carg2 = IR(carg1->op1); ++ if (carg2->op1 == hdr) { ++ return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2); ++ } ++ } else if (carg1->op1 == hdr) { ++ return lj_ir_call(J, ir->op2, fins->op1, carg1->op2); ++ } ++ } ++ } + } + return EMITFOLD; /* Always emit, CSE later. */ + } +@@ -592,18 +649,19 @@ LJFOLDF(bufput_kgc) + LJFOLD(BUFSTR any any) + LJFOLDF(bufstr_kfold_cse) + { +- lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || +- fleft->o == IR_CALLL); ++ lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || ++ fleft->o == IR_CALLL, ++ "bad buffer constructor IR op %d", fleft->o); + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { + if (fleft->o == IR_BUFHDR) { /* No put operations? */ +- if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */ ++ if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */ + return lj_ir_kstr(J, &J2G(J)->strempty); + fins->op1 = fleft->op1; + fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */ + return CSEFOLD; + } else if (fleft->o == IR_BUFPUT) { + IRIns *irb = IR(fleft->op1); +- if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) ++ if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET) + return fleft->op2; /* Shortcut for a single put operation. */ + } + } +@@ -613,9 +671,10 @@ LJFOLDF(bufstr_kfold_cse) + while (ref) { + IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); + while (ira->o == irb->o && ira->op2 == irb->op2) { +- lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || +- ira->o == IR_CALLL || ira->o == IR_CARG); +- if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) ++ lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || ++ ira->o == IR_CALLL || ira->o == IR_CARG, ++ "bad buffer constructor IR op %d", ira->o); ++ if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET) + return ref; /* CSE succeeded. */ + if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab) + break; +@@ -673,7 +732,7 @@ LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar) + LJFOLDF(bufput_kfold_fmt) + { + IRIns *irc = IR(fleft->op1); +- lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */ ++ lj_assertJ(irref_isk(irc->op2), "SFormat must be const"); + if (irref_isk(fleft->op2)) { + SFormat sf = (SFormat)IR(irc->op2)->i; + IRIns *ira = IR(fleft->op2); +@@ -1054,7 +1113,7 @@ LJFOLDF(simplify_nummuldiv_negneg) + } + + LJFOLD(POW any KINT) +-LJFOLDF(simplify_numpow_xk) ++LJFOLDF(simplify_numpow_xkint) + { + int32_t k = fright->i; + TRef ref = fins->op1; +@@ -1083,13 +1142,22 @@ LJFOLDF(simplify_numpow_xk) + return ref; + } + ++LJFOLD(POW any KNUM) ++LJFOLDF(simplify_numpow_xknum) ++{ ++ if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */ ++ return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT); ++ return NEXTFOLD; ++} ++ + LJFOLD(POW KNUM any) + LJFOLDF(simplify_numpow_kx) + { + lua_Number n = knumleft; +- if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ +- fins->o = IR_CONV; ++ if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */ + #if LJ_TARGET_X86ORX64 ++ /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */ ++ fins->o = IR_CONV; + fins->op1 = fins->op2; + fins->op2 = IRCONV_NUM_INT; + fins->op2 = (IRRef1)lj_opt_fold(J); +@@ -1183,10 +1251,10 @@ LJFOLDF(simplify_tobit_conv) + { + /* Fold even across PHI to avoid expensive num->int conversions in loop. */ + if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { +- lua_assert(irt_isnum(fleft->t)); ++ lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); + return fleft->op1; + } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { +- lua_assert(irt_isnum(fleft->t)); ++ lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); + fins->o = IR_CONV; + fins->op1 = fleft->op1; + fins->op2 = (IRT_INT<<5)|IRT_U32; +@@ -1226,8 +1294,8 @@ LJFOLDF(simplify_conv_sext) + /* Use scalar evolution analysis results to strength-reduce sign-extension. */ + if (ref == J->scev.idx) { + IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; +- lua_assert(irt_isint(J->scev.t)); +- if (lo && IR(lo)->i + ofs >= 0) { ++ lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported"); ++ if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { + ok_reduce: + #if LJ_TARGET_X64 + /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ +@@ -1257,16 +1325,21 @@ LJFOLD(CONV SUB IRCONV_U32_U64) + LJFOLD(CONV MUL IRCONV_U32_U64) + LJFOLDF(simplify_conv_narrow) + { ++#if LJ_64 ++ UNUSED(J); ++ return NEXTFOLD; ++#else + IROp op = (IROp)fleft->o; + IRType t = irt_type(fins->t); + IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; + PHIBARRIER(fleft); +- op1 = emitir(IRTI(IR_CONV), op1, mode); +- op2 = emitir(IRTI(IR_CONV), op2, mode); ++ op1 = emitir(IRT(IR_CONV, t), op1, mode); ++ op2 = emitir(IRT(IR_CONV, t), op2, mode); + fins->ot = IRT(op, t); + fins->op1 = op1; + fins->op2 = op2; + return RETRYFOLD; ++#endif + } + + /* Special CSE rule for CONV. */ +@@ -1302,7 +1375,8 @@ LJFOLDF(narrow_convert) + /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ + if (J->chain[IR_LOOP]) + return NEXTFOLD; +- lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); ++ lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT, ++ "unexpected CONV TOBIT"); + return lj_opt_narrow_convert(J); + } + +@@ -1408,7 +1482,7 @@ LJFOLDF(simplify_intmul_k64) + return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); + return NEXTFOLD; + #else +- UNUSED(J); lua_assert(0); return FAILFOLD; ++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; + #endif + } + +@@ -1416,7 +1490,7 @@ LJFOLD(MOD any KINT) + LJFOLDF(simplify_intmod_k) + { + int32_t k = fright->i; +- lua_assert(k != 0); ++ lj_assertJ(k != 0, "integer mod 0"); + if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ + fins->o = IR_BAND; + fins->op2 = lj_ir_kint(J, k-1); +@@ -1666,7 +1740,8 @@ LJFOLDF(simplify_shiftk_andk) + fins->ot = IRTI(IR_BAND); + return RETRYFOLD; + } else if (irk->o == IR_KINT64) { +- uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o); ++ uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i, ++ (IROp)fins->o); + IROpT ot = fleft->ot; + fins->op1 = fleft->op1; + fins->op1 = (IRRef1)lj_opt_fold(J); +@@ -1714,8 +1789,8 @@ LJFOLDF(simplify_andor_k64) + IRIns *irk = IR(fleft->op2); + PHIBARRIER(fleft); + if (irk->o == IR_KINT64) { +- uint64_t k = kfold_int64arith(ir_k64(irk)->u64, +- ir_k64(fright)->u64, (IROp)fins->o); ++ uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, ++ (IROp)fins->o); + /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ + /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ + if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { +@@ -1725,7 +1800,7 @@ LJFOLDF(simplify_andor_k64) + } + return NEXTFOLD; + #else +- UNUSED(J); lua_assert(0); return FAILFOLD; ++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; + #endif + } + +@@ -1761,8 +1836,8 @@ LJFOLDF(reassoc_intarith_k64) + #if LJ_HASFFI + IRIns *irk = IR(fleft->op2); + if (irk->o == IR_KINT64) { +- uint64_t k = kfold_int64arith(ir_k64(irk)->u64, +- ir_k64(fright)->u64, (IROp)fins->o); ++ uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, ++ (IROp)fins->o); + PHIBARRIER(fleft); + fins->op1 = fleft->op1; + fins->op2 = (IRRef1)lj_ir_kint64(J, k); +@@ -1770,12 +1845,10 @@ LJFOLDF(reassoc_intarith_k64) + } + return NEXTFOLD; + #else +- UNUSED(J); lua_assert(0); return FAILFOLD; ++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; + #endif + } + +-LJFOLD(MIN MIN any) +-LJFOLD(MAX MAX any) + LJFOLD(BAND BAND any) + LJFOLD(BOR BOR any) + LJFOLDF(reassoc_dup) +@@ -1785,6 +1858,15 @@ LJFOLDF(reassoc_dup) + return NEXTFOLD; + } + ++LJFOLD(MIN MIN any) ++LJFOLD(MAX MAX any) ++LJFOLDF(reassoc_dup_minmax) ++{ ++ if (fins->op2 == fleft->op2) ++ return LEFTFOLD; /* (a o b) o b ==> a o b */ ++ return NEXTFOLD; ++} ++ + LJFOLD(BXOR BXOR any) + LJFOLDF(reassoc_bxor) + { +@@ -1823,23 +1905,12 @@ LJFOLDF(reassoc_shift) + return NEXTFOLD; + } + +-LJFOLD(MIN MIN KNUM) +-LJFOLD(MAX MAX KNUM) + LJFOLD(MIN MIN KINT) + LJFOLD(MAX MAX KINT) + LJFOLDF(reassoc_minmax_k) + { + IRIns *irk = IR(fleft->op2); +- if (irk->o == IR_KNUM) { +- lua_Number a = ir_knum(irk)->n; +- lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD); +- if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ +- return LEFTFOLD; +- PHIBARRIER(fleft); +- fins->op1 = fleft->op1; +- fins->op2 = (IRRef1)lj_ir_knum(J, y); +- return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ +- } else if (irk->o == IR_KINT) { ++ if (irk->o == IR_KINT) { + int32_t a = irk->i; + int32_t y = kfold_intop(a, fright->i, fins->o); + if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ +@@ -1852,24 +1923,6 @@ LJFOLDF(reassoc_minmax_k) + return NEXTFOLD; + } + +-LJFOLD(MIN MAX any) +-LJFOLD(MAX MIN any) +-LJFOLDF(reassoc_minmax_left) +-{ +- if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) +- return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ +- return NEXTFOLD; +-} +- +-LJFOLD(MIN any MAX) +-LJFOLD(MAX any MIN) +-LJFOLDF(reassoc_minmax_right) +-{ +- if (fins->op1 == fright->op1 || fins->op1 == fright->op2) +- return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ +- return NEXTFOLD; +-} +- + /* -- Array bounds check elimination -------------------------------------- */ + + /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. +@@ -1995,8 +2048,6 @@ LJFOLDF(comm_comp) + + LJFOLD(BAND any any) + LJFOLD(BOR any any) +-LJFOLD(MIN any any) +-LJFOLD(MAX any any) + LJFOLDF(comm_dup) + { + if (fins->op1 == fins->op2) /* x o x ==> x */ +@@ -2004,6 +2055,15 @@ LJFOLDF(comm_dup) + return fold_comm_swap(J); + } + ++LJFOLD(MIN any any) ++LJFOLD(MAX any any) ++LJFOLDF(comm_dup_minmax) ++{ ++ if (fins->op1 == fins->op2) /* x o x ==> x */ ++ return LEFTFOLD; ++ return NEXTFOLD; ++} ++ + LJFOLD(BXOR any any) + LJFOLDF(comm_bxor) + { +@@ -2040,7 +2100,7 @@ LJFOLDF(merge_eqne_snew_kgc) + { + GCstr *kstr = ir_kstr(fright); + int32_t len = (int32_t)kstr->len; +- lua_assert(irt_isstr(fins->t)); ++ lj_assertJ(irt_isstr(fins->t), "bad equality IR type"); + + #if LJ_TARGET_UNALIGNED + #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ +@@ -2104,7 +2164,7 @@ LJFOLD(HLOAD KKPTR) + LJFOLDF(kfold_hload_kkptr) + { + UNUSED(J); +- lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); ++ lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv"); + return TREF_NIL; + } + +@@ -2114,8 +2174,8 @@ LJFOLDX(lj_opt_fwd_hload) + LJFOLD(ULOAD any) + LJFOLDX(lj_opt_fwd_uload) + +-LJFOLD(CALLL any IRCALL_lj_tab_len) +-LJFOLDX(lj_opt_fwd_tab_len) ++LJFOLD(ALEN any any) ++LJFOLDX(lj_opt_fwd_alen) + + /* Upvalue refs are really loads, but there are no corresponding stores. + ** So CSE is ok for them, except for UREFO across a GC step (see below). +@@ -2248,6 +2308,27 @@ LJFOLDF(fload_str_len_tostr) + return NEXTFOLD; + } + ++LJFOLD(FLOAD any IRFL_SBUF_W) ++LJFOLD(FLOAD any IRFL_SBUF_E) ++LJFOLD(FLOAD any IRFL_SBUF_B) ++LJFOLD(FLOAD any IRFL_SBUF_L) ++LJFOLD(FLOAD any IRFL_SBUF_REF) ++LJFOLD(FLOAD any IRFL_SBUF_R) ++LJFOLDF(fload_sbuf) ++{ ++ TRef tr = lj_opt_fwd_fload(J); ++ return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD; ++} ++ ++/* The fast function ID of function objects is immutable. */ ++LJFOLD(FLOAD KGC IRFL_FUNC_FFID) ++LJFOLDF(fload_func_ffid_kgc) ++{ ++ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) ++ return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid); ++ return NEXTFOLD; ++} ++ + /* The C type ID of cdata objects is immutable. */ + LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) + LJFOLDF(fload_cdata_typeid_kgc) +@@ -2315,7 +2396,7 @@ LJFOLDF(fwd_sload) + TRef tr = lj_opt_cse(J); + return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; + } else { +- lua_assert(J->slot[fins->op1] != 0); ++ lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed"); + return J->slot[fins->op1]; + } + } +@@ -2394,6 +2475,7 @@ LJFOLD(XSTORE any any) + LJFOLDX(lj_opt_dse_xstore) + + LJFOLD(NEWREF any any) /* Treated like a store. */ ++LJFOLD(TMPREF any any) + LJFOLD(CALLA any any) + LJFOLD(CALLL any any) /* Safeguard fallback. */ + LJFOLD(CALLS any any) +@@ -2404,7 +2486,6 @@ LJFOLD(TNEW any any) + LJFOLD(TDUP any) + LJFOLD(CNEW any any) + LJFOLD(XSNEW any any) +-LJFOLD(BUFHDR any any) + LJFOLDX(lj_ir_emit) + + /* ------------------------------------------------------------------------ */ +@@ -2430,8 +2511,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) + IRRef ref; + + if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { +- lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | +- JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); ++ lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | ++ JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT, ++ "bad JIT_F_OPT_DEFAULT"); + /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ + if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) + return lj_opt_cse(J); +@@ -2493,7 +2575,7 @@ retry: + return lj_ir_kint(J, fins->i); + if (ref == FAILFOLD) + lj_trace_err(J, LJ_TRERR_GFAIL); +- lua_assert(ref == DROPFOLD); ++ lj_assertJ(ref == DROPFOLD, "bad fold result"); + return REF_DROP; + } + +diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c +index 04c6d06d..df5811a9 100644 +--- a/src/lj_opt_loop.c ++++ b/src/lj_opt_loop.c +@@ -1,6 +1,6 @@ + /* + ** LOOP: Loop Optimizations. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_opt_loop_c +@@ -223,8 +223,9 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, + } + J->guardemit.irt = 0; + /* Setup new snapshot. */ +- snap->mapofs = (uint16_t)nmapofs; ++ snap->mapofs = (uint32_t)nmapofs; + snap->ref = (IRRef1)J->cur.nins; ++ snap->mcofs = 0; + snap->nslots = nslots; + snap->topslot = osnap->topslot; + snap->count = 0; +@@ -251,7 +252,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, + nmap += nn; + while (omap < nextmap) /* Copy PC + frame links. */ + *nmap++ = *omap++; +- J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap); ++ J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); + } + + typedef struct LoopState { +@@ -299,7 +300,8 @@ static void loop_unroll(LoopState *lps) + loopmap = &J->cur.snapmap[loopsnap->mapofs]; + /* The PC of snapshot #0 and the loop snapshot must match. */ + psentinel = &loopmap[loopsnap->nent]; +- lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); ++ lj_assertJ(*psentinel == J->cur.snapmap[J->cur.snap[0].nent], ++ "mismatched PC for loop snapshot"); + *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ + + /* Start substitution with snapshot #1 (#0 is empty for root traces). */ +@@ -352,10 +354,12 @@ static void loop_unroll(LoopState *lps) + irr = IR(ref); + goto phiconv; + } +- } else if (ref != REF_DROP && irr->o == IR_CONV && +- ref > invar && irr->op1 < invar) { +- /* May need an extra PHI for a CONV. */ +- ref = irr->op1; ++ } else if (ref != REF_DROP && ref > invar && ++ ((irr->o == IR_CONV && irr->op1 < invar) || ++ (irr->o == IR_ALEN && irr->op2 < invar && ++ irr->op2 != REF_NIL))) { ++ /* May need an extra PHI for a CONV or ALEN hint. */ ++ ref = irr->o == IR_CONV ? irr->op1 : irr->op2; + irr = IR(ref); + phiconv: + if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { +@@ -369,8 +373,8 @@ static void loop_unroll(LoopState *lps) + } + } + if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ +- J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; +- lua_assert(J->cur.nsnapmap <= J->sizesnapmap); ++ J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs; ++ lj_assertJ(J->cur.nsnapmap <= J->sizesnapmap, "bad snapshot map index"); + *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ + + loop_emit_phi(J, subst, phi, nphi, onsnap); +@@ -383,7 +387,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap) + SnapShot *snap = &J->cur.snap[nsnap-1]; + SnapEntry *map = J->cur.snapmap; + map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ +- J->cur.nsnapmap = (uint16_t)nsnapmap; ++ J->cur.nsnapmap = (uint32_t)nsnapmap; + J->cur.nsnap = nsnap; + J->guardemit.irt = 0; + lj_ir_rollback(J, ins); +diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c +index cc177d39..d6a419e4 100644 +--- a/src/lj_opt_mem.c ++++ b/src/lj_opt_mem.c +@@ -3,7 +3,7 @@ + ** AA: Alias Analysis using high-level semantic disambiguation. + ** FWD: Load Forwarding (L2L) + Store Forwarding (S2L). + ** DSE: Dead-Store Elimination. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_opt_mem_c +@@ -18,6 +18,7 @@ + #include "lj_jit.h" + #include "lj_iropt.h" + #include "lj_ircall.h" ++#include "lj_dispatch.h" + + /* Some local macros to save typing. Undef'd at the end. */ + #define IR(ref) (&J->cur.ir[(ref)]) +@@ -56,8 +57,8 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb) + { + IRIns *taba = IR(ta), *tabb = IR(tb); + int newa, newb; +- lua_assert(ta != tb); +- lua_assert(irt_istab(taba->t) && irt_istab(tabb->t)); ++ lj_assertJ(ta != tb, "bad usage"); ++ lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad usage"); + /* Disambiguate new allocations. */ + newa = (taba->o == IR_TNEW || taba->o == IR_TDUP); + newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP); +@@ -99,7 +100,7 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) + /* Disambiguate array references based on index arithmetic. */ + int32_t ofsa = 0, ofsb = 0; + IRRef basea = ka, baseb = kb; +- lua_assert(refb->o == IR_AREF); ++ lj_assertJ(refb->o == IR_AREF, "expected AREF"); + /* Gather base and offset from t[base] or t[base+-ofs]. */ + if (keya->o == IR_ADD && irref_isk(keya->op2)) { + basea = keya->op1; +@@ -117,8 +118,9 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) + return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ + } else { + /* Disambiguate hash references based on the type of their keys. */ +- lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && +- (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); ++ lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && ++ (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF), ++ "bad xREF IR op %d or %d", refa->o, refb->o); + if (!irt_sametype(keya->t, keyb->t)) + return ALIAS_NO; /* Different key types. */ + } +@@ -180,7 +182,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) + } + ref = store->prev; + } +- lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t)); ++ if (ir->o == IR_TNEW && !irt_isnil(fins->t)) ++ return 0; /* Type instability in loop-carried dependency. */ + if (irt_ispri(fins->t)) { + return TREF_PRI(irt_type(fins->t)); + } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) || +@@ -191,7 +194,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) + if (key->o == IR_KSLOT) key = IR(key->op1); + lj_ir_kvalue(J->L, &keyv, key); + tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); +- lua_assert(itype2irt(tv) == irt_type(fins->t)); ++ lj_assertJ(itype2irt(tv) == irt_type(fins->t), ++ "mismatched type in constant table"); + if (irt_isnum(fins->t)) + return lj_ir_knum_u64(J, tv->u64); + else if (LJ_DUALNUM && irt_isint(fins->t)) +@@ -360,16 +364,16 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) + /* Different value: try to eliminate the redundant store. */ + if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ + IRIns *ir; +- /* Check for any intervening guards (includes conflicting loads). */ ++ /* Check for any intervening guards (includes conflicting loads). ++ ** Note that lj_tab_keyindex and lj_vm_next don't need guards, ++ ** since they are followed by at least one guarded VLOAD. ++ */ + for (ir = IR(J->cur.nins-1); ir > store; ir--) +- if (irt_isguard(ir->t) || ir->o == IR_CALLL) ++ if (irt_isguard(ir->t) || ir->o == IR_ALEN) + goto doemit; /* No elimination possible. */ + /* Remove redundant store from chain and replace with NOP. */ + *refp = store->prev; +- store->o = IR_NOP; +- store->t.irt = IRT_NIL; +- store->op1 = store->op2 = 0; +- store->prev = 0; ++ lj_ir_nop(store); + /* Now emit the new store instead. */ + } + goto doemit; +@@ -380,6 +384,67 @@ doemit: + return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ + } + ++/* ALEN forwarding. */ ++TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J) ++{ ++ IRRef tab = fins->op1; /* Table reference. */ ++ IRRef lim = tab; /* Search limit. */ ++ IRRef ref; ++ ++ /* Search for conflicting HSTORE with numeric key. */ ++ ref = J->chain[IR_HSTORE]; ++ while (ref > lim) { ++ IRIns *store = IR(ref); ++ IRIns *href = IR(store->op1); ++ IRIns *key = IR(href->op2); ++ if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { ++ lim = ref; /* Conflicting store found, limits search for ALEN. */ ++ break; ++ } ++ ref = store->prev; ++ } ++ ++ /* Try to find a matching ALEN. */ ++ ref = J->chain[IR_ALEN]; ++ while (ref > lim) { ++ /* CSE for ALEN only depends on the table, not the hint. */ ++ if (IR(ref)->op1 == tab) { ++ IRRef sref; ++ ++ /* Search for aliasing table.clear. */ ++ if (!fwd_aa_tab_clear(J, ref, tab)) ++ break; ++ ++ /* Search for hint-forwarding or conflicting store. */ ++ sref = J->chain[IR_ASTORE]; ++ while (sref > ref) { ++ IRIns *store = IR(sref); ++ IRIns *aref = IR(store->op1); ++ IRIns *fref = IR(aref->op1); ++ if (tab == fref->op1) { /* ASTORE to the same table. */ ++ /* Detect t[#t+1] = x idiom for push. */ ++ IRIns *idx = IR(aref->op2); ++ if (!irt_isnil(store->t) && ++ idx->o == IR_ADD && idx->op1 == ref && ++ IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) { ++ /* Note: this requires an extra PHI check in loop unroll. */ ++ fins->op2 = aref->op2; /* Set ALEN hint. */ ++ } ++ goto doemit; /* Conflicting store, possibly giving a hint. */ ++ } else if (aa_table(J, tab, fref->op1) == ALIAS_NO) { ++ goto doemit; /* Conflicting store. */ ++ } ++ sref = store->prev; ++ } ++ ++ return ref; /* Plain ALEN forwarding. */ ++ } ++ ref = IR(ref)->prev; ++ } ++doemit: ++ return EMITFOLD; ++} ++ + /* -- ULOAD forwarding ---------------------------------------------------- */ + + /* The current alias analysis for upvalues is very simplistic. It only +@@ -429,7 +494,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) + + cselim: + /* Try to find a matching load. Below the conflicting store, if any. */ +- + ref = J->chain[IR_ULOAD]; + while (ref > lim) { + IRIns *ir = IR(ref); +@@ -470,10 +534,7 @@ TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J) + goto doemit; /* No elimination possible. */ + /* Remove redundant store from chain and replace with NOP. */ + *refp = store->prev; +- store->o = IR_NOP; +- store->t.irt = IRT_NIL; +- store->op1 = store->op2 = 0; +- store->prev = 0; ++ lj_ir_nop(store); + if (ref+1 < J->cur.nins && + store[1].o == IR_OBAR && store[1].op1 == xref) { + IRRef1 *bp = &J->chain[IR_OBAR]; +@@ -482,10 +543,7 @@ TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J) + bp = &obar->prev; + /* Remove OBAR, too. */ + *bp = obar->prev; +- obar->o = IR_NOP; +- obar->t.irt = IRT_NIL; +- obar->op1 = obar->op2 = 0; +- obar->prev = 0; ++ lj_ir_nop(obar); + } + /* Now emit the new store instead. */ + } +@@ -565,8 +623,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J) + goto doemit; + break; /* Otherwise continue searching. */ + case ALIAS_MUST: +- if (store->op2 == val) /* Same value: drop the new store. */ +- return DROPFOLD; ++ if (store->op2 == val && ++ !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R)) ++ return DROPFOLD; /* Same value: drop the new store. */ + /* Different value: try to eliminate the redundant store. */ + if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ + IRIns *ir; +@@ -576,10 +635,7 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J) + goto doemit; /* No elimination possible. */ + /* Remove redundant store from chain and replace with NOP. */ + *refp = store->prev; +- store->o = IR_NOP; +- store->t.irt = IRT_NIL; +- store->op1 = store->op2 = 0; +- store->prev = 0; ++ lj_ir_nop(store); + /* Now emit the new store instead. */ + } + goto doemit; +@@ -590,6 +646,29 @@ doemit: + return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ + } + ++/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */ ++int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim) ++{ ++ IRRef ref; ++ if (J->chain[IR_BUFPUT] > lim) ++ return 0; /* Conflict. */ ++ ref = J->chain[IR_CALLS]; ++ while (ref > lim) { ++ IRIns *ir = IR(ref); ++ if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr) ++ return 0; /* Conflict. */ ++ ref = ir->prev; ++ } ++ ref = J->chain[IR_CALLL]; ++ while (ref > lim) { ++ IRIns *ir = IR(ref); ++ if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr) ++ return 0; /* Conflict. */ ++ ref = ir->prev; ++ } ++ return 1; /* No conflict. Can safely FOLD/CSE. */ ++} ++ + /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */ + + /* Find cdata allocation for a reference (if any). */ +@@ -830,10 +909,7 @@ TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) + goto doemit; /* No elimination possible. */ + /* Remove redundant store from chain and replace with NOP. */ + *refp = store->prev; +- store->o = IR_NOP; +- store->t.irt = IRT_NIL; +- store->op1 = store->op2 = 0; +- store->prev = 0; ++ lj_ir_nop(store); + /* Now emit the new store instead. */ + } + goto doemit; +@@ -844,39 +920,6 @@ doemit: + return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ + } + +-/* -- Forwarding of lj_tab_len -------------------------------------------- */ +- +-/* This is rather simplistic right now, but better than nothing. */ +-TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) +-{ +- IRRef tab = fins->op1; /* Table reference. */ +- IRRef lim = tab; /* Search limit. */ +- IRRef ref; +- +- /* Any ASTORE is a conflict and limits the search. */ +- if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE]; +- +- /* Search for conflicting HSTORE with numeric key. */ +- ref = J->chain[IR_HSTORE]; +- while (ref > lim) { +- IRIns *store = IR(ref); +- IRIns *href = IR(store->op1); +- IRIns *key = IR(href->op2); +- if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { +- lim = ref; /* Conflicting store found, limits search for TLEN. */ +- break; +- } +- ref = store->prev; +- } +- +- /* Search for aliasing table.clear. */ +- if (!fwd_aa_tab_clear(J, lim, tab)) +- return lj_ir_emit(J); +- +- /* Try to find a matching load. Below the conflicting store, if any. */ +- return lj_opt_cselim(J, lim); +-} +- + /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ + + /* Check whether the previous value for a table store is non-nil. +diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c +index cd96ca4b..1a332bca 100644 +--- a/src/lj_opt_narrow.c ++++ b/src/lj_opt_narrow.c +@@ -1,7 +1,7 @@ + /* + ** NARROW: Narrowing of numbers to integers (double to int32_t). + ** STRIPOV: Stripping of overflow checks. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_opt_narrow_c +@@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) + } else if (op == NARROW_CONV) { + *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ + } else if (op == NARROW_SEXT) { +- lua_assert(sp >= nc->stack+1); ++ lj_assertJ(sp >= nc->stack+1, "stack underflow"); + sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], + (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); + } else if (op == NARROW_INT) { +- lua_assert(next < last); ++ lj_assertJ(next < last, "missing arg to NARROW_INT"); + *sp++ = nc->t == IRT_I64 ? + lj_ir_kint64(J, (int64_t)(int32_t)*next++) : + lj_ir_kint(J, *next++); + } else { /* Regular IROpT. Pops two operands and pushes one result. */ + IRRef mode = nc->mode; +- lua_assert(sp >= nc->stack+2); ++ lj_assertJ(sp >= nc->stack+2, "stack underflow"); + sp--; + /* Omit some overflow checks for array indexing. See comments above. */ + if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { +@@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) + narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); + } + } +- lua_assert(sp == nc->stack+1); ++ lj_assertJ(sp == nc->stack+1, "stack misalignment"); + return nc->stack[0]; + } + +@@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) + TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) + { + IRIns *ir; +- lua_assert(tref_isnumber(tr)); ++ lj_assertJ(tref_isnumber(tr), "expected number type"); + if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ + return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); + /* Omit some overflow checks for array indexing. See comments above. */ +@@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) + /* Narrow C array index (overflow undefined). */ + TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) + { +- lua_assert(tref_isnumber(tr)); ++ lj_assertJ(tref_isnumber(tr), "expected number type"); + if (tref_isnum(tr)) + return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); + /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ +@@ -551,8 +551,13 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) + { + rc = conv_str_tonum(J, rc, vc); + if (tref_isinteger(rc)) { +- if ((uint32_t)numberVint(vc) != 0x80000000u) +- return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); ++ uint32_t k = (uint32_t)numberVint(vc); ++ if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) { ++ TRef zero = lj_ir_kint(J, 0); ++ if (!LJ_DUALNUM) ++ emitir(IRTGI(IR_NE), rc, zero); ++ return emitir(IRTGI(IR_SUBOV), zero, rc); ++ } + rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); + } + return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); +@@ -588,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) + /* Narrowing must be unconditional to preserve (-x)^i semantics. */ + if (tvisint(vc) || numisint(numV(vc))) { + int checkrange = 0; +- /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ ++ /* pow() is faster for bigger exponents. But do this only for (+k)^i. */ + if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { + int32_t k = numberVint(vc); +- if (!(k >= -65536 && k <= 65536)) goto split_pow; ++ if (!(k >= -65536 && k <= 65536)) goto force_pow_num; + checkrange = 1; + } + if (!tref_isinteger(rc)) { +@@ -602,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) + TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); + emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); + } +- return emitir(IRTN(IR_POW), rb, rc); ++ } else { ++force_pow_num: ++ rc = lj_ir_tonum(J, rc); /* Want POW(num, num), not POW(num, int). */ + } +-split_pow: +- /* FOLD covers most cases, but some are easier to do here. */ +- if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) +- return rb; /* 1 ^ x ==> 1 */ +- rc = lj_ir_tonum(J, rc); +- if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) +- return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ +- /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ +- rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); +- rc = emitir(IRTN(IR_MUL), rb, rc); +- return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); ++ return emitir(IRTN(IR_POW), rb, rc); + } + + /* -- Predictive narrowing of induction variables ------------------------- */ +@@ -630,9 +627,10 @@ static int narrow_forl(jit_State *J, cTValue *o) + /* Narrow the FORL index type by looking at the runtime values. */ + IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) + { +- lua_assert(tvisnumber(&tv[FORL_IDX]) && ++ lj_assertJ(tvisnumber(&tv[FORL_IDX]) && + tvisnumber(&tv[FORL_STOP]) && +- tvisnumber(&tv[FORL_STEP])); ++ tvisnumber(&tv[FORL_STEP]), ++ "expected number types"); + /* Narrow only if the runtime values of start/stop/step are all integers. */ + if (narrow_forl(J, &tv[FORL_IDX]) && + narrow_forl(J, &tv[FORL_STOP]) && +diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c +index 929ccb61..5306a7db 100644 +--- a/src/lj_opt_sink.c ++++ b/src/lj_opt_sink.c +@@ -1,6 +1,6 @@ + /* + ** SINK: Allocation Sinking and Store Sinking. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_opt_sink_c +@@ -78,8 +78,7 @@ static void sink_mark_ins(jit_State *J) + switch (ir->o) { + case IR_BASE: + return; /* Finished. */ +- case IR_CALLL: /* IRCALL_lj_tab_len */ +- case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: ++ case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN: + irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ + break; + case IR_FLOAD: +@@ -100,8 +99,8 @@ static void sink_mark_ins(jit_State *J) + (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP && + !sink_checkphi(J, ir, (ir+1)->op2)))) + irt_setmark(ir->t); /* Mark ineligible allocation. */ +- /* fallthrough */ + #endif ++ /* fallthrough */ + case IR_USTORE: + irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ + break; +@@ -219,6 +218,7 @@ static void sink_sweep_ins(jit_State *J) + for (ir = IR(J->cur.nk); ir < irbase; ir++) { + irt_clearmark(ir->t); + ir->prev = REGSP_INIT; ++ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ir++; + } +diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c +index fc935204..25c1c234 100644 +--- a/src/lj_opt_split.c ++++ b/src/lj_opt_split.c +@@ -1,6 +1,6 @@ + /* + ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_opt_split_c +@@ -8,7 +8,7 @@ + + #include "lj_obj.h" + +-#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) ++#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) + + #include "lj_err.h" + #include "lj_buf.h" +@@ -235,7 +235,7 @@ static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, + return split_emit(J, IRTI(IR_BOR), t1, t2); + } else { + IRRef t1 = ir->prev, t2; +- lua_assert(op == IR_BSHR || op == IR_BSAR); ++ lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); + nir->o = IR_BSHR; + t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); + ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); +@@ -250,7 +250,7 @@ static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, + ir->prev = lj_ir_kint(J, 0); + return lo; + } else { +- lua_assert(op == IR_BSHR || op == IR_BSAR); ++ lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); + if (k == 32) { + J->cur.nins--; + ir->prev = hi; +@@ -403,32 +403,8 @@ static void split_ir(jit_State *J) + hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); + break; + case IR_FPMATH: +- /* Try to rejoin pow from EXP2, MUL and LOG2. */ +- if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { +- IRIns *irp = IR(nir->op1); +- if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { +- IRIns *irm4 = IR(irp->op1); +- IRIns *irm3 = IR(irm4->op1); +- IRIns *irm12 = IR(irm3->op1); +- IRIns *irl1 = IR(irm12->op1); +- if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && +- irl1->op2 == IRCALL_lj_vm_log2) { +- IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ +- IRRef arg3 = irm3->op2, arg4 = irm4->op2; +- J->cur.nins--; +- tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); +- tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); +- ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); +- hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); +- break; +- } +- } +- } + hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); + break; +- case IR_ATAN2: +- hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); +- break; + case IR_LDEXP: + hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); + break; +@@ -453,7 +429,7 @@ static void split_ir(jit_State *J) + hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); + break; + case IR_FLOAD: +- lua_assert(ir->op1 == REF_NIL); ++ lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State"); + hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); + nir->op2 += LJ_BE*4; + break; +@@ -489,8 +465,9 @@ static void split_ir(jit_State *J) + break; + } + #endif +- lua_assert(st == IRT_INT || +- (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); ++ lj_assertJ(st == IRT_INT || ++ (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)), ++ "bad source type for CONV"); + nir->o = IR_CALLN; + #if LJ_32 && LJ_HASFFI + nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : +@@ -520,7 +497,8 @@ static void split_ir(jit_State *J) + hi = nir->op2; + break; + default: +- lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); ++ lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX, ++ "bad IR op %d", ir->o); + hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), + hisubst[ir->op1], hisubst[ir->op2]); + break; +@@ -577,7 +555,7 @@ static void split_ir(jit_State *J) + hi = split_bitshift(J, hisubst, oir, nir, ir); + break; + case IR_FLOAD: +- lua_assert(ir->op2 == IRFL_CDATA_INT64); ++ lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported"); + hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); + #if LJ_BE + ir->prev = hi; hi = nref; +@@ -643,7 +621,7 @@ static void split_ir(jit_State *J) + hi = nir->op2; + break; + default: +- lua_assert(ir->o <= IR_NE); /* Comparisons. */ ++ lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */ + split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); + break; + } +@@ -667,7 +645,7 @@ static void split_ir(jit_State *J) + tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); + #endif + ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); +- } else if (ir->o == IR_TOSTR) { ++ } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) { + if (hisubst[ir->op1]) { + if (irref_isk(ir->op1)) + nir->op1 = ir->op1; +@@ -721,7 +699,7 @@ static void split_ir(jit_State *J) + #if LJ_SOFTFP + if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { + if (irt_isguard(ir->t)) { +- lua_assert(st == IRT_NUM && irt_isint(ir->t)); ++ lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types"); + J->cur.nins--; + ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); + } else { +@@ -852,7 +830,7 @@ void lj_opt_split(jit_State *J) + if (!J->needsplit) + J->needsplit = split_needsplit(J); + #else +- lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ ++ lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state"); + #endif + if (J->needsplit) { + int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); +diff --git a/src/lj_parse.c b/src/lj_parse.c +index 08f7cfa6..ea64677f 100644 +--- a/src/lj_parse.c ++++ b/src/lj_parse.c +@@ -1,6 +1,6 @@ + /* + ** Lua parser (source code -> bytecode). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -163,6 +163,12 @@ LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD); + LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); + LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); + ++#ifdef LUA_USE_ASSERT ++#define lj_assertFS(c, ...) (lj_assertG_(G(fs->L), (c), __VA_ARGS__)) ++#else ++#define lj_assertFS(c, ...) ((void)fs) ++#endif ++ + /* -- Error handling ------------------------------------------------------ */ + + LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) +@@ -200,7 +206,7 @@ static BCReg const_num(FuncState *fs, ExpDesc *e) + { + lua_State *L = fs->L; + TValue *o; +- lua_assert(expr_isnumk(e)); ++ lj_assertFS(expr_isnumk(e), "bad usage"); + o = lj_tab_set(L, fs->kt, &e->u.nval); + if (tvhaskslot(o)) + return tvkslot(o); +@@ -225,7 +231,7 @@ static BCReg const_gc(FuncState *fs, GCobj *gc, uint32_t itype) + /* Add a string constant. */ + static BCReg const_str(FuncState *fs, ExpDesc *e) + { +- lua_assert(expr_isstrk(e) || e->k == VGLOBAL); ++ lj_assertFS(expr_isstrk(e) || e->k == VGLOBAL, "bad usage"); + return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR); + } + +@@ -313,7 +319,7 @@ static void jmp_patchins(FuncState *fs, BCPos pc, BCPos dest) + { + BCIns *jmp = &fs->bcbase[pc].ins; + BCPos offset = dest-(pc+1)+BCBIAS_J; +- lua_assert(dest != NO_JMP); ++ lj_assertFS(dest != NO_JMP, "uninitialized jump target"); + if (offset > BCMAX_D) + err_syntax(fs->ls, LJ_ERR_XJUMP); + setbc_d(jmp, offset); +@@ -362,7 +368,7 @@ static void jmp_patch(FuncState *fs, BCPos list, BCPos target) + if (target == fs->pc) { + jmp_tohere(fs, list); + } else { +- lua_assert(target < fs->pc); ++ lj_assertFS(target < fs->pc, "bad jump target"); + jmp_patchval(fs, list, target, NO_REG, target); + } + } +@@ -392,7 +398,7 @@ static void bcreg_free(FuncState *fs, BCReg reg) + { + if (reg >= fs->nactvar) { + fs->freereg--; +- lua_assert(reg == fs->freereg); ++ lj_assertFS(reg == fs->freereg, "bad regfree"); + } + } + +@@ -542,7 +548,7 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) + } else if (e->k <= VKTRUE) { + ins = BCINS_AD(BC_KPRI, reg, const_pri(e)); + } else { +- lua_assert(e->k == VVOID || e->k == VJMP); ++ lj_assertFS(e->k == VVOID || e->k == VJMP, "bad expr type %d", e->k); + return; + } + bcemit_INS(fs, ins); +@@ -637,7 +643,7 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) + ins = BCINS_AD(BC_GSET, ra, const_str(fs, var)); + } else { + BCReg ra, rc; +- lua_assert(var->k == VINDEXED); ++ lj_assertFS(var->k == VINDEXED, "bad expr type %d", var->k); + ra = expr_toanyreg(fs, e); + rc = var->u.s.aux; + if ((int32_t)rc < 0) { +@@ -645,10 +651,12 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) + } else if (rc > BCMAX_C) { + ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); + } else { ++#ifdef LUA_USE_ASSERT + /* Free late alloced key reg to avoid assert on free of value reg. */ + /* This can only happen when called from expr_table(). */ +- lua_assert(e->k != VNONRELOC || ra < fs->nactvar || +- rc < ra || (bcreg_free(fs, rc),1)); ++ if (e->k == VNONRELOC && ra >= fs->nactvar && rc >= ra) ++ bcreg_free(fs, rc); ++#endif + ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); + } + } +@@ -663,7 +671,7 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key) + expr_free(fs, e); + func = fs->freereg; + bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */ +- lua_assert(expr_isstrk(key)); ++ lj_assertFS(expr_isstrk(key), "bad usage"); + idx = const_str(fs, key); + if (idx <= BCMAX_C) { + bcreg_reserve(fs, 2+LJ_FR2); +@@ -803,7 +811,8 @@ static void bcemit_arith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) + else + rc = expr_toanyreg(fs, e2); + /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */ +- lua_assert(expr_isnumk(e1) || e1->k == VNONRELOC); ++ lj_assertFS(expr_isnumk(e1) || e1->k == VNONRELOC, ++ "bad expr type %d", e1->k); + expr_toval(fs, e1); + /* Avoid two consts to satisfy bytecode constraints. */ + if (expr_isnumk(e1) && !expr_isnumk(e2) && +@@ -853,9 +862,12 @@ static void bcemit_comp(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) + e1 = e2; e2 = eret; /* Swap operands. */ + op = ((op-BC_ISLT)^3)+BC_ISLT; + expr_toval(fs, e1); ++ ra = expr_toanyreg(fs, e1); ++ rd = expr_toanyreg(fs, e2); ++ } else { ++ rd = expr_toanyreg(fs, e2); ++ ra = expr_toanyreg(fs, e1); + } +- rd = expr_toanyreg(fs, e2); +- ra = expr_toanyreg(fs, e1); + ins = BCINS_AD(op, ra, rd); + } + /* Using expr_free might cause asserts if the order is wrong. */ +@@ -888,19 +900,20 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) + if (op <= OPR_POW) { + bcemit_arith(fs, op, e1, e2); + } else if (op == OPR_AND) { +- lua_assert(e1->t == NO_JMP); /* List must be closed. */ ++ lj_assertFS(e1->t == NO_JMP, "jump list not closed"); + expr_discharge(fs, e2); + jmp_append(fs, &e2->f, e1->f); + *e1 = *e2; + } else if (op == OPR_OR) { +- lua_assert(e1->f == NO_JMP); /* List must be closed. */ ++ lj_assertFS(e1->f == NO_JMP, "jump list not closed"); + expr_discharge(fs, e2); + jmp_append(fs, &e2->t, e1->t); + *e1 = *e2; + } else if (op == OPR_CONCAT) { + expr_toval(fs, e2); + if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { +- lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1); ++ lj_assertFS(e1->u.s.info == bc_b(*bcptr(fs, e2))-1, ++ "bad CAT stack layout"); + expr_free(fs, e1); + setbc_b(bcptr(fs, e2), e1->u.s.info); + e1->u.s.info = e2->u.s.info; +@@ -912,8 +925,9 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) + } + e1->k = VRELOCABLE; + } else { +- lua_assert(op == OPR_NE || op == OPR_EQ || +- op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT); ++ lj_assertFS(op == OPR_NE || op == OPR_EQ || ++ op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT, ++ "bad binop %d", op); + bcemit_comp(fs, op, e1, e2); + } + } +@@ -942,10 +956,10 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) + e->u.s.info = fs->freereg-1; + e->k = VNONRELOC; + } else { +- lua_assert(e->k == VNONRELOC); ++ lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); + } + } else { +- lua_assert(op == BC_UNM || op == BC_LEN); ++ lj_assertFS(op == BC_UNM || op == BC_LEN, "bad unop %d", op); + if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ + #if LJ_HASFFI + if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ +@@ -1040,8 +1054,9 @@ static void var_new(LexState *ls, BCReg n, GCstr *name) + lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); + lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); + } +- lua_assert((uintptr_t)name < VARNAME__MAX || +- lj_tab_getstr(fs->kt, name) != NULL); ++ lj_assertFS((uintptr_t)name < VARNAME__MAX || ++ lj_tab_getstr(fs->kt, name) != NULL, ++ "unanchored variable name"); + /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ + setgcref(ls->vstack[vtop].name, obj2gco(name)); + fs->varmap[fs->nactvar+n] = (uint16_t)vtop; +@@ -1096,7 +1111,7 @@ static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e) + return i; /* Already exists. */ + /* Otherwise create a new one. */ + checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); +- lua_assert(e->k == VLOCAL || e->k == VUPVAL); ++ lj_assertFS(e->k == VLOCAL || e->k == VUPVAL, "bad expr type %d", e->k); + fs->uvmap[n] = (uint16_t)vidx; + fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info); + fs->nuv = n+1; +@@ -1147,7 +1162,8 @@ static MSize gola_new(LexState *ls, GCstr *name, uint8_t info, BCPos pc) + lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); + lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); + } +- lua_assert(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL); ++ lj_assertFS(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL, ++ "unanchored label name"); + /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ + setgcref(ls->vstack[vtop].name, obj2gco(name)); + ls->vstack[vtop].startpc = pc; +@@ -1177,8 +1193,9 @@ static void gola_close(LexState *ls, VarInfo *vg) + FuncState *fs = ls->fs; + BCPos pc = vg->startpc; + BCIns *ip = &fs->bcbase[pc].ins; +- lua_assert(gola_isgoto(vg)); +- lua_assert(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO); ++ lj_assertFS(gola_isgoto(vg), "expected goto"); ++ lj_assertFS(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO, ++ "bad bytecode op %d", bc_op(*ip)); + setbc_a(ip, vg->slot); + if (bc_op(*ip) == BC_JMP) { + BCPos next = jmp_next(fs, pc); +@@ -1197,9 +1214,9 @@ static void gola_resolve(LexState *ls, FuncScope *bl, MSize idx) + if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) { + if (vg->slot < vl->slot) { + GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name); +- lua_assert((uintptr_t)name >= VARNAME__MAX); ++ lj_assertLS((uintptr_t)name >= VARNAME__MAX, "expected goto name"); + ls->linenumber = ls->fs->bcbase[vg->startpc].line; +- lua_assert(strref(vg->name) != NAME_BREAK); ++ lj_assertLS(strref(vg->name) != NAME_BREAK, "unexpected break"); + lj_lex_error(ls, 0, LJ_ERR_XGSCOPE, + strdata(strref(vg->name)), strdata(name)); + } +@@ -1263,7 +1280,7 @@ static void fscope_begin(FuncState *fs, FuncScope *bl, int flags) + bl->vstart = fs->ls->vtop; + bl->prev = fs->bl; + fs->bl = bl; +- lua_assert(fs->freereg == fs->nactvar); ++ lj_assertFS(fs->freereg == fs->nactvar, "bad regalloc"); + } + + /* End a scope. */ +@@ -1274,7 +1291,7 @@ static void fscope_end(FuncState *fs) + fs->bl = bl->prev; + var_remove(ls, bl->nactvar); + fs->freereg = fs->nactvar; +- lua_assert(bl->nactvar == fs->nactvar); ++ lj_assertFS(bl->nactvar == fs->nactvar, "bad regalloc"); + if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL) + bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0); + if ((bl->flags & FSCOPE_BREAK)) { +@@ -1361,13 +1378,13 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) + Node *n = &node[i]; + if (tvhaskslot(&n->val)) { + ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val); +- lua_assert(!tvisint(&n->key)); ++ lj_assertFS(!tvisint(&n->key), "unexpected integer key"); + if (tvisnum(&n->key)) { + TValue *tv = &((TValue *)kptr)[kidx]; + if (LJ_DUALNUM) { + lua_Number nn = numV(&n->key); + int32_t k = lj_num2int(nn); +- lua_assert(!tvismzero(&n->key)); ++ lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); + if ((lua_Number)k == nn) + setintV(tv, k); + else +@@ -1415,21 +1432,21 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt, + uint8_t *li = (uint8_t *)lineinfo; + do { + BCLine delta = base[i].line - first; +- lua_assert(delta >= 0 && delta < 256); ++ lj_assertFS(delta >= 0 && delta < 256, "bad line delta"); + li[i] = (uint8_t)delta; + } while (++i < n); + } else if (LJ_LIKELY(numline < 65536)) { + uint16_t *li = (uint16_t *)lineinfo; + do { + BCLine delta = base[i].line - first; +- lua_assert(delta >= 0 && delta < 65536); ++ lj_assertFS(delta >= 0 && delta < 65536, "bad line delta"); + li[i] = (uint16_t)delta; + } while (++i < n); + } else { + uint32_t *li = (uint32_t *)lineinfo; + do { + BCLine delta = base[i].line - first; +- lua_assert(delta >= 0); ++ lj_assertFS(delta >= 0, "bad line delta"); + li[i] = (uint32_t)delta; + } while (++i < n); + } +@@ -1448,7 +1465,7 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) + MSize len = s->len+1; + char *p = lj_buf_more(&ls->sb, len); + p = lj_buf_wmem(p, strdata(s), len); +- setsbufP(&ls->sb, p); ++ ls->sb.w = p; + } + *ofsvar = sbuflen(&ls->sb); + lastpc = 0; +@@ -1469,7 +1486,7 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) + startpc = vs->startpc; + p = lj_strfmt_wuleb128(p, startpc-lastpc); + p = lj_strfmt_wuleb128(p, vs->endpc-startpc); +- setsbufP(&ls->sb, p); ++ ls->sb.w = p; + lastpc = startpc; + } + } +@@ -1482,7 +1499,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) + { + setmref(pt->uvinfo, p); + setmref(pt->varinfo, (char *)p + ofsvar); +- memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */ ++ memcpy(p, ls->sb.b, sbuflen(&ls->sb)); /* Copy from temp. buffer. */ + } + #else + +@@ -1519,7 +1536,7 @@ static void fs_fixup_ret(FuncState *fs) + } + fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */ + fscope_end(fs); +- lua_assert(fs->bl == NULL); ++ lj_assertFS(fs->bl == NULL, "bad scope nesting"); + /* May need to fixup returns encoded before first function was created. */ + if (fs->flags & PROTO_FIXUP_RETURN) { + BCPos pc; +@@ -1591,7 +1608,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) + L->top--; /* Pop table of constants. */ + ls->vtop = fs->vbase; /* Reset variable stack. */ + ls->fs = fs->prev; +- lua_assert(ls->fs != NULL || ls->tok == TK_eof); ++ lj_assertL(ls->fs != NULL || ls->tok == TK_eof, "bad parser state"); + return pt; + } + +@@ -1685,14 +1702,15 @@ static void expr_bracket(LexState *ls, ExpDesc *v) + } + + /* Get value of constant expression. */ +-static void expr_kvalue(TValue *v, ExpDesc *e) ++static void expr_kvalue(FuncState *fs, TValue *v, ExpDesc *e) + { ++ UNUSED(fs); + if (e->k <= VKTRUE) { + setpriV(v, ~(uint32_t)e->k); + } else if (e->k == VKSTR) { + setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR); + } else { +- lua_assert(tvisnumber(expr_numtv(e))); ++ lj_assertFS(tvisnumber(expr_numtv(e)), "bad number constant"); + *v = *expr_numtv(e); + } + } +@@ -1742,11 +1760,11 @@ static void expr_table(LexState *ls, ExpDesc *e) + fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx); + } + vcall = 0; +- expr_kvalue(&k, &key); ++ expr_kvalue(fs, &k, &key); + v = lj_tab_set(fs->L, t, &k); + lj_gc_anybarriert(fs->L, t); + if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ +- expr_kvalue(v, &val); ++ expr_kvalue(fs, v, &val); + } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ + settabV(fs->L, v, t); /* Preserve key with table itself as value. */ + fixt = 1; /* Fix this later, after all resizes. */ +@@ -1765,8 +1783,9 @@ static void expr_table(LexState *ls, ExpDesc *e) + if (vcall) { + BCInsLine *ilp = &fs->bcbase[fs->pc-1]; + ExpDesc en; +- lua_assert(bc_a(ilp->ins) == freg && +- bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB)); ++ lj_assertFS(bc_a(ilp->ins) == freg && ++ bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB), ++ "bad CALL code generation"); + expr_init(&en, VKNUM, 0); + en.u.nval.u32.lo = narr-1; + en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */ +@@ -1796,7 +1815,7 @@ static void expr_table(LexState *ls, ExpDesc *e) + for (i = 0; i <= hmask; i++) { + Node *n = &node[i]; + if (tvistab(&n->val)) { +- lua_assert(tabV(&n->val) == t); ++ lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table"); + setnilV(&n->val); /* Turn value into nil. */ + } + } +@@ -1827,7 +1846,7 @@ static BCReg parse_params(LexState *ls, int needself) + } while (lex_opt(ls, ',')); + } + var_add(ls, nparams); +- lua_assert(fs->nactvar == nparams); ++ lj_assertFS(fs->nactvar == nparams, "bad regalloc"); + bcreg_reserve(fs, nparams); + lex_check(ls, ')'); + return nparams; +@@ -1914,7 +1933,7 @@ static void parse_args(LexState *ls, ExpDesc *e) + err_syntax(ls, LJ_ERR_XFUNARG); + return; /* Silence compiler. */ + } +- lua_assert(e->k == VNONRELOC); ++ lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); + base = e->u.s.info; /* Base register for call. */ + if (args.k == VCALL) { + ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2); +@@ -2667,7 +2686,8 @@ static int parse_stmt(LexState *ls) + lj_lex_next(ls); + parse_goto(ls); + break; +- } /* else: fallthrough */ ++ } ++ /* fallthrough */ + default: + parse_call_assign(ls); + break; +@@ -2683,8 +2703,9 @@ static void parse_chunk(LexState *ls) + while (!islast && !parse_isend(ls->tok)) { + islast = parse_stmt(ls); + lex_opt(ls, ';'); +- lua_assert(ls->fs->framesize >= ls->fs->freereg && +- ls->fs->freereg >= ls->fs->nactvar); ++ lj_assertLS(ls->fs->framesize >= ls->fs->freereg && ++ ls->fs->freereg >= ls->fs->nactvar, ++ "bad regalloc"); + ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */ + } + synlevel_end(ls); +@@ -2719,9 +2740,8 @@ GCproto *lj_parse(LexState *ls) + err_token(ls, TK_eof); + pt = fs_finish(ls, ls->linenumber); + L->top--; /* Drop chunkname. */ +- lua_assert(fs.prev == NULL); +- lua_assert(ls->fs == NULL); +- lua_assert(pt->sizeuv == 0); ++ lj_assertL(fs.prev == NULL && ls->fs == NULL, "mismatched frame nesting"); ++ lj_assertL(pt->sizeuv == 0, "toplevel proto has upvalues"); + return pt; + } + +diff --git a/src/lj_parse.h b/src/lj_parse.h +index ceeab699..5207023f 100644 +--- a/src/lj_parse.h ++++ b/src/lj_parse.h +@@ -1,6 +1,6 @@ + /* + ** Lua parser (source code -> bytecode). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_PARSE_H +diff --git a/src/lj_prng.c b/src/lj_prng.c +new file mode 100644 +index 00000000..bb32da8b +--- /dev/null ++++ b/src/lj_prng.c +@@ -0,0 +1,250 @@ ++/* ++** Pseudo-random number generation. ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++#define lj_prng_c ++#define LUA_CORE ++ ++/* To get the syscall prototype. */ ++#if defined(__linux__) && !defined(_GNU_SOURCE) ++#define _GNU_SOURCE ++#endif ++ ++#include "lj_def.h" ++#include "lj_arch.h" ++#include "lj_prng.h" ++ ++/* -- PRNG step function -------------------------------------------------- */ ++ ++/* This implements a Tausworthe PRNG with period 2^223. Based on: ++** Tables of maximally-equidistributed combined LFSR generators, ++** Pierre L'Ecuyer, 1991, table 3, 1st entry. ++** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. ++** ++** Important note: This PRNG is NOT suitable for cryptographic use! ++** ++** But it works fine for math.random(), which has an API that's not ++** suitable for cryptography, anyway. ++** ++** When used as a securely seeded global PRNG, it substantially raises ++** the difficulty for various attacks on the VM. ++*/ ++ ++/* Update generator i and compute a running xor of all states. */ ++#define TW223_GEN(rs, z, r, i, k, q, s) \ ++ z = rs->u[i]; \ ++ z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ ++ r ^= z; rs->u[i] = z; ++ ++#define TW223_STEP(rs, z, r) \ ++ TW223_GEN(rs, z, r, 0, 63, 31, 18) \ ++ TW223_GEN(rs, z, r, 1, 58, 19, 28) \ ++ TW223_GEN(rs, z, r, 2, 55, 24, 7) \ ++ TW223_GEN(rs, z, r, 3, 47, 21, 8) ++ ++/* PRNG step function with uint64_t result. */ ++LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs) ++{ ++ uint64_t z, r = 0; ++ TW223_STEP(rs, z, r) ++ return r; ++} ++ ++/* PRNG step function with double in uint64_t result. */ ++LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs) ++{ ++ uint64_t z, r = 0; ++ TW223_STEP(rs, z, r) ++ /* Returns a double bit pattern in the range 1.0 <= d < 2.0. */ ++ return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); ++} ++ ++/* Condition seed: ensure k[i] MSB of u[i] are non-zero. */ ++static LJ_AINLINE void lj_prng_condition(PRNGState *rs) ++{ ++ if (rs->u[0] < (1u << 1)) rs->u[0] += (1u << 1); ++ if (rs->u[1] < (1u << 6)) rs->u[1] += (1u << 6); ++ if (rs->u[2] < (1u << 9)) rs->u[2] += (1u << 9); ++ if (rs->u[3] < (1u << 17)) rs->u[3] += (1u << 17); ++} ++ ++/* -- PRNG seeding from OS ------------------------------------------------ */ ++ ++#if LUAJIT_SECURITY_PRNG == 0 ++ ++/* Nothing to define. */ ++ ++#elif LJ_TARGET_XBOX360 ++ ++extern int XNetRandom(void *buf, unsigned int len); ++ ++#elif LJ_TARGET_PS3 ++ ++extern int sys_get_random_number(void *buf, uint64_t len); ++ ++#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA ++ ++extern int sceRandomGetRandomNumber(void *buf, size_t len); ++ ++#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE ++ ++#define WIN32_LEAN_AND_MEAN ++#include <windows.h> ++ ++#if LJ_TARGET_UWP || LJ_TARGET_XBOXONE ++/* Must use BCryptGenRandom. */ ++#include <bcrypt.h> ++#pragma comment(lib, "bcrypt.lib") ++#else ++/* If you wonder about this mess, then search online for RtlGenRandom. */ ++typedef BOOLEAN (WINAPI *PRGR)(void *buf, ULONG len); ++static PRGR libfunc_rgr; ++#endif ++ ++#elif LJ_TARGET_POSIX ++ ++#if LJ_TARGET_LINUX ++/* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */ ++#include <sys/syscall.h> ++#else ++ ++#if LJ_TARGET_OSX && !LJ_TARGET_IOS ++/* ++** In their infinite wisdom Apple decided to disallow getentropy() in the ++** iOS App Store. Even though the call is common to all BSD-ish OS, it's ++** recommended by Apple in their own security-related docs, and, to top ++** off the foolery, /dev/urandom is handled by the same kernel code, ++** yet accessing it is actually permitted (but less efficient). ++*/ ++#include <Availability.h> ++#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 ++#define LJ_TARGET_HAS_GETENTROPY 1 ++#endif ++#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN ++#define LJ_TARGET_HAS_GETENTROPY 1 ++#endif ++ ++#if LJ_TARGET_HAS_GETENTROPY ++extern int getentropy(void *buf, size_t len); ++#ifdef __ELF__ ++ __attribute__((weak)) ++#endif ++; ++#endif ++ ++#endif ++ ++/* For the /dev/urandom fallback. */ ++#include <fcntl.h> ++#include <unistd.h> ++ ++#endif ++ ++#if LUAJIT_SECURITY_PRNG == 0 ++ ++/* If you really don't care about security, then define ++** LUAJIT_SECURITY_PRNG=0. This yields a predictable seed ++** and provides NO SECURITY against various attacks on the VM. ++** ++** BTW: This is NOT the way to get predictable table iteration, ++** predictable trace generation, predictable bytecode generation, etc. ++*/ ++int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) ++{ ++ lj_prng_seed_fixed(rs); /* The fixed seed is already conditioned. */ ++ return 1; ++} ++ ++#else ++ ++/* Securely seed PRNG from system entropy. Returns 0 on failure. */ ++int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) ++{ ++#if LJ_TARGET_XBOX360 ++ ++ if (XNetRandom(rs->u, (unsigned int)sizeof(rs->u)) == 0) ++ goto ok; ++ ++#elif LJ_TARGET_PS3 ++ ++ if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0) ++ goto ok; ++ ++#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA ++ ++ if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0) ++ goto ok; ++ ++#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE ++ ++ if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u), ++ BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0) ++ goto ok; ++ ++#elif LJ_TARGET_WINDOWS ++ ++ /* Keep the library loaded in case multiple VMs are started. */ ++ if (!libfunc_rgr) { ++ HMODULE lib = LJ_WIN_LOADLIBA("advapi32.dll"); ++ if (!lib) return 0; ++ libfunc_rgr = (PRGR)GetProcAddress(lib, "SystemFunction036"); ++ if (!libfunc_rgr) return 0; ++ } ++ if (libfunc_rgr(rs->u, (ULONG)sizeof(rs->u))) ++ goto ok; ++ ++#elif LJ_TARGET_POSIX ++ ++#if LJ_TARGET_LINUX && defined(SYS_getrandom) ++ ++ if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u)) ++ goto ok; ++ ++#elif LJ_TARGET_HAS_GETENTROPY ++ ++#ifdef __ELF__ ++ if (&getentropy && getentropy(rs->u, sizeof(rs->u)) == 0) ++ goto ok; ++#else ++ if (getentropy(rs->u, sizeof(rs->u)) == 0) ++ goto ok; ++#endif ++ ++#endif ++ ++ /* Fallback to /dev/urandom. This may fail if the device is not ++ ** existent or accessible in a chroot or container, or if the process ++ ** or the OS ran out of file descriptors. ++ */ ++ { ++ int fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC); ++ if (fd != -1) { ++ ssize_t n = read(fd, rs->u, sizeof(rs->u)); ++ (void)close(fd); ++ if (n == (ssize_t)sizeof(rs->u)) ++ goto ok; ++ } ++ } ++ ++#else ++ ++ /* Add an elif above for your OS with a secure PRNG seed. ++ ** Note that fiddling around with rand(), getpid(), time() or coercing ++ ** ASLR to yield a few bits of randomness is not helpful. ++ ** If you don't want any security, then don't pretend you have any ++ ** and simply define LUAJIT_SECURITY_PRNG=0 for the build. ++ */ ++#error "Missing secure PRNG seed for this OS" ++ ++#endif ++ return 0; /* Fail. */ ++ ++ok: ++ lj_prng_condition(rs); ++ (void)lj_prng_u64(rs); ++ return 1; /* Success. */ ++} ++ ++#endif ++ +diff --git a/src/lj_prng.h b/src/lj_prng.h +new file mode 100644 +index 00000000..216729be +--- /dev/null ++++ b/src/lj_prng.h +@@ -0,0 +1,24 @@ ++/* ++** Pseudo-random number generation. ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++#ifndef _LJ_PRNG_H ++#define _LJ_PRNG_H ++ ++#include "lj_def.h" ++ ++LJ_FUNC int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs); ++LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs); ++LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs); ++ ++/* This is just the precomputed result of lib_math.c:random_seed(rs, 0.0). */ ++static LJ_AINLINE void lj_prng_seed_fixed(PRNGState *rs) ++{ ++ rs->u[0] = U64x(a0d27757,0a345b8c); ++ rs->u[1] = U64x(764a296c,5d4aa64f); ++ rs->u[2] = U64x(51220704,070adeaa); ++ rs->u[3] = U64x(2a2717b5,a7b7b927); ++} ++ ++#endif +diff --git a/src/lj_profile.c b/src/lj_profile.c +index 116998e1..fbcb9878 100644 +--- a/src/lj_profile.c ++++ b/src/lj_profile.c +@@ -1,6 +1,6 @@ + /* + ** Low-overhead profiling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_profile_c +@@ -153,7 +153,7 @@ static void profile_trigger(ProfileState *ps) + profile_lock(ps); + ps->samples++; /* Always increment number of samples. */ + mask = g->hookmask; +- if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) { /* Set profile hook. */ ++ if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */ + int st = g->vmstate; + ps->vmstate = st >= 0 ? 'N' : + st == ~LJ_VMST_INTERP ? 'I' : +@@ -247,7 +247,7 @@ static DWORD WINAPI profile_thread(void *psx) + { + ProfileState *ps = (ProfileState *)psx; + int interval = ps->interval; +-#if LJ_TARGET_WINDOWS ++#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP + ps->wmm_tbp(interval); + #endif + while (1) { +@@ -255,7 +255,7 @@ static DWORD WINAPI profile_thread(void *psx) + if (ps->abort) break; + profile_trigger(ps); + } +-#if LJ_TARGET_WINDOWS ++#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP + ps->wmm_tep(interval); + #endif + return 0; +@@ -264,9 +264,9 @@ static DWORD WINAPI profile_thread(void *psx) + /* Start profiling timer thread. */ + static void profile_timer_start(ProfileState *ps) + { +-#if LJ_TARGET_WINDOWS ++#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP + if (!ps->wmm) { /* Load WinMM library on-demand. */ +- ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0); ++ ps->wmm = LJ_WIN_LOADLIBA("winmm.dll"); + if (ps->wmm) { + ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod"); + ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod"); +@@ -346,8 +346,7 @@ LUA_API void luaJIT_profile_stop(lua_State *L) + lj_trace_flushall(L); + #endif + lj_buf_free(g, &ps->sb); +- setmref(ps->sb.b, NULL); +- setmref(ps->sb.e, NULL); ++ ps->sb.w = ps->sb.e = NULL; + ps->g = NULL; + } + } +@@ -362,7 +361,7 @@ LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, + lj_buf_reset(sb); + lj_debug_dumpstack(L, sb, fmt, depth); + *len = (size_t)sbuflen(sb); +- return sbufB(sb); ++ return sb->b; + } + + #endif +diff --git a/src/lj_profile.h b/src/lj_profile.h +index 0cccfd78..96706ee3 100644 +--- a/src/lj_profile.h ++++ b/src/lj_profile.h +@@ -1,6 +1,6 @@ + /* + ** Low-overhead profiling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_PROFILE_H +diff --git a/src/lj_record.c b/src/lj_record.c +index 9d0469c4..30722814 100644 +--- a/src/lj_record.c ++++ b/src/lj_record.c +@@ -1,6 +1,6 @@ + /* + ** Trace recorder (bytecode -> SSA IR). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_record_c +@@ -33,6 +33,7 @@ + #include "lj_snap.h" + #include "lj_dispatch.h" + #include "lj_vm.h" ++#include "lj_prng.h" + + /* Some local macros to save typing. Undef'd at the end. */ + #define IR(ref) (&J->cur.ir[(ref)]) +@@ -50,34 +51,52 @@ + static void rec_check_ir(jit_State *J) + { + IRRef i, nins = J->cur.nins, nk = J->cur.nk; +- lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); ++ lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536, ++ "inconsistent IR layout"); + for (i = nk; i < nins; i++) { + IRIns *ir = IR(i); + uint32_t mode = lj_ir_mode[ir->o]; + IRRef op1 = ir->op1; + IRRef op2 = ir->op2; ++ const char *err = NULL; + switch (irm_op1(mode)) { +- case IRMnone: lua_assert(op1 == 0); break; +- case IRMref: lua_assert(op1 >= nk); +- lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; ++ case IRMnone: ++ if (op1 != 0) err = "IRMnone op1 used"; ++ break; ++ case IRMref: ++ if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i)) ++ err = "IRMref op1 out of range"; ++ break; + case IRMlit: break; +- case IRMcst: lua_assert(i < REF_BIAS); ++ case IRMcst: ++ if (i >= REF_BIAS) { err = "constant in IR range"; break; } + if (irt_is64(ir->t) && ir->o != IR_KNULL) + i++; + continue; + } + switch (irm_op2(mode)) { +- case IRMnone: lua_assert(op2 == 0); break; +- case IRMref: lua_assert(op2 >= nk); +- lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; ++ case IRMnone: ++ if (op2) err = "IRMnone op2 used"; ++ break; ++ case IRMref: ++ if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i)) ++ err = "IRMref op2 out of range"; ++ break; + case IRMlit: break; +- case IRMcst: lua_assert(0); break; ++ case IRMcst: err = "IRMcst op2"; break; + } +- if (ir->prev) { +- lua_assert(ir->prev >= nk); +- lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); +- lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); ++ if (!err && ir->prev) { ++ if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev <= i)) ++ err = "chain out of range"; ++ else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o) ++ err = "chain to different op"; + } ++ lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s", ++ i-REF_BIAS, ++ ir->o, ++ irm_op1(mode) == IRMref ? op1-REF_BIAS : op1, ++ irm_op2(mode) == IRMref ? op2-REF_BIAS : op2, ++ err); + } + } + +@@ -87,9 +106,10 @@ static void rec_check_slots(jit_State *J) + BCReg s, nslots = J->baseslot + J->maxslot; + int32_t depth = 0; + cTValue *base = J->L->base - J->baseslot; +- lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); +- lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); +- lua_assert(nslots < LJ_MAX_JSLOTS); ++ lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot"); ++ lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME), ++ "baseslot does not point to frame"); ++ lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow"); + for (s = 0; s < nslots; s++) { + TRef tr = J->slot[s]; + if (tr) { +@@ -97,56 +117,68 @@ static void rec_check_slots(jit_State *J) + IRRef ref = tref_ref(tr); + IRIns *ir = NULL; /* Silence compiler. */ + if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { +- lua_assert(ref >= J->cur.nk && ref < J->cur.nins); ++ lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins, ++ "slot %d ref %04d out of range", s, ref - REF_BIAS); + ir = IR(ref); +- lua_assert(irt_t(ir->t) == tref_t(tr)); ++ lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s); + } + if (s == 0) { +- lua_assert(tref_isfunc(tr)); ++ lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function"); + #if LJ_FR2 + } else if (s == 1) { +- lua_assert((tr & ~TREF_FRAME) == 0); ++ lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1"); + #endif + } else if ((tr & TREF_FRAME)) { + GCfunc *fn = gco2func(frame_gc(tv)); + BCReg delta = (BCReg)(tv - frame_prev(tv)); + #if LJ_FR2 +- if (ref) +- lua_assert(ir_knum(ir)->u64 == tv->u64); ++ lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, ++ "frame slot %d PC mismatch", s); + tr = J->slot[s-1]; + ir = IR(tref_ref(tr)); + #endif +- lua_assert(tref_isfunc(tr)); +- if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); +- lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) +- : (s == delta + LJ_FR2)); ++ lj_assertJ(tref_isfunc(tr), ++ "frame slot %d is not a function", s-LJ_FR2); ++ lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir), ++ "frame slot %d function mismatch", s-LJ_FR2); ++ lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) ++ : (s == delta + LJ_FR2), ++ "frame slot %d broken chain", s-LJ_FR2); + depth++; + } else if ((tr & TREF_CONT)) { + #if LJ_FR2 +- if (ref) +- lua_assert(ir_knum(ir)->u64 == tv->u64); ++ lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, ++ "cont slot %d continuation mismatch", s); + #else +- lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); ++ lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void), ++ "cont slot %d continuation mismatch", s); + #endif +- lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); ++ lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME), ++ "cont slot %d not followed by frame", s); + depth++; ++ } else if ((tr & TREF_KEYINDEX)) { ++ lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d", ++ s, tref_type(tr)); + } else { +- if (tvisnumber(tv)) +- lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ +- else +- lua_assert(itype2irt(tv) == tref_type(tr)); ++ /* Number repr. may differ, but other types must be the same. */ ++ lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) : ++ itype2irt(tv) == tref_type(tr), ++ "slot %d type mismatch: stack type %d vs IR type %d", ++ s, itypemap(tv), tref_type(tr)); + if (tref_isk(tr)) { /* Compare constants. */ + TValue tvk; + lj_ir_kvalue(J->L, &tvk, ir); +- if (!(tvisnum(&tvk) && tvisnan(&tvk))) +- lua_assert(lj_obj_equal(tv, &tvk)); +- else +- lua_assert(tvisnum(tv) && tvisnan(tv)); ++ lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ? ++ (tvisnum(tv) && tvisnan(tv)) : ++ lj_obj_equal(tv, &tvk), ++ "slot %d const mismatch: stack %016llx vs IR %016llx", ++ s, tv->u64, tvk.u64); + } + } + } + } +- lua_assert(J->framedepth == depth); ++ lj_assertJ(J->framedepth == depth, ++ "frame depth mismatch %d vs %d", J->framedepth, depth); + } + #endif + +@@ -182,7 +214,8 @@ static TRef getcurrf(jit_State *J) + { + if (J->base[-1-LJ_FR2]) + return J->base[-1-LJ_FR2]; +- lua_assert(J->baseslot == 1+LJ_FR2); ++ /* Non-base frame functions ought to be loaded already. */ ++ lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot"); + return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); + } + +@@ -229,6 +262,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o) + return 0; /* Can't represent lightuserdata (pointless). */ + } + ++/* Emit a VLOAD with the correct type. */ ++TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t) ++{ ++ TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx); ++ if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ ++ return tr; ++} ++ + /* -- Record loop ops ----------------------------------------------------- */ + + /* Loop event. */ +@@ -245,9 +286,9 @@ static void canonicalize_slots(jit_State *J) + if (LJ_DUALNUM) return; + for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { + TRef tr = J->slot[s]; +- if (tref_isinteger(tr)) { ++ if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) { + IRIns *ir = IR(tref_ref(tr)); +- if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) ++ if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY)))) + J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); + } + } +@@ -427,7 +468,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, + TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); + TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); + int tc, dir = rec_for_direction(&tv[FORL_STEP]); +- lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); ++ lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI, ++ "bad bytecode %d instead of FORI/JFORI", bc_op(*fori)); + scev->t.irt = t; + scev->dir = dir; + scev->stop = tref_ref(stop); +@@ -483,7 +525,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) + IRT_NUM; + for (i = FORL_IDX; i <= FORL_STEP; i++) { + if (!tr[i]) sload(J, ra+i); +- lua_assert(tref_isnumber_str(tr[i])); ++ lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type"); + if (tref_isstr(tr[i])) + tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); + if (t == IRT_INT) { +@@ -540,10 +582,10 @@ static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) + } + + /* Record LOOP/JLOOP. Now, that was easy. */ +-static LoopEvent rec_loop(jit_State *J, BCReg ra) ++static LoopEvent rec_loop(jit_State *J, BCReg ra, int skip) + { + if (ra < J->maxslot) J->maxslot = ra; +- J->pc++; ++ J->pc += skip; + return LOOPEV_ENTER; + } + +@@ -567,6 +609,7 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) + { + if (J->parent == 0 && J->exitno == 0) { + if (pc == J->startpc && J->framedepth + J->retdepth == 0) { ++ if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */ + /* Same loop? */ + if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ + lj_trace_err(J, LJ_TRERR_LLEAVE); +@@ -607,6 +650,70 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) + } /* Side trace continues across a loop that's left or not entered. */ + } + ++/* Record ITERN. */ ++static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb) ++{ ++#if LJ_BE ++ /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, ++ ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. ++ */ ++ UNUSED(ra); UNUSED(rb); ++ setintV(&J->errinfo, (int32_t)BC_ITERN); ++ lj_trace_err_info(J, LJ_TRERR_NYIBC); ++#else ++ RecordIndex ix; ++ /* Since ITERN is recorded at the start, we need our own loop detection. */ ++ if (J->pc == J->startpc && ++ (J->cur.nins > REF_FIRST+1 || ++ (J->cur.nins == REF_FIRST+1 && J->cur.ir[REF_FIRST].o != IR_PROF)) && ++ J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) { ++ lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ ++ return LOOPEV_ENTER; ++ } ++ J->maxslot = ra; ++ lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */ ++ ix.tab = getslot(J, ra-2); ++ ix.key = J->base[ra-1] ? J->base[ra-1] : ++ sloadt(J, (int32_t)(ra-1), IRT_INT, IRSLOAD_KEYINDEX); ++ copyTV(J->L, &ix.tabv, &J->L->base[ra-2]); ++ copyTV(J->L, &ix.keyv, &J->L->base[ra-1]); ++ ix.idxchain = (rb < 3); /* Omit value type check, if unused. */ ++ ix.mobj = 1; /* We need the next index, too. */ ++ J->maxslot = ra + lj_record_next(J, &ix); ++ J->needsnap = 1; ++ if (!tref_isnil(ix.key)) { /* Looping back? */ ++ J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */ ++ J->base[ra] = ix.key; ++ J->base[ra+1] = ix.val; ++ J->pc += bc_j(J->pc[1])+2; ++ return LOOPEV_ENTER; ++ } else { ++ J->maxslot = ra-3; ++ J->pc += 2; ++ return LOOPEV_LEAVE; ++ } ++#endif ++} ++ ++/* Record ISNEXT. */ ++static void rec_isnext(jit_State *J, BCReg ra) ++{ ++ cTValue *b = &J->L->base[ra-3]; ++ if (tvisfunc(b) && funcV(b)->c.ffid == FF_next && ++ tvistab(b+1) && tvisnil(b+2)) { ++ /* These checks are folded away for a compiled pairs(). */ ++ TRef func = getslot(J, ra-3); ++ TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID); ++ emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next)); ++ (void)getslot(J, ra-2); /* Type check for table. */ ++ (void)getslot(J, ra-1); /* Type check for nil key. */ ++ J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX; ++ J->maxslot = ra; ++ } else { /* Abort trace. Interpreter will despecialize bytecode. */ ++ lj_trace_err(J, LJ_TRERR_RECERR); ++ } ++} ++ + /* -- Record profiler hook checks ----------------------------------------- */ + + #if LJ_HASPROFILE +@@ -615,7 +722,8 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) + static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) + { + GCproto *ppt; +- lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l'); ++ lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l', ++ "bad profiler mode %c", J->prof_mode); + if (!pt) + return 0; + ppt = J->prev_pt; +@@ -676,7 +784,7 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) + /* NYI: io_file_iter doesn't have an ffid, yet. */ + { /* Specialize to the ffid. */ + TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID); +- emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid)); ++ emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid)); + } + return tr; + default: +@@ -731,6 +839,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) + J->framedepth++; + J->base += func+1+LJ_FR2; + J->baseslot += func+1+LJ_FR2; ++ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) ++ lj_trace_err(J, LJ_TRERR_STACKOV); + } + + /* Record tail call. */ +@@ -791,13 +901,14 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + BCReg cbase = (BCReg)frame_delta(frame); + if (--J->framedepth <= 0) + lj_trace_err(J, LJ_TRERR_NYIRETL); +- lua_assert(J->baseslot > 1+LJ_FR2); ++ lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); + gotresults++; + rbase += cbase; + J->baseslot -= (BCReg)cbase; + J->base -= cbase; + J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ + frame = frame_prevd(frame); ++ J->needsnap = 1; /* Stop catching on-trace errors. */ + } + /* Return to lower frame via interpreter for unhandled cases. */ + if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && +@@ -815,7 +926,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + BCReg cbase = (BCReg)frame_delta(frame); + if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ + lj_trace_err(J, LJ_TRERR_NYIRETL); +- lua_assert(J->baseslot > 1+LJ_FR2); ++ lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); + rbase += cbase; + J->baseslot -= (BCReg)cbase; + J->base -= cbase; +@@ -842,7 +953,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + J->maxslot = cbase+(BCReg)nresults; + if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ + J->framedepth--; +- lua_assert(J->baseslot > cbase+1+LJ_FR2); ++ lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for return"); + J->baseslot -= cbase+1+LJ_FR2; + J->base -= cbase+1+LJ_FR2; + } else if (J->parent == 0 && J->exitno == 0 && +@@ -857,7 +968,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); + J->retdepth++; + J->needsnap = 1; +- lua_assert(J->baseslot == 1+LJ_FR2); ++ lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return"); + /* Shift result slots up and clear the slots of the new frame below. */ + memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); + memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); +@@ -884,6 +995,9 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; + if (bslot != J->maxslot) { /* Concatenate the remainder. */ + TValue *b = J->L->base, save; /* Simulate lower frame and result. */ ++ /* Can't handle MM_concat + CALLT + fast func side-effects. */ ++ if (J->postproc != LJ_POST_NONE) ++ lj_trace_err(J, LJ_TRERR_NYIRETL); + J->base[J->maxslot] = tr; + copyTV(J->L, &save, b-(2<<LJ_FR2)); + if (gotresults) +@@ -905,12 +1019,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + } /* Otherwise continue with another __concat call. */ + } else { + /* Result type already specialized. */ +- lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); ++ lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt, ++ "bad continuation type"); + } + } else { + lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ + } +- lua_assert(J->baseslot >= 1+LJ_FR2); ++ lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return"); + } + + /* -- Metamethod handling ------------------------------------------------- */ +@@ -976,13 +1091,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) + } + /* The cdata metatable is treated as immutable. */ + if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; +-#if LJ_GC64 +- /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ + ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, + GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); +-#else +- ix->mt = mix.tab = lj_ir_ktab(J, mt); +-#endif + goto nocheck; + } + ix->mt = mt ? mix.tab : TREF_NIL; +@@ -1056,7 +1166,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) + lj_record_call(J, func, 2); + } else { + if (LJ_52 && tref_istab(tr)) +- return lj_ir_call(J, IRCALL_lj_tab_len, tr); ++ return emitir(IRTI(IR_ALEN), tr, TREF_NIL); + lj_trace_err(J, LJ_TRERR_NOMM); + } + return 0; /* No result yet. */ +@@ -1165,7 +1275,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) + ix->tab = ix->val; + copyTV(J->L, &ix->tabv, &ix->valv); + } else { +- lua_assert(tref_iscdata(ix->key)); ++ lj_assertJ(tref_iscdata(ix->key), "cdata expected"); + ix->tab = ix->key; + copyTV(J->L, &ix->tabv, &ix->keyv); + } +@@ -1262,7 +1372,8 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) + /* Got scalar evolution analysis results for this reference? */ + if (ref == J->scev.idx) { + int32_t stop; +- lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); ++ lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD, ++ "only int SCEV supported"); + stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); + /* Runtime value for stop of loop is within bounds? */ + if ((uint64_t)stop + ofs < (uint64_t)asize) { +@@ -1380,7 +1491,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) + + while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ + /* Never call raw lj_record_idx() on non-table. */ +- lua_assert(ix->idxchain != 0); ++ lj_assertJ(ix->idxchain != 0, "bad usage"); + if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) + lj_trace_err(J, LJ_TRERR_NOMM); + handlemm: +@@ -1402,6 +1513,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) + return 0; /* No result yet. */ + } + } ++#if LJ_HASBUFFER ++ /* The index table of buffer objects is treated as immutable. */ ++ if (ix->mt == TREF_NIL && !ix->val && ++ tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER && ++ tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) { ++ cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv)); ++ TRef tr = lj_record_constify(J, val); ++ if (tr) return tr; /* Specialize to the value, i.e. a method. */ ++ } ++#endif + /* Otherwise retry lookup with metaobject. */ + ix->tab = ix->mobj; + copyTV(J->L, &ix->tabv, &ix->mobjv); +@@ -1464,10 +1585,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) + emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC), + xref, lj_ir_kkptr(J, niltvg(J2G(J)))); + if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { +- lua_assert(hasmm); ++ lj_assertJ(hasmm, "inconsistent metamethod handling"); + goto handlemm; + } +- lua_assert(!hasmm); ++ lj_assertJ(!hasmm, "inconsistent metamethod handling"); + if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ + TRef key = ix->key; + if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ +@@ -1512,6 +1633,47 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) + } + } + ++/* Determine result type of table traversal. */ ++static IRType rec_next_types(GCtab *t, uint32_t idx) ++{ ++ for (; idx < t->asize; idx++) { ++ cTValue *a = arrayslot(t, idx); ++ if (LJ_LIKELY(!tvisnil(a))) ++ return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8); ++ } ++ idx -= t->asize; ++ for (; idx <= t->hmask; idx++) { ++ Node *n = &noderef(t->node)[idx]; ++ if (!tvisnil(&n->val)) ++ return itype2irt(&n->key) + (itype2irt(&n->val) << 8); ++ } ++ return IRT_NIL + (IRT_NIL << 8); ++} ++ ++/* Record a table traversal step aka next(). */ ++int lj_record_next(jit_State *J, RecordIndex *ix) ++{ ++ IRType t, tkey, tval; ++ TRef trvk; ++ t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo); ++ tkey = (t & 0xff); tval = (t >> 8); ++ trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key); ++ if (ix->mobj || tkey == IRT_NIL) { ++ TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk); ++ /* Always check for invalid key from next() for nil result. */ ++ if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1)); ++ ix->mobj = idx; ++ } ++ ix->key = lj_record_vload(J, trvk, 1, tkey); ++ if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */ ++ ix->val = TREF_NIL; ++ return 1; ++ } else { /* Need value. */ ++ ix->val = lj_record_vload(J, trvk, 0, tval); ++ return 2; ++ } ++} ++ + static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i) + { + RecordIndex ix; +@@ -1573,7 +1735,7 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) + int needbarrier = 0; + if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ + TRef tr, kfunc; +- lua_assert(val == 0); ++ lj_assertJ(val == 0, "bad usage"); + if (!tref_isk(fn)) { /* Late specialization of current function. */ + if (J->pt->flags >= PROTO_CLC_POLY) + goto noconstify; +@@ -1667,7 +1829,7 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) + if (lnk) { /* Possible tail- or up-recursion. */ + lj_trace_flush(J, lnk); /* Flush trace that only returns. */ + /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ +- hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); ++ hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u); + } + lj_trace_err(J, LJ_TRERR_CUNROLL); + } +@@ -1695,7 +1857,7 @@ static void rec_func_vararg(jit_State *J) + { + GCproto *pt = J->pt; + BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; +- lua_assert((pt->flags & PROTO_VARARG)); ++ lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg function"); + if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) + lj_trace_err(J, LJ_TRERR_STACKOV); + J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ +@@ -1764,7 +1926,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) + { + int32_t numparams = J->pt->numparams; + ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; +- lua_assert(frame_isvarg(J->L->base-1)); ++ lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame"); + if (LJ_FR2 && dst > J->maxslot) + J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */ + if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ +@@ -1795,11 +1957,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); + for (i = 0; i < nload; i++) { + IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); +- TRef aref = emitir(IRT(IR_AREF, IRT_PGC), +- vbase, lj_ir_kint(J, (int32_t)i)); +- TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); +- if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ +- J->base[dst+i] = tr; ++ J->base[dst+i] = lj_record_vload(J, vbase, i, t); + } + } else { + emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); +@@ -1846,8 +2004,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) + lj_ir_kint(J, frofs-(8<<LJ_FR2))); + t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]); + aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); +- tr = emitir(IRTG(IR_VLOAD, t), aref, 0); +- if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ ++ tr = lj_record_vload(J, aref, 0, t); + } + J->base[dst-2-LJ_FR2] = tr; + J->maxslot = dst-1-LJ_FR2; +@@ -1858,6 +2015,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) + lj_trace_err_info(J, LJ_TRERR_NYIBC); + } + } ++ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) ++ lj_trace_err(J, LJ_TRERR_STACKOV); + } + + /* -- Record allocations -------------------------------------------------- */ +@@ -1885,7 +2044,7 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) + TValue savetv[5]; + BCReg s; + RecordIndex ix; +- lua_assert(baseslot < topslot); ++ lj_assertJ(baseslot < topslot, "bad CAT arg"); + for (s = baseslot; s <= topslot; s++) + (void)getslot(J, s); /* Ensure all arguments have a reference. */ + if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) { +@@ -1902,9 +2061,9 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) + tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC), + lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); + do { +- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++); ++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++); + } while (trp <= top); +- tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); ++ tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + J->maxslot = (BCReg)(xbase - J->base); + if (xbase == base) return tr; /* Return simple concatenation result. */ + /* Pass partial result. */ +@@ -2009,7 +2168,7 @@ void lj_record_ins(jit_State *J) + if (bc_op(*J->pc) >= BC__MAX) + return; + break; +- default: lua_assert(0); break; ++ default: lj_assertJ(0, "bad post-processing mode"); break; + } + J->postproc = LJ_POST_NONE; + } +@@ -2017,7 +2176,7 @@ void lj_record_ins(jit_State *J) + /* Need snapshot before recording next bytecode (e.g. after a store). */ + if (J->needsnap) { + J->needsnap = 0; +- lj_snap_purge(J); ++ if (J->pt) lj_snap_purge(J); + lj_snap_add(J); + J->mergesnap = 1; + } +@@ -2187,7 +2346,7 @@ void lj_record_ins(jit_State *J) + if (tref_isstr(rc)) + rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); + else if (!LJ_52 && tref_istab(rc)) +- rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); ++ rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL); + else + rc = rec_mm_len(J, rc, rcv); + break; +@@ -2377,7 +2536,8 @@ void lj_record_ins(jit_State *J) + J->loopref = J->cur.nins; + break; + case BC_JFORI: +- lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); ++ lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL, ++ "JFORI does not point to JFORL"); + if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ + lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); + /* Continue tracing if the loop is not entered. */ +@@ -2389,8 +2549,11 @@ void lj_record_ins(jit_State *J) + case BC_ITERL: + rec_loop_interp(J, pc, rec_iterl(J, *pc)); + break; ++ case BC_ITERN: ++ rec_loop_interp(J, pc, rec_itern(J, ra, rb)); ++ break; + case BC_LOOP: +- rec_loop_interp(J, pc, rec_loop(J, ra)); ++ rec_loop_interp(J, pc, rec_loop(J, ra, 1)); + break; + + case BC_JFORL: +@@ -2400,7 +2563,8 @@ void lj_record_ins(jit_State *J) + rec_loop_jit(J, rc, rec_iterl(J, traceref(J, rc)->startins)); + break; + case BC_JLOOP: +- rec_loop_jit(J, rc, rec_loop(J, ra)); ++ rec_loop_jit(J, rc, rec_loop(J, ra, ++ !bc_isret(bc_op(traceref(J, rc)->startins)))); + break; + + case BC_IFORL: +@@ -2416,6 +2580,10 @@ void lj_record_ins(jit_State *J) + J->maxslot = ra; /* Shrink used slots. */ + break; + ++ case BC_ISNEXT: ++ rec_isnext(J, ra); ++ break; ++ + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: +@@ -2430,7 +2598,8 @@ void lj_record_ins(jit_State *J) + rec_func_lua(J); + break; + case BC_JFUNCV: +- lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ ++ /* Cannot happen. No hotcall counting for varag funcs. */ ++ lj_assertJ(0, "unsupported vararg hotcall"); + break; + + case BC_FUNCC: +@@ -2444,8 +2613,6 @@ void lj_record_ins(jit_State *J) + break; + } + /* fallthrough */ +- case BC_ITERN: +- case BC_ISNEXT: + case BC_UCLO: + case BC_FNEW: + setintV(&J->errinfo, (int32_t)op); +@@ -2468,8 +2635,9 @@ void lj_record_ins(jit_State *J) + #undef rbv + #undef rcv + +- /* Limit the number of recorded IR instructions. */ +- if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) ++ /* Limit the number of recorded IR instructions and constants. */ ++ if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] || ++ J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst]) + lj_trace_err(J, LJ_TRERR_TRACEOV); + } + +@@ -2489,13 +2657,20 @@ static const BCIns *rec_setup_root(jit_State *J) + J->bc_min = pc; + break; + case BC_ITERL: +- lua_assert(bc_op(pc[-1]) == BC_ITERC); ++ lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL"); + J->maxslot = ra + bc_b(pc[-1]) - 1; + J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); + pc += 1+bc_j(ins); +- lua_assert(bc_op(pc[-1]) == BC_JMP); ++ lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1"); + J->bc_min = pc; + break; ++ case BC_ITERN: ++ lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN"); ++ J->maxslot = ra; ++ J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns); ++ J->bc_min = pc+2 + bc_j(pc[1]); ++ J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */ ++ break; + case BC_LOOP: + /* Only check BC range for real loops, but not for "repeat until true". */ + pcj = pc + bc_j(ins); +@@ -2525,7 +2700,7 @@ static const BCIns *rec_setup_root(jit_State *J) + pc++; + break; + default: +- lua_assert(0); ++ lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins)); + break; + } + return pc; +@@ -2592,9 +2767,14 @@ void lj_record_setup(jit_State *J) + } + lj_snap_replay(J, T); + sidecheck: +- if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || +- T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + +- J->param[JIT_P_tryside]) { ++ if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || ++ T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + ++ J->param[JIT_P_tryside])) { ++ if (bc_op(*J->pc) == BC_JLOOP) { ++ BCIns startins = traceref(J, bc_d(*J->pc))->startins; ++ if (bc_op(startins) == BC_ITERN) ++ rec_itern(J, bc_a(startins), bc_b(startins)); ++ } + lj_record_stop(J, LJ_TRLINK_INTERP, 0); + } + } else { /* Root trace. */ +@@ -2603,6 +2783,7 @@ void lj_record_setup(jit_State *J) + J->pc = rec_setup_root(J); + /* Note: the loop instruction itself is recorded at the end and not + ** at the start! So snapshot #0 needs to point to the *next* instruction. ++ ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST. + */ + lj_snap_add(J); + if (bc_op(J->cur.startins) == BC_FORL) +diff --git a/src/lj_record.h b/src/lj_record.h +index 93d374d2..01cc6041 100644 +--- a/src/lj_record.h ++++ b/src/lj_record.h +@@ -1,6 +1,6 @@ + /* + ** Trace recorder (bytecode -> SSA IR). +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_RECORD_H +@@ -30,6 +30,7 @@ LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, + cTValue *av, cTValue *bv); + LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk); + LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); ++LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t); + + LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); + LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs); +@@ -37,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults); + + LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm); + LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); ++LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix); + + LJ_FUNC void lj_record_ins(jit_State *J); + LJ_FUNC void lj_record_setup(jit_State *J); +diff --git a/src/lj_serialize.c b/src/lj_serialize.c +new file mode 100644 +index 00000000..d6551b11 +--- /dev/null ++++ b/src/lj_serialize.c +@@ -0,0 +1,538 @@ ++/* ++** Object de/serialization. ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++#define lj_serialize_c ++#define LUA_CORE ++ ++#include "lj_obj.h" ++ ++#if LJ_HASBUFFER ++#include "lj_err.h" ++#include "lj_buf.h" ++#include "lj_str.h" ++#include "lj_tab.h" ++#include "lj_udata.h" ++#if LJ_HASFFI ++#include "lj_ctype.h" ++#include "lj_cdata.h" ++#endif ++#if LJ_HASJIT ++#include "lj_ir.h" ++#endif ++#include "lj_serialize.h" ++ ++/* Tags for internal serialization format. */ ++enum { ++ SER_TAG_NIL, /* 0x00 */ ++ SER_TAG_FALSE, ++ SER_TAG_TRUE, ++ SER_TAG_NULL, ++ SER_TAG_LIGHTUD32, ++ SER_TAG_LIGHTUD64, ++ SER_TAG_INT, ++ SER_TAG_NUM, ++ SER_TAG_TAB, /* 0x08 */ ++ SER_TAG_DICT_MT = SER_TAG_TAB+6, ++ SER_TAG_DICT_STR, ++ SER_TAG_INT64, /* 0x10 */ ++ SER_TAG_UINT64, ++ SER_TAG_COMPLEX, ++ SER_TAG_0x13, ++ SER_TAG_0x14, ++ SER_TAG_0x15, ++ SER_TAG_0x16, ++ SER_TAG_0x17, ++ SER_TAG_0x18, /* 0x18 */ ++ SER_TAG_0x19, ++ SER_TAG_0x1a, ++ SER_TAG_0x1b, ++ SER_TAG_0x1c, ++ SER_TAG_0x1d, ++ SER_TAG_0x1e, ++ SER_TAG_0x1f, ++ SER_TAG_STR, /* 0x20 + str->len */ ++}; ++LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0); ++ ++/* -- Helper functions ---------------------------------------------------- */ ++ ++static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz) ++{ ++ if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) { ++ sbx->w = w; ++ w = lj_buf_more2((SBuf *)sbx, sz); ++ } ++ return w; ++} ++ ++/* Write U124 to buffer. */ ++static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v) ++{ ++ if (v < 0x1fe0) { ++ v -= 0xe0; ++ *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v; ++ } else { ++ *w++ = (char)0xff; ++#if LJ_BE ++ v = lj_bswap(v); ++#endif ++ memcpy(w, &v, 4); w += 4; ++ } ++ return w; ++} ++ ++static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v) ++{ ++ if (LJ_LIKELY(v < 0xe0)) { ++ *w++ = (char)v; ++ return w; ++ } else { ++ return serialize_wu124_(w, v); ++ } ++} ++ ++static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv) ++{ ++ uint32_t v = *pv; ++ if (v != 0xff) { ++ if (r >= w) return NULL; ++ v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++; ++ } else { ++ if (r + 4 > w) return NULL; ++ v = lj_getu32(r); r += 4; ++#if LJ_BE ++ v = lj_bswap(v); ++#endif ++ } ++ *pv = v; ++ return r; ++} ++ ++static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv) ++{ ++ if (LJ_LIKELY(r < w)) { ++ uint32_t v = *(uint8_t *)r; r++; ++ *pv = v; ++ if (LJ_UNLIKELY(v >= 0xe0)) { ++ r = serialize_ru124_(r, w, pv); ++ } ++ return r; ++ } ++ return NULL; ++} ++ ++/* Prepare string dictionary for use (once). */ ++void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict) ++{ ++ if (!dict->hmask) { /* No hash part means not prepared, yet. */ ++ MSize i, len = lj_tab_len(dict); ++ if (!len) return; ++ lj_tab_resize(L, dict, dict->asize, hsize2hbits(len)); ++ for (i = 1; i <= len && i < dict->asize; i++) { ++ cTValue *o = arrayslot(dict, i); ++ if (tvisstr(o)) { ++ if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */ ++ lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1); ++ } ++ } else if (!tvisfalse(o)) { ++ lj_err_caller(L, LJ_ERR_BUFFER_BADOPT); ++ } ++ } ++ } ++} ++ ++/* Prepare metatable dictionary for use (once). */ ++void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict) ++{ ++ if (!dict->hmask) { /* No hash part means not prepared, yet. */ ++ MSize i, len = lj_tab_len(dict); ++ if (!len) return; ++ lj_tab_resize(L, dict, dict->asize, hsize2hbits(len)); ++ for (i = 1; i <= len && i < dict->asize; i++) { ++ cTValue *o = arrayslot(dict, i); ++ if (tvistab(o)) { ++ if (tvisnil(lj_tab_get(L, dict, o))) { /* Ignore dups. */ ++ lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1); ++ } ++ } else if (!tvisfalse(o)) { ++ lj_err_caller(L, LJ_ERR_BUFFER_BADOPT); ++ } ++ } ++ } ++} ++ ++/* -- Internal serializer ------------------------------------------------- */ ++ ++/* Put serialized object into buffer. */ ++static char *serialize_put(char *w, SBufExt *sbx, cTValue *o) ++{ ++ if (LJ_LIKELY(tvisstr(o))) { ++ const GCstr *str = strV(o); ++ MSize len = str->len; ++ w = serialize_more(w, sbx, 5+len); ++ w = serialize_wu124(w, SER_TAG_STR + len); ++ w = lj_buf_wmem(w, strdata(str), len); ++ } else if (tvisint(o)) { ++ uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o); ++ w = serialize_more(w, sbx, 1+4); ++ *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4; ++ } else if (tvisnum(o)) { ++ uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64; ++ w = serialize_more(w, sbx, 1+sizeof(lua_Number)); ++ *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8; ++ } else if (tvispri(o)) { ++ w = serialize_more(w, sbx, 1); ++ *w++ = (char)(SER_TAG_NIL + ~itype(o)); ++ } else if (tvistab(o)) { ++ const GCtab *t = tabV(o); ++ uint32_t narray = 0, nhash = 0, one = 2; ++ if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH); ++ sbx->depth--; ++ if (t->asize > 0) { /* Determine max. length of array part. */ ++ ptrdiff_t i; ++ TValue *array = tvref(t->array); ++ for (i = (ptrdiff_t)t->asize-1; i >= 0; i--) ++ if (!tvisnil(&array[i])) ++ break; ++ narray = (uint32_t)(i+1); ++ if (narray && tvisnil(&array[0])) one = 4; ++ } ++ if (t->hmask > 0) { /* Count number of used hash slots. */ ++ uint32_t i, hmask = t->hmask; ++ Node *node = noderef(t->node); ++ for (i = 0; i <= hmask; i++) ++ nhash += !tvisnil(&node[i].val); ++ } ++ /* Write metatable index. */ ++ if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) { ++ TValue mto; ++ Node *n; ++ settabV(sbufL(sbx), &mto, tabref(t->metatable)); ++ n = hashgcref(tabref(sbx->dict_mt), mto.gcr); ++ do { ++ if (n->key.u64 == mto.u64) { ++ uint32_t idx = n->val.u32.lo; ++ w = serialize_more(w, sbx, 1+5); ++ *w++ = SER_TAG_DICT_MT; ++ w = serialize_wu124(w, idx); ++ break; ++ } ++ } while ((n = nextnode(n))); ++ } ++ /* Write number of array slots and hash slots. */ ++ w = serialize_more(w, sbx, 1+2*5); ++ *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0)); ++ if (narray) w = serialize_wu124(w, narray); ++ if (nhash) w = serialize_wu124(w, nhash); ++ if (narray) { /* Write array entries. */ ++ cTValue *oa = tvref(t->array) + (one >> 2); ++ cTValue *oe = tvref(t->array) + narray; ++ while (oa < oe) w = serialize_put(w, sbx, oa++); ++ } ++ if (nhash) { /* Write hash entries. */ ++ const Node *node = noderef(t->node) + t->hmask; ++ GCtab *dict_str = tabref(sbx->dict_str); ++ if (LJ_UNLIKELY(dict_str)) { ++ for (;; node--) ++ if (!tvisnil(&node->val)) { ++ if (LJ_LIKELY(tvisstr(&node->key))) { ++ /* Inlined lj_tab_getstr is 30% faster. */ ++ const GCstr *str = strV(&node->key); ++ Node *n = hashstr(dict_str, str); ++ do { ++ if (tvisstr(&n->key) && strV(&n->key) == str) { ++ uint32_t idx = n->val.u32.lo; ++ w = serialize_more(w, sbx, 1+5); ++ *w++ = SER_TAG_DICT_STR; ++ w = serialize_wu124(w, idx); ++ break; ++ } ++ n = nextnode(n); ++ if (!n) { ++ MSize len = str->len; ++ w = serialize_more(w, sbx, 5+len); ++ w = serialize_wu124(w, SER_TAG_STR + len); ++ w = lj_buf_wmem(w, strdata(str), len); ++ break; ++ } ++ } while (1); ++ } else { ++ w = serialize_put(w, sbx, &node->key); ++ } ++ w = serialize_put(w, sbx, &node->val); ++ if (--nhash == 0) break; ++ } ++ } else { ++ for (;; node--) ++ if (!tvisnil(&node->val)) { ++ w = serialize_put(w, sbx, &node->key); ++ w = serialize_put(w, sbx, &node->val); ++ if (--nhash == 0) break; ++ } ++ } ++ } ++ sbx->depth++; ++#if LJ_HASFFI ++ } else if (tviscdata(o)) { ++ CTState *cts = ctype_cts(sbufL(sbx)); ++ CType *s = ctype_raw(cts, cdataV(o)->ctypeid); ++ uint8_t *sp = cdataptr(cdataV(o)); ++ if (ctype_isinteger(s->info) && s->size == 8) { ++ w = serialize_more(w, sbx, 1+8); ++ *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64; ++#if LJ_BE ++ { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); } ++#else ++ memcpy(w, sp, 8); ++#endif ++ w += 8; ++ } else if (ctype_iscomplex(s->info) && s->size == 16) { ++ w = serialize_more(w, sbx, 1+16); ++ *w++ = SER_TAG_COMPLEX; ++#if LJ_BE ++ { /* Only swap the doubles. The re/im order stays the same. */ ++ uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8); ++ u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8); ++ } ++#else ++ memcpy(w, sp, 16); ++#endif ++ w += 16; ++ } else { ++ goto badenc; /* NYI other cdata */ ++ } ++#endif ++ } else if (tvislightud(o)) { ++ uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o); ++ w = serialize_more(w, sbx, 1+sizeof(ud)); ++ if (ud == 0) { ++ *w++ = SER_TAG_NULL; ++ } else if (LJ_32 || checku32(ud)) { ++#if LJ_BE && LJ_64 ++ ud = lj_bswap64(ud); ++#elif LJ_BE ++ ud = lj_bswap(ud); ++#endif ++ *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4; ++#if LJ_64 ++ } else { ++#if LJ_BE ++ ud = lj_bswap64(ud); ++#endif ++ *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8; ++#endif ++ } ++ } else { ++ /* NYI userdata */ ++#if LJ_HASFFI ++ badenc: ++#endif ++ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o)); ++ } ++ return w; ++} ++ ++/* Get serialized object from buffer. */ ++static char *serialize_get(char *r, SBufExt *sbx, TValue *o) ++{ ++ char *w = sbx->w; ++ uint32_t tp; ++ r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob; ++ if (LJ_LIKELY(tp >= SER_TAG_STR)) { ++ uint32_t len = tp - SER_TAG_STR; ++ if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob; ++ setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len)); ++ r += len; ++ } else if (tp == SER_TAG_INT) { ++ if (LJ_UNLIKELY(r + 4 > w)) goto eob; ++ setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r))); ++ r += 4; ++ } else if (tp == SER_TAG_NUM) { ++ if (LJ_UNLIKELY(r + 8 > w)) goto eob; ++ memcpy(o, r, 8); r += 8; ++#if LJ_BE ++ o->u64 = lj_bswap64(o->u64); ++#endif ++ if (!tvisnum(o)) setnanV(o); /* Fix non-canonical NaNs. */ ++ } else if (tp <= SER_TAG_TRUE) { ++ setpriV(o, ~tp); ++ } else if (tp == SER_TAG_DICT_STR) { ++ GCtab *dict_str; ++ uint32_t idx; ++ r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob; ++ idx++; ++ dict_str = tabref(sbx->dict_str); ++ if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx))) ++ copyTV(sbufL(sbx), o, arrayslot(dict_str, idx)); ++ else ++ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx); ++ } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) { ++ uint32_t narray = 0, nhash = 0; ++ GCtab *t, *mt = NULL; ++ if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH); ++ sbx->depth--; ++ if (tp == SER_TAG_DICT_MT) { ++ GCtab *dict_mt; ++ uint32_t idx; ++ r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob; ++ idx++; ++ dict_mt = tabref(sbx->dict_mt); ++ if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx))) ++ mt = tabV(arrayslot(dict_mt, idx)); ++ else ++ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx); ++ r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob; ++ if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag; ++ } ++ if (tp >= SER_TAG_TAB+2) { ++ r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob; ++ } ++ if ((tp & 1)) { ++ r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob; ++ } ++ t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash)); ++ /* NOBARRIER: The table is new (marked white). */ ++ setgcref(t->metatable, obj2gco(mt)); ++ settabV(sbufL(sbx), o, t); ++ if (narray) { ++ TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4); ++ TValue *oe = tvref(t->array) + narray; ++ while (oa < oe) r = serialize_get(r, sbx, oa++); ++ } ++ if (nhash) { ++ do { ++ TValue k, *v; ++ r = serialize_get(r, sbx, &k); ++ v = lj_tab_set(sbufL(sbx), t, &k); ++ if (LJ_UNLIKELY(!tvisnil(v))) ++ lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY); ++ r = serialize_get(r, sbx, v); ++ } while (--nhash); ++ } ++ sbx->depth++; ++#if LJ_HASFFI ++ } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) { ++ uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8; ++ GCcdata *cd; ++ if (LJ_UNLIKELY(r + sz > w)) goto eob; ++ cd = lj_cdata_new_(sbufL(sbx), ++ tp == SER_TAG_INT64 ? CTID_INT64 : ++ tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE, ++ sz); ++ memcpy(cdataptr(cd), r, sz); r += sz; ++#if LJ_BE ++ *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd)); ++ if (sz == 16) ++ ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]); ++#endif ++ if (sz == 16) { /* Fix non-canonical NaNs. */ ++ TValue *cdo = (TValue *)cdataptr(cd); ++ if (!tvisnum(&cdo[0])) setnanV(&cdo[0]); ++ if (!tvisnum(&cdo[1])) setnanV(&cdo[1]); ++ } ++ setcdataV(sbufL(sbx), o, cd); ++#endif ++ } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) { ++ uintptr_t ud = 0; ++ if (tp == SER_TAG_LIGHTUD32) { ++ if (LJ_UNLIKELY(r + 4 > w)) goto eob; ++ ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)); ++ r += 4; ++ } ++#if LJ_64 ++ else if (tp == SER_TAG_LIGHTUD64) { ++ if (LJ_UNLIKELY(r + 8 > w)) goto eob; ++ memcpy(&ud, r, 8); r += 8; ++#if LJ_BE ++ ud = lj_bswap64(ud); ++#endif ++ } ++ setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud)); ++#else ++ setrawlightudV(o, (void *)ud); ++#endif ++ } else { ++badtag: ++ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp); ++ } ++ return r; ++eob: ++ lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB); ++ return NULL; ++} ++ ++/* -- External serialization API ------------------------------------------ */ ++ ++/* Encode to buffer. */ ++SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o) ++{ ++ sbx->depth = LJ_SERIALIZE_DEPTH; ++ sbx->w = serialize_put(sbx->w, sbx, o); ++ return sbx; ++} ++ ++/* Decode from buffer. */ ++char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o) ++{ ++ sbx->depth = LJ_SERIALIZE_DEPTH; ++ return serialize_get(sbx->r, sbx, o); ++} ++ ++/* Stand-alone encoding, borrowing from global temporary buffer. */ ++GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o) ++{ ++ SBufExt sbx; ++ char *w; ++ memset(&sbx, 0, sizeof(SBufExt)); ++ lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf); ++ sbx.depth = LJ_SERIALIZE_DEPTH; ++ w = serialize_put(sbx.w, &sbx, o); ++ return lj_str_new(L, sbx.b, (size_t)(w - sbx.b)); ++} ++ ++/* Stand-alone decoding, copy-on-write from string. */ ++void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str) ++{ ++ SBufExt sbx; ++ char *r; ++ memset(&sbx, 0, sizeof(SBufExt)); ++ lj_bufx_set_cow(L, &sbx, strdata(str), str->len); ++ /* No need to set sbx.cowref here. */ ++ sbx.depth = LJ_SERIALIZE_DEPTH; ++ r = serialize_get(sbx.r, &sbx, o); ++ if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV); ++} ++ ++#if LJ_HASJIT ++/* Peek into buffer to find the result IRType for specialization purposes. */ ++LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx) ++{ ++ uint32_t tp; ++ if (serialize_ru124(sbx->r, sbx->w, &tp)) { ++ /* This must match the handling of all tags in the decoder above. */ ++ switch (tp) { ++ case SER_TAG_NIL: return IRT_NIL; ++ case SER_TAG_FALSE: return IRT_FALSE; ++ case SER_TAG_TRUE: return IRT_TRUE; ++ case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64: ++ return IRT_LIGHTUD; ++ case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM; ++ case SER_TAG_NUM: return IRT_NUM; ++ case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2: ++ case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5: ++ case SER_TAG_DICT_MT: ++ return IRT_TAB; ++ case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX: ++ return IRT_CDATA; ++ case SER_TAG_DICT_STR: ++ default: ++ return IRT_STR; ++ } ++ } ++ return IRT_NIL; /* Will fail on actual decode. */ ++} ++#endif ++ ++#endif +diff --git a/src/lj_serialize.h b/src/lj_serialize.h +new file mode 100644 +index 00000000..1fda23eb +--- /dev/null ++++ b/src/lj_serialize.h +@@ -0,0 +1,28 @@ ++/* ++** Object de/serialization. ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++#ifndef _LJ_SERIALIZE_H ++#define _LJ_SERIALIZE_H ++ ++#include "lj_obj.h" ++#include "lj_buf.h" ++ ++#if LJ_HASBUFFER ++ ++#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */ ++ ++LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict); ++LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict); ++LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o); ++LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o); ++LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o); ++LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str); ++#if LJ_HASJIT ++LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx); ++#endif ++ ++#endif ++ ++#endif +diff --git a/src/lj_snap.c b/src/lj_snap.c +index bb063c2b..97097a5b 100644 +--- a/src/lj_snap.c ++++ b/src/lj_snap.c +@@ -1,6 +1,6 @@ + /* + ** Snapshot handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_snap_c +@@ -85,15 +85,20 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) + IRIns *ir = &J->cur.ir[ref]; + if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && + ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { +- /* No need to snapshot unmodified non-inherited slots. */ +- if (!(ir->op2 & IRSLOAD_INHERIT)) ++ /* ++ ** No need to snapshot unmodified non-inherited slots. ++ ** But always snapshot the function below a frame in LJ_FR2 mode. ++ */ ++ if (!(ir->op2 & IRSLOAD_INHERIT) && ++ (!LJ_FR2 || s == 0 || s+1 == nslots || ++ !(J->slot[s+1] & (TREF_CONT|TREF_FRAME)))) + continue; + /* No need to restore readonly slots and unmodified non-parent slots. */ + if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && + (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) + sn |= SNAP_NORESTORE; + } +- if (LJ_SOFTFP && irt_isnum(ir->t)) ++ if (LJ_SOFTFP32 && irt_isnum(ir->t)) + sn |= SNAP_SOFTFPNUM; + map[n++] = sn; + } +@@ -110,12 +115,15 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) + cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; + #if LJ_FR2 + uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); +- lua_assert(2 <= J->baseslot && J->baseslot <= 257); ++ lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot"); + memcpy(map, &pcbase, sizeof(uint64_t)); + #else + MSize f = 0; + map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ + #endif ++ lj_assertJ(!J->pt || ++ (J->pc >= proto_bc(J->pt) && ++ J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC"); + while (frame > lim) { /* Backwards traversal of all frames above base. */ + if (frame_islua(frame)) { + #if !LJ_FR2 +@@ -129,7 +137,7 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) + #endif + frame = frame_prevd(frame); + } else { +- lua_assert(!frame_isc(frame)); ++ lj_assertJ(!frame_isc(frame), "broken frame chain"); + #if !LJ_FR2 + map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); + #endif +@@ -141,10 +149,10 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) + } + *topslot = (uint8_t)(ftop - lim); + #if LJ_FR2 +- lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); ++ lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def"); + return 2; + #else +- lua_assert(f == (MSize)(1 + J->framedepth)); ++ lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size"); + return f; + #endif + } +@@ -161,11 +169,12 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) + nent = snapshot_slots(J, p, nslots); + snap->nent = (uint8_t)nent; + nent += snapshot_framelinks(J, p + nent, &snap->topslot); +- snap->mapofs = (uint16_t)nsnapmap; ++ snap->mapofs = (uint32_t)nsnapmap; + snap->ref = (IRRef1)J->cur.nins; ++ snap->mcofs = 0; + snap->nslots = (uint8_t)nslots; + snap->count = 0; +- J->cur.nsnapmap = (uint16_t)(nsnapmap + nent); ++ J->cur.nsnapmap = (uint32_t)(nsnapmap + nent); + } + + /* Add or merge a snapshot. */ +@@ -222,7 +231,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, + #define DEF_SLOT(s) udf[(s)] *= 3 + + /* Scan through following bytecode and check for uses/defs. */ +- lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); ++ lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, ++ "snapshot PC out of range"); + for (;;) { + BCIns ins = *pc++; + BCOp op = bc_op(ins); +@@ -233,7 +243,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, + switch (bcmode_c(op)) { + case BCMvar: USE_SLOT(bc_c(ins)); break; + case BCMrbase: +- lua_assert(op == BC_CAT); ++ lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op); + for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); + for (; s < maxslot; s++) DEF_SLOT(s); + break; +@@ -242,7 +252,12 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, + BCReg minslot = bc_a(ins); + if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT; + else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1; +- else if (op == BC_UCLO) { pc += bc_j(ins); break; } ++ else if (op == BC_UCLO) { ++ ptrdiff_t delta = bc_j(ins); ++ if (delta < 0) return maxslot; /* Prevent loop. */ ++ pc += delta; ++ break; ++ } + for (s = minslot; s < maxslot; s++) DEF_SLOT(s); + return minslot < maxslot ? minslot : maxslot; + } +@@ -266,7 +281,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, + if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins)); + break; + case BCMbase: +- if (op >= BC_CALLM && op <= BC_VARG) { ++ if (op >= BC_CALLM && op <= BC_ITERN) { + BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? + maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2); + if (LJ_FR2) DEF_SLOT(bc_a(ins)+1); +@@ -277,6 +292,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, + for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); + return 0; + } ++ } else if (op == BC_VARG) { ++ return maxslot; /* NYI: punt. */ + } else if (op == BC_KNIL) { + for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s); + } else if (op == BC_TSETM) { +@@ -285,7 +302,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, + break; + default: break; + } +- lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); ++ lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, ++ "use/def analysis PC out of range"); + } + + #undef USE_SLOT +@@ -294,15 +312,45 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, + return 0; /* unreachable */ + } + ++/* Mark slots used by upvalues of child prototypes as used. */ ++void snap_useuv(GCproto *pt, uint8_t *udf) ++{ ++ /* This is a coarse check, because it's difficult to correlate the lifetime ++ ** of slots and closures. But the number of false positives is quite low. ++ ** A false positive may cause a slot not to be purged, which is just ++ ** a missed optimization. ++ */ ++ if ((pt->flags & PROTO_CHILD)) { ++ ptrdiff_t i, j, n = pt->sizekgc; ++ GCRef *kr = mref(pt->k, GCRef) - 1; ++ for (i = 0; i < n; i++, kr--) { ++ GCobj *o = gcref(*kr); ++ if (o->gch.gct == ~LJ_TPROTO) { ++ for (j = 0; j < gco2pt(o)->sizeuv; j++) { ++ uint32_t v = proto_uv(gco2pt(o))[j]; ++ if ((v & PROTO_UV_LOCAL)) { ++ udf[(v & 0xff)] = 0; ++ } ++ } ++ } ++ } ++ } ++} ++ + /* Purge dead slots before the next snapshot. */ + void lj_snap_purge(jit_State *J) + { + uint8_t udf[SNAP_USEDEF_SLOTS]; +- BCReg maxslot = J->maxslot; +- BCReg s = snap_usedef(J, udf, J->pc, maxslot); +- for (; s < maxslot; s++) +- if (udf[s] != 0) +- J->base[s] = 0; /* Purge dead slots. */ ++ BCReg s, maxslot = J->maxslot; ++ if (bc_op(*J->pc) == BC_FUNCV && maxslot > J->pt->numparams) ++ maxslot = J->pt->numparams; ++ s = snap_usedef(J, udf, J->pc, maxslot); ++ if (s < maxslot) { ++ snap_useuv(J->pt, udf); ++ for (; s < maxslot; s++) ++ if (udf[s] != 0) ++ J->base[s] = 0; /* Purge dead slots. */ ++ } + } + + /* Shrink last snapshot. */ +@@ -315,6 +363,7 @@ void lj_snap_shrink(jit_State *J) + BCReg maxslot = J->maxslot; + BCReg baseslot = J->baseslot; + BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); ++ if (minslot < maxslot) snap_useuv(J->pt, udf); + maxslot += baseslot; + minslot += baseslot; + snap->nslots = (uint8_t)maxslot; +@@ -326,7 +375,7 @@ void lj_snap_shrink(jit_State *J) + snap->nent = (uint8_t)m; + nlim = J->cur.nsnapmap - snap->mapofs - 1; + while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */ +- J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */ ++ J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */ + } + + /* -- Snapshot access ----------------------------------------------------- */ +@@ -356,25 +405,26 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) + } + + /* Copy RegSP from parent snapshot to the parent links of the IR. */ +-IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) ++IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir) + { + SnapShot *snap = &T->snap[snapno]; + SnapEntry *map = &T->snapmap[snap->mapofs]; + BloomFilter rfilt = snap_renamefilter(T, snapno); + MSize n = 0; + IRRef ref = 0; ++ UNUSED(J); + for ( ; ; ir++) { + uint32_t rs; + if (ir->o == IR_SLOAD) { + if (!(ir->op2 & IRSLOAD_PARENT)) break; + for ( ; ; n++) { +- lua_assert(n < snap->nent); ++ lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1); + if (snap_slot(map[n]) == ir->op1) { + ref = snap_ref(map[n++]); + break; + } + } +- } else if (LJ_SOFTFP && ir->o == IR_HIOP) { ++ } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { + ref++; + } else if (ir->o == IR_PVAL) { + ref = ir->op1 + REF_BIAS; +@@ -385,7 +435,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) + if (bloomtest(rfilt, ref)) + rs = snap_renameref(T, snapno, ref, rs); + ir->prev = (uint16_t)rs; +- lua_assert(regsp_used(rs)); ++ lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS); + } + return ir; + } +@@ -403,7 +453,7 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) + case IR_KNUM: case IR_KINT64: + return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); + case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ +- default: lua_assert(0); return TREF_NIL; break; ++ default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL; + } + } + +@@ -413,7 +463,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) + MSize j; + for (j = 0; j < nmax; j++) + if (snap_ref(map[j]) == ref) +- return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); ++ return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME); + return 0; + } + +@@ -481,17 +531,19 @@ void lj_snap_replay(jit_State *J, GCtrace *T) + tr = snap_replay_const(J, ir); + } else if (!regsp_used(ir->prev)) { + pass23 = 1; +- lua_assert(s != 0); ++ lj_assertJ(s != 0, "unused slot 0 in snapshot"); + tr = s; + } else { + IRType t = irt_type(ir->t); + uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; +- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; ++ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; + if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); ++ if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX; + tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); + } + setslot: +- J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ ++ /* Same as TREF_* flags. */ ++ J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME)); + J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); + if ((sn & SNAP_FRAME)) + J->baseslot = s+1; +@@ -507,8 +559,9 @@ void lj_snap_replay(jit_State *J, GCtrace *T) + if (regsp_reg(ir->r) == RID_SUNK) { + if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; + pass23 = 1; +- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || +- ir->o == IR_CNEW || ir->o == IR_CNEWI); ++ lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || ++ ir->o == IR_CNEW || ir->o == IR_CNEWI, ++ "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); + if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); + if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); + if (LJ_HASFFI && ir->o == IR_CNEWI) { +@@ -520,13 +573,14 @@ void lj_snap_replay(jit_State *J, GCtrace *T) + if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { + if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) + snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); +- else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && ++ else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && + irs+1 < irlast && (irs+1)->o == IR_HIOP) + snap_pref(J, T, map, nent, seen, (irs+1)->op2); + } + } + } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { +- lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); ++ lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, ++ "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); + J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); + } + } +@@ -576,13 +630,15 @@ void lj_snap_replay(jit_State *J, GCtrace *T) + val = snap_pref(J, T, map, nent, seen, irs->op2); + if (val == 0) { + IRIns *irc = &T->ir[irs->op2]; +- lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); ++ lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT, ++ "sunk store for parent IR %04d with bad op %d", ++ refp - REF_BIAS, irc->o); + val = snap_pref(J, T, map, nent, seen, irc->op1); + val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); +- } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && ++ } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && + irs+1 < irlast && (irs+1)->o == IR_HIOP) { + IRType t = IRT_I64; +- if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) ++ if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) + t = IRT_NUM; + lj_needsplit(J); + if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { +@@ -626,7 +682,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, + IRType1 t = ir->t; + RegSP rs = ir->prev; + if (irref_isk(ref)) { /* Restore constant slot. */ +- lj_ir_kvalue(J->L, o, ir); ++ if (ir->o == IR_KPTR) { ++ o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir); ++ } else { ++ lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL), ++ "restore of const from IR %04d with bad op %d", ++ ref - REF_BIAS, ir->o); ++ lj_ir_kvalue(J->L, o, ir); ++ } + return; + } + if (LJ_UNLIKELY(bloomtest(rfilt, ref))) +@@ -635,7 +698,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, + int32_t *sps = &ex->spill[regsp_spill(rs)]; + if (irt_isinteger(t)) { + setintV(o, *sps); +-#if !LJ_SOFTFP ++#if !LJ_SOFTFP32 + } else if (irt_isnum(t)) { + o->u64 = *(uint64_t *)sps; + #endif +@@ -645,13 +708,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, + o->u64 = *(uint64_t *)sps; + #endif + } else { +- lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ ++ lj_assertJ(!irt_ispri(t), "PRI ref with spill slot"); + setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); + } + } else { /* Restore from register. */ + Reg r = regsp_reg(rs); + if (ra_noreg(r)) { +- lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); ++ lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, ++ "restore from IR %04d has no reg", ref - REF_BIAS); + snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); + if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); + return; +@@ -660,6 +724,9 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, + #if !LJ_SOFTFP + } else if (irt_isnum(t)) { + setnumV(o, ex->fpr[r-RID_MIN_FPR]); ++#elif LJ_64 /* && LJ_SOFTFP */ ++ } else if (irt_isnum(t)) { ++ o->u64 = ex->gpr[r-RID_MIN_GPR]; + #endif + #if LJ_64 && !LJ_GC64 + } else if (irt_is64(t)) { +@@ -676,7 +743,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, + + #if LJ_HASFFI + /* Restore raw data from the trace exit state. */ +-static void snap_restoredata(GCtrace *T, ExitState *ex, ++static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex, + SnapNo snapno, BloomFilter rfilt, + IRRef ref, void *dst, CTSize sz) + { +@@ -684,8 +751,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, + RegSP rs = ir->prev; + int32_t *src; + uint64_t tmp; ++ UNUSED(J); + if (irref_isk(ref)) { +- if (ir->o == IR_KNUM || ir->o == IR_KINT64) { ++ if (ir_isk64(ir)) { + src = (int32_t *)&ir[1]; + } else if (sz == 8) { + tmp = (uint64_t)(uint32_t)ir->i; +@@ -706,8 +774,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, + Reg r = regsp_reg(rs); + if (ra_noreg(r)) { + /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ +- lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); +- snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); ++ lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, ++ "restore from IR %04d has no reg", ref - REF_BIAS); ++ snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4); + *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; + return; + } +@@ -728,7 +797,8 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, + if (LJ_64 && LJ_BE && sz == 4) src++; + } + } +- lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); ++ lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8, ++ "restore from IR %04d with bad size %d", ref - REF_BIAS, sz); + if (sz == 4) *(int32_t *)dst = *src; + else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; + else if (sz == 1) *(int8_t *)dst = (int8_t)*src; +@@ -741,8 +811,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, + SnapNo snapno, BloomFilter rfilt, + IRIns *ir, TValue *o) + { +- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || +- ir->o == IR_CNEW || ir->o == IR_CNEWI); ++ lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || ++ ir->o == IR_CNEW || ir->o == IR_CNEWI, ++ "sunk allocation with bad op %d", ir->o); + #if LJ_HASFFI + if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { + CTState *cts = ctype_cts(J->L); +@@ -753,13 +824,14 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, + setcdataV(J->L, o, cd); + if (ir->o == IR_CNEWI) { + uint8_t *p = (uint8_t *)cdataptr(cd); +- lua_assert(sz == 4 || sz == 8); ++ lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz); + if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { +- snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); ++ snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2, ++ LJ_LE ? p+4 : p, 4); + if (LJ_BE) p += 4; + sz = 4; + } +- snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); ++ snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz); + } else { + IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; + for (irs = ir+1; irs < irlast; irs++) +@@ -767,8 +839,11 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, + IRIns *iro = &T->ir[T->ir[irs->op1].op2]; + uint8_t *p = (uint8_t *)cd; + CTSize szs; +- lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); +- lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); ++ lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o); ++ lj_assertJ(T->ir[irs->op1].o == IR_ADD, ++ "sunk store with bad add op %d", T->ir[irs->op1].o); ++ lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64, ++ "sunk store with bad const offset op %d", iro->o); + if (irt_is64(irs->t)) szs = 8; + else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; + else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; +@@ -777,14 +852,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, + p += (int64_t)ir_k64(iro)->u64; + else + p += iro->i; +- lua_assert(p >= (uint8_t *)cdataptr(cd) && +- p + szs <= (uint8_t *)cdataptr(cd) + sz); ++ lj_assertJ(p >= (uint8_t *)cdataptr(cd) && ++ p + szs <= (uint8_t *)cdataptr(cd) + sz, ++ "sunk store with offset out of range"); + if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { +- lua_assert(szs == 4); +- snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); ++ lj_assertJ(szs == 4, "sunk store with bad size %d", szs); ++ snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2, ++ LJ_LE ? p+4 : p, 4); + if (LJ_BE) p += 4; + } +- snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); ++ snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs); + } + } + } else +@@ -799,10 +876,12 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, + if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { + IRIns *irk = &T->ir[irs->op1]; + TValue tmp, *val; +- lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || +- irs->o == IR_FSTORE); ++ lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE || ++ irs->o == IR_FSTORE, ++ "sunk store with bad op %d", irs->o); + if (irk->o == IR_FREF) { +- lua_assert(irk->op2 == IRFL_TAB_META); ++ lj_assertJ(irk->op2 == IRFL_TAB_META, ++ "sunk store with bad field %d", irk->op2); + snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); + /* NOBARRIER: The table is new (marked white). */ + setgcref(t->metatable, obj2gco(tabV(&tmp))); +@@ -813,7 +892,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, + val = lj_tab_set(J->L, t, &tmp); + /* NOBARRIER: The table is new (marked white). */ + snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); +- if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { ++ if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { + snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); + val->u32.hi = tmp.u32.lo; + } +@@ -874,7 +953,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) + continue; + } + snap_restoreval(J, T, ex, snapno, rfilt, ref, o); +- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { ++ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { + TValue tmp; + snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); + o->u32.hi = tmp.u32.lo; +@@ -884,13 +963,17 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) + setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); + L->base = o+1; + #endif ++ } else if ((sn & SNAP_KEYINDEX)) { ++ /* A IRT_INT key index slot is restored as a number. Undo this. */ ++ o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o))); ++ o->u32.hi = LJ_KEYINDEX; + } + } + } + #if LJ_FR2 + L->base += (map[nent+LJ_BE] & 0xff); + #endif +- lua_assert(map + nent == flinks); ++ lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot"); + + /* Compute current stack top. */ + switch (bc_op(*pc)) { +diff --git a/src/lj_snap.h b/src/lj_snap.h +index 2c9ae3d6..c73f75b3 100644 +--- a/src/lj_snap.h ++++ b/src/lj_snap.h +@@ -1,6 +1,6 @@ + /* + ** Snapshot handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_SNAP_H +@@ -13,7 +13,8 @@ + LJ_FUNC void lj_snap_add(jit_State *J); + LJ_FUNC void lj_snap_purge(jit_State *J); + LJ_FUNC void lj_snap_shrink(jit_State *J); +-LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); ++LJ_FUNC IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, ++ IRIns *ir); + LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); + LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); + LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); +diff --git a/src/lj_state.c b/src/lj_state.c +index 632dd07e..e87b945a 100644 +--- a/src/lj_state.c ++++ b/src/lj_state.c +@@ -1,6 +1,6 @@ + /* + ** State and stack handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -25,6 +25,7 @@ + #include "lj_trace.h" + #include "lj_dispatch.h" + #include "lj_vm.h" ++#include "lj_prng.h" + #include "lj_lex.h" + #include "lj_alloc.h" + #include "luajit.h" +@@ -60,7 +61,8 @@ static void resizestack(lua_State *L, MSize n) + MSize oldsize = L->stacksize; + MSize realsize = n + 1 + LJ_STACK_EXTRA; + GCobj *up; +- lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); ++ lj_assertL((MSize)(tvref(L->maxstack)-oldst) == L->stacksize-LJ_STACK_EXTRA-1, ++ "inconsistent stack size"); + st = (TValue *)lj_mem_realloc(L, tvref(L->stack), + (MSize)(oldsize*sizeof(TValue)), + (MSize)(realsize*sizeof(TValue))); +@@ -148,12 +150,13 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) + /* NOBARRIER: State initialization, all objects are white. */ + setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); + settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); +- lj_str_resize(L, LJ_MIN_STRTAB-1); ++ lj_str_init(L); + lj_meta_init(L); + lj_lex_init(L); + fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ + g->gc.threshold = 4*g->gc.total; + lj_trace_initstate(g); ++ lj_err_verify(); + return NULL; + } + +@@ -162,16 +165,25 @@ static void close_state(lua_State *L) + global_State *g = G(L); + lj_func_closeuv(L, tvref(L->stack)); + lj_gc_freeall(g); +- lua_assert(gcref(g->gc.root) == obj2gco(L)); +- lua_assert(g->strnum == 0); ++ lj_assertG(gcref(g->gc.root) == obj2gco(L), ++ "main thread is not first GC object"); ++ lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num); + lj_trace_freestate(g); + #if LJ_HASFFI + lj_ctype_freestate(g); + #endif +- lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); ++ lj_str_freetab(g); + lj_buf_free(g, &g->tmpbuf); + lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); +- lua_assert(g->gc.total == sizeof(GG_State)); ++#if LJ_64 ++ if (mref(g->gc.lightudseg, uint32_t)) { ++ MSize segnum = g->gc.lightudnum ? (2 << lj_fls(g->gc.lightudnum)) : 2; ++ lj_mem_freevec(g, mref(g->gc.lightudseg, uint32_t), segnum, uint32_t); ++ } ++#endif ++ lj_assertG(g->gc.total == sizeof(GG_State), ++ "memory leak of %lld bytes", ++ (long long)(g->gc.total - sizeof(GG_State))); + #ifndef LUAJIT_USE_SYSMALLOC + if (g->allocf == lj_alloc_f) + lj_alloc_destroy(g->allocd); +@@ -181,16 +193,33 @@ static void close_state(lua_State *L) + } + + #if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) +-lua_State *lj_state_newstate(lua_Alloc f, void *ud) ++lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd) + #else +-LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) ++LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) + #endif + { +- GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); +- lua_State *L = &GG->L; +- global_State *g = &GG->g; ++ PRNGState prng; ++ GG_State *GG; ++ lua_State *L; ++ global_State *g; ++ /* We need the PRNG for the memory allocator, so initialize this first. */ ++ if (!lj_prng_seed_secure(&prng)) { ++ lj_assertX(0, "secure PRNG seeding failed"); ++ /* Can only return NULL here, so this errors with "not enough memory". */ ++ return NULL; ++ } ++#ifndef LUAJIT_USE_SYSMALLOC ++ if (allocf == LJ_ALLOCF_INTERNAL) { ++ allocd = lj_alloc_create(&prng); ++ if (!allocd) return NULL; ++ allocf = lj_alloc_f; ++ } ++#endif ++ GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State)); + if (GG == NULL || !checkptrGC(GG)) return NULL; + memset(GG, 0, sizeof(GG_State)); ++ L = &GG->L; ++ g = &GG->g; + L->gct = ~LJ_TTHREAD; + L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ + L->dummy_ffid = FF_C; +@@ -198,12 +227,18 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) + g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; + g->strempty.marked = LJ_GC_WHITE0; + g->strempty.gct = ~LJ_TSTR; +- g->allocf = f; +- g->allocd = ud; ++ g->allocf = allocf; ++ g->allocd = allocd; ++ g->prng = prng; ++#ifndef LUAJIT_USE_SYSMALLOC ++ if (allocf == lj_alloc_f) { ++ lj_alloc_setprng(allocd, &g->prng); ++ } ++#endif + setgcref(g->mainthref, obj2gco(L)); + setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); + setgcref(g->uvhead.next, obj2gco(&g->uvhead)); +- g->strmask = ~(MSize)0; ++ g->str.mask = ~(MSize)0; + setnilV(registry(L)); + setnilV(&g->nilnode.val); + setnilV(&g->nilnode.key); +@@ -283,17 +318,17 @@ lua_State *lj_state_new(lua_State *L) + setmrefr(L1->glref, L->glref); + setgcrefr(L1->env, L->env); + stack_init(L1, L); /* init stack */ +- lua_assert(iswhite(obj2gco(L1))); ++ lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white"); + return L1; + } + + void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) + { +- lua_assert(L != mainthread(g)); ++ lj_assertG(L != mainthread(g), "free of main thread"); + if (obj2gco(L) == gcref(g->cur_L)) + setgcrefnull(g->cur_L); + lj_func_closeuv(L, tvref(L->stack)); +- lua_assert(gcref(L->openupval) == NULL); ++ lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues"); + lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); + lj_mem_freet(g, L); + } +diff --git a/src/lj_state.h b/src/lj_state.h +index 02a0eafa..273b6b12 100644 +--- a/src/lj_state.h ++++ b/src/lj_state.h +@@ -1,6 +1,6 @@ + /* + ** State and stack handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_STATE_H +@@ -32,4 +32,6 @@ LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); + LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); + #endif + ++#define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4)) ++ + #endif +diff --git a/src/lj_str.c b/src/lj_str.c +index 264dedc1..c6f2ceec 100644 +--- a/src/lj_str.c ++++ b/src/lj_str.c +@@ -1,6 +1,6 @@ + /* + ** String handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_str_c +@@ -11,6 +11,7 @@ + #include "lj_err.h" + #include "lj_str.h" + #include "lj_char.h" ++#include "lj_prng.h" + + /* -- String helpers ------------------------------------------------------ */ + +@@ -37,27 +38,6 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) + return (int32_t)(a->len - b->len); + } + +-/* Fast string data comparison. Caveat: unaligned access to 1st string! */ +-static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) +-{ +- MSize i = 0; +- lua_assert(len > 0); +- lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); +- do { /* Note: innocuous access up to end of string + 3. */ +- uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); +- if (v) { +- i -= len; +-#if LJ_LE +- return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; +-#else +- return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; +-#endif +- } +- i += 4; +- } while (i < len); +- return 0; +-} +- + /* Find fixed string p inside string s. */ + const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) + { +@@ -90,108 +70,301 @@ int lj_str_haspattern(GCstr *s) + return 0; /* No pattern matching chars found. */ + } + +-/* -- String interning ---------------------------------------------------- */ +- +-/* Resize the string hash table (grow and shrink). */ +-void lj_str_resize(lua_State *L, MSize newmask) +-{ +- global_State *g = G(L); +- GCRef *newhash; +- MSize i; +- if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) +- return; /* No resizing during GC traversal or if already too big. */ +- newhash = lj_mem_newvec(L, newmask+1, GCRef); +- memset(newhash, 0, (newmask+1)*sizeof(GCRef)); +- for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ +- GCobj *p = gcref(g->strhash[i]); +- while (p) { /* Follow each hash chain and reinsert all strings. */ +- MSize h = gco2str(p)->hash & newmask; +- GCobj *next = gcnext(p); +- /* NOBARRIER: The string table is a GC root. */ +- setgcrefr(p->gch.nextgc, newhash[h]); +- setgcref(newhash[h], p); +- p = next; +- } +- } +- lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); +- g->strmask = newmask; +- g->strhash = newhash; +-} ++/* -- String hashing ------------------------------------------------------ */ + +-/* Intern a string and return string object. */ +-GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) ++/* Keyed sparse ARX string hash. Constant time. */ ++static StrHash hash_sparse(uint64_t seed, const char *str, MSize len) + { +- global_State *g; +- GCstr *s; +- GCobj *o; +- MSize len = (MSize)lenx; +- MSize a, b, h = len; +- if (lenx >= LJ_MAX_STR) +- lj_err_msg(L, LJ_ERR_STROV); +- g = G(L); +- /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ ++ /* Constants taken from lookup3 hash by Bob Jenkins. */ ++ StrHash a, b, h = len ^ (StrHash)seed; + if (len >= 4) { /* Caveat: unaligned access! */ + a = lj_getu32(str); + h ^= lj_getu32(str+len-4); + b = lj_getu32(str+(len>>1)-2); + h ^= b; h -= lj_rol(b, 14); + b += lj_getu32(str+(len>>2)-1); +- } else if (len > 0) { ++ } else { + a = *(const uint8_t *)str; + h ^= *(const uint8_t *)(str+len-1); + b = *(const uint8_t *)(str+(len>>1)); + h ^= b; h -= lj_rol(b, 14); +- } else { +- return &g->strempty; + } + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + h ^= b; h -= lj_rol(b, 16); +- /* Check if the string has already been interned. */ +- o = gcref(g->strhash[h & g->strmask]); +- if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { +- while (o != NULL) { +- GCstr *sx = gco2str(o); +- if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { +- /* Resurrect if dead. Can only happen with fixstring() (keywords). */ +- if (isdead(g, o)) flipwhite(o); +- return sx; /* Return existing string. */ ++ return h; ++} ++ ++#if LUAJIT_SECURITY_STRHASH ++/* Keyed dense ARX string hash. Linear time. */ ++static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h, ++ const char *str, MSize len) ++{ ++ StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4)); ++ if (len > 12) { ++ StrHash a = (StrHash)seed; ++ const char *pe = str+len-12, *p = pe, *q = str; ++ do { ++ a += lj_getu32(p); ++ b += lj_getu32(p+4); ++ h += lj_getu32(p+8); ++ p = q; q += 12; ++ h ^= b; h -= lj_rol(b, 14); ++ a ^= h; a -= lj_rol(h, 11); ++ b ^= a; b -= lj_rol(a, 25); ++ } while (p < pe); ++ h ^= b; h -= lj_rol(b, 16); ++ a ^= h; a -= lj_rol(h, 4); ++ b ^= a; b -= lj_rol(a, 14); ++ } ++ return b; ++} ++#endif ++ ++/* -- String interning ---------------------------------------------------- */ ++ ++#define LJ_STR_MAXCOLL 32 ++ ++/* Resize the string interning hash table (grow and shrink). */ ++void lj_str_resize(lua_State *L, MSize newmask) ++{ ++ global_State *g = G(L); ++ GCRef *newtab, *oldtab = g->str.tab; ++ MSize i; ++ ++ /* No resizing during GC traversal or if already too big. */ ++ if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) ++ return; ++ ++ newtab = lj_mem_newvec(L, newmask+1, GCRef); ++ memset(newtab, 0, (newmask+1)*sizeof(GCRef)); ++ ++#if LUAJIT_SECURITY_STRHASH ++ /* Check which chains need secondary hashes. */ ++ if (g->str.second) { ++ int newsecond = 0; ++ /* Compute primary chain lengths. */ ++ for (i = g->str.mask; i != ~(MSize)0; i--) { ++ GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); ++ while (o) { ++ GCstr *s = gco2str(o); ++ MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) : ++ s->hash; ++ hash &= newmask; ++ setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1); ++ o = gcnext(o); + } +- o = gcnext(o); + } +- } else { /* Slow path: end of string is too close to a page boundary. */ +- while (o != NULL) { +- GCstr *sx = gco2str(o); +- if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { +- /* Resurrect if dead. Can only happen with fixstring() (keywords). */ +- if (isdead(g, o)) flipwhite(o); +- return sx; /* Return existing string. */ ++ /* Mark secondary chains. */ ++ for (i = newmask; i != ~(MSize)0; i--) { ++ int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL; ++ newsecond |= secondary; ++ setgcrefp(newtab[i], secondary); ++ } ++ g->str.second = newsecond; ++ } ++#endif ++ ++ /* Reinsert all strings from the old table into the new table. */ ++ for (i = g->str.mask; i != ~(MSize)0; i--) { ++ GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); ++ while (o) { ++ GCobj *next = gcnext(o); ++ GCstr *s = gco2str(o); ++ MSize hash = s->hash; ++#if LUAJIT_SECURITY_STRHASH ++ uintptr_t u; ++ if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */ ++ hash &= newmask; ++ u = gcrefu(newtab[hash]); ++ if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */ ++ s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len); ++ s->hashalg = 1; ++ hash &= newmask; ++ u = gcrefu(newtab[hash]); ++ } ++ } else { /* String hashed with secondary hash. */ ++ MSize shash = hash_sparse(g->str.seed, strdata(s), s->len); ++ u = gcrefu(newtab[shash & newmask]); ++ if (u & 1) { ++ hash &= newmask; ++ u = gcrefu(newtab[hash]); ++ } else { /* Revert string back to primary hash. */ ++ s->hash = shash; ++ s->hashalg = 0; ++ hash = (shash & newmask); ++ } ++ } ++ /* NOBARRIER: The string table is a GC root. */ ++ setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); ++ setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1))); ++#else ++ hash &= newmask; ++ /* NOBARRIER: The string table is a GC root. */ ++ setgcrefr(o->gch.nextgc, newtab[hash]); ++ setgcref(newtab[hash], o); ++#endif ++ o = next; ++ } ++ } ++ ++ /* Free old table and replace with new table. */ ++ lj_str_freetab(g); ++ g->str.tab = newtab; ++ g->str.mask = newmask; ++} ++ ++#if LUAJIT_SECURITY_STRHASH ++/* Rehash and rechain all strings in a chain. */ ++static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc, ++ const char *str, MSize len) ++{ ++ global_State *g = G(L); ++ int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */ ++ GCRef *strtab = g->str.tab; ++ MSize strmask = g->str.mask; ++ GCobj *o = gcref(strtab[hashc & strmask]); ++ setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1)); ++ g->str.second = 1; ++ while (o) { ++ uintptr_t u; ++ GCobj *next = gcnext(o); ++ GCstr *s = gco2str(o); ++ StrHash hash; ++ if (ow) { /* Must sweep while rechaining. */ ++ if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */ ++ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), ++ "sweep of undead string"); ++ makewhite(g, o); ++ } else { /* Free dead string. */ ++ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, ++ "sweep of unlive string"); ++ lj_str_free(g, s); ++ o = next; ++ continue; + } +- o = gcnext(o); + } ++ hash = s->hash; ++ if (!s->hashalg) { /* Rehash with secondary hash. */ ++ hash = hash_dense(g->str.seed, hash, strdata(s), s->len); ++ s->hash = hash; ++ s->hashalg = 1; ++ } ++ /* Rechain. */ ++ hash &= strmask; ++ u = gcrefu(strtab[hash]); ++ setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); ++ setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1))); ++ o = next; + } +- /* Nope, create a new string. */ +- s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); ++ /* Try to insert the pending string again. */ ++ return lj_str_new(L, str, len); ++} ++#endif ++ ++/* Reseed String ID from PRNG after random interval < 2^bits. */ ++#if LUAJIT_SECURITY_STRID == 1 ++#define STRID_RESEED_INTERVAL 8 ++#elif LUAJIT_SECURITY_STRID == 2 ++#define STRID_RESEED_INTERVAL 4 ++#elif LUAJIT_SECURITY_STRID >= 3 ++#define STRID_RESEED_INTERVAL 0 ++#endif ++ ++/* Allocate a new string and add to string interning table. */ ++static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len, ++ StrHash hash, int hashalg) ++{ ++ GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr); ++ global_State *g = G(L); ++ uintptr_t u; + newwhite(g, s); + s->gct = ~LJ_TSTR; + s->len = len; +- s->hash = h; ++ s->hash = hash; ++#ifndef STRID_RESEED_INTERVAL ++ s->sid = g->str.id++; ++#elif STRID_RESEED_INTERVAL ++ if (!g->str.idreseed--) { ++ uint64_t r = lj_prng_u64(&g->prng); ++ g->str.id = (StrID)r; ++ g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL)); ++ } ++ s->sid = g->str.id++; ++#else ++ s->sid = (StrID)lj_prng_u64(&g->prng); ++#endif + s->reserved = 0; ++ s->hashalg = (uint8_t)hashalg; ++ /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */ ++ *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0; + memcpy(strdatawr(s), str, len); +- strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ +- /* Add it to string hash table. */ +- h &= g->strmask; +- s->nextgc = g->strhash[h]; ++ /* Add to string hash table. */ ++ hash &= g->str.mask; ++ u = gcrefu(g->str.tab[hash]); ++ setgcrefp(s->nextgc, (u & ~(uintptr_t)1)); + /* NOBARRIER: The string table is a GC root. */ +- setgcref(g->strhash[h], obj2gco(s)); +- if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ +- lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ ++ setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1))); ++ if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */ ++ lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */ + return s; /* Return newly interned string. */ + } + ++/* Intern a string and return string object. */ ++GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) ++{ ++ global_State *g = G(L); ++ if (lenx-1 < LJ_MAX_STR-1) { ++ MSize len = (MSize)lenx; ++ StrHash hash = hash_sparse(g->str.seed, str, len); ++ MSize coll = 0; ++ int hashalg = 0; ++ /* Check if the string has already been interned. */ ++ GCobj *o = gcref(g->str.tab[hash & g->str.mask]); ++#if LUAJIT_SECURITY_STRHASH ++ if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */ ++ hashalg = 1; ++ hash = hash_dense(g->str.seed, hash, str, len); ++ o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1); ++ } ++#endif ++ while (o != NULL) { ++ GCstr *sx = gco2str(o); ++ if (sx->hash == hash && sx->len == len) { ++ if (memcmp(str, strdata(sx), len) == 0) { ++ if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ ++ return sx; /* Return existing string. */ ++ } ++ coll++; ++ } ++ coll++; ++ o = gcnext(o); ++ } ++#if LUAJIT_SECURITY_STRHASH ++ /* Rehash chain if there are too many collisions. */ ++ if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) { ++ return lj_str_rehash_chain(L, hash, str, len); ++ } ++#endif ++ /* Otherwise allocate a new string. */ ++ return lj_str_alloc(L, str, len, hash, hashalg); ++ } else { ++ if (lenx) ++ lj_err_msg(L, LJ_ERR_STROV); ++ return &g->strempty; ++ } ++} ++ + void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) + { +- g->strnum--; +- lj_mem_free(g, s, sizestring(s)); ++ g->str.num--; ++ lj_mem_free(g, s, lj_str_size(s->len)); ++} ++ ++void LJ_FASTCALL lj_str_init(lua_State *L) ++{ ++ global_State *g = G(L); ++ g->str.seed = lj_prng_u64(&g->prng); ++ lj_str_resize(L, LJ_MIN_STRTAB-1); + } + +diff --git a/src/lj_str.h b/src/lj_str.h +index 85c1e405..39fa4f06 100644 +--- a/src/lj_str.h ++++ b/src/lj_str.h +@@ -1,6 +1,6 @@ + /* + ** String handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_STR_H +@@ -20,8 +20,12 @@ LJ_FUNC int lj_str_haspattern(GCstr *s); + LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); + LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); + LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); ++LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L); ++#define lj_str_freetab(g) \ ++ (lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef)) + + #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) + #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) ++#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3)) + + #endif +diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c +index d7893ce9..945954aa 100644 +--- a/src/lj_strfmt.c ++++ b/src/lj_strfmt.c +@@ -1,6 +1,6 @@ + /* + ** String formatting. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include <stdio.h> +@@ -9,11 +9,17 @@ + #define LUA_CORE + + #include "lj_obj.h" ++#include "lj_err.h" + #include "lj_buf.h" + #include "lj_str.h" ++#include "lj_meta.h" + #include "lj_state.h" + #include "lj_char.h" + #include "lj_strfmt.h" ++#if LJ_HASFFI ++#include "lj_ctype.h" ++#endif ++#include "lj_lib.h" + + /* -- Format parser ------------------------------------------------------- */ + +@@ -161,6 +167,10 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) + if (tvisstr(o)) { + *lenp = strV(o)->len; + return strVdata(o); ++ } else if (tvisbuf(o)) { ++ SBufExt *sbx = bufV(o); ++ *lenp = sbufxlen(sbx); ++ return sbx->r; + } else if (tvisint(o)) { + sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); + } else if (tvisnum(o)) { +@@ -169,7 +179,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) + return NULL; + } + *lenp = sbuflen(sb); +- return sbufB(sb); ++ return sb->b; + } + + /* -- Unformatted conversions to buffer ----------------------------------- */ +@@ -177,7 +187,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) + /* Add integer to buffer. */ + SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) + { +- setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k)); ++ sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k); + return sb; + } + +@@ -191,73 +201,86 @@ SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) + + SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) + { +- setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v)); ++ sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v); + return sb; + } + + /* Add quoted string to buffer. */ +-SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) ++static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len) + { +- const char *s = strdata(str); +- MSize len = str->len; + lj_buf_putb(sb, '"'); + while (len--) { + uint32_t c = (uint32_t)(uint8_t)*s++; +- char *p = lj_buf_more(sb, 4); ++ char *w = lj_buf_more(sb, 4); + if (c == '"' || c == '\' || c == '\n') { +- *p++ = '\'; ++ *w++ = '\'; + } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ + uint32_t d; +- *p++ = '\'; ++ *w++ = '\'; + if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { +- *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; ++ *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; + goto tens; + } else if (c >= 10) { + tens: +- d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); ++ d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d); + } + c += '0'; + } +- *p++ = (char)c; +- setsbufP(sb, p); ++ *w++ = (char)c; ++ sb->w = w; + } + lj_buf_putb(sb, '"'); + return sb; + } + ++#if LJ_HASJIT ++SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) ++{ ++ return strfmt_putquotedlen(sb, strdata(str), str->len); ++} ++#endif ++ + /* -- Formatted conversions to buffer ------------------------------------- */ + + /* Add formatted char to buffer. */ + SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c) + { + MSize width = STRFMT_WIDTH(sf); +- char *p = lj_buf_more(sb, width > 1 ? width : 1); +- if ((sf & STRFMT_F_LEFT)) *p++ = (char)c; +- while (width-- > 1) *p++ = ' '; +- if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c; +- setsbufP(sb, p); ++ char *w = lj_buf_more(sb, width > 1 ? width : 1); ++ if ((sf & STRFMT_F_LEFT)) *w++ = (char)c; ++ while (width-- > 1) *w++ = ' '; ++ if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c; ++ sb->w = w; + return sb; + } + + /* Add formatted string to buffer. */ +-SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) ++static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len) + { +- MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf); + MSize width = STRFMT_WIDTH(sf); +- char *p = lj_buf_more(sb, width > len ? width : len); +- if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); +- while (width-- > len) *p++ = ' '; +- if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); +- setsbufP(sb, p); ++ char *w; ++ if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf); ++ w = lj_buf_more(sb, width > len ? width : len); ++ if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); ++ while (width-- > len) *w++ = ' '; ++ if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); ++ sb->w = w; + return sb; + } + ++#if LJ_HASJIT ++SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) ++{ ++ return strfmt_putfstrlen(sb, sf, strdata(str), str->len); ++} ++#endif ++ + /* Add formatted signed/unsigned integer to buffer. */ + SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) + { +- char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p; ++ char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w; + #ifdef LUA_USE_ASSERT +- char *ps; ++ char *ws; + #endif + MSize prefix = 0, len, prec, pprec, width, need; + +@@ -301,27 +324,27 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) + width = STRFMT_WIDTH(sf); + pprec = prec + (prefix >> 8); + need = width > pprec ? width : pprec; +- p = lj_buf_more(sb, need); ++ w = lj_buf_more(sb, need); + #ifdef LUA_USE_ASSERT +- ps = p; ++ ws = w; + #endif + + /* Format number with leading/trailing whitespace and zeros. */ + if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0) +- while (width-- > pprec) *p++ = ' '; ++ while (width-- > pprec) *w++ = ' '; + if (prefix) { +- if ((char)prefix >= 'X') *p++ = '0'; +- *p++ = (char)prefix; ++ if ((char)prefix >= 'X') *w++ = '0'; ++ *w++ = (char)prefix; + } + if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO) +- while (width-- > pprec) *p++ = '0'; +- while (prec-- > len) *p++ = '0'; +- while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */ ++ while (width-- > pprec) *w++ = '0'; ++ while (prec-- > len) *w++ = '0'; ++ while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */ + if ((sf & STRFMT_F_LEFT)) +- while (width-- > pprec) *p++ = ' '; ++ while (width-- > pprec) *w++ = ' '; + +- lua_assert(need == (MSize)(p - ps)); +- setsbufP(sb, p); ++ lj_assertX(need == (MSize)(w - ws), "miscalculated format size"); ++ sb->w = w; + return sb; + } + +@@ -346,6 +369,117 @@ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) + return lj_strfmt_putfxint(sb, sf, (uint64_t)k); + } + ++/* Format stack arguments to buffer. */ ++int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry) ++{ ++ int narg = (int)(L->top - L->base); ++ GCstr *fmt = lj_lib_checkstr(L, arg); ++ FormatState fs; ++ SFormat sf; ++ lj_strfmt_init(&fs, strdata(fmt), fmt->len); ++ while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { ++ if (sf == STRFMT_LIT) { ++ lj_buf_putmem(sb, fs.str, fs.len); ++ } else if (sf == STRFMT_ERR) { ++ lj_err_callerv(L, LJ_ERR_STRFMT, ++ strdata(lj_str_new(L, fs.str, fs.len))); ++ } else { ++ TValue *o = &L->base[arg++]; ++ if (arg > narg) ++ lj_err_arg(L, arg, LJ_ERR_NOVAL); ++ switch (STRFMT_TYPE(sf)) { ++ case STRFMT_INT: ++ if (tvisint(o)) { ++ int32_t k = intV(o); ++ if (sf == STRFMT_INT) ++ lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ ++ else ++ lj_strfmt_putfxint(sb, sf, k); ++ break; ++ } ++#if LJ_HASFFI ++ if (tviscdata(o)) { ++ GCcdata *cd = cdataV(o); ++ if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { ++ lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); ++ break; ++ } ++ } ++#endif ++ lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); ++ break; ++ case STRFMT_UINT: ++ if (tvisint(o)) { ++ lj_strfmt_putfxint(sb, sf, intV(o)); ++ break; ++ } ++#if LJ_HASFFI ++ if (tviscdata(o)) { ++ GCcdata *cd = cdataV(o); ++ if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { ++ lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); ++ break; ++ } ++ } ++#endif ++ lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); ++ break; ++ case STRFMT_NUM: ++ lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); ++ break; ++ case STRFMT_STR: { ++ MSize len; ++ const char *s; ++ cTValue *mo; ++ if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 && ++ !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { ++ /* Call __tostring metamethod once. */ ++ copyTV(L, L->top++, mo); ++ copyTV(L, L->top++, o); ++ lua_call(L, 1, 1); ++ o = &L->base[arg-1]; /* Stack may have been reallocated. */ ++ copyTV(L, o, --L->top); /* Replace inline for retry. */ ++ if (retry < 2) { /* Global buffer may have been overwritten. */ ++ retry = 1; ++ break; ++ } ++ } ++ if (LJ_LIKELY(tvisstr(o))) { ++ len = strV(o)->len; ++ s = strVdata(o); ++#if LJ_HASBUFFER ++ } else if (tvisbuf(o)) { ++ SBufExt *sbx = bufV(o); ++ if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF); ++ len = sbufxlen(sbx); ++ s = sbx->r; ++#endif ++ } else { ++ GCstr *str = lj_strfmt_obj(L, o); ++ len = str->len; ++ s = strdata(str); ++ } ++ if ((sf & STRFMT_T_QUOTED)) ++ strfmt_putquotedlen(sb, s, len); /* No formatting. */ ++ else ++ strfmt_putfstrlen(sb, sf, s, len); ++ break; ++ } ++ case STRFMT_CHAR: ++ lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); ++ break; ++ case STRFMT_PTR: /* No formatting. */ ++ lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o)); ++ break; ++ default: ++ lj_assertL(0, "bad string format type"); ++ break; ++ } ++ } ++ } ++ return retry; ++} ++ + /* -- Conversions to strings ---------------------------------------------- */ + + /* Convert integer to string. */ +@@ -393,7 +527,7 @@ GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) + p = lj_buf_wmem(p, "builtin#", 8); + p = lj_strfmt_wint(p, funcV(o)->c.ffid); + } else { +- p = lj_strfmt_wptr(p, lj_obj_ptr(o)); ++ p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o)); + } + return lj_str_new(L, buf, (size_t)(p - buf)); + } +@@ -449,7 +583,7 @@ const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) + case STRFMT_ERR: + default: + lj_buf_putb(sb, '?'); +- lua_assert(0); ++ lj_assertL(0, "bad string format near offset %d", fs.len); + break; + } + } +diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h +index 6e1d9017..cb2c7360 100644 +--- a/src/lj_strfmt.h ++++ b/src/lj_strfmt.h +@@ -1,6 +1,6 @@ + /* + ** String formatting. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_STRFMT_H +@@ -79,7 +79,8 @@ static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) + { + fs->p = (const uint8_t *)p; + fs->e = (const uint8_t *)p + len; +- lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */ ++ /* Must be NUL-terminated. May have NULs inside, too. */ ++ lj_assertX(*fs->e == 0, "format not NUL-terminated"); + } + + /* Raw conversions. */ +@@ -94,7 +95,9 @@ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); + LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o); + #endif + LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v); ++#if LJ_HASJIT + LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str); ++#endif + + /* Formatted conversions to buffer. */ + LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k); +@@ -102,7 +105,10 @@ LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n); + LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n); + LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n); + LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c); ++#if LJ_HASJIT + LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str); ++#endif ++LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry); + + /* Conversions to strings. */ + LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k); +@@ -117,7 +123,7 @@ LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o); + LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, + va_list argp); + LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) +-#ifdef __GNUC__ ++#if defined(__GNUC__) || defined(__clang__) + __attribute__ ((format (printf, 2, 3))) + #endif + ; +diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c +index 9271f68a..dfd56bd4 100644 +--- a/src/lj_strfmt_num.c ++++ b/src/lj_strfmt_num.c +@@ -1,6 +1,6 @@ + /* + ** String formatting for floating-point numbers. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** Contributed by Peter Cawley. + */ + +@@ -257,7 +257,7 @@ static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen, + } else { + prec -= hilen - 9; + } +- lua_assert(prec < 9); ++ lj_assertX(prec < 9, "bad precision %d", prec); + lj_strfmt_wuint9(nd9, nd[ndhi]); + lj_strfmt_wuint9(ref9, *ref); + return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5'); +@@ -414,14 +414,14 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) + ** Rescaling was performed, but this introduced some error, and might + ** have pushed us across a rounding boundary. We check whether this + ** error affected the result by introducing even more error (2ulp in +- ** either direction), and seeing whether a roundary boundary was ++ ** either direction), and seeing whether a rounding boundary was + ** crossed. Having already converted the -2ulp case, we save off its + ** most significant digits, convert the +2ulp case, and compare them. + */ + int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29) + + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12)); + const int8_t *m_e = four_ulp_m_e + eidx * 2; +- lua_assert(0 <= eidx && eidx < 128); ++ lj_assertG_(G(sbufL(sb)), 0 <= eidx && eidx < 128, "bad eidx %d", eidx); + nd[33] = nd[ndhi]; + nd[32] = nd[(ndhi - 1) & 0x3f]; + nd[31] = nd[(ndhi - 2) & 0x3f]; +@@ -576,7 +576,7 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) + /* Add formatted floating-point number to buffer. */ + SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) + { +- setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL)); ++ sb->w = lj_strfmt_wfnum(sb, sf, n, NULL); + return sb; + } + +diff --git a/src/lj_strscan.c b/src/lj_strscan.c +index f5f35c96..f681fbb0 100644 +--- a/src/lj_strscan.c ++++ b/src/lj_strscan.c +@@ -1,6 +1,6 @@ + /* + ** String scanning. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include <math.h> +@@ -79,7 +79,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) + /* Avoid double rounding for denormals. */ + if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { + /* NYI: all of this generates way too much code on 32 bit CPUs. */ +-#if defined(__GNUC__) && LJ_64 ++#if (defined(__GNUC__) || defined(__clang__)) && LJ_64 + int32_t b = (int32_t)(__builtin_clzll(x)^63); + #else + int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : +@@ -93,7 +93,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) + } + + /* Convert to double using a signed int64_t conversion, then rescale. */ +- lua_assert((int64_t)x >= 0); ++ lj_assertX((int64_t)x >= 0, "bad double conversion"); + n = (double)(int64_t)x; + if (neg) n = -n; + if (ex2) n = ldexp(n, ex2); +@@ -262,7 +262,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, + uint32_t hi = 0, lo = (uint32_t)(xip-xi); + int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1); + +- lua_assert(lo > 0 && (ex10 & 1) == 0); ++ lj_assertX(lo > 0 && (ex10 & 1) == 0, "bad lo %d ex10 %d", lo, ex10); + + /* Handle simple overflow/underflow. */ + if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; } +@@ -370,9 +370,11 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o, + } + + /* Scan string containing a number. Returns format. Returns value in o. */ +-StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) ++StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, ++ uint32_t opt) + { + int32_t neg = 0; ++ const uint8_t *pe = p + len; + + /* Remove leading space, parse sign and non-numbers. */ + if (LJ_UNLIKELY(!lj_char_isdigit(*p))) { +@@ -390,7 +392,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) + p += 3; + } + while (lj_char_isspace(*p)) p++; +- if (*p) return STRSCAN_ERROR; ++ if (*p || p < pe) return STRSCAN_ERROR; + o->u64 = tmp.u64; + return STRSCAN_NUM; + } +@@ -441,6 +443,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) + + /* Handle decimal point. */ + if (dp) { ++ if (base == 2) return STRSCAN_ERROR; + fmt = STRSCAN_NUM; + if (dig) { + ex = (int32_t)(dp-(p-1)); dp = p-1; +@@ -488,16 +491,16 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) + while (lj_char_isspace(*p)) p++; + if (*p) return STRSCAN_ERROR; + } ++ if (p < pe) return STRSCAN_ERROR; + + /* Fast path for decimal 32 bit integers. */ + if (fmt == STRSCAN_INT && base == 10 && + (dig < 10 || (dig == 10 && *sp <= '2' && x < 0x80000000u+neg))) { +- int32_t y = neg ? -(int32_t)x : (int32_t)x; + if ((opt & STRSCAN_OPT_TONUM)) { +- o->n = (double)y; ++ o->n = neg ? -(double)x : (double)x; + return STRSCAN_NUM; + } else { +- o->i = y; ++ o->i = neg ? -(int32_t)x : (int32_t)x; + return STRSCAN_INT; + } + } +@@ -524,18 +527,19 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) + + int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) + { +- StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, ++ StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, + STRSCAN_OPT_TONUM); +- lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM); ++ lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM, "bad scan format"); + return (fmt != STRSCAN_ERROR); + } + + #if LJ_DUALNUM + int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) + { +- StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, ++ StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, + STRSCAN_OPT_TOINT); +- lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT); ++ lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT, ++ "bad scan format"); + if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); + return (fmt != STRSCAN_ERROR); + } +diff --git a/src/lj_strscan.h b/src/lj_strscan.h +index 6fb0dda0..7b7d6fed 100644 +--- a/src/lj_strscan.h ++++ b/src/lj_strscan.h +@@ -1,6 +1,6 @@ + /* + ** String scanning. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_STRSCAN_H +@@ -22,7 +22,8 @@ typedef enum { + STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64, + } StrScanFmt; + +-LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt); ++LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, ++ uint32_t opt); + LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o); + #if LJ_DUALNUM + LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o); +diff --git a/src/lj_tab.c b/src/lj_tab.c +index 47c0cfd3..4113839f 100644 +--- a/src/lj_tab.c ++++ b/src/lj_tab.c +@@ -1,6 +1,6 @@ + /* + ** Table handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -16,29 +16,10 @@ + + /* -- Object hashing ------------------------------------------------------ */ + +-/* Hash values are masked with the table hash mask and used as an index. */ +-static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) +-{ +- Node *n = noderef(t->node); +- return &n[hash & t->hmask]; +-} +- +-/* String hashes are precomputed when they are interned. */ +-#define hashstr(t, s) hashmask(t, (s)->hash) +- +-#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) +-#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) +-#if LJ_GC64 +-#define hashgcref(t, r) \ +- hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) +-#else +-#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) +-#endif +- + /* Hash an arbitrary key and return its anchor position in the hash table. */ + static Node *hashkey(const GCtab *t, cTValue *key) + { +- lua_assert(!tvisint(key)); ++ lj_assertX(!tvisint(key), "attempt to hash integer"); + if (tvisstr(key)) + return hashstr(t, strV(key)); + else if (tvisnum(key)) +@@ -57,7 +38,7 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits) + { + uint32_t hsize; + Node *node; +- lua_assert(hbits != 0); ++ lj_assertL(hbits != 0, "zero hash size"); + if (hbits > LJ_MAX_HBITS) + lj_err_msg(L, LJ_ERR_TABOV); + hsize = 1u << hbits; +@@ -78,7 +59,7 @@ static LJ_AINLINE void clearhpart(GCtab *t) + { + uint32_t i, hmask = t->hmask; + Node *node = noderef(t->node); +- lua_assert(t->hmask != 0); ++ lj_assertX(t->hmask != 0, "empty hash part"); + for (i = 0; i <= hmask; i++) { + Node *n = &node[i]; + setmref(n->next, NULL); +@@ -103,7 +84,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) + /* First try to colocate the array part. */ + if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { + Node *nilnode; +- lua_assert((sizeof(GCtab) & 7) == 0); ++ lj_assertL((sizeof(GCtab) & 7) == 0, "bad GCtab size"); + t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); + t->gct = ~LJ_TTAB; + t->nomm = (uint8_t)~0; +@@ -185,7 +166,8 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) + GCtab *t; + uint32_t asize, hmask; + t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); +- lua_assert(kt->asize == t->asize && kt->hmask == t->hmask); ++ lj_assertL(kt->asize == t->asize && kt->hmask == t->hmask, ++ "mismatched size of table and template"); + t->nomm = 0; /* Keys with metamethod names may be present. */ + asize = kt->asize; + if (asize > 0) { +@@ -310,7 +292,7 @@ void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) + + static uint32_t countint(cTValue *key, uint32_t *bins) + { +- lua_assert(!tvisint(key)); ++ lj_assertX(!tvisint(key), "bad integer key"); + if (tvisnum(key)) { + lua_Number nk = numV(key); + int32_t k = lj_num2int(nk); +@@ -412,7 +394,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key) + return NULL; + } + +-cTValue *lj_tab_getstr(GCtab *t, GCstr *key) ++cTValue *lj_tab_getstr(GCtab *t, const GCstr *key) + { + Node *n = hashstr(t, key); + do { +@@ -463,7 +445,8 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) + if (!tvisnil(&n->val) || t->hmask == 0) { + Node *nodebase = noderef(t->node); + Node *collide, *freenode = getfreetop(t, nodebase); +- lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); ++ lj_assertL(freenode >= nodebase && freenode <= nodebase+t->hmask+1, ++ "bad freenode"); + do { + if (freenode == nodebase) { /* No free node found? */ + rehashtab(L, t, key); /* Rehash table. */ +@@ -471,7 +454,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) + } + } while (!tvisnil(&(--freenode)->key)); + setfreetop(t, nodebase, freenode); +- lua_assert(freenode != &G(L)->nilnode); ++ lj_assertL(freenode != &G(L)->nilnode, "store to fallback hash"); + collide = hashkey(t, &n->key); + if (collide != n) { /* Colliding node not the main node? */ + while (noderef(collide->next) != n) /* Find predecessor. */ +@@ -486,11 +469,33 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) + /* Rechain pseudo-resurrected string keys with colliding hashes. */ + while (nextnode(freenode)) { + Node *nn = nextnode(freenode); +- if (tvisstr(&nn->key) && !tvisnil(&nn->val) && +- hashstr(t, strV(&nn->key)) == n) { ++ if (!tvisnil(&nn->val) && hashkey(t, &nn->key) == n) { + freenode->next = nn->next; + nn->next = n->next; + setmref(n->next, nn); ++ /* ++ ** Rechaining a resurrected string key creates a new dilemma: ++ ** Another string key may have originally been resurrected via ++ ** _any_ of the previous nodes as a chain anchor. Including ++ ** a node that had to be moved, which makes them unreachable. ++ ** It's not feasible to check for all previous nodes, so rechain ++ ** any string key that's currently in a non-main positions. ++ */ ++ while ((nn = nextnode(freenode))) { ++ if (!tvisnil(&nn->val)) { ++ Node *mn = hashkey(t, &nn->key); ++ if (mn != freenode && mn != nn) { ++ freenode->next = nn->next; ++ nn->next = mn->next; ++ setmref(mn->next, nn); ++ } else { ++ freenode = nn; ++ } ++ } else { ++ freenode = nn; ++ } ++ } ++ break; + } else { + freenode = nn; + } +@@ -505,7 +510,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) + if (LJ_UNLIKELY(tvismzero(&n->key))) + n->key.u64 = 0; + lj_gc_anybarriert(L, t); +- lua_assert(tvisnil(&n->val)); ++ lj_assertL(tvisnil(&n->val), "new hash slot is not empty"); + return &n->val; + } + +@@ -522,7 +527,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key) + return lj_tab_newkey(L, t, &k); + } + +-TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key) ++TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key) + { + TValue k; + Node *n = hashstr(t, key); +@@ -563,103 +568,126 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) + + /* -- Table traversal ----------------------------------------------------- */ + +-/* Get the traversal index of a key. */ +-static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key) ++/* Table traversal indexes: ++** ++** Array key index: [0 .. t->asize-1] ++** Hash key index: [t->asize .. t->asize+t->hmask] ++** Invalid key: ~0 ++*/ ++ ++/* Get the successor traversal index of a key. */ ++uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key) + { + TValue tmp; + if (tvisint(key)) { + int32_t k = intV(key); + if ((uint32_t)k < t->asize) +- return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ ++ return (uint32_t)k + 1; + setnumV(&tmp, (lua_Number)k); + key = &tmp; + } else if (tvisnum(key)) { + lua_Number nk = numV(key); + int32_t k = lj_num2int(nk); + if ((uint32_t)k < t->asize && nk == (lua_Number)k) +- return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ ++ return (uint32_t)k + 1; + } + if (!tvisnil(key)) { + Node *n = hashkey(t, key); + do { + if (lj_obj_equal(&n->key, key)) +- return t->asize + (uint32_t)(n - noderef(t->node)); +- /* Hash key indexes: [t->asize..t->asize+t->nmask] */ ++ return t->asize + (uint32_t)((n+1) - noderef(t->node)); + } while ((n = nextnode(n))); +- if (key->u32.hi == 0xfffe7fff) /* ITERN was despecialized while running. */ +- return key->u32.lo - 1; +- lj_err_msg(L, LJ_ERR_NEXTIDX); +- return 0; /* unreachable */ ++ if (key->u32.hi == LJ_KEYINDEX) /* Despecialized ITERN while running. */ ++ return key->u32.lo; ++ return ~0u; /* Invalid key to next. */ + } +- return ~0u; /* A nil key starts the traversal. */ ++ return 0; /* A nil key starts the traversal. */ + } + +-/* Advance to the next step in a table traversal. */ +-int lj_tab_next(lua_State *L, GCtab *t, TValue *key) ++/* Get the next key/value pair of a table traversal. */ ++int lj_tab_next(GCtab *t, cTValue *key, TValue *o) + { +- uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */ +- for (i++; i < t->asize; i++) /* First traverse the array keys. */ +- if (!tvisnil(arrayslot(t, i))) { +- setintV(key, i); +- copyTV(L, key+1, arrayslot(t, i)); ++ uint32_t idx = lj_tab_keyindex(t, key); /* Find successor index of key. */ ++ /* First traverse the array part. */ ++ for (; idx < t->asize; idx++) { ++ cTValue *a = arrayslot(t, idx); ++ if (LJ_LIKELY(!tvisnil(a))) { ++ setintV(o, idx); ++ o[1] = *a; + return 1; + } +- for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */ +- Node *n = &noderef(t->node)[i]; ++ } ++ idx -= t->asize; ++ /* Then traverse the hash part. */ ++ for (; idx <= t->hmask; idx++) { ++ Node *n = &noderef(t->node)[idx]; + if (!tvisnil(&n->val)) { +- copyTV(L, key, &n->key); +- copyTV(L, key+1, &n->val); ++ o[0] = n->key; ++ o[1] = n->val; + return 1; + } + } +- return 0; /* End of traversal. */ ++ return (int32_t)idx < 0 ? -1 : 0; /* Invalid key or end of traversal. */ + } + + /* -- Table length calculation -------------------------------------------- */ + +-static MSize unbound_search(GCtab *t, MSize j) ++/* Compute table length. Slow path with mixed array/hash lookups. */ ++LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi) + { + cTValue *tv; +- MSize i = j; /* i is zero or a present index */ +- j++; +- /* find `i' and `j' such that i is present and j is not */ +- while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { +- i = j; +- j *= 2; +- if (j > (MSize)(INT_MAX-2)) { /* overflow? */ +- /* table was built with bad purposes: resort to linear search */ +- i = 1; +- while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; +- return i - 1; ++ size_t lo = hi; ++ hi++; ++ /* Widening search for an upper bound. */ ++ while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) { ++ lo = hi; ++ hi += hi; ++ if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */ ++ lo = 1; ++ while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++; ++ return (MSize)(lo - 1); + } + } +- /* now do a binary search between them */ +- while (j - i > 1) { +- MSize m = (i+j)/2; +- cTValue *tvb = lj_tab_getint(t, (int32_t)m); +- if (tvb && !tvisnil(tvb)) i = m; else j = m; ++ /* Binary search to find a non-nil to nil transition. */ ++ while (hi - lo > 1) { ++ size_t mid = (lo+hi) >> 1; ++ cTValue *tvb = lj_tab_getint(t, (int32_t)mid); ++ if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid; + } +- return i; ++ return (MSize)lo; + } + +-/* +-** Try to find a boundary in table `t'. A `boundary' is an integer index +-** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). +-*/ ++/* Compute table length. Fast path. */ + MSize LJ_FASTCALL lj_tab_len(GCtab *t) + { +- MSize j = (MSize)t->asize; +- if (j > 1 && tvisnil(arrayslot(t, j-1))) { +- MSize i = 1; +- while (j - i > 1) { +- MSize m = (i+j)/2; +- if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; ++ size_t hi = (size_t)t->asize; ++ if (hi) hi--; ++ /* In a growing array the last array element is very likely nil. */ ++ if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) { ++ /* Binary search to find a non-nil to nil transition in the array. */ ++ size_t lo = 0; ++ while (hi - lo > 1) { ++ size_t mid = (lo+hi) >> 1; ++ if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid; + } +- return i-1; ++ return (MSize)lo; + } +- if (j) j--; +- if (t->hmask <= 0) +- return j; +- return unbound_search(t, j); ++ /* Without a hash part, there's an implicit nil after the last element. */ ++ return t->hmask ? tab_len_slow(t, hi) : (MSize)hi; + } + ++#if LJ_HASJIT ++/* Verify hinted table length or compute it. */ ++MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint) ++{ ++ size_t asize = (size_t)t->asize; ++ cTValue *tv = arrayslot(t, hint); ++ if (LJ_LIKELY(hint+1 < asize)) { ++ if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint; ++ } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) { ++ return (MSize)hint; ++ } ++ return lj_tab_len(t); ++} ++#endif ++ +diff --git a/src/lj_tab.h b/src/lj_tab.h +index 71e34945..e0e81ff7 100644 +--- a/src/lj_tab.h ++++ b/src/lj_tab.h +@@ -1,6 +1,6 @@ + /* + ** Table handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_TAB_H +@@ -31,6 +31,25 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) + return hi; + } + ++/* Hash values are masked with the table hash mask and used as an index. */ ++static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) ++{ ++ Node *n = noderef(t->node); ++ return &n[hash & t->hmask]; ++} ++ ++/* String IDs are generated when a string is interned. */ ++#define hashstr(t, s) hashmask(t, (s)->sid) ++ ++#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) ++#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) ++#if LJ_GC64 ++#define hashgcref(t, r) \ ++ hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) ++#else ++#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) ++#endif ++ + #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) + + LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); +@@ -50,14 +69,14 @@ LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); + /* Caveat: all getters except lj_tab_get() can return NULL! */ + + LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); +-LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); ++LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key); + LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); + + /* Caveat: all setters require a write barrier for the stored value. */ + + LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); + LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); +-LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); ++LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key); + LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); + + #define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) +@@ -67,7 +86,11 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); + #define lj_tab_setint(L, t, key) \ + (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) + +-LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); ++LJ_FUNC uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key); ++LJ_FUNCA int lj_tab_next(GCtab *t, cTValue *key, TValue *o); + LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); ++#if LJ_HASJIT ++LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint); ++#endif + + #endif +diff --git a/src/lj_target.h b/src/lj_target.h +index 8dcae957..2d186b14 100644 +--- a/src/lj_target.h ++++ b/src/lj_target.h +@@ -1,6 +1,6 @@ + /* + ** Definitions for target CPU. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_TARGET_H +@@ -152,7 +152,8 @@ typedef uint32_t RegCost; + /* Return the address of an exit stub. */ + static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) + { +- lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL); ++ lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL, ++ "exit stub group for exit %d uninitialized", exitno); + return (char *)group[exitno / EXITSTUBS_PER_GROUP] + + EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); + } +diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h +index 5551b1f1..72516bc2 100644 +--- a/src/lj_target_arm.h ++++ b/src/lj_target_arm.h +@@ -1,6 +1,6 @@ + /* + ** Definitions for ARM CPUs. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_TARGET_ARM_H +@@ -211,6 +211,7 @@ typedef enum ARMIns { + /* ARMv6T2 */ + ARMI_MOVW = 0xe3000000, + ARMI_MOVT = 0xe3400000, ++ ARMI_BFI = 0xe7c00010, + + /* VFP */ + ARMI_VMOV_D = 0xeeb00b40, +diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h +index 520023ae..6d39ffb8 100644 +--- a/src/lj_target_arm64.h ++++ b/src/lj_target_arm64.h +@@ -1,6 +1,6 @@ + /* + ** Definitions for ARM64 CPUs. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_TARGET_ARM64_H +@@ -31,6 +31,8 @@ enum { + + /* Calling conventions. */ + RID_RET = RID_X0, ++ RID_RETLO = RID_X0, ++ RID_RETHI = RID_X1, + RID_FPRET = RID_D0, + + /* These definitions must match with the *.dasc file(s): */ +@@ -132,9 +134,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) + #define A64F_IMMR(x) ((x) << 16) + #define A64F_U16(x) ((x) << 5) + #define A64F_U12(x) ((x) << 10) +-#define A64F_S26(x) (x) ++#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu)) + #define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) +-#define A64F_S14(x) ((x) << 5) ++#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5) + #define A64F_S9(x) ((x) << 12) + #define A64F_BIT(x) ((x) << 19) + #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) +@@ -145,6 +147,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) + #define A64F_LSL16(x) (((x) / 16) << 21) + #define A64F_BSH(sh) ((sh) << 10) + ++/* Check for valid field range. */ ++#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) ++ + typedef enum A64Ins { + A64I_S = 0x20000000, + A64I_X = 0x80000000, +@@ -207,6 +212,8 @@ typedef enum A64Ins { + + A64I_EXTRw = 0x13800000, + A64I_EXTRx = 0x93c00000, ++ A64I_BFMw = 0x33000000, ++ A64I_BFMx = 0xb3400000, + A64I_SBFMw = 0x13000000, + A64I_SBFMx = 0x93400000, + A64I_SXTBw = 0x13001c00, +diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h +index 740687b3..5da94605 100644 +--- a/src/lj_target_mips.h ++++ b/src/lj_target_mips.h +@@ -1,6 +1,6 @@ + /* + ** Definitions for MIPS CPUs. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_TARGET_MIPS_H +@@ -223,6 +223,8 @@ typedef enum MIPSIns { + MIPSI_ADDIU = 0x24000000, + MIPSI_SUB = 0x00000022, + MIPSI_SUBU = 0x00000023, ++ ++#if !LJ_TARGET_MIPSR6 + MIPSI_MUL = 0x70000002, + MIPSI_DIV = 0x0000001a, + MIPSI_DIVU = 0x0000001b, +@@ -232,6 +234,15 @@ typedef enum MIPSIns { + MIPSI_MFHI = 0x00000010, + MIPSI_MFLO = 0x00000012, + MIPSI_MULT = 0x00000018, ++#else ++ MIPSI_MUL = 0x00000098, ++ MIPSI_MUH = 0x000000d8, ++ MIPSI_DIV = 0x0000009a, ++ MIPSI_DIVU = 0x0000009b, ++ ++ MIPSI_SELEQZ = 0x00000035, ++ MIPSI_SELNEZ = 0x00000037, ++#endif + + MIPSI_SLL = 0x00000000, + MIPSI_SRL = 0x00000002, +@@ -245,6 +256,8 @@ typedef enum MIPSIns { + MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ + MIPSI_DROTRV = 0x00000056, + ++ MIPSI_INS = 0x7c000004, /* MIPSXXR2 */ ++ + MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ + MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ + MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ +@@ -253,8 +266,13 @@ typedef enum MIPSIns { + MIPSI_B = 0x10000000, + MIPSI_J = 0x08000000, + MIPSI_JAL = 0x0c000000, ++#if !LJ_TARGET_MIPSR6 + MIPSI_JALX = 0x74000000, + MIPSI_JR = 0x00000008, ++#else ++ MIPSI_JR = 0x00000009, ++ MIPSI_BALC = 0xe8000000, ++#endif + MIPSI_JALR = 0x0000f809, + + MIPSI_BEQ = 0x10000000, +@@ -282,15 +300,23 @@ typedef enum MIPSIns { + + /* MIPS64 instructions. */ + MIPSI_DADD = 0x0000002c, +- MIPSI_DADDI = 0x60000000, + MIPSI_DADDU = 0x0000002d, + MIPSI_DADDIU = 0x64000000, + MIPSI_DSUB = 0x0000002e, + MIPSI_DSUBU = 0x0000002f, ++#if !LJ_TARGET_MIPSR6 + MIPSI_DDIV = 0x0000001e, + MIPSI_DDIVU = 0x0000001f, + MIPSI_DMULT = 0x0000001c, + MIPSI_DMULTU = 0x0000001d, ++#else ++ MIPSI_DDIV = 0x0000009e, ++ MIPSI_DMOD = 0x000000de, ++ MIPSI_DDIVU = 0x0000009f, ++ MIPSI_DMODU = 0x000000df, ++ MIPSI_DMUL = 0x0000009c, ++ MIPSI_DMUH = 0x000000dc, ++#endif + + MIPSI_DSLL = 0x00000038, + MIPSI_DSRL = 0x0000003a, +@@ -308,6 +334,11 @@ typedef enum MIPSIns { + MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, + MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, + MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, ++#if LJ_TARGET_MIPSR6 ++ MIPSI_LSA = 0x00000005, ++ MIPSI_DLSA = 0x00000015, ++ MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA, ++#endif + + /* Extract/insert instructions. */ + MIPSI_DEXTM = 0x7c000001, +@@ -317,18 +348,19 @@ typedef enum MIPSIns { + MIPSI_DINSU = 0x7c000006, + MIPSI_DINS = 0x7c000007, + +- MIPSI_RINT_D = 0x4620001a, +- MIPSI_RINT_S = 0x4600001a, +- MIPSI_RINT = 0x4400001a, + MIPSI_FLOOR_D = 0x4620000b, +- MIPSI_CEIL_D = 0x4620000a, +- MIPSI_ROUND_D = 0x46200008, + + /* FP instructions. */ + MIPSI_MOV_S = 0x46000006, + MIPSI_MOV_D = 0x46200006, ++#if !LJ_TARGET_MIPSR6 + MIPSI_MOVT_D = 0x46210011, + MIPSI_MOVF_D = 0x46200011, ++#else ++ MIPSI_MIN_D = 0x4620001C, ++ MIPSI_MAX_D = 0x4620001E, ++ MIPSI_SEL_D = 0x46200010, ++#endif + + MIPSI_ABS_D = 0x46200005, + MIPSI_NEG_D = 0x46200007, +@@ -363,15 +395,23 @@ typedef enum MIPSIns { + MIPSI_DMTC1 = 0x44a00000, + MIPSI_DMFC1 = 0x44200000, + ++#if !LJ_TARGET_MIPSR6 + MIPSI_BC1F = 0x45000000, + MIPSI_BC1T = 0x45010000, +- + MIPSI_C_EQ_D = 0x46200032, + MIPSI_C_OLT_S = 0x46000034, + MIPSI_C_OLT_D = 0x46200034, + MIPSI_C_ULT_D = 0x46200035, + MIPSI_C_OLE_D = 0x46200036, + MIPSI_C_ULE_D = 0x46200037, ++#else ++ MIPSI_BC1EQZ = 0x45200000, ++ MIPSI_BC1NEZ = 0x45a00000, ++ MIPSI_CMP_EQ_D = 0x46a00002, ++ MIPSI_CMP_LT_S = 0x46800004, ++ MIPSI_CMP_LT_D = 0x46a00004, ++#endif ++ + } MIPSIns; + + #endif +diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h +index c5c991a3..c83dcc5e 100644 +--- a/src/lj_target_ppc.h ++++ b/src/lj_target_ppc.h +@@ -1,6 +1,6 @@ + /* + ** Definitions for PPC CPUs. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_TARGET_PPC_H +diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h +index 356f7924..d0ce196c 100644 +--- a/src/lj_target_x86.h ++++ b/src/lj_target_x86.h +@@ -1,6 +1,6 @@ + /* + ** Definitions for x86 and x64 CPUs. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_TARGET_X86_H +@@ -38,10 +38,9 @@ enum { + RID_RET = RID_EAX, + #if LJ_64 + RID_FPRET = RID_XMM0, +-#else ++#endif + RID_RETLO = RID_EAX, + RID_RETHI = RID_EDX, +-#endif + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_EDX, /* Interpreter BASE. */ +@@ -165,6 +164,8 @@ typedef struct { + #define EXITSTUB_SPACING (2+2) + #define EXITSTUBS_PER_GROUP 32 + ++#define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */ ++ + /* -- x86 ModRM operand encoding ------------------------------------------ */ + + typedef enum { +@@ -228,16 +229,10 @@ typedef enum { + /* Note: little-endian byte-order! */ + XI_FLDZ = 0xeed9, + XI_FLD1 = 0xe8d9, +- XI_FLDLG2 = 0xecd9, +- XI_FLDLN2 = 0xedd9, + XI_FDUP = 0xc0d9, /* Really fld st0. */ + XI_FPOP = 0xd8dd, /* Really fstp st0. */ + XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ + XI_FRNDINT = 0xfcd9, +- XI_FSIN = 0xfed9, +- XI_FCOS = 0xffd9, +- XI_FPTAN = 0xf2d9, +- XI_FPATAN = 0xf3d9, + XI_FSCALE = 0xfdd9, + XI_FYL2X = 0xf1d9, + +diff --git a/src/lj_trace.c b/src/lj_trace.c +index d85b47f8..43b86e4f 100644 +--- a/src/lj_trace.c ++++ b/src/lj_trace.c +@@ -1,6 +1,6 @@ + /* + ** Trace management. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_trace_c +@@ -30,6 +30,7 @@ + #include "lj_vm.h" + #include "lj_vmevent.h" + #include "lj_target.h" ++#include "lj_prng.h" + + /* -- Error handling ------------------------------------------------------ */ + +@@ -104,7 +105,8 @@ static void perftools_addtrace(GCtrace *T) + name++; + else + name = "(string)"; +- lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); ++ lj_assertX(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, ++ "trace PC out of range"); + lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); + if (!fp) { + char fname[40]; +@@ -183,7 +185,7 @@ void lj_trace_reenableproto(GCproto *pt) + { + if ((pt->flags & PROTO_ILOOP)) { + BCIns *bc = proto_bc(pt); +- BCPos i, sizebc = pt->sizebc;; ++ BCPos i, sizebc = pt->sizebc; + pt->flags &= ~PROTO_ILOOP; + if (bc_op(bc[0]) == BC_IFUNCF) + setbc_op(&bc[0], BC_FUNCF); +@@ -205,27 +207,28 @@ static void trace_unpatch(jit_State *J, GCtrace *T) + return; /* No need to unpatch branches in parent traces (yet). */ + switch (bc_op(*pc)) { + case BC_JFORL: +- lua_assert(traceref(J, bc_d(*pc)) == T); ++ lj_assertJ(traceref(J, bc_d(*pc)) == T, "JFORL references other trace"); + *pc = T->startins; + pc += bc_j(T->startins); +- lua_assert(bc_op(*pc) == BC_JFORI); ++ lj_assertJ(bc_op(*pc) == BC_JFORI, "FORL does not point to JFORI"); + setbc_op(pc, BC_FORI); + break; + case BC_JITERL: + case BC_JLOOP: +- lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op)); ++ lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP || ++ bc_isret(op), "bad original bytecode %d", op); + *pc = T->startins; + break; + case BC_JMP: +- lua_assert(op == BC_ITERL); ++ lj_assertJ(op == BC_ITERL, "bad original bytecode %d", op); + pc += bc_j(*pc)+2; + if (bc_op(*pc) == BC_JITERL) { +- lua_assert(traceref(J, bc_d(*pc)) == T); ++ lj_assertJ(traceref(J, bc_d(*pc)) == T, "JITERL references other trace"); + *pc = T->startins; + } + break; + case BC_JFUNCF: +- lua_assert(op == BC_FUNCF); ++ lj_assertJ(op == BC_FUNCF, "bad original bytecode %d", op); + *pc = T->startins; + break; + default: /* Already unpatched. */ +@@ -237,7 +240,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T) + static void trace_flushroot(jit_State *J, GCtrace *T) + { + GCproto *pt = &gcref(T->startpt)->pt; +- lua_assert(T->root == 0 && pt != NULL); ++ lj_assertJ(T->root == 0, "not a root trace"); ++ lj_assertJ(pt != NULL, "trace has no prototype"); + /* First unpatch any modified bytecode. */ + trace_unpatch(J, T); + /* Unlink root trace from chain anchored in prototype. */ +@@ -353,7 +357,8 @@ void lj_trace_freestate(global_State *g) + { /* This assumes all traces have already been freed. */ + ptrdiff_t i; + for (i = 1; i < (ptrdiff_t)J->sizetrace; i++) +- lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL); ++ lj_assertG(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL, ++ "trace still allocated"); + } + #endif + lj_mcode_free(J); +@@ -368,8 +373,13 @@ void lj_trace_freestate(global_State *g) + /* Blacklist a bytecode instruction. */ + static void blacklist_pc(GCproto *pt, BCIns *pc) + { +- setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP); +- pt->flags |= PROTO_ILOOP; ++ if (bc_op(*pc) == BC_ITERN) { ++ setbc_op(pc, BC_ITERC); ++ setbc_op(pc+1+bc_j(pc[1]), BC_JMP); ++ } else { ++ setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP); ++ pt->flags |= PROTO_ILOOP; ++ } + } + + /* Penalize a bytecode instruction. */ +@@ -380,7 +390,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) + if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ + /* First try to bump its hotcount several times. */ + val = ((uint32_t)J->penalty[i].val << 1) + +- LJ_PRNG_BITS(J, PENALTY_RNDBITS); ++ (lj_prng_u64(&J2G(J)->prng) & ((1u<<PENALTY_RNDBITS)-1)); + if (val > PENALTY_MAX) { + blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ + return; +@@ -406,10 +416,11 @@ static void trace_start(jit_State *J) + TraceNo traceno; + + if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ +- if (J->parent == 0 && J->exitno == 0) { ++ if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) { + /* Lazy bytecode patching to disable hotcount events. */ +- lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || +- bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); ++ lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || ++ bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF, ++ "bad hot bytecode %d", bc_op(*J->pc)); + setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); + J->pt->flags |= PROTO_ILOOP; + } +@@ -420,7 +431,8 @@ static void trace_start(jit_State *J) + /* Get a new trace number. */ + traceno = trace_findfree(J); + if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ +- lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); ++ lj_assertJ((J2G(J)->hookmask & HOOK_GC) == 0, ++ "recorder called from GC hook"); + lj_trace_flushall(J->L); + J->state = LJ_TRACE_IDLE; /* Silently ignored. */ + return; +@@ -489,6 +501,7 @@ static void trace_stop(jit_State *J) + J->cur.nextroot = pt->trace; + pt->trace = (TraceNo1)traceno; + break; ++ case BC_ITERN: + case BC_RET: + case BC_RET0: + case BC_RET1: +@@ -496,10 +509,14 @@ static void trace_stop(jit_State *J) + goto addroot; + case BC_JMP: + /* Patch exit branch in parent to side trace entry. */ +- lua_assert(J->parent != 0 && J->cur.root != 0); ++ lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace"); + lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode); + /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ +- traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE; ++ { ++ SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; ++ snap->count = SNAPCOUNT_DONE; ++ if (J->cur.topslot > snap->topslot) snap->topslot = J->cur.topslot; ++ } + /* Add to side trace chain in root trace. */ + { + GCtrace *root = traceref(J, J->cur.root); +@@ -515,7 +532,7 @@ static void trace_stop(jit_State *J) + traceref(J, J->exitno)->link = traceno; + break; + default: +- lua_assert(0); ++ lj_assertJ(0, "bad stop bytecode %d", op); + break; + } + +@@ -536,8 +553,8 @@ static void trace_stop(jit_State *J) + static int trace_downrec(jit_State *J) + { + /* Restart recording at the return instruction. */ +- lua_assert(J->pt != NULL); +- lua_assert(bc_isret(bc_op(*J->pc))); ++ lj_assertJ(J->pt != NULL, "no active prototype"); ++ lj_assertJ(bc_isret(bc_op(*J->pc)), "not at a return bytecode"); + if (bc_op(*J->pc) == BC_RETM) + return 0; /* NYI: down-recursion with RETM. */ + J->parent = 0; +@@ -644,8 +661,13 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) + J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ + trace_start(J); + lj_dispatch_update(J2G(J)); +- break; ++ if (J->state != LJ_TRACE_RECORD_1ST) ++ break; ++ /* fallthrough */ + ++ case LJ_TRACE_RECORD_1ST: ++ J->state = LJ_TRACE_RECORD; ++ /* fallthrough */ + case LJ_TRACE_RECORD: + trace_pendpatch(J, 0); + setvmstate(J2G(J), RECORD); +@@ -750,7 +772,7 @@ static void trace_hotside(jit_State *J, const BCIns *pc) + isluafunc(curr_func(J->L)) && + snap->count != SNAPCOUNT_DONE && + ++snap->count >= J->param[JIT_P_hotexit]) { +- lua_assert(J->state == LJ_TRACE_IDLE); ++ lj_assertJ(J->state == LJ_TRACE_IDLE, "hot side exit while recording"); + /* J->parent is non-zero for a side trace. */ + J->state = LJ_TRACE_START; + lj_trace_ins(J, pc); +@@ -782,7 +804,9 @@ typedef struct ExitDataCP { + static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) + { + ExitDataCP *exd = (ExitDataCP *)ud; +- cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ ++ /* Always catch error here and don't call error function. */ ++ cframe_errfunc(L->cframe) = 0; ++ cframe_nres(L->cframe) = -2*LUAI_MAXSTACK*(int)sizeof(TValue); + exd->pc = lj_snap_restore(exd->J, exd->exptr); + UNUSED(dummy); + return NULL; +@@ -812,7 +836,7 @@ static void trace_exit_regs(lua_State *L, ExitState *ex) + } + #endif + +-#ifdef EXITSTATE_PCREG ++#if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE) + /* Determine trace number from pc of exit instruction. */ + static TraceNo trace_exit_find(jit_State *J, MCode *pc) + { +@@ -822,7 +846,7 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc) + if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode)) + return traceno; + } +- lua_assert(0); ++ lj_assertJ(0, "bad exit pc"); + return 0; + } + #endif +@@ -834,29 +858,39 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) + lua_State *L = J->L; + ExitState *ex = (ExitState *)exptr; + ExitDataCP exd; +- int errcode; ++ int errcode, exitcode = J->exitcode; ++ TValue exiterr; + const BCIns *pc; + void *cf; + GCtrace *T; ++ ++ setnilV(&exiterr); ++ if (exitcode) { /* Trace unwound with error code. */ ++ J->exitcode = 0; ++ copyTV(L, &exiterr, L->top-1); ++ } ++ + #ifdef EXITSTATE_PCREG + J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); + #endif + T = traceref(J, J->parent); UNUSED(T); + #ifdef EXITSTATE_CHECKEXIT + if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */ +- lua_assert(T->root != 0); ++ lj_assertJ(T->root != 0, "stack check in root trace"); + J->exitno = T->ir[REF_BASE].op2; + J->parent = T->ir[REF_BASE].op1; + T = traceref(J, J->parent); + } + #endif +- lua_assert(T != NULL && J->exitno < T->nsnap); ++ lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit number"); + exd.J = J; + exd.exptr = exptr; + errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); + if (errcode) + return -errcode; /* Return negated error code. */ + ++ if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */ ++ + if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) + lj_vmevent_send(L, TEXIT, + lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); +@@ -868,7 +902,9 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) + pc = exd.pc; + cf = cframe_raw(L->cframe); + setcframe_pc(cf, pc); +- if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { ++ if (exitcode) { ++ return -exitcode; ++ } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { + /* Just exit to interpreter. */ + } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { + if (!(G(L)->hookmask & HOOK_GC)) +@@ -878,13 +914,14 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) + } + if (bc_op(*pc) == BC_JLOOP) { + BCIns *retpc = &traceref(J, bc_d(*pc))->startins; +- if (bc_isret(bc_op(*retpc))) { ++ int isret = bc_isret(bc_op(*retpc)); ++ if (isret || bc_op(*retpc) == BC_ITERN) { + if (J->state == LJ_TRACE_RECORD) { + J->patchins = *pc; + J->patchpc = (BCIns *)pc; + *J->patchpc = *retpc; + J->bcskip = 1; +- } else { ++ } else if (isret) { + pc = retpc; + setcframe_pc(cf, pc); + } +@@ -906,4 +943,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) + } + } + ++#if LJ_UNWIND_JIT ++/* Given an mcode address determine trace exit address for unwinding. */ ++uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep) ++{ ++#if EXITTRACE_VMSTATE ++ TraceNo traceno = J2G(J)->vmstate; ++#else ++ TraceNo traceno = trace_exit_find(J, (MCode *)addr); ++#endif ++ GCtrace *T = traceref(J, traceno); ++ if (T ++#if EXITTRACE_VMSTATE ++ && addr >= (uintptr_t)T->mcode && addr < (uintptr_t)T->mcode + T->szmcode ++#endif ++ ) { ++ SnapShot *snap = T->snap; ++ SnapNo lo = 0, exitno = T->nsnap; ++ uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode); /* MCode units! */ ++ /* Rightmost binary search for mcode offset to determine exit number. */ ++ do { ++ SnapNo mid = (lo+exitno) >> 1; ++ if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1; ++ } while (lo < exitno); ++ exitno--; ++ *ep = exitno; ++#ifdef EXITSTUBS_PER_GROUP ++ return (uintptr_t)exitstub_addr(J, exitno); ++#else ++ return (uintptr_t)exitstub_trace_addr(T, exitno); ++#endif ++ } ++ /* Cannot correlate addr with trace/exit. This will be fatal. */ ++ lj_assertJ(0, "bad exit pc"); ++ return 0; ++} ++#endif ++ + #endif +diff --git a/src/lj_trace.h b/src/lj_trace.h +index 22cae741..e4cf2dc4 100644 +--- a/src/lj_trace.h ++++ b/src/lj_trace.h +@@ -1,6 +1,6 @@ + /* + ** Trace management. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_TRACE_H +@@ -37,6 +37,9 @@ LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); + LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); + LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); + LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); ++#if LJ_UNWIND_EXT ++LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep); ++#endif + + /* Signal asynchronous abort of trace or end of trace. */ + #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) +diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h +index 1363c4f3..24dcb5c1 100644 +--- a/src/lj_traceerr.h ++++ b/src/lj_traceerr.h +@@ -1,6 +1,6 @@ + /* + ** Trace compiler error messages. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + /* This file may be included multiple times with different TREDEF macros. */ +diff --git a/src/lj_udata.c b/src/lj_udata.c +index bd0321b8..a0edd0df 100644 +--- a/src/lj_udata.c ++++ b/src/lj_udata.c +@@ -1,6 +1,6 @@ + /* + ** Userdata handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_udata_c +@@ -8,6 +8,7 @@ + + #include "lj_obj.h" + #include "lj_gc.h" ++#include "lj_err.h" + #include "lj_udata.h" + + GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) +@@ -32,3 +33,30 @@ void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud) + lj_mem_free(g, ud, sizeudata(ud)); + } + ++#if LJ_64 ++void *lj_lightud_intern(lua_State *L, void *p) ++{ ++ global_State *g = G(L); ++ uint64_t u = (uint64_t)p; ++ uint32_t up = lightudup(u); ++ uint32_t *segmap = mref(g->gc.lightudseg, uint32_t); ++ MSize segnum = g->gc.lightudnum; ++ if (segmap) { ++ MSize seg; ++ for (seg = 0; seg <= segnum; seg++) ++ if (segmap[seg] == up) /* Fast path. */ ++ return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u)); ++ segnum++; ++ /* Leave last segment unused to avoid clash with ITERN key. */ ++ if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)-1) lj_err_msg(L, LJ_ERR_BADLU); ++ } ++ if (!((segnum-1) & segnum) && segnum != 1) { ++ lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t); ++ setmref(g->gc.lightudseg, segmap); ++ } ++ g->gc.lightudnum = segnum; ++ segmap[segnum] = up; ++ return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u)); ++} ++#endif ++ +diff --git a/src/lj_udata.h b/src/lj_udata.h +index f271a42d..78522ecc 100644 +--- a/src/lj_udata.h ++++ b/src/lj_udata.h +@@ -1,6 +1,6 @@ + /* + ** Userdata handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_UDATA_H +@@ -10,5 +10,8 @@ + + LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); + LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); ++#if LJ_64 ++LJ_FUNC void * LJ_FASTCALL lj_lightud_intern(lua_State *L, void *p); ++#endif + + #endif +diff --git a/src/lj_vm.h b/src/lj_vm.h +index 1cc7eed7..81ee8e28 100644 +--- a/src/lj_vm.h ++++ b/src/lj_vm.h +@@ -1,6 +1,6 @@ + /* + ** Assembler VM interface definitions. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_VM_H +@@ -26,6 +26,9 @@ LJ_ASMF void lj_vm_unwind_ff_eh(void); + #if LJ_TARGET_X86ORX64 + LJ_ASMF void lj_vm_unwind_rethrow(void); + #endif ++#if LJ_TARGET_MIPS ++LJ_ASMF void lj_vm_unwind_stub(void); ++#endif + + /* Miscellaneous functions. */ + #if LJ_TARGET_X86ORX64 +@@ -48,6 +51,7 @@ LJ_ASMF void lj_vm_inshook(void); + LJ_ASMF void lj_vm_rethook(void); + LJ_ASMF void lj_vm_callhook(void); + LJ_ASMF void lj_vm_profhook(void); ++LJ_ASMF void lj_vm_IITERN(void); + + /* Trace exit handling. */ + LJ_ASMF void lj_vm_exit_handler(void); +@@ -92,14 +96,10 @@ LJ_ASMF double lj_vm_trunc(double); + LJ_ASMF double lj_vm_trunc_sf(double); + #endif + #endif +-#ifdef LUAJIT_NO_EXP2 +-LJ_ASMF double lj_vm_exp2(double); +-#else +-#define lj_vm_exp2 exp2 +-#endif + #if LJ_HASFFI + LJ_ASMF int lj_vm_errno(void); + #endif ++LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); + #endif + + /* Continuations for metamethods. */ +diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c +index 86640804..45c82096 100644 +--- a/src/lj_vmevent.c ++++ b/src/lj_vmevent.c +@@ -1,6 +1,6 @@ + /* + ** VM event handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #include <stdio.h> +diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h +index 050fb4dd..a9082e7d 100644 +--- a/src/lj_vmevent.h ++++ b/src/lj_vmevent.h +@@ -1,6 +1,6 @@ + /* + ** VM event handling. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LJ_VMEVENT_H +diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c +index b231d3e8..23ef0dd2 100644 +--- a/src/lj_vmmath.c ++++ b/src/lj_vmmath.c +@@ -1,6 +1,6 @@ + /* + ** Math helper functions for assembler VM. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define lj_vmmath_c +@@ -48,10 +48,9 @@ double lj_vm_foldarith(double x, double y, int op) + case IR_NEG - IR_ADD: return -x; break; + case IR_ABS - IR_ADD: return fabs(x); break; + #if LJ_HASJIT +- case IR_ATAN2 - IR_ADD: return atan2(x, y); break; + case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; +- case IR_MIN - IR_ADD: return x > y ? y : x; break; +- case IR_MAX - IR_ADD: return x < y ? y : x; break; ++ case IR_MIN - IR_ADD: return x < y ? x : y; break; ++ case IR_MAX - IR_ADD: return x > y ? x : y; break; + #endif + default: return x; + } +@@ -61,7 +60,8 @@ double lj_vm_foldarith(double x, double y, int op) + int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) + { + uint32_t y, ua, ub; +- lua_assert(b != 0); /* This must be checked before using this function. */ ++ /* This must be checked before using this function. */ ++ lj_assertX(b != 0, "modulo with zero divisor"); + ua = a < 0 ? (uint32_t)-a : (uint32_t)a; + ub = b < 0 ? (uint32_t)-b : (uint32_t)b; + y = ua % ub; +@@ -80,19 +80,12 @@ double lj_vm_log2(double a) + } + #endif + +-#ifdef LUAJIT_NO_EXP2 +-double lj_vm_exp2(double a) +-{ +- return exp(a * 0.6931471805599453); +-} +-#endif +- + #if !LJ_TARGET_X86ORX64 + /* Unsigned x^k. */ + static double lj_vm_powui(double x, uint32_t k) + { + double y; +- lua_assert(k != 0); ++ lj_assertX(k != 0, "pow with zero exponent"); + for (; (k & 1) == 0; k >>= 1) x *= x; + y = x; + if ((k >>= 1) != 0) { +@@ -129,15 +122,9 @@ double lj_vm_foldfpm(double x, int fpm) + case IRFPM_CEIL: return lj_vm_ceil(x); + case IRFPM_TRUNC: return lj_vm_trunc(x); + case IRFPM_SQRT: return sqrt(x); +- case IRFPM_EXP: return exp(x); +- case IRFPM_EXP2: return lj_vm_exp2(x); + case IRFPM_LOG: return log(x); + case IRFPM_LOG2: return lj_vm_log2(x); +- case IRFPM_LOG10: return log10(x); +- case IRFPM_SIN: return sin(x); +- case IRFPM_COS: return cos(x); +- case IRFPM_TAN: return tan(x); +- default: lua_assert(0); ++ default: lj_assertX(0, "bad fpm %d", fpm); + } + return 0; + } +diff --git a/src/ljamalg.c b/src/ljamalg.c +index f1f28623..384b3cc1 100644 +--- a/src/ljamalg.c ++++ b/src/ljamalg.c +@@ -1,16 +1,6 @@ + /* + ** LuaJIT core and libraries amalgamation. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +-*/ +- +-/* +-+--------------------------------------------------------------------------+ +-| WARNING: Compiling the amalgamation needs a lot of virtual memory | +-| (around 300 MB with GCC 4.x)! If you don't have enough physical memory | +-| your machine will start swapping to disk and the compile will not finish | +-| within a reasonable amount of time. | +-| So either compile on a bigger machine or use the non-amalgamated build. | +-+--------------------------------------------------------------------------+ ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #define ljamalg_c +@@ -28,6 +18,7 @@ + #include "lua.h" + #include "lauxlib.h" + ++#include "lj_assert.c" + #include "lj_gc.c" + #include "lj_err.c" + #include "lj_char.c" +@@ -40,6 +31,7 @@ + #include "lj_udata.c" + #include "lj_meta.c" + #include "lj_debug.c" ++#include "lj_prng.c" + #include "lj_state.c" + #include "lj_dispatch.c" + #include "lj_vmevent.c" +@@ -47,6 +39,7 @@ + #include "lj_strscan.c" + #include "lj_strfmt.c" + #include "lj_strfmt_num.c" ++#include "lj_serialize.c" + #include "lj_api.c" + #include "lj_profile.c" + #include "lj_lex.c" +@@ -93,5 +86,6 @@ + #include "lib_bit.c" + #include "lib_jit.c" + #include "lib_ffi.c" ++#include "lib_buffer.c" + #include "lib_init.c" + +diff --git a/src/lua.h b/src/lua.h +index 850bd796..6d1634d1 100644 +--- a/src/lua.h ++++ b/src/lua.h +@@ -1,7 +1,7 @@ + /* + ** $Id: lua.h,v 1.218.1.5 2008/08/06 13:30:12 roberto Exp $ + ** Lua - An Extensible Extension Language +-** Lua.org, PUC-Rio, Brazil (http://www.lua.org) ++** Lua.org, PUC-Rio, Brazil (https://www.lua.org) + ** See Copyright Notice at the end of this file + */ + +diff --git a/src/luaconf.h b/src/luaconf.h +index c2d29d94..5ba6eda9 100644 +--- a/src/luaconf.h ++++ b/src/luaconf.h +@@ -1,6 +1,6 @@ + /* + ** Configuration header. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef luaconf_h +@@ -136,7 +136,7 @@ + + #define LUALIB_API LUA_API + +-/* Support for internal assertions. */ ++/* Compatibility support for assertions. */ + #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) + #include <assert.h> + #endif +diff --git a/src/luajit.c b/src/luajit.c +index 1ca24301..6aed5337 100644 +--- a/src/luajit.c ++++ b/src/luajit.c +@@ -1,6 +1,6 @@ + /* + ** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + ** + ** Major portions taken verbatim or adapted from the Lua interpreter. + ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +@@ -150,6 +150,7 @@ static void print_jit_status(lua_State *L) + fputs(s, stdout); + } + putc('\n', stdout); ++ lua_settop(L, 0); /* clear stack */ + } + + static void createargtable(lua_State *L, char **argv, int argc, int argf) +@@ -421,6 +422,7 @@ static int collectargs(char **argv, int *flags) + break; + case 'e': + *flags |= FLAGS_EXEC; ++ /* fallthrough */ + case 'j': /* LuaJIT extension */ + case 'l': + *flags |= FLAGS_OPTION; +diff --git a/src/luajit.h b/src/luajit.h +index 708a5a11..2ee1f908 100644 +--- a/src/luajit.h ++++ b/src/luajit.h +@@ -1,7 +1,7 @@ + /* +-** LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ ++** LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/ + ** +-** Copyright (C) 2005-2017 Mike Pall. All rights reserved. ++** Copyright (C) 2005-2021 Mike Pall. All rights reserved. + ** + ** Permission is hereby granted, free of charge, to any person obtaining + ** a copy of this software and associated documentation files (the +@@ -22,7 +22,7 @@ + ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + ** +-** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] ++** [ MIT license: https://www.opensource.org/licenses/mit-license.php ] + */ + + #ifndef _LUAJIT_H +@@ -33,8 +33,8 @@ + #define LUAJIT_VERSION "LuaJIT 2.1.0-beta3" + #define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */ + #define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3 +-#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2017 Mike Pall" +-#define LUAJIT_URL "http://luajit.org/" ++#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2021 Mike Pall" ++#define LUAJIT_URL "https://luajit.org/" + + /* Modes for luaJIT_setmode. */ + #define LUAJIT_MODE_MASK 0x00ff +diff --git a/src/lualib.h b/src/lualib.h +index bfc130a1..5c18e9ec 100644 +--- a/src/lualib.h ++++ b/src/lualib.h +@@ -1,6 +1,6 @@ + /* + ** Standard library header. +-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + */ + + #ifndef _LUALIB_H +@@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *L); + LUALIB_API int luaopen_bit(lua_State *L); + LUALIB_API int luaopen_jit(lua_State *L); + LUALIB_API int luaopen_ffi(lua_State *L); ++LUALIB_API int luaopen_string_buffer(lua_State *L); + + LUALIB_API void luaL_openlibs(lua_State *L); + +diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat +index 71bde759..7e1a6e04 100644 +--- a/src/msvcbuild.bat ++++ b/src/msvcbuild.bat +@@ -1,29 +1,31 @@ + @rem Script to build LuaJIT with MSVC. +-@rem Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++@rem Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + @rem +-@rem Either open a "Visual Studio .NET Command Prompt" +-@rem (Note that the Express Edition does not contain an x64 compiler) +-@rem -or- +-@rem Open a "Windows SDK Command Shell" and set the compiler environment: +-@rem setenv /release /x86 +-@rem -or- +-@rem setenv /release /x64 ++@rem Open a "Visual Studio Command Prompt" (either x86 or x64). ++@rem Then cd to this directory and run this script. Use the following ++@rem options (in order), if needed. The default is a dynamic release build. + @rem +-@rem Then cd to this directory and run this script. ++@rem nogc64 disable LJ_GC64 mode for x64 ++@rem debug emit debug symbols ++@rem amalg amalgamated build ++@rem static static linkage + + @if not defined INCLUDE goto :FAIL + + @setlocal ++@rem Add more debug flags here, e.g. DEBUGCFLAGS=/DLUA_USE_APICHECK ++@set DEBUGCFLAGS= + @set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline + @set LJLINK=link /nologo + @set LJMT=mt /nologo + @set LJLIB=lib /nologo /nodefaultlib + @set DASMDIR=..\dynasm + @set DASM=%DASMDIR%\dynasm.lua +-@set DASC=vm_x86.dasc ++@set DASC=vm_x64.dasc + @set LJDLLNAME=lua51.dll + @set LJLIBNAME=lua51.lib +-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c ++@set BUILDTYPE=release ++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c + + %LJCOMPILE% host\minilua.c + @if errorlevel 1 goto :BAD +@@ -36,15 +38,16 @@ if exist minilua.exe.manifest^ + @set LJARCH=x64 + @minilua + @if errorlevel 8 goto :X64 ++@set DASC=vm_x86.dasc + @set DASMFLAGS=-D WIN -D JIT -D FFI + @set LJARCH=x86 + @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 + :X64 +-@if "%1" neq "gc64" goto :NOGC64 ++@if "%1" neq "nogc64" goto :GC64 + @shift +-@set DASC=vm_x64.dasc +-@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_GC64 +-:NOGC64 ++@set DASC=vm_x86.dasc ++@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 ++:GC64 + minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% + @if errorlevel 1 goto :BAD + +@@ -72,9 +75,11 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c + + @if "%1" neq "debug" goto :NODEBUG + @shift +-@set LJCOMPILE=%LJCOMPILE% /Zi +-@set LJLINK=%LJLINK% /debug /opt:ref /opt:icf /incremental:no ++@set BUILDTYPE=debug ++@set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS% ++@set LJLINK=%LJLINK% /opt:ref /opt:icf /incremental:no + :NODEBUG ++@set LJLINK=%LJLINK% /%BUILDTYPE% + @if "%1"=="amalg" goto :AMALGDLL + @if "%1"=="static" goto :STATIC + %LJCOMPILE% /MD /DLUA_BUILD_AS_DLL lj_*.c lib_*.c +@@ -118,5 +123,5 @@ if exist luajit.exe.manifest^ + @echo ******************************************************* + @goto :END + :FAIL +-@echo You must open a "Visual Studio .NET Command Prompt" to run this script ++@echo You must open a "Visual Studio Command Prompt" to run this script + :END +diff --git a/src/ps4build.bat b/src/ps4build.bat +index e4a7defe..fdd09d81 100644 +--- a/src/ps4build.bat ++++ b/src/ps4build.bat +@@ -26,13 +26,13 @@ + @set LJMT=mt /nologo + @set DASMDIR=..\dynasm + @set DASM=%DASMDIR%\dynasm.lua +-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c +-@set GC64=-DLUAJIT_ENABLE_GC64 ++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c ++@set GC64= + @set DASC=vm_x64.dasc + + @if "%1" neq "gc32" goto :NOGC32 + @shift +-@set GC64= ++@set GC64=-DLUAJIT_DISABLE_GC64 + @set DASC=vm_x86.dasc + :NOGC32 + +diff --git a/src/psvitabuild.bat b/src/psvitabuild.bat +index 3991dc65..2980e157 100644 +--- a/src/psvitabuild.bat ++++ b/src/psvitabuild.bat +@@ -14,7 +14,7 @@ + @set LJMT=mt /nologo + @set DASMDIR=..\dynasm + @set DASM=%DASMDIR%\dynasm.lua +-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c ++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c + + %LJCOMPILE% host\minilua.c + @if errorlevel 1 goto :BAD +diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc +index 780cc16e..405be30a 100644 +--- a/src/vm_arm.dasc ++++ b/src/vm_arm.dasc +@@ -1,6 +1,6 @@ + |// Low-level VM code for ARM CPUs. + |// Bytecode interpreter, fast functions and helper functions. +-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + | + |.arch arm + |.section code_op, code_sub +@@ -539,13 +539,13 @@ static void build_subroutines(BuildCtx *ctx) + | cmp CARG1, #1 + |.endif + | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC]. +- | ldr CARG3, LFUNC:CARG3->field_pc + | mvn INS, #~LJ_TNIL + | add CARG2, RA, RC + | str INS, [CARG2, #-4] // Ensure one valid arg. + |.if FFI + | bls >1 + |.endif ++ | ldr CARG3, LFUNC:CARG3->field_pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | // BASE = base, RA = resultptr, CARG4 = meta base + | bx CARG1 +@@ -699,6 +699,7 @@ static void build_subroutines(BuildCtx *ctx) + |->vmeta_tsetr: + | str BASE, L->base + | .IOS mov RC, BASE ++ | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. +@@ -1011,9 +1012,9 @@ static void build_subroutines(BuildCtx *ctx) + | cmp TAB:RB, #0 + | beq ->fff_restv + | ldr CARG3, TAB:RB->hmask +- | ldr CARG4, STR:RC->hash ++ | ldr CARG4, STR:RC->sid + | ldr NODE:INS, TAB:RB->node +- | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask ++ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask + | add CARG3, CARG3, CARG3, lsl #1 + | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 + |3: // Rearranged logic, because we expect _not_ to find the key. +@@ -1110,24 +1111,18 @@ static void build_subroutines(BuildCtx *ctx) + | checktab CARG2, ->fff_fallback + | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. + | ldr PC, [BASE, FRAME_PC] +- | mov CARG2, CARG1 +- | str BASE, L->base // Add frame since C call can throw. +- | mov CARG1, L +- | str BASE, L->top // Dummy frame length is ok. +- | add CARG3, BASE, #8 +- | str PC, SAVE_PC +- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) +- | // Returns 0 at end of traversal. ++ | add CARG2, BASE, #8 ++ | sub CARG3, BASE, #8 ++ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | // Returns 1=found, 0=end, -1=error. + | .IOS ldr BASE, L->base + | cmp CRET1, #0 +- | mvneq CRET2, #~LJ_TNIL +- | beq ->fff_restv // End of traversal: return nil. +- | ldrd CARG12, [BASE, #8] // Copy key and value to results. +- | ldrd CARG34, [BASE, #16] +- | mov RC, #(2+1)*8 +- | strd CARG12, [BASE, #-8] +- | strd CARG34, [BASE] +- | b ->fff_res ++ | mov RC, #(2+1)*8 ++ | bgt ->fff_res // Found key/value. ++ | bmi ->fff_fallback // Invalid key. ++ | // End of traversal: return nil. ++ | mvn CRET2, #~LJ_TNIL ++ | b ->fff_restv + | + |.ffunc_1 pairs + | checktab CARG2, ->fff_fallback +@@ -1715,8 +1710,8 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + |.endmacro + | +- | math_minmax math_min, gt, hi +- | math_minmax math_max, lt, lo ++ | math_minmax math_min, gt, pl ++ | math_minmax math_max, lt, le + | + |//-- String library ----------------------------------------------------- + | +@@ -1809,7 +1804,7 @@ static void build_subroutines(BuildCtx *ctx) + | str BASE, L->base + | str PC, SAVE_PC + | str L, SBUF:CARG1->L +- | str CARG4, SBUF:CARG1->p ++ | str CARG4, SBUF:CARG1->w + | bl extern lj_buf_putstr_ .. name + | bl extern lj_buf_tostr + | b ->fff_resstr +@@ -2246,7 +2241,7 @@ static void build_subroutines(BuildCtx *ctx) + |9: // Rethrow error from the right C frame. + | rsb CARG2, CARG1, #0 + | mov CARG1, L +- | bl extern lj_err_throw // (lua_State *L, int errcode) ++ | bl extern lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- +@@ -2429,6 +2424,64 @@ static void build_subroutines(BuildCtx *ctx) + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_RES, CARG1 ++ |.define NEXT_IDX, CARG2 ++ |.define NEXT_TMP0, CARG3 ++ |.define NEXT_TMP1, CARG4 ++ |.define NEXT_LIM, r12 ++ |.define NEXT_RES_PTR, sp ++ |.define NEXT_RES_VAL, [sp] ++ |.define NEXT_RES_KEY_I, [sp, #8] ++ |.define NEXT_RES_KEY_IT, [sp, #12] ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in CRET2. ++ |->vm_next: ++ |.if JIT ++ | ldr NEXT_TMP0, NEXT_TAB->array ++ | ldr NEXT_LIM, NEXT_TAB->asize ++ | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3 ++ |1: // Traverse array part. ++ | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM ++ | bhs >5 ++ | ldr NEXT_TMP1, [NEXT_TMP0, #4] ++ | str NEXT_IDX, NEXT_RES_KEY_I ++ | add NEXT_TMP0, NEXT_TMP0, #8 ++ | add NEXT_IDX, NEXT_IDX, #1 ++ | checktp NEXT_TMP1, LJ_TNIL ++ | beq <1 // Skip holes in array part. ++ | ldr NEXT_TMP0, [NEXT_TMP0, #-8] ++ | mov NEXT_RES, NEXT_RES_PTR ++ | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too. ++ | mvn NEXT_TMP0, #~LJ_TISNUM ++ | str NEXT_TMP0, NEXT_RES_KEY_IT ++ | bx lr ++ | ++ |5: // Traverse hash part. ++ | ldr NEXT_TMP0, NEXT_TAB->hmask ++ | ldr NODE:NEXT_RES, NEXT_TAB->node ++ | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1 ++ | add NEXT_LIM, NEXT_LIM, NEXT_TMP0 ++ | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3 ++ |6: ++ | cmp NEXT_IDX, NEXT_LIM ++ | bhi >9 ++ | ldr NEXT_TMP1, NODE:NEXT_RES->val.it ++ | checktp NEXT_TMP1, LJ_TNIL ++ | add NEXT_IDX, NEXT_IDX, #1 ++ | bxne lr ++ | // Skip holes in hash part. ++ | add NEXT_RES, NEXT_RES, #sizeof(Node) ++ | b <6 ++ | ++ |9: // End of iteration. Set the key to nil (not the value). ++ | mvn NEXT_TMP0, #0 ++ | mov NEXT_RES, NEXT_RES_PTR ++ | str NEXT_TMP0, NEXT_RES_KEY_IT ++ | bx lr ++ |.endif ++ | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- +@@ -3499,10 +3552,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TGETS_Z: + | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 + | ldr CARG3, TAB:CARG1->hmask +- | ldr CARG4, STR:RC->hash ++ | ldr CARG4, STR:RC->sid + | ldr NODE:INS, TAB:CARG1->node + | mov TAB:RB, TAB:CARG1 +- | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask ++ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask + | add CARG3, CARG3, CARG3, lsl #1 + | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 + |1: +@@ -3646,10 +3699,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TSETS_Z: + | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 + | ldr CARG3, TAB:CARG1->hmask +- | ldr CARG4, STR:RC->hash ++ | ldr CARG4, STR:RC->sid + | ldr NODE:INS, TAB:CARG1->node + | mov TAB:RB, TAB:CARG1 +- | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask ++ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask + | add CARG3, CARG3, CARG3, lsl #1 + | mov CARG4, #0 + | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 +@@ -3919,10 +3972,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + + case BC_ITERN: +- | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) + |.if JIT +- | // NYI: add hotloop, record BC_ITERN. ++ | hotloop + |.endif ++ |->vm_IITERN: ++ | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) + | add RA, BASE, RA + | ldr TAB:RB, [RA, #-16] + | ldr CARG1, [RA, #-8] // Get index from control var. +@@ -3988,7 +4042,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ins_next1 + | ins_next2 + | mov CARG1, #0 +- | mvn CARG2, #0x00018000 ++ | mvn CARG2, #~LJ_KEYINDEX + | strd CARG1, [RA, #-8] // Initialize control var. + |1: + | ins_next3 +@@ -3997,9 +4051,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | mov OP, #BC_ITERC + | strb CARG1, [PC, #-4] + | sub PC, RC, #0x20000 ++ |.if JIT ++ | ldrb CARG1, [PC] ++ | cmp CARG1, #BC_ITERN ++ | bne >6 ++ |.endif + | strb OP, [PC] // Subsumes ins_next1. + | ins_next2 + | b <1 ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] ++ | ldrh CARG2, [PC, #2] ++ | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] ++ | // Subsumes ins_next1 and ins_next2. ++ | ldr INS, TRACE:CARG1->startins ++ | bfi INS, OP, #0, #8 ++ | str INS, [PC], #4 ++ | b <1 ++ |.endif + break; + + case BC_VARG: +diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc +index 3eaf3763..c7090ca3 100644 +--- a/src/vm_arm64.dasc ++++ b/src/vm_arm64.dasc +@@ -1,6 +1,6 @@ + |// Low-level VM code for ARM64 CPUs. + |// Bytecode interpreter, fast functions and helper functions. +-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + | + |.arch arm64 + |.section code_op, code_sub +@@ -81,47 +81,48 @@ + | + |.define CFRAME_SPACE, 208 + |//----- 16 byte aligned, <-- sp entering interpreter +-|// Unused [sp, #204] // 32 bit values +-|.define SAVE_NRES, [sp, #200] +-|.define SAVE_ERRF, [sp, #196] +-|.define SAVE_MULTRES, [sp, #192] +-|.define TMPD, [sp, #184] // 64 bit values +-|.define SAVE_L, [sp, #176] +-|.define SAVE_PC, [sp, #168] +-|.define SAVE_CFRAME, [sp, #160] +-|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves +-|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves +-|.define SAVE_LR, [sp, #8] +-|.define SAVE_FP, [sp] ++|.define SAVE_FP_LR_, 192 ++|.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves ++|.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves ++|// Unused [sp, #44] // 32 bit values ++|.define SAVE_NRES, [sp, #40] ++|.define SAVE_ERRF, [sp, #36] ++|.define SAVE_MULTRES, [sp, #32] ++|.define TMPD, [sp, #24] // 64 bit values ++|.define SAVE_L, [sp, #16] ++|.define SAVE_PC, [sp, #8] ++|.define SAVE_CFRAME, [sp, #0] + |//----- 16 byte aligned, <-- sp while in interpreter. + | +-|.define TMPDofs, #184 ++|.define TMPDofs, #24 + | + |.macro save_, gpr1, gpr2, fpr1, fpr2 +-| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] +-| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] ++| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] ++| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] + |.endmacro + |.macro rest_, gpr1, gpr2, fpr1, fpr2 +-| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] +-| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] ++| ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] ++| ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] + |.endmacro + | + |.macro saveregs +-| stp fp, lr, [sp, #-CFRAME_SPACE]! +-| add fp, sp, #0 +-| stp x19, x20, [sp, # SAVE_GPR_] ++| sub sp, sp, # CFRAME_SPACE ++| stp fp, lr, [sp, # SAVE_FP_LR_] ++| add fp, sp, # SAVE_FP_LR_ ++| stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] + | save_ 21, 22, 8, 9 + | save_ 23, 24, 10, 11 + | save_ 25, 26, 12, 13 + | save_ 27, 28, 14, 15 + |.endmacro + |.macro restoreregs +-| ldp x19, x20, [sp, # SAVE_GPR_] ++| ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] + | rest_ 21, 22, 8, 9 + | rest_ 23, 24, 10, 11 + | rest_ 25, 26, 12, 13 + | rest_ 27, 28, 14, 15 +-| ldp fp, lr, [sp], # CFRAME_SPACE ++| ldp fp, lr, [sp, # SAVE_FP_LR_] ++| add sp, sp, # CFRAME_SPACE + |.endmacro + | + |// Type definitions. Some of these are only used for documentation. +@@ -500,8 +501,9 @@ static void build_subroutines(BuildCtx *ctx) + | ldr GL, L->glref // Setup pointer to global state. + | mov BASE, CARG2 + | str CARG1, SAVE_PC // Any value outside of bytecode is ok. +- | str RC, SAVE_CFRAME +- | str fp, L->cframe // Add our C frame to cframe chain. ++ | add TMP0, sp, #0 ++ | str RC, SAVE_CFRAME ++ | str TMP0, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | str L, GL->cur_L +@@ -536,8 +538,9 @@ static void build_subroutines(BuildCtx *ctx) + | sub RA, RA, RB // Compute -savestack(L, L->top). + | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame. + | str wzr, SAVE_ERRF // No error function. +- | str RC, SAVE_CFRAME +- | str fp, L->cframe // Add our C frame to cframe chain. ++ | add TMP0, sp, #0 ++ | str RC, SAVE_CFRAME ++ | str TMP0, L->cframe // Add our C frame to cframe chain. + | str L, GL->cur_L + | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | mov BASE, CRET1 +@@ -562,12 +565,12 @@ static void build_subroutines(BuildCtx *ctx) + | cmp CARG1, #1 + |.endif + | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC]. +- | ldr CARG3, LFUNC:CARG3->pc + | add TMP0, RA, RC + | str TISNIL, [TMP0, #-8] // Ensure one valid arg. + |.if FFI + | bls >1 + |.endif ++ | ldr CARG3, LFUNC:CARG3->pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | // BASE = base, RA = resultptr, CARG4 = meta base + | br CARG1 +@@ -711,6 +714,7 @@ static void build_subroutines(BuildCtx *ctx) + |->vmeta_tsetr: + | sxtw CARG3, TMP1w + | str BASE, L->base ++ | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. +@@ -992,9 +996,9 @@ static void build_subroutines(BuildCtx *ctx) + | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] + | cbz TAB:RB, ->fff_restv + | ldr TMP1w, TAB:RB->hmask +- | ldr TMP2w, STR:RC->hash ++ | ldr TMP2w, STR:RC->sid + | ldr NODE:CARG3, TAB:RB->node +- | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask ++ | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask + | add TMP1, TMP1, TMP1, lsl #1 + | movn CARG4, #~LJ_TSTR + | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 +@@ -1085,21 +1089,19 @@ static void build_subroutines(BuildCtx *ctx) + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next +- | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback ++ | checktp CARG1, LJ_TTAB, ->fff_fallback + | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. + | ldr PC, [BASE, FRAME_PC] +- | stp BASE, BASE, L->base // Add frame since C call can throw. +- | mov CARG1, L +- | add CARG3, BASE, #8 +- | str PC, SAVE_PC +- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) +- | // Returns 0 at end of traversal. ++ | add CARG2, BASE, #8 ++ | sub CARG3, BASE, #16 ++ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | // Returns 1=found, 0=end, -1=error. ++ | mov RC, #(2+1)*8 ++ | tbnz CRET1w, #31, ->fff_fallback // Invalid key. ++ | cbnz CRET1, ->fff_res // Found key/value. ++ | // End of traversal: return nil. + | str TISNIL, [BASE, #-16] +- | cbz CRET1, ->fff_res1 // End of traversal: return nil. +- | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results. +- | mov RC, #(2+1)*8 +- | stp CARG1, CARG2, [BASE, #-16] +- | b ->fff_res ++ | b ->fff_res1 + | + |.ffunc_1 pairs + | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback +@@ -1182,15 +1184,16 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc xpcall + | ldp CARG1, CARG2, [BASE] + | ldrb TMP0w, GL->hookmask +- | subs NARGS8:RC, NARGS8:RC, #16 ++ | subs NARGS8:TMP1, NARGS8:RC, #16 + | blo ->fff_fallback + | mov RB, BASE +- | add BASE, BASE, #24 + | asr ITYPE, CARG2, #47 + | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 + | cmn ITYPE, #-LJ_TFUNC + | add PC, TMP0, #24+FRAME_PCALL + | bne ->fff_fallback // Traceback must be a function. ++ | mov NARGS8:RC, NARGS8:TMP1 ++ | add BASE, BASE, #24 + | stp CARG2, CARG1, [RB] // Swap function and traceback. + | cbz NARGS8:RC, ->vm_call_dispatch + | b <1 +@@ -1487,8 +1490,8 @@ static void build_subroutines(BuildCtx *ctx) + | b <6 + |.endmacro + | +- | math_minmax math_min, gt, hi +- | math_minmax math_max, lt, lo ++ | math_minmax math_min, gt, pl ++ | math_minmax math_max, lt, le + | + |//-- String library ----------------------------------------------------- + | +@@ -1587,7 +1590,7 @@ static void build_subroutines(BuildCtx *ctx) + | str BASE, L->base + | str PC, SAVE_PC + | str L, GL->tmpbuf.L +- | str TMP0, GL->tmpbuf.p ++ | str TMP0, GL->tmpbuf.w + | bl extern lj_buf_putstr_ .. name + | bl extern lj_buf_tostr + | b ->fff_resstr +@@ -2033,9 +2036,9 @@ static void build_subroutines(BuildCtx *ctx) + | b <2 + | + |9: // Rethrow error from the right C frame. +- | neg CARG2, CARG1 ++ | neg CARG2w, CARG1w + | mov CARG1, L +- | bl extern lj_err_throw // (lua_State *L, int errcode) ++ | bl extern lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- +@@ -2064,6 +2067,63 @@ static void build_subroutines(BuildCtx *ctx) + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_RES, CARG1 ++ |.define NEXT_IDX, CARG2w ++ |.define NEXT_LIM, CARG3w ++ |.define NEXT_TMP0, TMP0 ++ |.define NEXT_TMP0w, TMP0w ++ |.define NEXT_TMP1, TMP1 ++ |.define NEXT_TMP1w, TMP1w ++ |.define NEXT_RES_PTR, sp ++ |.define NEXT_RES_VAL, [sp] ++ |.define NEXT_RES_KEY, [sp, #8] ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in CRET2w. ++ |->vm_next: ++ |.if JIT ++ | ldr NEXT_LIM, NEXT_TAB->asize ++ | ldr NEXT_TMP1, NEXT_TAB->array ++ |1: // Traverse array part. ++ | subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM ++ | bhs >5 // Index points after array part? ++ | ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3] ++ | cmn NEXT_TMP0, #-LJ_TNIL ++ | cinc NEXT_IDX, NEXT_IDX, eq ++ | beq <1 // Skip holes in array part. ++ | str NEXT_TMP0, NEXT_RES_VAL ++ | movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16 ++ | stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY ++ | add NEXT_IDX, NEXT_IDX, #1 ++ | mov NEXT_RES, NEXT_RES_PTR ++ |4: ++ | ret ++ | ++ |5: // Traverse hash part. ++ | ldr NEXT_TMP1w, NEXT_TAB->hmask ++ | ldr NODE:NEXT_RES, NEXT_TAB->node ++ | add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1 ++ | add NEXT_LIM, NEXT_LIM, NEXT_TMP1w ++ | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3 ++ |6: ++ | cmp NEXT_IDX, NEXT_LIM ++ | bhi >9 ++ | ldr NEXT_TMP0, NODE:NEXT_RES->val ++ | cmn NEXT_TMP0, #-LJ_TNIL ++ | add NEXT_IDX, NEXT_IDX, #1 ++ | bne <4 ++ | // Skip holes in hash part. ++ | add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node) ++ | b <6 ++ | ++ |9: // End of iteration. Set the key to nil (not the value). ++ | movn NEXT_TMP0, #0 ++ | str NEXT_TMP0, NEXT_RES_KEY ++ | mov NEXT_RES, NEXT_RES_PTR ++ | ret ++ |.endif ++ | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- +@@ -2121,16 +2181,16 @@ static void build_subroutines(BuildCtx *ctx) + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, x19 +- | stp fp, lr, [sp, #-32]! +- | add fp, sp, #0 +- | str CCSTATE, [sp, #16] ++ | stp x20, CCSTATE, [sp, #-32]! ++ | stp fp, lr, [sp, #16] ++ | add fp, sp, #16 + | mov CCSTATE, x0 + | ldr TMP0w, CCSTATE:x0->spadj + | ldrb TMP1w, CCSTATE->nsp + | add TMP2, CCSTATE, #offsetof(CCallState, stack) + | subs TMP1, TMP1, #1 + | ldr TMP3, CCSTATE->func +- | sub sp, fp, TMP0 ++ | sub sp, sp, TMP0 + | bmi >2 + |1: // Copy stack slots + | ldr TMP0, [TMP2, TMP1, lsl #3] +@@ -2148,12 +2208,12 @@ static void build_subroutines(BuildCtx *ctx) + | ldp d6, d7, CCSTATE->fpr[6] + | ldr x8, CCSTATE->retp + | blr TMP3 +- | mov sp, fp ++ | sub sp, fp, #16 + | stp x0, x1, CCSTATE->gpr[0] + | stp d0, d1, CCSTATE->fpr[0] + | stp d2, d3, CCSTATE->fpr[2] +- | ldr CCSTATE, [sp, #16] +- | ldp fp, lr, [sp], #32 ++ | ldp fp, lr, [sp, #16] ++ | ldp x20, CCSTATE, [sp], #32 + | ret + |.endif + |// Note: vm_ffi_call must be the last function in this object file! +@@ -2779,7 +2839,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |2: // Check if string is white and ensure upvalue is closed. + | ldrb TMP0w, UPVAL:CARG1->closed + | tst TMP1w, #LJ_GC_WHITES // iswhite(str) +- | ccmp TMP0w, #0, #0, ne ++ | ccmp TMP0w, #0, #4, ne + | beq <1 + | // Crossed a write barrier. Move the barrier forward. + | mov CARG1, GL +@@ -2883,7 +2943,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_GGET: + | // RA = dst, RC = str_const (~) + case BC_GSET: +- | // RA = dst, RC = str_const (~) ++ | // RA = src, RC = str_const (~) + | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] + | mvn RC, RC + | and LFUNC:CARG1, CARG1, #LJ_GCVMASK +@@ -2941,9 +3001,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TGETS_Z: + | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst + | ldr TMP1w, TAB:CARG2->hmask +- | ldr TMP2w, STR:RC->hash ++ | ldr TMP2w, STR:RC->sid + | ldr NODE:CARG3, TAB:CARG2->node +- | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask ++ | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask + | add TMP1, TMP1, TMP1, lsl #1 + | movn CARG4, #~LJ_TSTR + | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 +@@ -3067,9 +3127,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TSETS_Z: + | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src + | ldr TMP1w, TAB:CARG2->hmask +- | ldr TMP2w, STR:RC->hash ++ | ldr TMP2w, STR:RC->sid + | ldr NODE:CARG3, TAB:CARG2->node +- | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask ++ | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask + | add TMP1, TMP1, TMP1, lsl #1 + | movn CARG4, #~LJ_TSTR + | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 +@@ -3320,10 +3380,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + + case BC_ITERN: +- | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + |.if JIT +- | // NYI: add hotloop, record BC_ITERN. ++ | hotloop + |.endif ++ |->vm_IITERN: ++ | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + | add RA, BASE, RA, lsl #3 + | ldr TAB:RB, [RA, #-16] + | ldrh TMP3w, [PC, # OFS_RD] +@@ -3382,7 +3443,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ccmp CARG4, TISNIL, #0, eq + | ccmp TMP1w, #FF_next_N, #0, eq + | bne >5 +- | mov TMP0w, #0xfffe7fff ++ | mov TMP0w, #0xfffe7fff // LJ_KEYINDEX + | lsl TMP0, TMP0, #32 + | str TMP0, [RA, #-8] // Initialize control var. + |1: +@@ -3390,11 +3451,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ins_next + | + |5: // Despecialize bytecode if any of the checks fail. ++ |.if JIT ++ | ldrb TMP2w, [RC, # OFS_OP] ++ |.endif + | mov TMP0, #BC_JMP + | mov TMP1, #BC_ITERC + | strb TMP0w, [PC, #-4+OFS_OP] ++ |.if JIT ++ | cmp TMP2w, #BC_ITERN ++ | bne >6 ++ |.endif + | strb TMP1w, [RC, # OFS_OP] + | b <1 ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | ldr RA, [GL, #GL_J(trace)] ++ | ldrh TMP2w, [RC, # OFS_RD] ++ | ldr TRACE:RA, [RA, TMP2, lsl #3] ++ | ldr TMP2w, TRACE:RA->startins ++ | bfxil TMP2w, TMP1w, #0, #8 ++ | str TMP2w, [RC] ++ | b <1 ++ |.endif + break; + + case BC_VARG: +@@ -3859,7 +3937,7 @@ static int build_backend(BuildCtx *ctx) + static void emit_asm_debug(BuildCtx *ctx) + { + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); +- int i, cf = CFRAME_SIZE >> 3; ++ int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,"",%%progbits\n"); +@@ -3873,7 +3951,7 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 30\n" /* Return address is in lr. */ +- "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ ++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, +@@ -3883,15 +3961,14 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.long .Lframe0\n" + "\t.quad .Lbegin\n" + "\t.quad %d\n" +- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ +- "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ +- "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ +- fcofs, CFRAME_SIZE, cf, cf-1); ++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ ++ "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ ++ fcofs); + for (i = 19; i <= 28; i++) /* offset x19-x28 */ +- fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); ++ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); + for (i = 8; i <= 15; i++) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", +- 64+i, cf-i-4); ++ 64+i, i+(3+(28-19+1)-8)); + fprintf(ctx->fp, + "\t.align 3\n" + ".LEFDE0:\n\n"); +@@ -3903,10 +3980,10 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.long .Lframe0\n" + "\t.quad lj_vm_ffi_call\n" + "\t.quad %d\n" +- "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ +- "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ +- "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ +- "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ ++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ ++ "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ ++ "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ ++ "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ + "\t.align 3\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); + #endif +@@ -3925,7 +4002,7 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.long lj_err_unwind_dwarf-.\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ +- "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ ++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + ".LECIE1:\n\n"); + fprintf(ctx->fp, +@@ -3936,15 +4013,14 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.long .Lbegin-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ +- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ +- "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ +- "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ +- fcofs, CFRAME_SIZE, cf, cf-1); ++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ ++ "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ ++ fcofs); + for (i = 19; i <= 28; i++) /* offset x19-x28 */ +- fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); ++ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); + for (i = 8; i <= 15; i++) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", +- 64+i, cf-i-4); ++ 64+i, i+(3+(28-19+1)-8)); + fprintf(ctx->fp, + "\t.align 3\n" + ".LEFDE2:\n\n"); +@@ -3961,7 +4037,7 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.byte 30\n" /* Return address is in lr. */ + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ +- "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ ++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + ".LECIE2:\n\n"); + fprintf(ctx->fp, +@@ -3972,14 +4048,106 @@ static void emit_asm_debug(BuildCtx *ctx) + "\t.long lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ +- "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ +- "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ +- "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ +- "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ ++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ ++ "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ ++ "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ ++ "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ + "\t.align 3\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); + #endif + break; ++#if !LJ_NO_UNWIND ++ case BUILD_machasm: { ++#if LJ_HASFFI ++ int fcsize = 0; ++#endif ++ int j; ++ fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); ++ fprintf(ctx->fp, ++ "EH_frame1:\n" ++ "\t.set L$set$x,LECIEX-LSCIEX\n" ++ "\t.long L$set$x\n" ++ "LSCIEX:\n" ++ "\t.long 0\n" ++ "\t.byte 0x1\n" ++ "\t.ascii "zPR\0"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -8\n" ++ "\t.byte 30\n" /* Return address is in lr. */ ++ "\t.uleb128 6\n" /* augmentation length */ ++ "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ ++ "\t.long _lj_err_unwind_dwarf@GOT-.\n" ++ "\t.byte 0x1b\n" /* pcrel|sdata4 */ ++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ ++ "\t.align 3\n" ++ "LECIEX:\n\n"); ++ for (j = 0; j < ctx->nsym; j++) { ++ const char *name = ctx->sym[j].name; ++ int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs; ++ if (size == 0) continue; ++#if LJ_HASFFI ++ if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } ++#endif ++ fprintf(ctx->fp, ++ "LSFDE%d:\n" ++ "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" ++ "\t.long L$set$%d\n" ++ "LASFDE%d:\n" ++ "\t.long LASFDE%d-EH_frame1\n" ++ "\t.long %s-.\n" ++ "\t.long %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ ++ "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ ++ j, j, j, j, j, j, j, name, size); ++ for (i = 19; i <= 28; i++) /* offset x19-x28 */ ++ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); ++ for (i = 8; i <= 15; i++) /* offset d8-d15 */ ++ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", ++ 64+i, i+(3+(28-19+1)-8)); ++ fprintf(ctx->fp, ++ "\t.align 3\n" ++ "LEFDE%d:\n\n", j); ++ } ++#if LJ_HASFFI ++ if (fcsize) { ++ fprintf(ctx->fp, ++ "EH_frame2:\n" ++ "\t.set L$set$y,LECIEY-LSCIEY\n" ++ "\t.long L$set$y\n" ++ "LSCIEY:\n" ++ "\t.long 0\n" ++ "\t.byte 0x1\n" ++ "\t.ascii "zR\0"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -8\n" ++ "\t.byte 30\n" /* Return address is in lr. */ ++ "\t.uleb128 1\n" /* augmentation length */ ++ "\t.byte 0x1b\n" /* pcrel|sdata4 */ ++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ ++ "\t.align 3\n" ++ "LECIEY:\n\n"); ++ fprintf(ctx->fp, ++ "LSFDEY:\n" ++ "\t.set L$set$yy,LEFDEY-LASFDEY\n" ++ "\t.long L$set$yy\n" ++ "LASFDEY:\n" ++ "\t.long LASFDEY-EH_frame2\n" ++ "\t.long _lj_vm_ffi_call-.\n" ++ "\t.long %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ ++ "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ ++ "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ ++ "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ ++ "\t.align 3\n" ++ "LEFDEY:\n\n", fcsize); ++ } ++#endif ++ fprintf(ctx->fp, ".subsections_via_symbols\n"); ++ } ++ break; ++#endif + default: + break; + } +diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc +index 1afd6118..e3cc42a5 100644 +--- a/src/vm_mips.dasc ++++ b/src/vm_mips.dasc +@@ -1,6 +1,6 @@ + |// Low-level VM code for MIPS CPUs. + |// Bytecode interpreter, fast functions and helper functions. +-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + |// + |// MIPS soft-float support contributed by Djordje Kovacevic and + |// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. +@@ -190,7 +190,7 @@ + |//----------------------------------------------------------------------- + | + |// Trap for not-yet-implemented parts. +-|.macro NYI; .long 0xf0f0f0f0; .endmacro ++|.macro NYI; .long 0xec1cf0f0; .endmacro + | + |// Macros to mark delay slots. + |.macro ., a; a; .endmacro +@@ -399,7 +399,7 @@ static void build_subroutines(BuildCtx *ctx) + | xori AT, TMP0, FRAME_C + | and TMP2, PC, TMP2 + | bnez AT, ->vm_returnp +- | subu TMP2, BASE, TMP2 // TMP2 = previous base. ++ |. subu TMP2, BASE, TMP2 // TMP2 = previous base. + | + | addiu TMP1, RD, -8 + | sw TMP2, L->base +@@ -501,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx) + | b ->vm_returnc + |. li RD, 16 // 2 results: false + error message. + | ++ |->vm_unwind_stub: // Jump to exit stub from unwinder. ++ | jr CARG1 ++ |. move ra, CARG2 ++ | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- +@@ -669,11 +673,11 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + | lw PC, -16+HI(RB) // Restore PC from [cont|PC]. + | addu TMP2, RA, RD +- | lw TMP1, LFUNC:TMP1->pc + |.if FFI + | bnez AT, >1 + |.endif + |. sw TISNIL, -8+HI(TMP2) // Ensure one valid arg. ++ | lw TMP1, LFUNC:TMP1->pc + | // BASE = base, RA = resultptr, RB = meta base + | jr TMP0 // Jump to continuation. + |. lw KBASE, PC2PROTO(k)(TMP1) +@@ -1152,9 +1156,9 @@ static void build_subroutines(BuildCtx *ctx) + |. li SFARG1HI, LJ_TNIL + | lw TMP0, TAB:SFARG1LO->hmask + | li SFARG1HI, LJ_TTAB // Use metatable as default result. +- | lw TMP1, STR:RC->hash ++ | lw TMP1, STR:RC->sid + | lw NODE:TMP2, TAB:SFARG1LO->node +- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 +@@ -1258,35 +1262,27 @@ static void build_subroutines(BuildCtx *ctx) + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc next +- | lw CARG1, HI(BASE) +- | lw TAB:CARG2, LO(BASE) ++ | lw CARG2, HI(BASE) ++ | lw TAB:CARG1, LO(BASE) + | beqz NARGS8:RC, ->fff_fallback + |. addu TMP2, BASE, NARGS8:RC + | li AT, LJ_TTAB + | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. +- | bne CARG1, AT, ->fff_fallback ++ | bne CARG2, AT, ->fff_fallback + |. lw PC, FRAME_PC(BASE) + | load_got lj_tab_next +- | sw BASE, L->base // Add frame since C call can throw. +- | sw BASE, L->top // Dummy frame length is ok. +- | addiu CARG3, BASE, 8 +- | sw PC, SAVE_PC +- | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) +- |. move CARG1, L +- | // Returns 0 at end of traversal. ++ | addiu CARG2, BASE, 8 ++ | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ |. addiu CARG3, BASE, -8 ++ | // Returns 1=found, 0=end, -1=error. ++ | addiu RA, BASE, -8 ++ | bgtz CRET1, ->fff_res // Found key/value. ++ |. li RD, (2+1)*8 + | beqz CRET1, ->fff_restv // End of traversal: return nil. + |. li SFARG1HI, LJ_TNIL +- | lw TMP0, 8+HI(BASE) +- | lw TMP1, 8+LO(BASE) +- | addiu RA, BASE, -8 +- | lw TMP2, 16+HI(BASE) +- | lw TMP3, 16+LO(BASE) +- | sw TMP0, HI(RA) +- | sw TMP1, LO(RA) +- | sw TMP2, 8+HI(RA) +- | sw TMP3, 8+LO(RA) +- | b ->fff_res +- |. li RD, (2+1)*8 ++ | lw CFUNC:RB, FRAME_FUNC(BASE) ++ | b ->fff_fallback // Invalid key. ++ |. li RC, 2*8 + | + |.ffunc_1 pairs + | li AT, LJ_TTAB +@@ -1768,7 +1764,7 @@ static void build_subroutines(BuildCtx *ctx) + | b ->fff_res + |. li RD, (2+1)*8 + | +- |.macro math_minmax, name, intins, fpins ++ |.macro math_minmax, name, intins, ismax + | .ffunc_1 name + | addu TMP3, BASE, NARGS8:RC + | bne SFARG1HI, TISNUM, >5 +@@ -1822,13 +1818,21 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + |7: + |.if FPU ++ |.if ismax ++ | c.olt.d FARG1, FRET1 ++ |.else + | c.olt.d FRET1, FARG1 +- | fpins FRET1, FARG1 ++ |.endif ++ | movf.d FRET1, FARG1 ++ |.else ++ |.if ismax ++ | bal ->vm_sfcmpogt + |.else + | bal ->vm_sfcmpolt ++ |.endif + |. nop +- | intins SFARG1LO, SFARG2LO, CRET1 +- | intins SFARG1HI, SFARG2HI, CRET1 ++ | movz SFARG1LO, SFARG2LO, CRET1 ++ | movz SFARG1HI, SFARG2HI, CRET1 + |.endif + | b <6 + |. addiu TMP2, TMP2, 8 +@@ -1849,8 +1853,8 @@ static void build_subroutines(BuildCtx *ctx) + | + |.endmacro + | +- | math_minmax math_min, movz, movf.d +- | math_minmax math_max, movn, movt.d ++ | math_minmax math_min, movz, 0 ++ | math_minmax math_max, movn, 1 + | + |//-- String library ----------------------------------------------------- + | +@@ -1959,7 +1963,7 @@ static void build_subroutines(BuildCtx *ctx) + | lw TMP0, SBUF:CARG1->b + | sw L, SBUF:CARG1->L + | sw BASE, L->base +- | sw TMP0, SBUF:CARG1->p ++ | sw TMP0, SBUF:CARG1->w + | call_intern extern lj_buf_putstr_ .. name + |. sw PC, SAVE_PC + | load_got lj_buf_tostr +@@ -2512,9 +2516,9 @@ static void build_subroutines(BuildCtx *ctx) + |. addu RA, RA, BASE + | + |9: // Rethrow error from the right C frame. +- | load_got lj_err_throw +- | negu CARG2, CRET1 +- | call_intern lj_err_throw // (lua_State *L, int errcode) ++ | load_got lj_err_trace ++ | sub CARG2, r0, CRET1 ++ | call_intern lj_err_trace // (lua_State *L, int errcode) + |. move CARG1, L + |.endif + | +@@ -2692,6 +2696,43 @@ static void build_subroutines(BuildCtx *ctx) + |. move CRET1, CRET2 + |.endif + | ++ |->vm_sfcmpogt: ++ |.if not FPU ++ | sll AT, SFARG2HI, 1 ++ | sll TMP0, SFARG1HI, 1 ++ | or CRET1, SFARG2LO, SFARG1LO ++ | or TMP1, AT, TMP0 ++ | or TMP1, TMP1, CRET1 ++ | beqz TMP1, >8 // Both args +-0: return 0. ++ |. sltu CRET1, r0, SFARG2LO ++ | lui TMP1, 0xffe0 ++ | addu AT, AT, CRET1 ++ | sltu CRET1, r0, SFARG1LO ++ | sltu AT, TMP1, AT ++ | addu TMP0, TMP0, CRET1 ++ | sltu TMP0, TMP1, TMP0 ++ | or TMP1, AT, TMP0 ++ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; ++ |. and AT, SFARG2HI, SFARG1HI ++ | bltz AT, >5 // Both args negative? ++ |. nop ++ | beq SFARG2HI, SFARG1HI, >8 ++ |. sltu CRET1, SFARG2LO, SFARG1LO ++ | jr ra ++ |. slt CRET1, SFARG2HI, SFARG1HI ++ |5: // Swap conditions if both operands are negative. ++ | beq SFARG2HI, SFARG1HI, >8 ++ |. sltu CRET1, SFARG1LO, SFARG2LO ++ | jr ra ++ |. slt CRET1, SFARG1HI, SFARG2HI ++ |8: ++ | jr ra ++ |. nop ++ |9: ++ | jr ra ++ |. li CRET1, 0 ++ |.endif ++ | + |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. + |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpolex: +@@ -2734,29 +2775,96 @@ static void build_subroutines(BuildCtx *ctx) + |. li CRET1, 0 + |.endif + | +- |.macro sfmin_max, name, intins ++ |.macro sfmin_max, name, fpcall + |->vm_sf .. name: + |.if JIT and not FPU + | move TMP2, ra +- | bal ->vm_sfcmpolt ++ | bal ->fpcall + |. nop + | move TMP0, CRET1 + | move SFRETHI, SFARG1HI + | move SFRETLO, SFARG1LO + | move ra, TMP2 +- | intins SFRETHI, SFARG2HI, TMP0 ++ | movz SFRETHI, SFARG2HI, TMP0 + | jr ra +- |. intins SFRETLO, SFARG2LO, TMP0 ++ |. movz SFRETLO, SFARG2LO, TMP0 + |.endif + |.endmacro + | +- | sfmin_max min, movz +- | sfmin_max max, movn ++ | sfmin_max min, vm_sfcmpolt ++ | sfmin_max max, vm_sfcmpogt + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_IDX, CARG2 ++ |.define NEXT_ASIZE, CARG3 ++ |.define NEXT_NIL, CARG4 ++ |.define NEXT_TMP0, r12 ++ |.define NEXT_TMP1, r13 ++ |.define NEXT_TMP2, r14 ++ |.define NEXT_RES_VK, CRET1 ++ |.define NEXT_RES_IDX, CRET2 ++ |.define NEXT_RES_PTR, sp ++ |.define NEXT_RES_VAL_I, 0(sp) ++ |.define NEXT_RES_VAL_IT, 4(sp) ++ |.define NEXT_RES_KEY_I, 8(sp) ++ |.define NEXT_RES_KEY_IT, 12(sp) ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in CRET2. ++ |->vm_next: ++ |.if JIT and ENDIAN_LE ++ | lw NEXT_ASIZE, NEXT_TAB->asize ++ | lw NEXT_TMP0, NEXT_TAB->array ++ | li NEXT_NIL, LJ_TNIL ++ |1: // Traverse array part. ++ | sltu AT, NEXT_IDX, NEXT_ASIZE ++ | sll NEXT_TMP1, NEXT_IDX, 3 ++ | beqz AT, >5 ++ |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 ++ | lw NEXT_TMP2, 4(NEXT_TMP1) ++ | sw NEXT_IDX, NEXT_RES_KEY_I ++ | beq NEXT_TMP2, NEXT_NIL, <1 ++ |. addiu NEXT_IDX, NEXT_IDX, 1 ++ | lw NEXT_TMP0, 0(NEXT_TMP1) ++ | li AT, LJ_TISNUM ++ | sw NEXT_TMP2, NEXT_RES_VAL_IT ++ | sw AT, NEXT_RES_KEY_IT ++ | sw NEXT_TMP0, NEXT_RES_VAL_I ++ | move NEXT_RES_VK, NEXT_RES_PTR ++ | jr ra ++ |. move NEXT_RES_IDX, NEXT_IDX ++ | ++ |5: // Traverse hash part. ++ | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE ++ | lw NODE:NEXT_RES_VK, NEXT_TAB->node ++ | sll NEXT_TMP2, NEXT_RES_IDX, 5 ++ | lw NEXT_TMP0, NEXT_TAB->hmask ++ | sll AT, NEXT_RES_IDX, 3 ++ | subu AT, NEXT_TMP2, AT ++ | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT ++ |6: ++ | sltu AT, NEXT_TMP0, NEXT_RES_IDX ++ | bnez AT, >8 ++ |. nop ++ | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it ++ | bne NEXT_TMP2, NEXT_NIL, >9 ++ |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 ++ | // Skip holes in hash part. ++ | b <6 ++ |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) ++ | ++ |8: // End of iteration. Set the key to nil (not the value). ++ | sw NEXT_NIL, NEXT_RES_KEY_IT ++ | move NEXT_RES_VK, NEXT_RES_PTR ++ |9: ++ | jr ra ++ |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE ++ |.endif ++ | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- +@@ -3984,9 +4092,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | lw TMP0, TAB:RB->hmask +- | lw TMP1, STR:RC->hash ++ | lw TMP1, STR:RC->sid + | lw NODE:TMP2, TAB:RB->node +- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 +@@ -4158,10 +4266,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 + | lw TMP0, TAB:RB->hmask +- | lw TMP1, STR:RC->hash ++ | lw TMP1, STR:RC->sid + | lw NODE:TMP2, TAB:RB->node + | sb r0, TAB:RB->nomm // Clear metamethod cache. +- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 +@@ -4317,7 +4425,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. +- | barrierback TAB:CARG2, TMP3, TMP0, <2 ++ | barrierback TAB:CARG2, TMP3, CRET1, <2 + break; + + case BC_TSETM: +@@ -4480,10 +4588,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + + case BC_ITERN: +- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) +- |.if JIT +- | // NYI: add hotloop, record BC_ITERN. ++ |.if JIT and ENDIAN_LE ++ | hotloop + |.endif ++ |->vm_IITERN: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + | addu RA, BASE, RA + | lw TAB:RB, -16+LO(RA) + | lw RC, -8+LO(RA) // Get index from control var. +@@ -4562,9 +4671,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | addiu CARG2, CARG2, -FF_next_N + | or CARG2, CARG2, CARG3 + | bnez CARG2, >5 +- |. lui TMP1, 0xfffe ++ |. lui TMP1, (LJ_KEYINDEX >> 16) + | addu PC, TMP0, TMP2 +- | ori TMP1, TMP1, 0x7fff ++ | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) + | sw r0, -8+LO(RA) // Initialize control var. + | sw TMP1, -8+HI(RA) + |1: +@@ -4573,9 +4682,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | li TMP3, BC_JMP + | li TMP1, BC_ITERC + | sb TMP3, -4+OFS_OP(PC) +- | addu PC, TMP0, TMP2 ++ | addu PC, TMP0, TMP2 ++ |.if JIT ++ | lb TMP0, OFS_OP(PC) ++ | li AT, BC_ITERN ++ | bne TMP0, AT, >6 ++ |. lhu TMP2, OFS_RD(PC) ++ |.endif + | b <1 + |. sb TMP1, OFS_OP(PC) ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | lw TMP0, DISPATCH_J(trace)(DISPATCH) ++ | sll TMP2, TMP2, 2 ++ | addu TMP0, TMP0, TMP2 ++ | lw TRACE:TMP2, 0(TMP0) ++ | lw TMP0, TRACE:TMP2->startins ++ | li AT, -256 ++ | and TMP0, TMP0, AT ++ | or TMP0, TMP0, TMP1 ++ | b <1 ++ |. sw TMP0, 0(PC) ++ |.endif + break; + + case BC_VARG: +diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc +index c06270a0..4ddb2f9c 100644 +--- a/src/vm_mips64.dasc ++++ b/src/vm_mips64.dasc +@@ -1,6 +1,6 @@ + |// Low-level VM code for MIPS64 CPUs. + |// Bytecode interpreter, fast functions and helper functions. +-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + |// + |// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. + |// Sponsored by Cisco Systems, Inc. +@@ -83,6 +83,10 @@ + | + |.define FRET1, f0 + |.define FRET2, f2 ++| ++|.define FTMP0, f20 ++|.define FTMP1, f21 ++|.define FTMP2, f22 + |.endif + | + |// Stack layout while in interpreter. Must match with lj_frame.h. +@@ -189,7 +193,7 @@ + |//----------------------------------------------------------------------- + | + |// Trap for not-yet-implemented parts. +-|.macro NYI; .long 0xf0f0f0f0; .endmacro ++|.macro NYI; .long 0xec1cf0f0; .endmacro + | + |// Macros to mark delay slots. + |.macro ., a; a; .endmacro +@@ -310,10 +314,10 @@ + |.endmacro + | + |// Assumes DISPATCH is relative to GL. +-#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +-#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +-#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) +-#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) ++#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) ++#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) ++#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) ++#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) + | + #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) + | +@@ -492,8 +496,15 @@ static void build_subroutines(BuildCtx *ctx) + |7: // Less results wanted. + | subu TMP0, RD, TMP2 + | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. ++ |.if MIPSR6 ++ | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case? ++ | seleqz BASE, BASE, TMP2 ++ | b <3 ++ |. or BASE, BASE, TMP0 ++ |.else + | b <3 + |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? ++ |.endif + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: +@@ -545,6 +556,10 @@ static void build_subroutines(BuildCtx *ctx) + | b ->vm_returnc + |. li RD, 16 // 2 results: false + error message. + | ++ |->vm_unwind_stub: // Jump to exit stub from unwinder. ++ | jr CARG1 ++ |. move ra, CARG2 ++ | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- +@@ -713,11 +728,11 @@ static void build_subroutines(BuildCtx *ctx) + | ld PC, -24(RB) // Restore PC from [cont|PC]. + | cleartp LFUNC:TMP1 + | daddu TMP2, RA, RD +- | ld TMP1, LFUNC:TMP1->pc + |.if FFI + | bnez AT, >1 + |.endif + |. sd TISNIL, -8(TMP2) // Ensure one valid arg. ++ | ld TMP1, LFUNC:TMP1->pc + | // BASE = base, RA = resultptr, RB = meta base + | jr TMP0 // Jump to continuation. + |. ld KBASE, PC2PROTO(k)(TMP1) +@@ -1121,11 +1136,16 @@ static void build_subroutines(BuildCtx *ctx) + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! ++ |// MIPSR6: no delay slot, but a forbidden slot. + |.macro ffgccheck + | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) + | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) + | dsubu AT, TMP0, TMP1 ++ |.if MIPSR6 ++ | bgezalc AT, ->fff_gcstep ++ |.else + | bgezal AT, ->fff_gcstep ++ |.endif + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- +@@ -1153,7 +1173,13 @@ static void build_subroutines(BuildCtx *ctx) + | sltu TMP1, TISNUM, TMP0 + | not TMP2, TMP0 + | li TMP3, ~LJ_TISNUM ++ |.if MIPSR6 ++ | selnez TMP2, TMP2, TMP1 ++ | seleqz TMP3, TMP3, TMP1 ++ | or TMP2, TMP2, TMP3 ++ |.else + | movz TMP2, TMP3, TMP1 ++ |.endif + | dsll TMP2, TMP2, 3 + | daddu TMP2, CFUNC:RB, TMP2 + | b ->fff_restv +@@ -1165,7 +1191,11 @@ static void build_subroutines(BuildCtx *ctx) + | gettp TMP2, CARG1 + | daddiu TMP0, TMP2, -LJ_TTAB + | daddiu TMP1, TMP2, -LJ_TUDATA ++ |.if MIPSR6 ++ | selnez TMP0, TMP1, TMP0 ++ |.else + | movn TMP0, TMP1, TMP0 ++ |.endif + | bnez TMP0, >6 + |. cleartp TAB:CARG1 + |1: // Field metatable must be at same offset for GCtab and GCudata! +@@ -1175,9 +1205,9 @@ static void build_subroutines(BuildCtx *ctx) + | beqz TAB:RB, ->fff_restv + |. li CARG1, LJ_TNIL + | lw TMP0, TAB:RB->hmask +- | lw TMP1, STR:RC->hash ++ | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node +- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | dsll TMP0, TMP1, 5 + | dsll TMP1, TMP1, 3 + | dsubu TMP1, TMP0, TMP1 +@@ -1204,7 +1234,13 @@ static void build_subroutines(BuildCtx *ctx) + | + |6: + | sltiu AT, TMP2, LJ_TISNUM ++ |.if MIPSR6 ++ | selnez TMP0, TISNUM, AT ++ | seleqz AT, TMP2, AT ++ | or TMP2, TMP0, AT ++ |.else + | movn TMP2, TISNUM, AT ++ |.endif + | dsll TMP2, TMP2, 3 + | dsubu TMP0, DISPATCH, TMP2 + | b <2 +@@ -1266,8 +1302,13 @@ static void build_subroutines(BuildCtx *ctx) + | or TMP0, TMP0, TMP1 + | bnez TMP0, ->fff_fallback + |. sd BASE, L->base // Add frame since C call can throw. ++ |.if MIPSR6 ++ | sd PC, SAVE_PC // Redundant (but a defined value). ++ | ffgccheck ++ |.else + | ffgccheck + |. sd PC, SAVE_PC // Redundant (but a defined value). ++ |.endif + | load_got lj_strfmt_number + | move CARG1, L + | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) +@@ -1281,27 +1322,24 @@ static void build_subroutines(BuildCtx *ctx) + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next +- | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback ++ | checktp CARG1, -LJ_TTAB, ->fff_fallback + | daddu TMP2, BASE, NARGS8:RC + | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. +- | ld PC, FRAME_PC(BASE) + | load_got lj_tab_next +- | sd BASE, L->base // Add frame since C call can throw. +- | sd BASE, L->top // Dummy frame length is ok. +- | daddiu CARG3, BASE, 8 +- | sd PC, SAVE_PC +- | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) +- |. move CARG1, L +- | // Returns 0 at end of traversal. ++ | ld PC, FRAME_PC(BASE) ++ | daddiu CARG2, BASE, 8 ++ | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ |. daddiu CARG3, BASE, -16 ++ | // Returns 1=found, 0=end, -1=error. ++ | daddiu RA, BASE, -16 ++ | bgtz CRET1, ->fff_res // Found key/value. ++ |. li RD, (2+1)*8 + | beqz CRET1, ->fff_restv // End of traversal: return nil. + |. move CARG1, TISNIL +- | ld TMP0, 8(BASE) +- | daddiu RA, BASE, -16 +- | ld TMP2, 16(BASE) +- | sd TMP0, 0(RA) +- | sd TMP2, 8(RA) +- | b ->fff_res +- |. li RD, (2+1)*8 ++ | ld CFUNC:RB, FRAME_FUNC(BASE) ++ | cleartp CFUNC:RB ++ | b ->fff_fallback // Invalid key. ++ |. li RC, 2*8 + | + |.ffunc_1 pairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback +@@ -1399,15 +1437,16 @@ static void build_subroutines(BuildCtx *ctx) + |. nop + | + |.ffunc xpcall +- | daddiu NARGS8:RC, NARGS8:RC, -16 ++ | daddiu NARGS8:TMP0, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) +- | bltz NARGS8:RC, ->fff_fallback ++ | bltz NARGS8:TMP0, ->fff_fallback + |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) + | gettp AT, CARG2 + | daddiu AT, AT, -LJ_TFUNC + | bnez AT, ->fff_fallback // Traceback must be a function. + |. move TMP2, BASE ++ | move NARGS8:RC, NARGS8:TMP0 + | daddiu BASE, BASE, 24 + | // Remember active hook before pcall. + | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT +@@ -1437,8 +1476,15 @@ static void build_subroutines(BuildCtx *ctx) + | addiu AT, TMP0, -LUA_YIELD + | daddu CARG3, CARG2, TMP0 + | daddiu TMP3, CARG2, 8 ++ |.if MIPSR6 ++ | seleqz CARG2, CARG2, AT ++ | selnez TMP3, TMP3, AT ++ | bgtz AT, ->fff_fallback // st > LUA_YIELD? ++ |. or CARG2, TMP3, CARG2 ++ |.else + | bgtz AT, ->fff_fallback // st > LUA_YIELD? + |. movn CARG2, TMP3, AT ++ |.endif + | xor TMP2, TMP2, CARG3 + | bnez TMP1, ->fff_fallback // cframe != 0? + |. or AT, TMP2, TMP0 +@@ -1750,7 +1796,7 @@ static void build_subroutines(BuildCtx *ctx) + | b ->fff_res + |. li RD, (2+1)*8 + | +- |.macro math_minmax, name, intins, fpins ++ |.macro math_minmax, name, intins, intinsc, fpins + | .ffunc_1 name + | daddu TMP3, BASE, NARGS8:RC + | checkint CARG1, >5 +@@ -1762,7 +1808,13 @@ static void build_subroutines(BuildCtx *ctx) + |. sextw CARG1, CARG1 + | lw CARG2, LO(TMP2) + |. slt AT, CARG1, CARG2 ++ |.if MIPSR6 ++ | intins TMP1, CARG2, AT ++ | intinsc CARG1, CARG1, AT ++ | or CARG1, CARG1, TMP1 ++ |.else + | intins CARG1, CARG2, AT ++ |.endif + | daddiu TMP2, TMP2, 8 + | zextw CARG1, CARG1 + | b <1 +@@ -1798,12 +1850,30 @@ static void build_subroutines(BuildCtx *ctx) + |. nop + |7: + |.if FPU ++ |.if MIPSR6 ++ | fpins FRET1, FRET1, FARG1 ++ |.else ++ |.if fpins // ismax ++ | c.olt.d FARG1, FRET1 ++ |.else + | c.olt.d FRET1, FARG1 +- | fpins FRET1, FARG1 ++ |.endif ++ | movf.d FRET1, FARG1 ++ |.endif ++ |.else ++ |.if fpins // ismax ++ | bal ->vm_sfcmpogt + |.else + | bal ->vm_sfcmpolt ++ |.endif + |. nop +- | intins CARG1, CARG2, CRET1 ++ |.if MIPSR6 ++ | seleqz AT, CARG2, CRET1 ++ | selnez CARG1, CARG1, CRET1 ++ | or CARG1, CARG1, AT ++ |.else ++ | movz CARG1, CARG2, CRET1 ++ |.endif + |.endif + | b <6 + |. daddiu TMP2, TMP2, 8 +@@ -1824,8 +1894,13 @@ static void build_subroutines(BuildCtx *ctx) + | + |.endmacro + | +- | math_minmax math_min, movz, movf.d +- | math_minmax math_max, movn, movt.d ++ |.if MIPSR6 ++ | math_minmax math_min, seleqz, selnez, min.d ++ | math_minmax math_max, selnez, seleqz, max.d ++ |.else ++ | math_minmax math_min, movz, _, 0 ++ | math_minmax math_max, movn, _, 1 ++ |.endif + | + |//-- String library ----------------------------------------------------- + | +@@ -1850,7 +1925,9 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck ++ |.if not MIPSR6 + |. nop ++ |.endif + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori AT, NARGS8:RC, 8 // Exactly 1 argument. +@@ -1880,7 +1957,9 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc string_sub + | ffgccheck ++ |.if not MIPSR6 + |. nop ++ |.endif + | addiu AT, NARGS8:RC, -16 + | ld TMP0, 0(BASE) + | bltz AT, ->fff_fallback +@@ -1903,8 +1982,30 @@ static void build_subroutines(BuildCtx *ctx) + | addiu TMP0, CARG2, 1 + | addu TMP1, CARG4, TMP0 + | slt TMP3, CARG3, r0 ++ |.if MIPSR6 ++ | seleqz CARG4, CARG4, AT ++ | selnez TMP1, TMP1, AT ++ | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1 ++ |.else + | movn CARG4, TMP1, AT // if (end < 0) end += len+1 ++ |.endif + | addu TMP1, CARG3, TMP0 ++ |.if MIPSR6 ++ | selnez TMP1, TMP1, TMP3 ++ | seleqz CARG3, CARG3, TMP3 ++ | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1 ++ | li TMP2, 1 ++ | slt AT, CARG4, r0 ++ | slt TMP3, r0, CARG3 ++ | seleqz CARG4, CARG4, AT // if (end < 0) end = 0 ++ | selnez CARG3, CARG3, TMP3 ++ | seleqz TMP2, TMP2, TMP3 ++ | or CARG3, TMP2, CARG3 // if (start < 1) start = 1 ++ | slt AT, CARG2, CARG4 ++ | seleqz CARG4, CARG4, AT ++ | selnez CARG2, CARG2, AT ++ | or CARG4, CARG2, CARG4 // if (end > len) end = len ++ |.else + | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 + | li TMP2, 1 + | slt AT, CARG4, r0 +@@ -1913,6 +2014,7 @@ static void build_subroutines(BuildCtx *ctx) + | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 + | slt AT, CARG2, CARG4 + | movn CARG4, CARG2, AT // if (end > len) end = len ++ |.endif + | daddu CARG2, STR:CARG1, CARG3 + | subu CARG3, CARG4, CARG3 // len = end - start + | daddiu CARG2, CARG2, sizeof(GCstr)-1 +@@ -1936,7 +2038,7 @@ static void build_subroutines(BuildCtx *ctx) + | ld TMP0, SBUF:CARG1->b + | sd L, SBUF:CARG1->L + | sd BASE, L->base +- | sd TMP0, SBUF:CARG1->p ++ | sd TMP0, SBUF:CARG1->w + | call_intern extern lj_buf_putstr_ .. name + |. sd PC, SAVE_PC + | load_got lj_buf_tostr +@@ -1974,12 +2076,63 @@ static void build_subroutines(BuildCtx *ctx) + | slt AT, CARG1, r0 + | dsrlv CRET1, TMP0, CARG3 + | dsubu TMP0, r0, CRET1 ++ |.if MIPSR6 ++ | selnez TMP0, TMP0, AT ++ | seleqz CRET1, CRET1, AT ++ | or CRET1, CRET1, TMP0 ++ |.else + | movn CRET1, TMP0, AT ++ |.endif + | jr ra + |. zextw CRET1, CRET1 + |1: + | jr ra + |. move CRET1, r0 ++ | ++ |// FP number to int conversion with a check for soft-float. ++ |// Modifies CARG1, CRET1, CRET2, TMP0, AT. ++ |->vm_tointg: ++ |.if JIT ++ | dsll CRET2, CARG1, 1 ++ | beqz CRET2, >2 ++ |. li TMP0, 1076 ++ | dsrl AT, CRET2, 53 ++ | dsubu TMP0, TMP0, AT ++ | sltiu AT, TMP0, 54 ++ | beqz AT, >1 ++ |. dextm CRET2, CRET2, 0, 20 ++ | dinsu CRET2, AT, 21, 21 ++ | slt AT, CARG1, r0 ++ | dsrlv CRET1, CRET2, TMP0 ++ | dsubu CARG1, r0, CRET1 ++ |.if MIPSR6 ++ | seleqz CRET1, CRET1, AT ++ | selnez CARG1, CARG1, AT ++ | or CRET1, CRET1, CARG1 ++ |.else ++ | movn CRET1, CARG1, AT ++ |.endif ++ | li CARG1, 64 ++ | subu TMP0, CARG1, TMP0 ++ | dsllv CRET2, CRET2, TMP0 // Integer check. ++ | sextw AT, CRET1 ++ | xor AT, CRET1, AT // Range check. ++ |.if MIPSR6 ++ | seleqz AT, AT, CRET2 ++ | selnez CRET2, CRET2, CRET2 ++ | jr ra ++ |. or CRET2, AT, CRET2 ++ |.else ++ | jr ra ++ |. movz CRET2, AT, CRET2 ++ |.endif ++ |1: ++ | jr ra ++ |. li CRET2, 1 ++ |2: ++ | jr ra ++ |. move CRET1, r0 ++ |.endif + |.endif + | + |.macro .ffunc_bit, name +@@ -2470,9 +2623,9 @@ static void build_subroutines(BuildCtx *ctx) + |. daddu RA, RA, BASE + | + |9: // Rethrow error from the right C frame. +- | load_got lj_err_throw +- | negu CARG2, CRET1 +- | call_intern lj_err_throw // (lua_State *L, int errcode) ++ | load_got lj_err_trace ++ | sub CARG2, r0, CRET1 ++ | call_intern lj_err_trace // (lua_State *L, int errcode) + |. move CARG1, L + |.endif + | +@@ -2482,15 +2635,22 @@ static void build_subroutines(BuildCtx *ctx) + | + |// Hard-float round to integer. + |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. ++ |// MIPSR6: Modifies FTMP1, too. + |.macro vm_round_hf, func + | lui TMP0, 0x4330 // Hiword of 2^52 (double). + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + | abs.d FRET2, FARG1 // |x| + | dmfc1 AT, FARG1 ++ |.if MIPSR6 ++ | cmp.lt.d FTMP1, FRET2, f4 ++ | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 ++ | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52. ++ |.else + | c.olt.d 0, FRET2, f4 + | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 + | bc1f 0, >1 // Truncate only if |x| < 2^52. ++ |.endif + |. sub.d FRET1, FRET1, f4 + | slt AT, AT, r0 + |.if "func" == "ceil" +@@ -2501,16 +2661,38 @@ static void build_subroutines(BuildCtx *ctx) + |.if "func" == "trunc" + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 ++ |.if MIPSR6 ++ | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result? ++ | sub.d FRET2, FRET1, f4 ++ | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1. ++ | dmtc1 AT, FRET1 ++ | neg.d FRET2, FTMP1 ++ | jr ra ++ |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in. ++ |.else + | c.olt.d 0, FRET2, FRET1 // |x| < result? + | sub.d FRET2, FRET1, f4 + | movt.d FRET1, FRET2, 0 // If yes, subtract +1. + | neg.d FRET2, FRET1 + | jr ra + |. movn.d FRET1, FRET2, AT // Merge sign bit back in. ++ |.endif + |.else + | neg.d FRET2, FRET1 + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 ++ |.if MIPSR6 ++ | dmtc1 AT, FTMP1 ++ | sel.d FTMP1, FRET1, FRET2 ++ |.if "func" == "ceil" ++ | cmp.lt.d FRET1, FTMP1, FARG1 // x > result? ++ |.else ++ | cmp.lt.d FRET1, FARG1, FTMP1 // x < result? ++ |.endif ++ | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1. ++ | jr ra ++ |. sel.d FRET1, FTMP1, FRET2 ++ |.else + | movn.d FRET1, FRET2, AT // Merge sign bit back in. + |.if "func" == "ceil" + | c.olt.d 0, FRET1, FARG1 // x > result? +@@ -2521,6 +2703,7 @@ static void build_subroutines(BuildCtx *ctx) + | jr ra + |. movt.d FRET1, FRET2, 0 + |.endif ++ |.endif + |1: + | jr ra + |. mov.d FRET1, FARG1 +@@ -2628,12 +2811,40 @@ static void build_subroutines(BuildCtx *ctx) + |. slt CRET1, CARG2, CARG1 + |8: + | jr ra +- |. nop ++ |. li CRET1, 0 + |9: + | jr ra + |. move CRET1, CRET2 + |.endif + | ++ |->vm_sfcmpogt: ++ |.if not FPU ++ | dsll AT, CARG2, 1 ++ | dsll TMP0, CARG1, 1 ++ | or TMP1, AT, TMP0 ++ | beqz TMP1, >8 // Both args +-0: return 0. ++ |. lui TMP1, 0xffe0 ++ | dsll TMP1, TMP1, 32 ++ | sltu AT, TMP1, AT ++ | sltu TMP0, TMP1, TMP0 ++ | or TMP1, AT, TMP0 ++ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; ++ |. and AT, CARG2, CARG1 ++ | bltz AT, >5 // Both args negative? ++ |. nop ++ | jr ra ++ |. slt CRET1, CARG2, CARG1 ++ |5: // Swap conditions if both operands are negative. ++ | jr ra ++ |. slt CRET1, CARG1, CARG2 ++ |8: ++ | jr ra ++ |. li CRET1, 0 ++ |9: ++ | jr ra ++ |. li CRET1, 0 ++ |.endif ++ | + |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. + |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpolex: +@@ -2665,10 +2876,98 @@ static void build_subroutines(BuildCtx *ctx) + |. li CRET1, 0 + |.endif + | ++ |.macro sfmin_max, name, fpcall ++ |->vm_sf .. name: ++ |.if JIT and not FPU ++ | move TMP2, ra ++ | bal ->fpcall ++ |. nop ++ | move ra, TMP2 ++ | move TMP0, CRET1 ++ | move CRET1, CARG1 ++ |.if MIPSR6 ++ | selnez CRET1, CRET1, TMP0 ++ | seleqz TMP0, CARG2, TMP0 ++ | jr ra ++ |. or CRET1, CRET1, TMP0 ++ |.else ++ | jr ra ++ |. movz CRET1, CARG2, TMP0 ++ |.endif ++ |.endif ++ |.endmacro ++ | ++ | sfmin_max min, vm_sfcmpolt ++ | sfmin_max max, vm_sfcmpogt ++ | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_IDX, CARG2 ++ |.define NEXT_ASIZE, CARG3 ++ |.define NEXT_NIL, CARG4 ++ |.define NEXT_TMP0, r12 ++ |.define NEXT_TMP1, r13 ++ |.define NEXT_TMP2, r14 ++ |.define NEXT_RES_VK, CRET1 ++ |.define NEXT_RES_IDX, CRET2 ++ |.define NEXT_RES_PTR, sp ++ |.define NEXT_RES_VAL, 0(sp) ++ |.define NEXT_RES_KEY, 8(sp) ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in CRET2. ++ |->vm_next: ++ |.if JIT and ENDIAN_LE ++ | lw NEXT_ASIZE, NEXT_TAB->asize ++ | ld NEXT_TMP0, NEXT_TAB->array ++ | li NEXT_NIL, LJ_TNIL ++ |1: // Traverse array part. ++ | sltu AT, NEXT_IDX, NEXT_ASIZE ++ | sll NEXT_TMP1, NEXT_IDX, 3 ++ | beqz AT, >5 ++ |. daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 ++ | li AT, LJ_TISNUM ++ | ld NEXT_TMP2, 0(NEXT_TMP1) ++ | dsll AT, AT, 47 ++ | or NEXT_TMP1, NEXT_IDX, AT ++ | beq NEXT_TMP2, NEXT_NIL, <1 ++ |. addiu NEXT_IDX, NEXT_IDX, 1 ++ | sd NEXT_TMP2, NEXT_RES_VAL ++ | sd NEXT_TMP1, NEXT_RES_KEY ++ | move NEXT_RES_VK, NEXT_RES_PTR ++ | jr ra ++ |. move NEXT_RES_IDX, NEXT_IDX ++ | ++ |5: // Traverse hash part. ++ | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE ++ | ld NODE:NEXT_RES_VK, NEXT_TAB->node ++ | sll NEXT_TMP2, NEXT_RES_IDX, 5 ++ | lw NEXT_TMP0, NEXT_TAB->hmask ++ | sll AT, NEXT_RES_IDX, 3 ++ | subu AT, NEXT_TMP2, AT ++ | daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT ++ |6: ++ | sltu AT, NEXT_TMP0, NEXT_RES_IDX ++ | bnez AT, >8 ++ |. nop ++ | ld NEXT_TMP2, NODE:NEXT_RES_VK->val ++ | bne NEXT_TMP2, NEXT_NIL, >9 ++ |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 ++ | // Skip holes in hash part. ++ | b <6 ++ |. daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) ++ | ++ |8: // End of iteration. Set the key to nil (not the value). ++ | sd NEXT_NIL, NEXT_RES_KEY ++ | move NEXT_RES_VK, NEXT_RES_PTR ++ |9: ++ | jr ra ++ |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE ++ |.endif ++ | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- +@@ -2832,7 +3131,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | slt AT, CARG1, CARG2 + | addu TMP2, TMP2, TMP3 ++ |.if MIPSR6 ++ | movop TMP2, TMP2, AT ++ |.else + | movop TMP2, r0, AT ++ |.endif + |1: + | daddu PC, PC, TMP2 + | ins_next +@@ -2850,16 +3153,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + |3: // RA and RD are both numbers. + |.if FPU +- | fcomp f20, f22 ++ |.if MIPSR6 ++ | fcomp FTMP0, FTMP0, FTMP2 ++ | addu TMP2, TMP2, TMP3 ++ | mfc1 TMP3, FTMP0 ++ | b <1 ++ |. fmovop TMP2, TMP2, TMP3 ++ |.else ++ | fcomp FTMP0, FTMP2 + | addu TMP2, TMP2, TMP3 + | b <1 + |. fmovop TMP2, r0 ++ |.endif + |.else + | bal sfcomp + |. addu TMP2, TMP2, TMP3 + | b <1 ++ |.if MIPSR6 ++ |. movop TMP2, TMP2, CRET1 ++ |.else + |. movop TMP2, r0, CRET1 + |.endif ++ |.endif + | + |4: // RA is a number, RD is not a number. + | bne CARG4, TISNUM, ->vmeta_comp +@@ -2906,15 +3221,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + |.endmacro + | ++ |.if MIPSR6 + if (op == BC_ISLT) { +- | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt ++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt + } else if (op == BC_ISGE) { +- | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt ++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt + } else if (op == BC_ISLE) { +- | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult ++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult + } else { +- | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult ++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult + } ++ |.else ++ if (op == BC_ISLT) { ++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt ++ } else if (op == BC_ISGE) { ++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt ++ } else if (op == BC_ISLE) { ++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult ++ } else { ++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult ++ } ++ |.endif + break; + + case BC_ISEQV: case BC_ISNEV: +@@ -2960,7 +3287,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |2: // Check if the tags are the same and it's a table or userdata. + | xor AT, CARG3, CARG4 // Same type? + | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? ++ |.if MIPSR6 ++ | seleqz TMP0, TMP0, AT ++ |.else + | movn TMP0, r0, AT ++ |.endif + if (vk) { + | beqz TMP0, <1 + } else { +@@ -3010,11 +3341,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | xor TMP1, CARG1, CARG2 + | addu TMP2, TMP2, TMP3 ++ |.if MIPSR6 ++ if (vk) { ++ | seleqz TMP2, TMP2, TMP1 ++ } else { ++ | selnez TMP2, TMP2, TMP1 ++ } ++ |.else + if (vk) { + | movn TMP2, r0, TMP1 + } else { + | movz TMP2, r0, TMP1 + } ++ |.endif + | daddu PC, PC, TMP2 + | ins_next + break; +@@ -3041,6 +3380,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | bne CARG4, TISNUM, >6 + |. addu TMP2, TMP2, TMP3 + | xor AT, CARG1, CARG2 ++ |.if MIPSR6 ++ if (vk) { ++ | seleqz TMP2, TMP2, AT ++ |1: ++ | daddu PC, PC, TMP2 ++ |2: ++ } else { ++ | selnez TMP2, TMP2, AT ++ |1: ++ |2: ++ | daddu PC, PC, TMP2 ++ } ++ |.else + if (vk) { + | movn TMP2, r0, AT + |1: +@@ -3052,6 +3404,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |2: + | daddu PC, PC, TMP2 + } ++ |.endif + | ins_next + | + |3: // RA is not an integer. +@@ -3064,30 +3417,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |. addu TMP2, TMP2, TMP3 + | sltu AT, CARG4, TISNUM + |.if FPU +- | ldc1 f20, 0(RA) +- | ldc1 f22, 0(RD) ++ | ldc1 FTMP0, 0(RA) ++ | ldc1 FTMP2, 0(RD) + |.endif + | beqz AT, >5 + |. nop + |4: // RA and RD are both numbers. + |.if FPU +- | c.eq.d f20, f22 ++ |.if MIPSR6 ++ | cmp.eq.d FTMP0, FTMP0, FTMP2 ++ | dmfc1 TMP1, FTMP0 ++ | b <1 ++ if (vk) { ++ |. selnez TMP2, TMP2, TMP1 ++ } else { ++ |. seleqz TMP2, TMP2, TMP1 ++ } ++ |.else ++ | c.eq.d FTMP0, FTMP2 + | b <1 + if (vk) { + |. movf TMP2, r0 + } else { + |. movt TMP2, r0 + } ++ |.endif + |.else + | bal ->vm_sfcmpeq + |. nop + | b <1 ++ |.if MIPSR6 ++ if (vk) { ++ |. selnez TMP2, TMP2, CRET1 ++ } else { ++ |. seleqz TMP2, TMP2, CRET1 ++ } ++ |.else + if (vk) { + |. movz TMP2, r0, CRET1 + } else { + |. movn TMP2, r0, CRET1 + } + |.endif ++ |.endif + | + |5: // RA is a number, RD is not a number. + |.if FFI +@@ -3097,9 +3469,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU +- |. lwc1 f22, LO(RD) ++ |. lwc1 FTMP2, LO(RD) + | b <4 +- |. cvt.d.w f22, f22 ++ |. cvt.d.w FTMP2, FTMP2 + |.else + |. sextw CARG2, CARG2 + | bal ->vm_sfi2d_2 +@@ -3117,10 +3489,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU +- |. lwc1 f20, LO(RA) +- | ldc1 f22, 0(RD) ++ |. lwc1 FTMP0, LO(RA) ++ | ldc1 FTMP2, 0(RD) + | b <4 +- | cvt.d.w f20, f20 ++ | cvt.d.w FTMP0, FTMP0 + |.else + |. sextw CARG1, CARG1 + | bal ->vm_sfi2d_1 +@@ -3163,11 +3535,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 ++ |.if MIPSR6 ++ if (vk) { ++ | seleqz TMP2, TMP2, TMP0 ++ } else { ++ | selnez TMP2, TMP2, TMP0 ++ } ++ |.else + if (vk) { + | movn TMP2, r0, TMP0 + } else { + | movz TMP2, r0, TMP0 + } ++ |.endif + | daddu PC, PC, TMP2 + | ins_next + break; +@@ -3186,11 +3566,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 ++ |.if MIPSR6 ++ if (op == BC_IST) { ++ | selnez TMP2, TMP2, TMP0; ++ } else { ++ | seleqz TMP2, TMP2, TMP0; ++ } ++ |.else + if (op == BC_IST) { + | movz TMP2, r0, TMP0 + } else { + | movn TMP2, r0, TMP0 + } ++ |.endif + | daddu PC, PC, TMP2 + } else { + | ld CRET1, 0(RD) +@@ -3433,9 +3821,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | bltz TMP1, ->vmeta_arith + |. daddu RA, BASE, RA + |.elif "intins" == "mult" ++ |.if MIPSR6 ++ |. nop ++ | mul CRET1, CARG3, CARG4 ++ | muh TMP2, CARG3, CARG4 ++ |.else + |. intins CARG3, CARG4 + | mflo CRET1 + | mfhi TMP2 ++ |.endif + | sra TMP1, CRET1, 31 + | bne TMP1, TMP2, ->vmeta_arith + |. daddu RA, BASE, RA +@@ -3458,16 +3852,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + | + |5: // Check for two numbers. +- | .FPU ldc1 f20, 0(RB) ++ | .FPU ldc1 FTMP0, 0(RB) + | sltu AT, TMP0, TISNUM + | sltu TMP0, TMP1, TISNUM +- | .FPU ldc1 f22, 0(RC) ++ | .FPU ldc1 FTMP2, 0(RC) + | and AT, AT, TMP0 + | beqz AT, ->vmeta_arith + |. daddu RA, BASE, RA + | + |.if FPU +- | fpins FRET1, f20, f22 ++ | fpins FRET1, FTMP0, FTMP2 + |.elif "fpcall" == "sfpmod" + | sfpmod + |.else +@@ -3797,7 +4191,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | li TMP0, 0x801 + | addiu AT, CARG2, -0x7ff + | srl CARG3, RD, 14 ++ |.if MIPSR6 ++ | seleqz TMP0, TMP0, AT ++ | selnez CARG2, CARG2, AT ++ | or CARG2, CARG2, TMP0 ++ |.else + | movz CARG2, TMP0, AT ++ |.endif + | // (lua_State *L, int32_t asize, uint32_t hbits) + | call_intern lj_tab_new + |. move CARG1, L +@@ -3904,9 +4304,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | lw TMP0, TAB:RB->hmask +- | lw TMP1, STR:RC->hash ++ | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node +- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 +@@ -4067,10 +4467,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 + | lw TMP0, TAB:RB->hmask +- | lw TMP1, STR:RC->hash ++ | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | sb r0, TAB:RB->nomm // Clear metamethod cache. +- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 +@@ -4078,7 +4478,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |.if FPU +- | ldc1 f20, 0(RA) ++ | ldc1 FTMP0, 0(RA) + |.else + | ld CRET1, 0(RA) + |.endif +@@ -4094,7 +4494,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |.if FPU +- |. sdc1 f20, NODE:TMP2->val ++ |. sdc1 FTMP0, NODE:TMP2->val + |.else + |. sd CRET1, NODE:TMP2->val + |.endif +@@ -4135,7 +4535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ld BASE, L->base + |.if FPU + | b <3 // No 2nd write barrier needed. +- |. sdc1 f20, 0(CRET1) ++ |. sdc1 FTMP0, 0(CRET1) + |.else + | ld CARG1, 0(RA) + | b <3 // No 2nd write barrier needed. +@@ -4213,7 +4613,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. +- | barrierback TAB:CARG2, TMP3, TMP0, <2 ++ | barrierback TAB:CARG2, TMP3, CRET1, <2 + break; + + case BC_TSETM: +@@ -4364,10 +4764,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + + case BC_ITERN: +- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) +- |.if JIT +- | // NYI: add hotloop, record BC_ITERN. ++ |.if JIT and ENDIAN_LE ++ | hotloop + |.endif ++ |->vm_IITERN: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + | daddu RA, BASE, RA + | ld TAB:RB, -16(RA) + | lw RC, -8+LO(RA) // Get index from control var. +@@ -4388,11 +4789,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |. addiu RC, RC, 1 + | sd TMP2, 0(RA) + | sd CARG1, 8(RA) +- | or TMP0, RC, CARG3 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | decode_RD4b RD + | daddu RD, RD, TMP3 +- | sw TMP0, -8+LO(RA) // Update control var. ++ | sw RC, -8+LO(RA) // Update control var. + | daddu PC, PC, RD + |3: + | ins_next +@@ -4442,9 +4842,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | daddiu TMP1, TMP1, -FF_next_N + | or AT, AT, TMP1 + | bnez AT, >5 +- |. lui TMP1, 0xfffe ++ |. lui TMP1, (LJ_KEYINDEX >> 16) + | daddu PC, TMP0, TMP2 +- | ori TMP1, TMP1, 0x7fff ++ | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) + | dsll TMP1, TMP1, 32 + | sd TMP1, -8(RA) + |1: +@@ -4454,8 +4854,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | li TMP1, BC_ITERC + | sb TMP3, -4+OFS_OP(PC) + | daddu PC, TMP0, TMP2 ++ |.if JIT ++ | lb TMP0, OFS_OP(PC) ++ | li AT, BC_ITERN ++ | bne TMP0, AT, >6 ++ |. lhu TMP2, OFS_RD(PC) ++ |.endif + | b <1 + |. sb TMP1, OFS_OP(PC) ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | ld TMP0, DISPATCH_J(trace)(DISPATCH) ++ | sll TMP2, TMP2, 3 ++ | daddu TMP0, TMP0, TMP2 ++ | ld TRACE:TMP2, 0(TMP0) ++ | lw TMP0, TRACE:TMP2->startins ++ | li AT, -256 ++ | and TMP0, TMP0, AT ++ | or TMP0, TMP0, TMP1 ++ | b <1 ++ |. sw TMP0, 0(PC) ++ |.endif + break; + + case BC_VARG: +@@ -4478,7 +4897,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ld CARG1, 0(RC) + | sltu AT, RC, TMP3 + | daddiu RC, RC, 8 ++ |.if MIPSR6 ++ | selnez CARG1, CARG1, AT ++ | seleqz AT, TISNIL, AT ++ | or CARG1, CARG1, AT ++ |.else + | movz CARG1, TISNIL, AT ++ |.endif + | sd CARG1, 0(RA) + | sltu AT, RA, TMP2 + | bnez AT, <1 +@@ -4667,7 +5092,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | dext AT, CRET1, 31, 0 + | slt CRET1, CARG2, CARG3 + | slt TMP1, CARG3, CARG2 ++ |.if MIPSR6 ++ | selnez TMP1, TMP1, AT ++ | seleqz CRET1, CRET1, AT ++ | or CRET1, CRET1, TMP1 ++ |.else + | movn CRET1, TMP1, AT ++ |.endif + } else { + | bne CARG3, TISNUM, >5 + |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type +@@ -4683,20 +5114,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | slt CRET1, CRET1, CARG1 + | slt AT, CARG2, r0 + | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. ++ |.if MIPSR6 ++ | selnez TMP1, TMP1, AT ++ | seleqz CRET1, CRET1, AT ++ | or CRET1, CRET1, TMP1 ++ |.else + | movn CRET1, TMP1, AT ++ |.endif + | or CRET1, CRET1, TMP0 + | zextw CARG1, CARG1 + | settp CARG1, TISNUM + } + |1: + if (op == BC_FORI) { ++ |.if MIPSR6 ++ | selnez TMP2, TMP2, CRET1 ++ |.else + | movz TMP2, r0, CRET1 ++ |.endif + | daddu PC, PC, TMP2 + } else if (op == BC_JFORI) { + | daddu PC, PC, TMP2 + | lhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { ++ |.if MIPSR6 ++ | seleqz TMP2, TMP2, CRET1 ++ |.else + | movn TMP2, r0, CRET1 ++ |.endif + | daddu PC, PC, TMP2 + } + if (vk) { +@@ -4726,6 +5171,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. slt TMP3, TMP3, r0 ++ |.if MIPSR6 ++ | dmtc1 TMP3, FTMP2 ++ | cmp.lt.d FTMP0, f0, f2 ++ | cmp.lt.d FTMP1, f2, f0 ++ | sel.d FTMP2, FTMP1, FTMP0 ++ | b <1 ++ |. dmfc1 CRET1, FTMP2 ++ |.else + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | li CRET1, 1 +@@ -4733,12 +5186,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | movt AT, r0, 1 + | b <1 + |. movn CRET1, AT, TMP3 ++ |.endif + } else { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f4, FORL_STEP*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | ld TMP3, FORL_STEP*8(RA) + | add.d f0, f0, f4 ++ |.if MIPSR6 ++ | slt TMP3, TMP3, r0 ++ | dmtc1 TMP3, FTMP2 ++ | cmp.lt.d FTMP0, f0, f2 ++ | cmp.lt.d FTMP1, f2, f0 ++ | sel.d FTMP2, FTMP1, FTMP0 ++ | dmfc1 CRET1, FTMP2 ++ if (op == BC_IFORL) { ++ | seleqz TMP2, TMP2, CRET1 ++ | daddu PC, PC, TMP2 ++ } ++ |.else + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | slt TMP3, TMP3, r0 +@@ -4751,6 +5217,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | movn TMP2, r0, CRET1 + | daddu PC, PC, TMP2 + } ++ |.endif + | sdc1 f0, FORL_IDX*8(RA) + | ins_next1 + | b <2 +@@ -4926,8 +5393,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ld TMP0, 0(RA) + | sltu AT, RA, RC // Less args than parameters? + | move CARG1, TMP0 ++ |.if MIPSR6 ++ | selnez TMP0, TMP0, AT ++ | seleqz TMP3, TISNIL, AT ++ | or TMP0, TMP0, TMP3 ++ | seleqz TMP3, CARG1, AT ++ | selnez CARG1, TISNIL, AT ++ | or CARG1, CARG1, TMP3 ++ |.else + | movz TMP0, TISNIL, AT // Clear missing parameters. + | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). ++ |.endif + | addiu TMP2, TMP2, -1 + | sd TMP0, 16(TMP1) + | daddiu TMP1, TMP1, 8 +diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc +index b4260ebc..a376c849 100644 +--- a/src/vm_ppc.dasc ++++ b/src/vm_ppc.dasc +@@ -1,6 +1,6 @@ + |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. + |// Bytecode interpreter, fast functions and helper functions. +-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + | + |.arch ppc + |.section code_op, code_sub +@@ -18,7 +18,6 @@ + |// DynASM defines used by the PPC port: + |// + |// P64 64 bit pointers (only for GPR64 testing). +-|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port. + |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). + |// Affects reg saves, stack layout, carry/overflow/dot flags etc. + |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). +@@ -103,6 +102,18 @@ + |// Fixed register assignments for the interpreter. + |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) + | ++|.macro .FPU, a, b ++|.if FPU ++| a, b ++|.endif ++|.endmacro ++| ++|.macro .FPU, a, b, c ++|.if FPU ++| a, b, c ++|.endif ++|.endmacro ++| + |// The following must be C callee-save (but BASE is often refetched). + |.define BASE, r14 // Base of current Lua stack frame. + |.define KBASE, r15 // Constants of current Lua function. +@@ -116,8 +127,10 @@ + |.define TISNUM, r22 + |.define TISNIL, r23 + |.define ZERO, r24 ++|.if FPU + |.define TOBIT, f30 // 2^52 + 2^51. + |.define TONUM, f31 // 2^52 + 2^51 + 2^31. ++|.endif + | + |// The following temporaries are not saved across C calls, except for RA. + |.define RA, r20 // Callee-save. +@@ -133,6 +146,7 @@ + | + |// Saved temporaries. + |.define SAVE0, r21 ++|.define SAVE1, r25 + | + |// Calling conventions. + |.define CARG1, r3 +@@ -141,8 +155,10 @@ + |.define CARG4, r6 // Overlaps TMP3. + |.define CARG5, r7 // Overlaps INS. + | ++|.if FPU + |.define FARG1, f1 + |.define FARG2, f2 ++|.endif + | + |.define CRET1, r3 + |.define CRET2, r4 +@@ -213,10 +229,16 @@ + |.endif + |.else + | ++|.if FPU + |.define SAVE_LR, 276(sp) + |.define CFRAME_SPACE, 272 // Delta for sp. + |// Back chain for sp: 272(sp) <-- sp entering interpreter + |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. ++|.else ++|.define SAVE_LR, 132(sp) ++|.define CFRAME_SPACE, 128 // Delta for sp. ++|// Back chain for sp: 128(sp) <-- sp entering interpreter ++|.endif + |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. + |.define SAVE_CR, 52(sp) // 32 bit CR save. + |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. +@@ -226,16 +248,25 @@ + |.define SAVE_PC, 32(sp) + |.define SAVE_MULTRES, 28(sp) + |.define UNUSED1, 24(sp) ++|.if FPU + |.define TMPD_LO, 20(sp) + |.define TMPD_HI, 16(sp) + |.define TONUM_LO, 12(sp) + |.define TONUM_HI, 8(sp) ++|.else ++|.define SFSAVE_4, 20(sp) ++|.define SFSAVE_3, 16(sp) ++|.define SFSAVE_2, 12(sp) ++|.define SFSAVE_1, 8(sp) ++|.endif + |// Next frame lr: 4(sp) + |// Back chain for sp: 0(sp) <-- sp while in interpreter + | ++|.if FPU + |.define TMPD_BLO, 23(sp) + |.define TMPD, TMPD_HI + |.define TONUM_D, TONUM_HI ++|.endif + | + |.endif + | +@@ -245,7 +276,7 @@ + |.else + | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) + |.endif +-| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) ++| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) + |.endmacro + |.macro rest_, reg + |.if GPR64 +@@ -253,7 +284,7 @@ + |.else + | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) + |.endif +-| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) ++| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) + |.endmacro + | + |.macro saveregs +@@ -323,6 +354,7 @@ + |// Trap for not-yet-implemented parts. + |.macro NYI; tw 4, sp, sp; .endmacro + | ++|.if FPU + |// int/FP conversions. + |.macro tonum_i, freg, reg + | xoris reg, reg, 0x8000 +@@ -346,6 +378,7 @@ + |.macro toint, reg, freg + | toint reg, freg, freg + |.endmacro ++|.endif + | + |//----------------------------------------------------------------------- + | +@@ -533,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx) + | beq >2 + |1: + | addic. TMP1, TMP1, -8 ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz CARG1, 0(RA) ++ | lwz CARG2, 4(RA) ++ |.endif + | addi RA, RA, 8 ++ |.if FPU + | stfd f0, 0(BASE) ++ |.else ++ | stw CARG1, 0(BASE) ++ | stw CARG2, 4(BASE) ++ |.endif + | addi BASE, BASE, 8 + | bney <1 + | +@@ -613,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx) + | .toc ld TOCREG, SAVE_TOC + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | lp BASE, L->base +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | lwz DISPATCH, L->glref // Setup pointer to dispatch table. + | li ZERO, 0 +- | stw TMP3, TMPD ++ | .FPU stw TMP3, TMPD + | li TMP1, LJ_TFALSE +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | li TISNIL, LJ_TNIL + | li_vmstate INTERP +- | lfs TOBIT, TMPD ++ | .FPU lfs TOBIT, TMPD + | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. + | la RA, -8(BASE) // Results start at BASE-8. +- | stw TMP3, TMPD ++ | .FPU stw TMP3, TMPD + | addi DISPATCH, DISPATCH, GG_G2DISP + | stw TMP1, 0(RA) // Prepend false to error message. + | li RD, 16 // 2 results: false + error message. + | st_vmstate +- | lfs TONUM, TMPD ++ | .FPU lfs TONUM, TMPD + | b ->vm_returnc + | + |//----------------------------------------------------------------------- +@@ -690,22 +733,22 @@ static void build_subroutines(BuildCtx *ctx) + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | lp TMP1, L->top + | lwz PC, FRAME_PC(BASE) +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | stb CARG3, L->status +- | stw TMP3, TMPD +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). +- | lfs TOBIT, TMPD ++ | .FPU stw TMP3, TMPD ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU lfs TOBIT, TMPD + | sub RD, TMP1, BASE +- | stw TMP3, TMPD +- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) ++ | .FPU stw TMP3, TMPD ++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | addi RD, RD, 8 +- | stw TMP0, TONUM_HI ++ | .FPU stw TMP0, TONUM_HI + | li_vmstate INTERP + | li ZERO, 0 + | st_vmstate + | andix. TMP0, PC, FRAME_TYPE + | mr MULTRES, RD +- | lfs TONUM, TMPD ++ | .FPU lfs TONUM, TMPD + | li TISNIL, LJ_TNIL + | beq ->BC_RET_Z + | b ->vm_return +@@ -739,19 +782,19 @@ static void build_subroutines(BuildCtx *ctx) + | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | lp TMP1, L->top +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | add PC, PC, BASE +- | stw TMP3, TMPD ++ | .FPU stw TMP3, TMPD + | li ZERO, 0 +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). +- | lfs TOBIT, TMPD ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU lfs TOBIT, TMPD + | sub PC, PC, TMP2 // PC = frame delta + frame type +- | stw TMP3, TMPD +- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) ++ | .FPU stw TMP3, TMPD ++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | sub NARGS8:RC, TMP1, BASE +- | stw TMP0, TONUM_HI ++ | .FPU stw TMP0, TONUM_HI + | li_vmstate INTERP +- | lfs TONUM, TMPD ++ | .FPU lfs TONUM, TMPD + | li TISNIL, LJ_TNIL + | st_vmstate + | +@@ -816,11 +859,11 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + | lwz PC, -16(RB) // Restore PC from [cont|PC]. + | subi TMP2, RD, 8 +- | lwz TMP1, LFUNC:TMP1->pc + | stwx TISNIL, RA, TMP2 // Ensure one valid arg. + |.if FFI + | ble >1 + |.endif ++ | lwz TMP1, LFUNC:TMP1->pc + | lwz KBASE, PC2PROTO(k)(TMP1) + | // BASE = base, RA = resultptr, RB = meta base + | mtctr TMP0 +@@ -839,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx) + | lwz INS, -4(PC) + | subi CARG2, RB, 16 + | decode_RB8 SAVE0, INS ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz TMP2, 0(RA) ++ | lwz TMP3, 4(RA) ++ |.endif + | add TMP1, BASE, SAVE0 + | stp BASE, L->base + | cmplw TMP1, CARG2 + | sub CARG3, CARG2, TMP1 + | decode_RA8 RA, INS ++ |.if FPU + | stfd f0, 0(CARG2) ++ |.else ++ | stw TMP2, 0(CARG2) ++ | stw TMP3, 4(CARG2) ++ |.endif + | bney ->BC_CAT_Z ++ |.if FPU + | stfdx f0, BASE, RA ++ |.else ++ | stwux TMP2, RA, BASE ++ | stw TMP3, 4(RA) ++ |.endif + | b ->cont_nop + | + |//-- Table indexing metamethods ----------------------------------------- +@@ -900,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx) + | // Returns TValue * (finished) or NULL (metamethod). + | cmplwi CRET1, 0 + | beq >3 ++ |.if FPU + | lfd f0, 0(CRET1) ++ |.else ++ | lwz TMP0, 0(CRET1) ++ | lwz TMP1, 4(CRET1) ++ |.endif + | ins_next1 ++ |.if FPU + | stfdx f0, BASE, RA ++ |.else ++ | stwux TMP0, RA, BASE ++ | stw TMP1, 4(RA) ++ |.endif + | ins_next2 + | + |3: // Call __index metamethod. +@@ -920,7 +988,12 @@ static void build_subroutines(BuildCtx *ctx) + | // Returns cTValue * or NULL. + | cmplwi CRET1, 0 + | beq >1 ++ |.if FPU + | lfd f14, 0(CRET1) ++ |.else ++ | lwz SAVE0, 0(CRET1) ++ | lwz SAVE1, 4(CRET1) ++ |.endif + | b ->BC_TGETR_Z + |1: + | stwx TISNIL, BASE, RA +@@ -975,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx) + | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | cmplwi CRET1, 0 ++ |.if FPU + | lfdx f0, BASE, RA ++ |.else ++ | lwzux TMP2, RA, BASE ++ | lwz TMP3, 4(RA) ++ |.endif + | beq >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | ins_next1 ++ |.if FPU + | stfd f0, 0(CRET1) ++ |.else ++ | stw TMP2, 0(CRET1) ++ | stw TMP3, 4(CRET1) ++ |.endif + | ins_next2 + | + |3: // Call __newindex metamethod. +@@ -990,15 +1073,26 @@ static void build_subroutines(BuildCtx *ctx) + | add PC, TMP1, BASE + | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | li NARGS8:RC, 24 // 3 args for func(t, k, v) ++ |.if FPU + | stfd f0, 16(BASE) // Copy value to third argument. ++ |.else ++ | stw TMP2, 16(BASE) ++ | stw TMP3, 20(BASE) ++ |.endif + | b ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | stp BASE, L->base ++ | mr CARG1, L + | stw PC, SAVE_PC + | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. ++ |.if FPU + | stfd f14, 0(CRET1) ++ |.else ++ | stw SAVE0, 0(CRET1) ++ | stw SAVE1, 4(CRET1) ++ |.endif + | b ->cont_nop + | + |//-- Comparison metamethods --------------------------------------------- +@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx) + | + |->cont_ra: // RA = resultptr + | lwz INS, -4(PC) ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz CARG1, 0(RA) ++ | lwz CARG2, 4(RA) ++ |.endif + | decode_RA8 TMP1, INS ++ |.if FPU + | stfdx f0, BASE, TMP1 ++ |.else ++ | stwux CARG1, TMP1, BASE ++ | stw CARG2, 4(TMP1) ++ |.endif + | b ->cont_nop + | + |->cont_condt: // RA = resultptr +@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx) + |.macro .ffunc_n, name + |->ff_ .. name: + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) ++ | lwz CARG1, 0(BASE) ++ |.if FPU + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG2, 4(BASE) ++ |.endif + | blt ->fff_fallback +- | checknum CARG3; bge ->fff_fallback ++ | checknum CARG1; bge ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name + |->ff_ .. name: + | cmplwi NARGS8:RC, 16 +- | lwz CARG3, 0(BASE) ++ | lwz CARG1, 0(BASE) ++ |.if FPU + | lfd FARG1, 0(BASE) +- | lwz CARG4, 8(BASE) ++ | lwz CARG3, 8(BASE) + | lfd FARG2, 8(BASE) ++ |.else ++ | lwz CARG2, 4(BASE) ++ | lwz CARG3, 8(BASE) ++ | lwz CARG4, 12(BASE) ++ |.endif + | blt ->fff_fallback ++ | checknum CARG1; bge ->fff_fallback + | checknum CARG3; bge ->fff_fallback +- | checknum CARG4; bge ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. +@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx) + | bge cr1, ->fff_fallback + | stw CARG3, 0(RA) + | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. ++ | addi TMP1, BASE, 8 ++ | add TMP2, RA, NARGS8:RC + | stw CARG1, 4(RA) + | beq ->fff_res // Done if exactly 1 argument. +- | li TMP1, 8 +- | subi RC, RC, 8 + |1: +- | cmplw TMP1, RC +- | lfdx f0, BASE, TMP1 +- | stfdx f0, RA, TMP1 ++ | cmplw TMP1, TMP2 ++ |.if FPU ++ | lfd f0, 0(TMP1) ++ | stfd f0, 0(TMP1) ++ |.else ++ | lwz CARG1, 0(TMP1) ++ | lwz CARG2, 4(TMP1) ++ | stw CARG1, -8(TMP1) ++ | stw CARG2, -4(TMP1) ++ |.endif + | addi TMP1, TMP1, 8 + | bney <1 + | b ->fff_res +@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx) + | orc TMP1, TMP2, TMP0 + | addi TMP1, TMP1, ~LJ_TISNUM+1 + | slwi TMP1, TMP1, 3 ++ |.if FPU + | la TMP2, CFUNC:RB->upvalue + | lfdx FARG1, TMP2, TMP1 ++ |.else ++ | add TMP1, CFUNC:RB, TMP1 ++ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi ++ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo ++ |.endif + | b ->fff_resn + | + |//-- Base library: getters and setters --------------------------------- +@@ -1320,9 +1447,9 @@ static void build_subroutines(BuildCtx *ctx) + | beq ->fff_restv + | lwz TMP0, TAB:CARG1->hmask + | li CARG3, LJ_TTAB // Use metatable as default result. +- | lwz TMP1, STR:RC->hash ++ | lwz TMP1, STR:RC->sid + | lwz NODE:TMP2, TAB:CARG1->node +- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slwi TMP0, TMP1, 5 + | slwi TMP1, TMP1, 3 + | sub TMP1, TMP0, TMP1 +@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx) + | mr CARG1, L + | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. ++ |.if FPU + | lfd FARG1, 0(CRET1) ++ |.else ++ | lwz CARG2, 4(CRET1) ++ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1. ++ |.endif + | b ->fff_resn + | + |//-- Base library: conversions ------------------------------------------ +@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx) + | // Only handles the number case inline (without a base argument). + | cmplwi NARGS8:RC, 8 + | lwz CARG1, 0(BASE) ++ |.if FPU + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG2, 4(BASE) ++ |.endif + | bne ->fff_fallback // Exactly one argument. + | checknum CARG1; bgt ->fff_fallback + | b ->fff_resn +@@ -1423,32 +1559,24 @@ static void build_subroutines(BuildCtx *ctx) + | + |//-- Base library: iterators ------------------------------------------- + | +- |.ffunc next +- | cmplwi NARGS8:RC, 8 +- | lwz CARG1, 0(BASE) +- | lwz TAB:CARG2, 4(BASE) +- | blt ->fff_fallback ++ |.ffunc_1 next + | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. +- | checktab CARG1 ++ | checktab CARG3 + | lwz PC, FRAME_PC(BASE) + | bne ->fff_fallback +- | stp BASE, L->base // Add frame since C call can throw. +- | mr CARG1, L +- | stp BASE, L->top // Dummy frame length is ok. +- | la CARG3, 8(BASE) +- | stw PC, SAVE_PC +- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) +- | // Returns 0 at end of traversal. +- | cmplwi CRET1, 0 +- | li CARG3, LJ_TNIL +- | beq ->fff_restv // End of traversal: return nil. +- | lfd f0, 8(BASE) // Copy key and value to results. ++ | la CARG2, 8(BASE) ++ | la CARG3, -8(BASE) ++ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | // Returns 1=found, 0=end, -1=error. ++ | cmpwi CRET1, 0 + | la RA, -8(BASE) +- | lfd f1, 16(BASE) +- | stfd f0, 0(RA) + | li RD, (2+1)*8 +- | stfd f1, 8(RA) +- | b ->fff_res ++ | bgt ->fff_res // Found key/value. ++ | li CARG3, LJ_TNIL ++ | beq ->fff_restv // End of traversal: return nil. ++ | lwz CFUNC:RB, FRAME_FUNC(BASE) ++ | li NARGS8:RC, 2*8 ++ | b ->fff_fallback // Invalid key. + | + |.ffunc_1 pairs + | checktab CARG3 +@@ -1456,17 +1584,32 @@ static void build_subroutines(BuildCtx *ctx) + | bne ->fff_fallback + #if LJ_52 + | lwz TAB:TMP2, TAB:CARG1->metatable ++ |.if FPU + | lfd f0, CFUNC:RB->upvalue[0] ++ |.else ++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi ++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo ++ |.endif + | cmplwi TAB:TMP2, 0 + | la RA, -8(BASE) + | bne ->fff_fallback + #else ++ |.if FPU + | lfd f0, CFUNC:RB->upvalue[0] ++ |.else ++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi ++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo ++ |.endif + | la RA, -8(BASE) + #endif + | stw TISNIL, 8(BASE) + | li RD, (3+1)*8 ++ |.if FPU + | stfd f0, 0(RA) ++ |.else ++ | stw TMP0, 0(RA) ++ | stw TMP1, 4(RA) ++ |.endif + | b ->fff_res + | + |.ffunc ipairs_aux +@@ -1512,14 +1655,24 @@ static void build_subroutines(BuildCtx *ctx) + | stfd FARG2, 0(RA) + |.endif + | ble >2 // Not in array part? ++ |.if FPU + | lwzx TMP2, TMP1, TMP3 + | lfdx f0, TMP1, TMP3 ++ |.else ++ | lwzux TMP2, TMP1, TMP3 ++ | lwz TMP3, 4(TMP1) ++ |.endif + |1: + | checknil TMP2 + | li RD, (0+1)*8 + | beq ->fff_res // End of iteration, return 0 results. + | li RD, (2+1)*8 ++ |.if FPU + | stfd f0, 8(RA) ++ |.else ++ | stw TMP2, 8(RA) ++ | stw TMP3, 12(RA) ++ |.endif + | b ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | lwz TMP0, TAB:CARG1->hmask +@@ -1533,7 +1686,11 @@ static void build_subroutines(BuildCtx *ctx) + | li RD, (0+1)*8 + | beq ->fff_res + | lwz TMP2, 0(CRET1) ++ |.if FPU + | lfd f0, 0(CRET1) ++ |.else ++ | lwz TMP3, 4(CRET1) ++ |.endif + | b <1 + | + |.ffunc_1 ipairs +@@ -1542,12 +1699,22 @@ static void build_subroutines(BuildCtx *ctx) + | bne ->fff_fallback + #if LJ_52 + | lwz TAB:TMP2, TAB:CARG1->metatable ++ |.if FPU + | lfd f0, CFUNC:RB->upvalue[0] ++ |.else ++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi ++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo ++ |.endif + | cmplwi TAB:TMP2, 0 + | la RA, -8(BASE) + | bne ->fff_fallback + #else ++ |.if FPU + | lfd f0, CFUNC:RB->upvalue[0] ++ |.else ++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi ++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo ++ |.endif + | la RA, -8(BASE) + #endif + |.if DUALNUM +@@ -1557,7 +1724,12 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + | stw ZERO, 12(BASE) + | li RD, (3+1)*8 ++ |.if FPU + | stfd f0, 0(RA) ++ |.else ++ | stw TMP0, 0(RA) ++ | stw TMP1, 4(RA) ++ |.endif + | b ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- +@@ -1576,19 +1748,32 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc xpcall + | cmplwi NARGS8:RC, 16 +- | lwz CARG4, 8(BASE) ++ | lwz CARG3, 8(BASE) ++ |.if FPU + | lfd FARG2, 8(BASE) + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG1, 0(BASE) ++ | lwz CARG2, 4(BASE) ++ | lwz CARG4, 12(BASE) ++ |.endif + | blt ->fff_fallback + | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) + | mr TMP2, BASE +- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. ++ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function. + | la BASE, 16(BASE) + | // Remember active hook before pcall. + | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 ++ |.if FPU + | stfd FARG2, 0(TMP2) // Swap function and traceback. +- | subi NARGS8:RC, NARGS8:RC, 16 + | stfd FARG1, 8(TMP2) ++ |.else ++ | stw CARG3, 0(TMP2) ++ | stw CARG4, 4(TMP2) ++ | stw CARG1, 8(TMP2) ++ | stw CARG2, 12(TMP2) ++ |.endif ++ | subi NARGS8:RC, NARGS8:RC, 16 + | addi PC, TMP1, 16+FRAME_PCALL + | b ->vm_call_dispatch + | +@@ -1631,9 +1816,21 @@ static void build_subroutines(BuildCtx *ctx) + | stp BASE, L->top + |2: // Move args to coroutine. + | cmpw TMP1, NARGS8:RC ++ |.if FPU + | lfdx f0, BASE, TMP1 ++ |.else ++ | add CARG3, BASE, TMP1 ++ | lwz TMP2, 0(CARG3) ++ | lwz TMP3, 4(CARG3) ++ |.endif + | beq >3 ++ |.if FPU + | stfdx f0, CARG2, TMP1 ++ |.else ++ | add CARG3, CARG2, TMP1 ++ | stw TMP2, 0(CARG3) ++ | stw TMP3, 4(CARG3) ++ |.endif + | addi TMP1, TMP1, 8 + | b <2 + |3: +@@ -1664,8 +1861,17 @@ static void build_subroutines(BuildCtx *ctx) + | stp TMP2, L:SAVE0->top // Clear coroutine stack. + |5: // Move results from coroutine. + | cmplw TMP1, TMP3 ++ |.if FPU + | lfdx f0, TMP2, TMP1 + | stfdx f0, BASE, TMP1 ++ |.else ++ | add CARG3, TMP2, TMP1 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ | add CARG3, BASE, TMP1 ++ | stw CARG1, 0(CARG3) ++ | stw CARG2, 4(CARG3) ++ |.endif + | addi TMP1, TMP1, 8 + | bne <5 + |6: +@@ -1690,12 +1896,22 @@ static void build_subroutines(BuildCtx *ctx) + | andix. TMP0, PC, FRAME_TYPE + | la TMP3, -8(TMP3) + | li TMP1, LJ_TFALSE ++ |.if FPU + | lfd f0, 0(TMP3) ++ |.else ++ | lwz CARG1, 0(TMP3) ++ | lwz CARG2, 4(TMP3) ++ |.endif + | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. + | li RD, (2+1)*8 + | stw TMP1, -8(BASE) // Prepend false to results. + | la RA, -8(BASE) ++ |.if FPU + | stfd f0, 0(BASE) // Copy error message. ++ |.else ++ | stw CARG1, 0(BASE) // Copy error message. ++ | stw CARG2, 4(BASE) ++ |.endif + | b <7 + |.else + | mr CARG1, L +@@ -1874,7 +2090,12 @@ static void build_subroutines(BuildCtx *ctx) + | lus CARG1, 0x8000 // -(2^31). + | beqy ->fff_resi + |5: ++ |.if FPU + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG1, 0(BASE) ++ | lwz CARG2, 4(BASE) ++ |.endif + | blex func + | b ->fff_resn + |.endmacro +@@ -1898,10 +2119,14 @@ static void build_subroutines(BuildCtx *ctx) + | + |.ffunc math_log + | cmplwi NARGS8:RC, 8 +- | lwz CARG3, 0(BASE) +- | lfd FARG1, 0(BASE) ++ | lwz CARG1, 0(BASE) + | bne ->fff_fallback // Need exactly 1 argument. +- | checknum CARG3; bge ->fff_fallback ++ | checknum CARG1; bge ->fff_fallback ++ |.if FPU ++ | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG2, 4(BASE) ++ |.endif + | blex log + | b ->fff_resn + | +@@ -1923,17 +2148,24 @@ static void build_subroutines(BuildCtx *ctx) + |.if DUALNUM + |.ffunc math_ldexp + | cmplwi NARGS8:RC, 16 +- | lwz CARG3, 0(BASE) ++ | lwz TMP0, 0(BASE) ++ |.if FPU + | lfd FARG1, 0(BASE) +- | lwz CARG4, 8(BASE) ++ |.else ++ | lwz CARG1, 0(BASE) ++ | lwz CARG2, 4(BASE) ++ |.endif ++ | lwz TMP1, 8(BASE) + |.if GPR64 + | lwz CARG2, 12(BASE) +- |.else ++ |.elif FPU + | lwz CARG1, 12(BASE) ++ |.else ++ | lwz CARG3, 12(BASE) + |.endif + | blt ->fff_fallback +- | checknum CARG3; bge ->fff_fallback +- | checknum CARG4; bne ->fff_fallback ++ | checknum TMP0; bge ->fff_fallback ++ | checknum TMP1; bne ->fff_fallback + |.else + |.ffunc_nn math_ldexp + |.if GPR64 +@@ -1948,8 +2180,10 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc_n math_frexp + |.if GPR64 + | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) +- |.else ++ |.elif FPU + | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) ++ |.else ++ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) + |.endif + | lwz PC, FRAME_PC(BASE) + | blex frexp +@@ -1958,7 +2192,12 @@ static void build_subroutines(BuildCtx *ctx) + |.if not DUALNUM + | tonum_i FARG2, TMP1 + |.endif ++ |.if FPU + | stfd FARG1, 0(RA) ++ |.else ++ | stw CRET1, 0(RA) ++ | stw CRET2, 4(RA) ++ |.endif + | li RD, (2+1)*8 + |.if DUALNUM + | stw TISNUM, 8(RA) +@@ -1971,13 +2210,20 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc_n math_modf + |.if GPR64 + | la CARG2, -8(BASE) +- |.else ++ |.elif FPU + | la CARG1, -8(BASE) ++ |.else ++ | la CARG3, -8(BASE) + |.endif + | lwz PC, FRAME_PC(BASE) + | blex modf + | la RA, -8(BASE) ++ |.if FPU + | stfd FARG1, 0(BASE) ++ |.else ++ | stw CRET1, 0(BASE) ++ | stw CRET2, 4(BASE) ++ |.endif + | li RD, (2+1)*8 + | b ->fff_res + | +@@ -1985,13 +2231,13 @@ static void build_subroutines(BuildCtx *ctx) + |.if DUALNUM + | .ffunc_1 name + | checknum CARG3 +- | addi TMP1, BASE, 8 +- | add TMP2, BASE, NARGS8:RC ++ | addi SAVE0, BASE, 8 ++ | add SAVE1, BASE, NARGS8:RC + | bne >4 + |1: // Handle integers. +- | lwz CARG4, 0(TMP1) +- | cmplw cr1, TMP1, TMP2 +- | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 0(SAVE0) ++ | cmplw cr1, SAVE0, SAVE1 ++ | lwz CARG2, 4(SAVE0) + | bge cr1, ->fff_resi + | checknum CARG4 + | xoris TMP0, CARG1, 0x8000 +@@ -2008,36 +2254,76 @@ static void build_subroutines(BuildCtx *ctx) + |.if GPR64 + | rldicl CARG1, CARG1, 0, 32 + |.endif +- | addi TMP1, TMP1, 8 ++ | addi SAVE0, SAVE0, 8 + | b <1 + |3: + | bge ->fff_fallback + | // Convert intermediate result to number and continue below. ++ |.if FPU + | tonum_i FARG1, CARG1 +- | lfd FARG2, 0(TMP1) ++ | lfd FARG2, 0(SAVE0) ++ |.else ++ | mr CARG2, CARG1 ++ | bl ->vm_sfi2d_1 ++ | lwz CARG3, 0(SAVE0) ++ | lwz CARG4, 4(SAVE0) ++ |.endif + | b >6 + |4: ++ |.if FPU + | lfd FARG1, 0(BASE) ++ |.else ++ | lwz CARG1, 0(BASE) ++ | lwz CARG2, 4(BASE) ++ |.endif + | bge ->fff_fallback + |5: // Handle numbers. +- | lwz CARG4, 0(TMP1) +- | cmplw cr1, TMP1, TMP2 +- | lfd FARG2, 0(TMP1) ++ | lwz CARG3, 0(SAVE0) ++ | cmplw cr1, SAVE0, SAVE1 ++ |.if FPU ++ | lfd FARG2, 0(SAVE0) ++ |.else ++ | lwz CARG4, 4(SAVE0) ++ |.endif + | bge cr1, ->fff_resn +- | checknum CARG4; bge >7 ++ | checknum CARG3; bge >7 + |6: +- | fsub f0, FARG1, FARG2 +- | addi TMP1, TMP1, 8 ++ | addi SAVE0, SAVE0, 8 ++ |.if FPU + |.if ismax ++ | fsub f0, FARG1, FARG2 ++ |.else ++ | fsub f0, FARG2, FARG1 ++ |.endif + | fsel FARG1, f0, FARG1, FARG2 + |.else +- | fsel FARG1, f0, FARG2, FARG1 ++ | stw CARG1, SFSAVE_1 ++ | stw CARG2, SFSAVE_2 ++ | stw CARG3, SFSAVE_3 ++ | stw CARG4, SFSAVE_4 ++ | blex __ledf2 ++ | cmpwi CRET1, 0 ++ |.if ismax ++ | blt >8 ++ |.else ++ | bge >8 ++ |.endif ++ | lwz CARG1, SFSAVE_1 ++ | lwz CARG2, SFSAVE_2 ++ | b <5 ++ |8: ++ | lwz CARG1, SFSAVE_3 ++ | lwz CARG2, SFSAVE_4 + |.endif + | b <5 + |7: // Convert integer to number and continue above. +- | lwz CARG2, 4(TMP1) ++ | lwz CARG3, 4(SAVE0) + | bne ->fff_fallback +- | tonum_i FARG2, CARG2 ++ |.if FPU ++ | tonum_i FARG2, CARG3 ++ |.else ++ | bl ->vm_sfi2d_2 ++ |.endif + | b <6 + |.else + | .ffunc_n name +@@ -2049,13 +2335,13 @@ static void build_subroutines(BuildCtx *ctx) + | checknum CARG2 + | bge cr1, ->fff_resn + | bge ->fff_fallback +- | fsub f0, FARG1, FARG2 +- | addi TMP1, TMP1, 8 + |.if ismax +- | fsel FARG1, f0, FARG1, FARG2 ++ | fsub f0, FARG1, FARG2 + |.else +- | fsel FARG1, f0, FARG2, FARG1 ++ | fsub f0, FARG2, FARG1 + |.endif ++ | addi TMP1, TMP1, 8 ++ | fsel FARG1, f0, FARG1, FARG2 + | b <1 + |.endif + |.endmacro +@@ -2211,7 +2497,7 @@ static void build_subroutines(BuildCtx *ctx) + | stw L, SBUF:CARG1->L + | stp BASE, L->base + | stw PC, SAVE_PC +- | stw TMP0, SBUF:CARG1->p ++ | stw TMP0, SBUF:CARG1->w + | bl extern lj_buf_putstr_ .. name + | bl extern lj_buf_tostr + | b ->fff_resstr +@@ -2237,28 +2523,37 @@ static void build_subroutines(BuildCtx *ctx) + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name +- | addi TMP1, BASE, 8 +- | add TMP2, BASE, NARGS8:RC ++ | addi SAVE0, BASE, 8 ++ | add SAVE1, BASE, NARGS8:RC + |1: +- | lwz CARG4, 0(TMP1) +- | cmplw cr1, TMP1, TMP2 ++ | lwz CARG4, 0(SAVE0) ++ | cmplw cr1, SAVE0, SAVE1 + |.if DUALNUM +- | lwz CARG2, 4(TMP1) ++ | lwz CARG2, 4(SAVE0) + |.else +- | lfd FARG1, 0(TMP1) ++ | lfd FARG1, 0(SAVE0) + |.endif + | bgey cr1, ->fff_resi + | checknum CARG4 + |.if DUALNUM ++ |.if FPU + | bnel ->fff_bitop_fb + |.else ++ | beq >3 ++ | stw CARG1, SFSAVE_1 ++ | bl ->fff_bitop_fb ++ | mr CARG2, CARG1 ++ | lwz CARG1, SFSAVE_1 ++ |3: ++ |.endif ++ |.else + | fadd FARG1, FARG1, TOBIT + | bge ->fff_fallback + | stfd FARG1, TMPD + | lwz CARG2, TMPD_LO + |.endif + | ins CARG1, CARG1, CARG2 +- | addi TMP1, TMP1, 8 ++ | addi SAVE0, SAVE0, 8 + | b <1 + |.endmacro + | +@@ -2280,7 +2575,14 @@ static void build_subroutines(BuildCtx *ctx) + |.macro .ffunc_bit_sh, name, ins, shmod + |.if DUALNUM + | .ffunc_2 bit_..name ++ |.if FPU + | checknum CARG3; bnel ->fff_tobit_fb ++ |.else ++ | checknum CARG3; beq >1 ++ | bl ->fff_tobit_fb ++ | lwz CARG2, 12(BASE) // Conversion polluted CARG2. ++ |1: ++ |.endif + | // Note: no inline conversion from number for 2nd argument! + | checknum CARG4; bne ->fff_fallback + |.else +@@ -2317,27 +2619,77 @@ static void build_subroutines(BuildCtx *ctx) + |->fff_resn: + | lwz PC, FRAME_PC(BASE) + | la RA, -8(BASE) ++ |.if FPU + | stfd FARG1, -8(BASE) ++ |.else ++ | stw CARG1, -8(BASE) ++ | stw CARG2, -4(BASE) ++ |.endif + | b ->fff_res1 + | + |// Fallback FP number to bit conversion. + |->fff_tobit_fb: + |.if DUALNUM ++ |.if FPU + | lfd FARG1, 0(BASE) + | bgt ->fff_fallback + | fadd FARG1, FARG1, TOBIT + | stfd FARG1, TMPD + | lwz CARG1, TMPD_LO + | blr ++ |.else ++ | bgt ->fff_fallback ++ | mr CARG2, CARG1 ++ | mr CARG1, CARG3 ++ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2. ++ |->vm_tobit: ++ | slwi TMP2, CARG1, 1 ++ | addis TMP2, TMP2, 0x0020 ++ | cmpwi TMP2, 0 ++ | bge >2 ++ | li TMP1, 0x3e0 ++ | srawi TMP2, TMP2, 21 ++ | not TMP1, TMP1 ++ | sub. TMP2, TMP1, TMP2 ++ | cmpwi cr7, CARG1, 0 ++ | blt >1 ++ | slwi TMP1, CARG1, 11 ++ | srwi TMP0, CARG2, 21 ++ | oris TMP1, TMP1, 0x8000 ++ | or TMP1, TMP1, TMP0 ++ | srw CARG1, TMP1, TMP2 ++ | bclr 4, 28 // Return if cr7[lt] == 0, no hint. ++ | neg CARG1, CARG1 ++ | blr ++ |1: ++ | addi TMP2, TMP2, 21 ++ | srw TMP1, CARG2, TMP2 ++ | slwi CARG2, CARG1, 12 ++ | subfic TMP2, TMP2, 20 ++ | slw TMP0, CARG2, TMP2 ++ | or CARG1, TMP1, TMP0 ++ | bclr 4, 28 // Return if cr7[lt] == 0, no hint. ++ | neg CARG1, CARG1 ++ | blr ++ |2: ++ | li CARG1, 0 ++ | blr ++ |.endif + |.endif + |->fff_bitop_fb: + |.if DUALNUM +- | lfd FARG1, 0(TMP1) ++ |.if FPU ++ | lfd FARG1, 0(SAVE0) + | bgt ->fff_fallback + | fadd FARG1, FARG1, TOBIT + | stfd FARG1, TMPD + | lwz CARG2, TMPD_LO + | blr ++ |.else ++ | bgt ->fff_fallback ++ | mr CARG1, CARG4 ++ | b ->vm_tobit ++ |.endif + |.endif + | + |//----------------------------------------------------------------------- +@@ -2530,10 +2882,21 @@ static void build_subroutines(BuildCtx *ctx) + | decode_RA8 RC, INS // Call base. + | beq >2 + |1: // Move results down. ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz CARG1, 0(RA) ++ | lwz CARG2, 4(RA) ++ |.endif + | addic. TMP1, TMP1, -8 + | addi RA, RA, 8 ++ |.if FPU + | stfdx f0, BASE, RC ++ |.else ++ | add CARG3, BASE, RC ++ | stw CARG1, 0(CARG3) ++ | stw CARG2, 4(CARG3) ++ |.endif + | addi RC, RC, 8 + | bne <1 + |2: +@@ -2586,10 +2949,12 @@ static void build_subroutines(BuildCtx *ctx) + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b, c, d ++ |.if FPU + | stfd f..a, 16+a*8(sp) + | stfd f..b, 16+b*8(sp) + | stfd f..c, 16+c*8(sp) + | stfd f..d, 16+d*8(sp) ++ |.endif + |.endmacro + | + |->vm_exit_handler: +@@ -2661,16 +3026,16 @@ static void build_subroutines(BuildCtx *ctx) + | lwz KBASE, PC2PROTO(k)(TMP1) + | // Setup type comparison constants. + | li TISNUM, LJ_TISNUM +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). +- | stw TMP3, TMPD ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU stw TMP3, TMPD + | li ZERO, 0 +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). +- | lfs TOBIT, TMPD +- | stw TMP3, TMPD +- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU lfs TOBIT, TMPD ++ | .FPU stw TMP3, TMPD ++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | li TISNIL, LJ_TNIL +- | stw TMP0, TONUM_HI +- | lfs TONUM, TMPD ++ | .FPU stw TMP0, TONUM_HI ++ | .FPU lfs TONUM, TMPD + | // Modified copy of ins_next which handles function header dispatch, too. + | lwz INS, 0(PC) + | addi PC, PC, 4 +@@ -2708,14 +3073,42 @@ static void build_subroutines(BuildCtx *ctx) + |9: // Rethrow error from the right C frame. + | neg CARG2, CARG1 + | mr CARG1, L +- | bl extern lj_err_throw // (lua_State *L, int errcode) ++ | bl extern lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | +- |// NYI: Use internal implementations of floor, ceil, trunc. ++ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp. ++ | ++ |.macro sfi2d, AHI, ALO ++ |.if not FPU ++ | mr. AHI, ALO ++ | bclr 12, 2 // Handle zero first. ++ | srawi TMP0, ALO, 31 ++ | xor TMP1, ALO, TMP0 ++ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1. ++ | cntlzw AHI, TMP1 ++ | andix. TMP0, TMP0, 0x800 // Mask sign bit. ++ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1. ++ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI. ++ | slwi ALO, TMP1, 21 ++ | or AHI, AHI, TMP0 // Sign | Exponent. ++ | srwi TMP1, TMP1, 11 ++ | slwi AHI, AHI, 20 // Align left. ++ | add AHI, AHI, TMP1 // Add mantissa, increment exponent. ++ | blr ++ |.endif ++ |.endmacro ++ | ++ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1. ++ |->vm_sfi2d_1: ++ | sfi2d CARG1, CARG2 ++ | ++ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1. ++ |->vm_sfi2d_2: ++ | sfi2d CARG3, CARG4 + | + |->vm_modi: + | divwo. TMP0, CARG1, CARG2 +@@ -2770,6 +3163,11 @@ static void build_subroutines(BuildCtx *ctx) + | blr + |.endif + | ++ |->vm_next: ++ |.if JIT ++ | NYI // On big-endian. ++ |.endif ++ | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- +@@ -2783,21 +3181,21 @@ static void build_subroutines(BuildCtx *ctx) + | addi DISPATCH, r12, GG_G2DISP + | stw r11, CTSTATE->cb.slot + | stw r3, CTSTATE->cb.gpr[0] +- | stfd f1, CTSTATE->cb.fpr[0] ++ | .FPU stfd f1, CTSTATE->cb.fpr[0] + | stw r4, CTSTATE->cb.gpr[1] +- | stfd f2, CTSTATE->cb.fpr[1] ++ | .FPU stfd f2, CTSTATE->cb.fpr[1] + | stw r5, CTSTATE->cb.gpr[2] +- | stfd f3, CTSTATE->cb.fpr[2] ++ | .FPU stfd f3, CTSTATE->cb.fpr[2] + | stw r6, CTSTATE->cb.gpr[3] +- | stfd f4, CTSTATE->cb.fpr[3] ++ | .FPU stfd f4, CTSTATE->cb.fpr[3] + | stw r7, CTSTATE->cb.gpr[4] +- | stfd f5, CTSTATE->cb.fpr[4] ++ | .FPU stfd f5, CTSTATE->cb.fpr[4] + | stw r8, CTSTATE->cb.gpr[5] +- | stfd f6, CTSTATE->cb.fpr[5] ++ | .FPU stfd f6, CTSTATE->cb.fpr[5] + | stw r9, CTSTATE->cb.gpr[6] +- | stfd f7, CTSTATE->cb.fpr[6] ++ | .FPU stfd f7, CTSTATE->cb.fpr[6] + | stw r10, CTSTATE->cb.gpr[7] +- | stfd f8, CTSTATE->cb.fpr[7] ++ | .FPU stfd f8, CTSTATE->cb.fpr[7] + | addi TMP0, sp, CFRAME_SPACE+8 + | stw TMP0, CTSTATE->cb.stack + | mr CARG1, CTSTATE +@@ -2808,21 +3206,21 @@ static void build_subroutines(BuildCtx *ctx) + | lp BASE, L:CRET1->base + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | lp RC, L:CRET1->top +- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li ZERO, 0 + | mr L, CRET1 +- | stw TMP3, TMPD +- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) ++ | .FPU stw TMP3, TMPD ++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | lwz LFUNC:RB, FRAME_FUNC(BASE) +- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). +- | stw TMP0, TONUM_HI ++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). ++ | .FPU stw TMP0, TONUM_HI + | li TISNIL, LJ_TNIL + | li_vmstate INTERP +- | lfs TOBIT, TMPD +- | stw TMP3, TMPD ++ | .FPU lfs TOBIT, TMPD ++ | .FPU stw TMP3, TMPD + | sub RC, RC, BASE + | st_vmstate +- | lfs TONUM, TMPD ++ | .FPU lfs TONUM, TMPD + | ins_callt + |.endif + | +@@ -2836,7 +3234,7 @@ static void build_subroutines(BuildCtx *ctx) + | mr CARG2, RA + | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) + | lwz CRET1, CTSTATE->cb.gpr[0] +- | lfd FARG1, CTSTATE->cb.fpr[0] ++ | .FPU lfd FARG1, CTSTATE->cb.fpr[0] + | lwz CRET2, CTSTATE->cb.gpr[1] + | b ->vm_leave_unw + |.endif +@@ -2870,14 +3268,14 @@ static void build_subroutines(BuildCtx *ctx) + | bge <1 + |2: + | bney cr1, >3 +- | lfd f1, CCSTATE->fpr[0] +- | lfd f2, CCSTATE->fpr[1] +- | lfd f3, CCSTATE->fpr[2] +- | lfd f4, CCSTATE->fpr[3] +- | lfd f5, CCSTATE->fpr[4] +- | lfd f6, CCSTATE->fpr[5] +- | lfd f7, CCSTATE->fpr[6] +- | lfd f8, CCSTATE->fpr[7] ++ | .FPU lfd f1, CCSTATE->fpr[0] ++ | .FPU lfd f2, CCSTATE->fpr[1] ++ | .FPU lfd f3, CCSTATE->fpr[2] ++ | .FPU lfd f4, CCSTATE->fpr[3] ++ | .FPU lfd f5, CCSTATE->fpr[4] ++ | .FPU lfd f6, CCSTATE->fpr[5] ++ | .FPU lfd f7, CCSTATE->fpr[6] ++ | .FPU lfd f8, CCSTATE->fpr[7] + |3: + | lp TMP0, CCSTATE->func + | lwz CARG2, CCSTATE->gpr[1] +@@ -2894,7 +3292,7 @@ static void build_subroutines(BuildCtx *ctx) + | lwz TMP2, -4(r14) + | lwz TMP0, 4(r14) + | stw CARG1, CCSTATE:TMP1->gpr[0] +- | stfd FARG1, CCSTATE:TMP1->fpr[0] ++ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] + | stw CARG2, CCSTATE:TMP1->gpr[1] + | mtlr TMP0 + | stw CARG3, CCSTATE:TMP1->gpr[2] +@@ -2923,19 +3321,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + |.if DUALNUM +- | lwzux TMP0, RA, BASE ++ | lwzux CARG1, RA, BASE + | addi PC, PC, 4 + | lwz CARG2, 4(RA) +- | lwzux TMP1, RD, BASE ++ | lwzux CARG3, RD, BASE + | lwz TMP2, -4(PC) +- | checknum cr0, TMP0 +- | lwz CARG3, 4(RD) ++ | checknum cr0, CARG1 ++ | lwz CARG4, 4(RD) + | decode_RD4 TMP2, TMP2 +- | checknum cr1, TMP1 +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | checknum cr1, CARG3 ++ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) + | bne cr0, >7 + | bne cr1, >8 +- | cmpw CARG2, CARG3 ++ | cmpw CARG2, CARG4 + if (op == BC_ISLT) { + | bge >2 + } else if (op == BC_ISGE) { +@@ -2946,28 +3344,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ble >2 + } + |1: +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |2: + | ins_next + | + |7: // RA is not an integer. + | bgt cr0, ->vmeta_comp + | // RA is a number. +- | lfd f0, 0(RA) ++ | .FPU lfd f0, 0(RA) + | bgt cr1, ->vmeta_comp + | blt cr1, >4 + | // RA is a number, RD is an integer. +- | tonum_i f1, CARG3 ++ |.if FPU ++ | tonum_i f1, CARG4 ++ |.else ++ | bl ->vm_sfi2d_2 ++ |.endif + | b >5 + | + |8: // RA is an integer, RD is not an integer. + | bgt cr1, ->vmeta_comp + | // RA is an integer, RD is a number. ++ |.if FPU + | tonum_i f0, CARG2 ++ |.else ++ | bl ->vm_sfi2d_1 ++ |.endif + |4: +- | lfd f1, 0(RD) ++ | .FPU lfd f1, 0(RD) + |5: ++ |.if FPU + | fcmpu cr0, f0, f1 ++ |.else ++ | blex __ledf2 ++ | cmpwi CRET1, 0 ++ |.endif + if (op == BC_ISLT) { + | bge <2 + } else if (op == BC_ISGE) { +@@ -3015,42 +3426,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + |.if DUALNUM +- | lwzux TMP0, RA, BASE ++ | lwzux CARG1, RA, BASE + | addi PC, PC, 4 + | lwz CARG2, 4(RA) +- | lwzux TMP1, RD, BASE +- | checknum cr0, TMP0 +- | lwz TMP2, -4(PC) +- | checknum cr1, TMP1 +- | decode_RD4 TMP2, TMP2 +- | lwz CARG3, 4(RD) ++ | lwzux CARG3, RD, BASE ++ | checknum cr0, CARG1 ++ | lwz SAVE0, -4(PC) ++ | checknum cr1, CARG3 ++ | decode_RD4 SAVE0, SAVE0 ++ | lwz CARG4, 4(RD) + | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) + if (vk) { + | ble cr7, ->BC_ISEQN_Z + } else { + | ble cr7, ->BC_ISNEN_Z + } + |.else +- | lwzux TMP0, RA, BASE +- | lwz TMP2, 0(PC) ++ | lwzux CARG1, RA, BASE ++ | lwz SAVE0, 0(PC) + | lfd f0, 0(RA) + | addi PC, PC, 4 +- | lwzux TMP1, RD, BASE +- | checknum cr0, TMP0 +- | decode_RD4 TMP2, TMP2 ++ | lwzux CARG3, RD, BASE ++ | checknum cr0, CARG1 ++ | decode_RD4 SAVE0, SAVE0 + | lfd f1, 0(RD) +- | checknum cr1, TMP1 +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | checknum cr1, CARG3 ++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) + | bge cr0, >5 + | bge cr1, >5 + | fcmpu cr0, f0, f1 + if (vk) { + | bne >1 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + } else { + | beq >1 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + } + |1: + | ins_next +@@ -3058,36 +3469,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |5: // Either or both types are not numbers. + |.if not DUALNUM + | lwz CARG2, 4(RA) +- | lwz CARG3, 4(RD) ++ | lwz CARG4, 4(RD) + |.endif + |.if FFI +- | cmpwi cr7, TMP0, LJ_TCDATA +- | cmpwi cr5, TMP1, LJ_TCDATA ++ | cmpwi cr7, CARG1, LJ_TCDATA ++ | cmpwi cr5, CARG3, LJ_TCDATA + |.endif +- | not TMP3, TMP0 +- | cmplw TMP0, TMP1 +- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? ++ | not TMP2, CARG1 ++ | cmplw CARG1, CARG3 ++ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive? + |.if FFI + | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq + |.endif +- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? ++ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata? + |.if FFI + | beq cr7, ->vmeta_equal_cd + |.endif +- | cmplw cr5, CARG2, CARG3 ++ | cmplw cr5, CARG2, CARG4 + | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. + | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. + | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. +- | mr SAVE0, PC ++ | mr SAVE1, PC + | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. + | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. + if (vk) { + | bne cr0, >6 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |6: + } else { + | beq cr0, >6 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |6: + } + |.if DUALNUM +@@ -3102,6 +3513,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! ++ | mr CARG3, CARG4 + | lwz TAB:TMP2, TAB:CARG2->metatable + | li CARG4, 1-vk // ne = 0 or 1. + | cmplwi TAB:TMP2, 0 +@@ -3109,7 +3521,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lbz TMP2, TAB:TMP2->nomm + | andix. TMP2, TMP2, 1<<MM_eq + | bne <1 // Or 'no __eq' flag set? +- | mr PC, SAVE0 // Restore old PC. ++ | mr PC, SAVE1 // Restore old PC. + | b ->vmeta_equal // Handle __eq metamethod. + break; + +@@ -3150,16 +3562,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + |.if DUALNUM +- | lwzux TMP0, RA, BASE ++ | lwzux CARG1, RA, BASE + | addi PC, PC, 4 + | lwz CARG2, 4(RA) +- | lwzux TMP1, RD, KBASE +- | checknum cr0, TMP0 +- | lwz TMP2, -4(PC) +- | checknum cr1, TMP1 +- | decode_RD4 TMP2, TMP2 +- | lwz CARG3, 4(RD) +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | lwzux CARG3, RD, KBASE ++ | checknum cr0, CARG1 ++ | lwz SAVE0, -4(PC) ++ | checknum cr1, CARG3 ++ | decode_RD4 SAVE0, SAVE0 ++ | lwz CARG4, 4(RD) ++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) + if (vk) { + |->BC_ISEQN_Z: + } else { +@@ -3167,7 +3579,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } + | bne cr0, >7 + | bne cr1, >8 +- | cmpw CARG2, CARG3 ++ | cmpw CARG2, CARG4 + |4: + |.else + if (vk) { +@@ -3175,20 +3587,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } else { + |->BC_ISNEN_Z: // Dummy label. + } +- | lwzx TMP0, BASE, RA ++ | lwzx CARG1, BASE, RA + | addi PC, PC, 4 + | lfdx f0, BASE, RA +- | lwz TMP2, -4(PC) ++ | lwz SAVE0, -4(PC) + | lfdx f1, KBASE, RD +- | decode_RD4 TMP2, TMP2 +- | checknum TMP0 +- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) ++ | decode_RD4 SAVE0, SAVE0 ++ | checknum CARG1 ++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) + | bge >3 + | fcmpu cr0, f0, f1 + |.endif + if (vk) { + | bne >1 +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |1: + |.if not FFI + |3: +@@ -3199,13 +3611,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.if not FFI + |3: + |.endif +- | add PC, PC, TMP2 ++ | add PC, PC, SAVE0 + |2: + } + | ins_next + |.if FFI + |3: +- | cmpwi TMP0, LJ_TCDATA ++ | cmpwi CARG1, LJ_TCDATA + | beq ->vmeta_equal_cd + | b <1 + |.endif +@@ -3213,18 +3625,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |7: // RA is not an integer. + | bge cr0, <3 + | // RA is a number. +- | lfd f0, 0(RA) ++ | .FPU lfd f0, 0(RA) + | blt cr1, >1 + | // RA is a number, RD is an integer. +- | tonum_i f1, CARG3 ++ |.if FPU ++ | tonum_i f1, CARG4 ++ |.else ++ | bl ->vm_sfi2d_2 ++ |.endif + | b >2 + | + |8: // RA is an integer, RD is a number. ++ |.if FPU + | tonum_i f0, CARG2 ++ |.else ++ | bl ->vm_sfi2d_1 ++ |.endif + |1: +- | lfd f1, 0(RD) ++ | .FPU lfd f1, 0(RD) + |2: ++ |.if FPU + | fcmpu cr0, f0, f1 ++ |.else ++ | blex __ledf2 ++ | cmpwi CRET1, 0 ++ |.endif + | b <4 + |.endif + break; +@@ -3279,7 +3704,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add PC, PC, TMP2 + } else { + | li TMP1, LJ_TFALSE ++ |.if FPU + | lfdx f0, BASE, RD ++ |.else ++ | lwzux CARG1, RD, BASE ++ | lwz CARG2, 4(RD) ++ |.endif + | cmplw TMP0, TMP1 + if (op == BC_ISTC) { + | bge >1 +@@ -3288,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + } + | addis PC, PC, -(BCBIAS_J*4 >> 16) + | decode_RD4 TMP2, INS ++ |.if FPU + | stfdx f0, BASE, RA ++ |.else ++ | stwux CARG1, RA, BASE ++ | stw CARG2, 4(RA) ++ |.endif + | add PC, PC, TMP2 + |1: + } +@@ -3323,8 +3758,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_MOV: + | // RA = dst*8, RD = src*8 + | ins_next1 ++ |.if FPU + | lfdx f0, BASE, RD + | stfdx f0, BASE, RA ++ |.else ++ | lwzux TMP0, RD, BASE ++ | lwz TMP1, 4(RD) ++ | stwux TMP0, RA, BASE ++ | stw TMP1, 4(RA) ++ |.endif + | ins_next2 + break; + case BC_NOT: +@@ -3426,44 +3868,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: +- | lwzx TMP1, BASE, RB ++ | lwzx CARG1, BASE, RB + | .if DUALNUM +- | lwzx TMP2, KBASE, RC ++ | lwzx CARG3, KBASE, RC + | .endif ++ | .if FPU + | lfdx f14, BASE, RB + | lfdx f15, KBASE, RC ++ | .else ++ | add TMP1, BASE, RB ++ | add TMP2, KBASE, RC ++ | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 4(TMP2) ++ | .endif + | .if DUALNUM +- | checknum cr0, TMP1 +- | checknum cr1, TMP2 ++ | checknum cr0, CARG1 ++ | checknum cr1, CARG3 + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | bge ->vmeta_arith_vn + | .else +- | checknum TMP1; bge ->vmeta_arith_vn ++ | checknum CARG1; bge ->vmeta_arith_vn + | .endif + || break; + ||case 1: +- | lwzx TMP1, BASE, RB ++ | lwzx CARG1, BASE, RB + | .if DUALNUM +- | lwzx TMP2, KBASE, RC ++ | lwzx CARG3, KBASE, RC + | .endif ++ | .if FPU + | lfdx f15, BASE, RB + | lfdx f14, KBASE, RC ++ | .else ++ | add TMP1, BASE, RB ++ | add TMP2, KBASE, RC ++ | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 4(TMP2) ++ | .endif + | .if DUALNUM +- | checknum cr0, TMP1 +- | checknum cr1, TMP2 ++ | checknum cr0, CARG1 ++ | checknum cr1, CARG3 + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | bge ->vmeta_arith_nv + | .else +- | checknum TMP1; bge ->vmeta_arith_nv ++ | checknum CARG1; bge ->vmeta_arith_nv + | .endif + || break; + ||default: +- | lwzx TMP1, BASE, RB +- | lwzx TMP2, BASE, RC ++ | lwzx CARG1, BASE, RB ++ | lwzx CARG3, BASE, RC ++ | .if FPU + | lfdx f14, BASE, RB + | lfdx f15, BASE, RC +- | checknum cr0, TMP1 +- | checknum cr1, TMP2 ++ | .else ++ | add TMP1, BASE, RB ++ | add TMP2, BASE, RC ++ | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 4(TMP2) ++ | .endif ++ | checknum cr0, CARG1 ++ | checknum cr1, CARG3 + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | bge ->vmeta_arith_vv + || break; +@@ -3497,48 +3960,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | fsub a, b, a // b - floor(b/c)*c + |.endmacro + | ++ |.macro sfpmod ++ |->BC_MODVN_Z: ++ | stw CARG1, SFSAVE_1 ++ | stw CARG2, SFSAVE_2 ++ | mr SAVE0, CARG3 ++ | mr SAVE1, CARG4 ++ | blex __divdf3 ++ | blex floor ++ | mr CARG3, SAVE0 ++ | mr CARG4, SAVE1 ++ | blex __muldf3 ++ | mr CARG3, CRET1 ++ | mr CARG4, CRET2 ++ | lwz CARG1, SFSAVE_1 ++ | lwz CARG2, SFSAVE_2 ++ | blex __subdf3 ++ |.endmacro ++ | + |.macro ins_arithfp, fpins + | ins_arithpre + |.if "fpins" == "fpmod_" + | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. +- |.else ++ |.elif FPU + | fpins f0, f14, f15 + | ins_next1 + | stfdx f0, BASE, RA + | ins_next2 ++ |.else ++ | blex __divdf3 // Only soft-float div uses this macro. ++ | ins_next1 ++ | stwux CRET1, RA, BASE ++ | stw CRET2, 4(RA) ++ | ins_next2 + |.endif + |.endmacro + | +- |.macro ins_arithdn, intins, fpins ++ |.macro ins_arithdn, intins, fpins, fpcall + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: +- | lwzux TMP1, RB, BASE +- | lwzux TMP2, RC, KBASE +- | lwz CARG1, 4(RB) +- | checknum cr0, TMP1 +- | lwz CARG2, 4(RC) ++ | lwzux CARG1, RB, BASE ++ | lwzux CARG3, RC, KBASE ++ | lwz CARG2, 4(RB) ++ | checknum cr0, CARG1 ++ | lwz CARG4, 4(RC) ++ | checknum cr1, CARG3 + || break; + ||case 1: +- | lwzux TMP1, RB, BASE +- | lwzux TMP2, RC, KBASE +- | lwz CARG2, 4(RB) +- | checknum cr0, TMP1 +- | lwz CARG1, 4(RC) ++ | lwzux CARG3, RB, BASE ++ | lwzux CARG1, RC, KBASE ++ | lwz CARG4, 4(RB) ++ | checknum cr0, CARG3 ++ | lwz CARG2, 4(RC) ++ | checknum cr1, CARG1 + || break; + ||default: +- | lwzux TMP1, RB, BASE +- | lwzux TMP2, RC, BASE +- | lwz CARG1, 4(RB) +- | checknum cr0, TMP1 +- | lwz CARG2, 4(RC) ++ | lwzux CARG1, RB, BASE ++ | lwzux CARG3, RC, BASE ++ | lwz CARG2, 4(RB) ++ | checknum cr0, CARG1 ++ | lwz CARG4, 4(RC) ++ | checknum cr1, CARG3 + || break; + ||} +- | checknum cr1, TMP2 + | bne >5 + | bne cr1, >5 +- | intins CARG1, CARG1, CARG2 ++ |.if "intins" == "intmod" ++ | mr CARG1, CARG2 ++ | mr CARG2, CARG4 ++ |.endif ++ | intins CARG1, CARG2, CARG4 + | bso >4 + |1: + | ins_next1 +@@ -3550,29 +4043,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | checkov TMP0, <1 // Ignore unrelated overflow. + | ins_arithfallback b + |5: // FP variant. ++ |.if FPU + ||if (vk == 1) { + | lfd f15, 0(RB) +- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | lfd f14, 0(RC) + ||} else { + | lfd f14, 0(RB) +- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | lfd f15, 0(RC) + ||} ++ |.endif ++ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | ins_arithfallback bge + |.if "fpins" == "fpmod_" + | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. + |.else ++ |.if FPU + | fpins f0, f14, f15 +- | ins_next1 + | stfdx f0, BASE, RA ++ |.else ++ |.if "fpcall" == "sfpmod" ++ | sfpmod ++ |.else ++ | blex fpcall ++ |.endif ++ | stwux CRET1, RA, BASE ++ | stw CRET2, 4(RA) ++ |.endif ++ | ins_next1 + | b <2 + |.endif + |.endmacro + | +- |.macro ins_arith, intins, fpins ++ |.macro ins_arith, intins, fpins, fpcall + |.if DUALNUM +- | ins_arithdn intins, fpins ++ | ins_arithdn intins, fpins, fpcall + |.else + | ins_arithfp fpins + |.endif +@@ -3583,13 +4087,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.macro addo32., y, a, b + | // Need to check overflow for (a<<32) + (b<<32). + | rldicr TMP0, a, 32, 31 +- | rldicr TMP3, b, 32, 31 +- | addo. TMP0, TMP0, TMP3 ++ | rldicr TMP1, b, 32, 31 ++ | addo. TMP0, TMP0, TMP1 + | add y, a, b + |.endmacro +- | ins_arith addo32., fadd ++ | ins_arith addo32., fadd, __adddf3 + |.else +- | ins_arith addo., fadd ++ | ins_arith addo., fadd, __adddf3 + |.endif + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: +@@ -3597,40 +4101,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.macro subo32., y, a, b + | // Need to check overflow for (a<<32) - (b<<32). + | rldicr TMP0, a, 32, 31 +- | rldicr TMP3, b, 32, 31 +- | subo. TMP0, TMP0, TMP3 ++ | rldicr TMP1, b, 32, 31 ++ | subo. TMP0, TMP0, TMP1 + | sub y, a, b + |.endmacro +- | ins_arith subo32., fsub ++ | ins_arith subo32., fsub, __subdf3 + |.else +- | ins_arith subo., fsub ++ | ins_arith subo., fsub, __subdf3 + |.endif + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: +- | ins_arith mullwo., fmul ++ | ins_arith mullwo., fmul, __muldf3 + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithfp fdiv + break; + case BC_MODVN: +- | ins_arith intmod, fpmod ++ | ins_arith intmod, fpmod, sfpmod + break; + case BC_MODNV: case BC_MODVV: +- | ins_arith intmod, fpmod_ ++ | ins_arith intmod, fpmod_, sfpmod + break; + case BC_POW: + | // NYI: (partial) integer arithmetic. +- | lwzx TMP1, BASE, RB ++ | lwzx CARG1, BASE, RB ++ | lwzx CARG3, BASE, RC ++ |.if FPU + | lfdx FARG1, BASE, RB +- | lwzx TMP2, BASE, RC + | lfdx FARG2, BASE, RC +- | checknum cr0, TMP1 +- | checknum cr1, TMP2 ++ |.else ++ | add TMP1, BASE, RB ++ | add TMP2, BASE, RC ++ | lwz CARG2, 4(TMP1) ++ | lwz CARG4, 4(TMP2) ++ |.endif ++ | checknum cr0, CARG1 ++ | checknum cr1, CARG3 + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + | bge ->vmeta_arith_vv + | blex pow + | ins_next1 ++ |.if FPU + | stfdx FARG1, BASE, RA ++ |.else ++ | stwux CARG1, RA, BASE ++ | stw CARG2, 4(RA) ++ |.endif + | ins_next2 + break; + +@@ -3650,8 +4166,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lp BASE, L->base + | bne ->vmeta_binop + | ins_next1 ++ |.if FPU + | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. + | stfdx f0, BASE, RA ++ |.else ++ | lwzux TMP0, SAVE0, BASE ++ | lwz TMP1, 4(SAVE0) ++ | stwux TMP0, RA, BASE ++ | stw TMP1, 4(RA) ++ |.endif + | ins_next2 + break; + +@@ -3714,8 +4237,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_KNUM: + | // RA = dst*8, RD = num_const*8 + | ins_next1 ++ |.if FPU + | lfdx f0, KBASE, RD + | stfdx f0, BASE, RA ++ |.else ++ | lwzux TMP0, RD, KBASE ++ | lwz TMP1, 4(RD) ++ | stwux TMP0, RA, BASE ++ | stw TMP1, 4(RA) ++ |.endif + | ins_next2 + break; + case BC_KPRI: +@@ -3748,8 +4278,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwzx UPVAL:RB, LFUNC:RB, RD + | ins_next1 + | lwz TMP1, UPVAL:RB->v ++ |.if FPU + | lfd f0, 0(TMP1) + | stfdx f0, BASE, RA ++ |.else ++ | lwz TMP2, 0(TMP1) ++ | lwz TMP3, 4(TMP1) ++ | stwux TMP2, RA, BASE ++ | stw TMP3, 4(RA) ++ |.endif + | ins_next2 + break; + case BC_USETV: +@@ -3757,14 +4294,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz LFUNC:RB, FRAME_FUNC(BASE) + | srwi RA, RA, 1 + | addi RA, RA, offsetof(GCfuncL, uvptr) ++ |.if FPU + | lfdux f0, RD, BASE ++ |.else ++ | lwzux CARG1, RD, BASE ++ | lwz CARG3, 4(RD) ++ |.endif + | lwzx UPVAL:RB, LFUNC:RB, RA + | lbz TMP3, UPVAL:RB->marked + | lwz CARG2, UPVAL:RB->v + | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) + | lbz TMP0, UPVAL:RB->closed + | lwz TMP2, 0(RD) ++ |.if FPU + | stfd f0, 0(CARG2) ++ |.else ++ | stw CARG1, 0(CARG2) ++ | stw CARG3, 4(CARG2) ++ |.endif + | cmplwi cr1, TMP0, 0 + | lwz TMP1, 4(RD) + | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq +@@ -3820,11 +4367,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz LFUNC:RB, FRAME_FUNC(BASE) + | srwi RA, RA, 1 + | addi RA, RA, offsetof(GCfuncL, uvptr) ++ |.if FPU + | lfdx f0, KBASE, RD ++ |.else ++ | lwzux TMP2, RD, KBASE ++ | lwz TMP3, 4(RD) ++ |.endif + | lwzx UPVAL:RB, LFUNC:RB, RA + | ins_next1 + | lwz TMP1, UPVAL:RB->v ++ |.if FPU + | stfd f0, 0(TMP1) ++ |.else ++ | stw TMP2, 0(TMP1) ++ | stw TMP3, 4(TMP1) ++ |.endif + | ins_next2 + break; + case BC_USETP: +@@ -3972,11 +4529,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + | ble ->vmeta_tgetv // Integer key and in array part? + | lwzx TMP0, TMP1, TMP2 ++ |.if FPU + | lfdx f14, TMP1, TMP2 ++ |.else ++ | lwzux SAVE0, TMP1, TMP2 ++ | lwz SAVE1, 4(TMP1) ++ |.endif + | checknil TMP0; beq >2 + |1: + | ins_next1 ++ |.if FPU + | stfdx f14, BASE, RA ++ |.else ++ | stwux SAVE0, RA, BASE ++ | stw SAVE1, 4(RA) ++ |.endif + | ins_next2 + | + |2: // Check for __index if table value is nil. +@@ -4007,9 +4574,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | lwz TMP0, TAB:RB->hmask +- | lwz TMP1, STR:RC->hash ++ | lwz TMP1, STR:RC->sid + | lwz NODE:TMP2, TAB:RB->node +- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slwi TMP0, TMP1, 5 + | slwi TMP1, TMP1, 3 + | sub TMP1, TMP0, TMP1 +@@ -4052,12 +4619,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz TMP1, TAB:RB->asize + | lwz TMP2, TAB:RB->array + | cmplw TMP0, TMP1; bge ->vmeta_tgetb ++ |.if FPU + | lwzx TMP1, TMP2, RC + | lfdx f0, TMP2, RC ++ |.else ++ | lwzux TMP1, TMP2, RC ++ | lwz TMP3, 4(TMP2) ++ |.endif + | checknil TMP1; beq >5 + |1: + | ins_next1 ++ |.if FPU + | stfdx f0, BASE, RA ++ |.else ++ | stwux TMP1, RA, BASE ++ | stw TMP3, 4(RA) ++ |.endif + | ins_next2 + | + |5: // Check for __index if table value is nil. +@@ -4087,10 +4664,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | cmplw TMP0, CARG2 + | slwi TMP2, CARG2, 3 + | ble ->vmeta_tgetr // In array part? ++ |.if FPU + | lfdx f14, TMP1, TMP2 ++ |.else ++ | lwzux SAVE0, TMP2, TMP1 ++ | lwz SAVE1, 4(TMP2) ++ |.endif + |->BC_TGETR_Z: + | ins_next1 ++ |.if FPU + | stfdx f14, BASE, RA ++ |.else ++ | stwux SAVE0, RA, BASE ++ | stw SAVE1, 4(RA) ++ |.endif + | ins_next2 + break; + +@@ -4131,11 +4718,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ble ->vmeta_tsetv // Integer key and in array part? + | lwzx TMP2, TMP1, TMP0 + | lbz TMP3, TAB:RB->marked ++ |.if FPU + | lfdx f14, BASE, RA ++ |.else ++ | add SAVE1, BASE, RA ++ | lwz SAVE0, 0(SAVE1) ++ | lwz SAVE1, 4(SAVE1) ++ |.endif + | checknil TMP2; beq >3 + |1: + | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) ++ |.if FPU + | stfdx f14, TMP1, TMP0 ++ |.else ++ | stwux SAVE0, TMP1, TMP0 ++ | stw SAVE1, 4(TMP1) ++ |.endif + | bne >7 + |2: + | ins_next +@@ -4172,11 +4770,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 + | lwz TMP0, TAB:RB->hmask +- | lwz TMP1, STR:RC->hash ++ | lwz TMP1, STR:RC->sid + | lwz NODE:TMP2, TAB:RB->node + | stb ZERO, TAB:RB->nomm // Clear metamethod cache. +- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask ++ |.if FPU + | lfdx f14, BASE, RA ++ |.else ++ | add CARG2, BASE, RA ++ | lwz SAVE0, 0(CARG2) ++ | lwz SAVE1, 4(CARG2) ++ |.endif + | slwi TMP0, TMP1, 5 + | slwi TMP1, TMP1, 3 + | sub TMP1, TMP0, TMP1 +@@ -4192,7 +4796,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | checknil CARG2; beq >4 // Key found, but nil value? + |2: + | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) ++ |.if FPU + | stfd f14, NODE:TMP2->val ++ |.else ++ | stw SAVE0, NODE:TMP2->val.u32.hi ++ | stw SAVE1, NODE:TMP2->val.u32.lo ++ |.endif + | bne >7 + |3: + | ins_next +@@ -4231,7 +4840,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Returns TValue *. + | lp BASE, L->base ++ |.if FPU + | stfd f14, 0(CRET1) ++ |.else ++ | stw SAVE0, 0(CRET1) ++ | stw SAVE1, 4(CRET1) ++ |.endif + | b <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. +@@ -4248,13 +4862,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | lwz TMP2, TAB:RB->array + | lbz TMP3, TAB:RB->marked + | cmplw TMP0, TMP1 ++ |.if FPU + | lfdx f14, BASE, RA ++ |.else ++ | add CARG2, BASE, RA ++ | lwz SAVE0, 0(CARG2) ++ | lwz SAVE1, 4(CARG2) ++ |.endif + | bge ->vmeta_tsetb + | lwzx TMP1, TMP2, RC + | checknil TMP1; beq >5 + |1: + | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) ++ |.if FPU + | stfdx f14, TMP2, RC ++ |.else ++ | stwux SAVE0, RC, TMP2 ++ | stw SAVE1, 4(RC) ++ |.endif + | bne >7 + |2: + | ins_next +@@ -4294,10 +4919,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |2: + | cmplw TMP0, CARG3 + | slwi TMP2, CARG3, 3 ++ |.if FPU + | lfdx f14, BASE, RA ++ |.else ++ | lwzux SAVE0, RA, BASE ++ | lwz SAVE1, 4(RA) ++ |.endif + | ble ->vmeta_tsetr // In array part? + | ins_next1 ++ |.if FPU + | stfdx f14, TMP1, TMP2 ++ |.else ++ | stwux SAVE0, TMP1, TMP2 ++ | stw SAVE1, 4(TMP1) ++ |.endif + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. +@@ -4327,10 +4962,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add TMP1, TMP1, TMP0 + | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |3: // Copy result slots to table. ++ |.if FPU + | lfd f0, 0(RA) ++ |.else ++ | lwz SAVE0, 0(RA) ++ | lwz SAVE1, 4(RA) ++ |.endif + | addi RA, RA, 8 + | cmpw cr1, RA, TMP2 ++ |.if FPU + | stfd f0, 0(TMP1) ++ |.else ++ | stw SAVE0, 0(TMP1) ++ | stw SAVE1, 4(TMP1) ++ |.endif + | addi TMP1, TMP1, 8 + | blt cr1, <3 + | bne >7 +@@ -4397,9 +5042,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | beq cr1, >3 + |2: + | addi TMP3, TMP2, 8 ++ |.if FPU + | lfdx f0, RA, TMP2 ++ |.else ++ | add CARG3, RA, TMP2 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ |.endif + | cmplw cr1, TMP3, NARGS8:RC ++ |.if FPU + | stfdx f0, BASE, TMP2 ++ |.else ++ | stwux CARG1, TMP2, BASE ++ | stw CARG2, 4(TMP2) ++ |.endif + | mr TMP2, TMP3 + | bne cr1, <2 + |3: +@@ -4432,14 +5088,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add BASE, BASE, RA + | lwz TMP1, -24(BASE) + | lwz LFUNC:RB, -20(BASE) ++ |.if FPU + | lfd f1, -8(BASE) + | lfd f0, -16(BASE) ++ |.else ++ | lwz CARG1, -8(BASE) ++ | lwz CARG2, -4(BASE) ++ | lwz CARG3, -16(BASE) ++ | lwz CARG4, -12(BASE) ++ |.endif + | stw TMP1, 0(BASE) // Copy callable. + | stw LFUNC:RB, 4(BASE) + | checkfunc TMP1 +- | stfd f1, 16(BASE) // Copy control var. + | li NARGS8:RC, 16 // Iterators get 2 arguments. ++ |.if FPU ++ | stfd f1, 16(BASE) // Copy control var. + | stfdu f0, 8(BASE) // Copy state. ++ |.else ++ | stw CARG1, 16(BASE) // Copy control var. ++ | stw CARG2, 20(BASE) ++ | stwu CARG3, 8(BASE) // Copy state. ++ | stw CARG4, 4(BASE) ++ |.endif + | bne ->vmeta_call + | ins_call + break; +@@ -4447,8 +5117,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + case BC_ITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + |.if JIT +- | // NYI: add hotloop, record BC_ITERN. ++ | // NYI on big-endian + |.endif ++ |->vm_IITERN: + | add RA, BASE, RA + | lwz TAB:RB, -12(RA) + | lwz RC, -4(RA) // Get index from control var. +@@ -4460,7 +5131,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | slwi TMP3, RC, 3 + | bge >5 // Index points after array part? + | lwzx TMP2, TMP1, TMP3 ++ |.if FPU + | lfdx f0, TMP1, TMP3 ++ |.else ++ | lwzux CARG1, TMP3, TMP1 ++ | lwz CARG2, 4(TMP3) ++ |.endif + | checknil TMP2 + | lwz INS, -4(PC) + | beq >4 +@@ -4472,7 +5148,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |.endif + | addi RC, RC, 1 + | addis TMP3, PC, -(BCBIAS_J*4 >> 16) ++ |.if FPU + | stfd f0, 8(RA) ++ |.else ++ | stw CARG1, 8(RA) ++ | stw CARG2, 12(RA) ++ |.endif + | decode_RD4 TMP1, INS + | stw RC, -4(RA) // Update control var. + | add PC, TMP1, TMP3 +@@ -4497,17 +5178,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | slwi RB, RC, 3 + | sub TMP3, TMP3, RB + | lwzx RB, TMP2, TMP3 ++ |.if FPU + | lfdx f0, TMP2, TMP3 ++ |.else ++ | add CARG3, TMP2, TMP3 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ |.endif + | add NODE:TMP3, TMP2, TMP3 + | checknil RB + | lwz INS, -4(PC) + | beq >7 ++ |.if FPU + | lfd f1, NODE:TMP3->key ++ |.else ++ | lwz CARG3, NODE:TMP3->key.u32.hi ++ | lwz CARG4, NODE:TMP3->key.u32.lo ++ |.endif + | addis TMP2, PC, -(BCBIAS_J*4 >> 16) ++ |.if FPU + | stfd f0, 8(RA) ++ |.else ++ | stw CARG1, 8(RA) ++ | stw CARG2, 12(RA) ++ |.endif + | add RC, RC, TMP0 + | decode_RD4 TMP1, INS ++ |.if FPU + | stfd f1, 0(RA) ++ |.else ++ | stw CARG3, 0(RA) ++ | stw CARG4, 4(RA) ++ |.endif + | addi RC, RC, 1 + | add PC, TMP1, TMP2 + | stw RC, -4(RA) // Update control var. +@@ -4536,8 +5238,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq + | add TMP3, PC, TMP0 + | bne cr0, >5 +- | lus TMP1, 0xfffe +- | ori TMP1, TMP1, 0x7fff ++ | lus TMP1, (LJ_KEYINDEX >> 16) ++ | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) + | stw ZERO, -4(RA) // Initialize control var. + | stw TMP1, -8(RA) + | addis PC, TMP3, -(BCBIAS_J*4 >> 16) +@@ -4548,6 +5250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | li TMP1, BC_ITERC + | stb TMP0, -1(PC) + | addis PC, TMP3, -(BCBIAS_J*4 >> 16) ++ | // NYI on big-endian: unpatch JLOOP. + | stb TMP1, 3(PC) + | b <1 + break; +@@ -4573,9 +5276,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | subi TMP2, TMP2, 16 + | ble >2 // No vararg slots? + |1: // Copy vararg slots to destination slots. ++ |.if FPU + | lfd f0, 0(RC) ++ |.else ++ | lwz CARG1, 0(RC) ++ | lwz CARG2, 4(RC) ++ |.endif + | addi RC, RC, 8 ++ |.if FPU + | stfd f0, 0(RA) ++ |.else ++ | stw CARG1, 0(RA) ++ | stw CARG2, 4(RA) ++ |.endif + | cmplw RA, TMP2 + | cmplw cr1, RC, TMP3 + | bge >3 // All destination slots filled? +@@ -4598,9 +5311,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | addi MULTRES, TMP1, 8 + | bgt >7 + |6: ++ |.if FPU + | lfd f0, 0(RC) ++ |.else ++ | lwz CARG1, 0(RC) ++ | lwz CARG2, 4(RC) ++ |.endif + | addi RC, RC, 8 ++ |.if FPU + | stfd f0, 0(RA) ++ |.else ++ | stw CARG1, 0(RA) ++ | stw CARG2, 4(RA) ++ |.endif + | cmplw RC, TMP3 + | addi RA, RA, 8 + | blt <6 // More vararg slots? +@@ -4651,14 +5374,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | li TMP1, 0 + |2: + | addi TMP3, TMP1, 8 ++ |.if FPU + | lfdx f0, RA, TMP1 ++ |.else ++ | add CARG3, RA, TMP1 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ |.endif + | cmpw TMP3, RC ++ |.if FPU + | stfdx f0, TMP2, TMP1 ++ |.else ++ | add CARG3, TMP2, TMP1 ++ | stw CARG1, 0(CARG3) ++ | stw CARG2, 4(CARG3) ++ |.endif + | beq >3 + | addi TMP1, TMP3, 8 ++ |.if FPU + | lfdx f1, RA, TMP3 ++ |.else ++ | add CARG3, RA, TMP3 ++ | lwz CARG1, 0(CARG3) ++ | lwz CARG2, 4(CARG3) ++ |.endif + | cmpw TMP1, RC ++ |.if FPU + | stfdx f1, TMP2, TMP3 ++ |.else ++ | add CARG3, TMP2, TMP3 ++ | stw CARG1, 0(CARG3) ++ | stw CARG2, 4(CARG3) ++ |.endif + | bne <2 + |3: + |5: +@@ -4700,8 +5447,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | subi TMP2, BASE, 8 + | decode_RB8 RB, INS + if (op == BC_RET1) { ++ |.if FPU + | lfd f0, 0(RA) + | stfd f0, 0(TMP2) ++ |.else ++ | lwz CARG1, 0(RA) ++ | lwz CARG2, 4(RA) ++ | stw CARG1, 0(TMP2) ++ | stw CARG2, 4(TMP2) ++ |.endif + } + |5: + | cmplw RB, RD +@@ -4762,11 +5516,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + |4: + | stw CARG1, FORL_IDX*8+4(RA) + } else { +- | lwz TMP3, FORL_STEP*8(RA) ++ | lwz SAVE0, FORL_STEP*8(RA) + | lwz CARG3, FORL_STEP*8+4(RA) + | lwz TMP2, FORL_STOP*8(RA) + | lwz CARG2, FORL_STOP*8+4(RA) +- | cmplw cr7, TMP3, TISNUM ++ | cmplw cr7, SAVE0, TISNUM + | cmplw cr1, TMP2, TISNUM + | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq + | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq +@@ -4809,41 +5563,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + if (vk) { + |.if DUALNUM + |9: // FP loop. ++ |.if FPU + | lfd f1, FORL_IDX*8(RA) + |.else ++ | lwz CARG1, FORL_IDX*8(RA) ++ | lwz CARG2, FORL_IDX*8+4(RA) ++ |.endif ++ |.else + | lfdux f1, RA, BASE + |.endif ++ |.if FPU + | lfd f3, FORL_STEP*8(RA) + | lfd f2, FORL_STOP*8(RA) +- | lwz TMP3, FORL_STEP*8(RA) + | fadd f1, f1, f3 + | stfd f1, FORL_IDX*8(RA) ++ |.else ++ | lwz CARG3, FORL_STEP*8(RA) ++ | lwz CARG4, FORL_STEP*8+4(RA) ++ | mr SAVE1, RD ++ | blex __adddf3 ++ | mr RD, SAVE1 ++ | stw CRET1, FORL_IDX*8(RA) ++ | stw CRET2, FORL_IDX*8+4(RA) ++ | lwz CARG3, FORL_STOP*8(RA) ++ | lwz CARG4, FORL_STOP*8+4(RA) ++ |.endif ++ | lwz SAVE0, FORL_STEP*8(RA) + } else { + |.if DUALNUM + |9: // FP loop. + |.else + | lwzux TMP1, RA, BASE +- | lwz TMP3, FORL_STEP*8(RA) ++ | lwz SAVE0, FORL_STEP*8(RA) + | lwz TMP2, FORL_STOP*8(RA) + | cmplw cr0, TMP1, TISNUM +- | cmplw cr7, TMP3, TISNUM ++ | cmplw cr7, SAVE0, TISNUM + | cmplw cr1, TMP2, TISNUM + |.endif ++ |.if FPU + | lfd f1, FORL_IDX*8(RA) ++ |.else ++ | lwz CARG1, FORL_IDX*8(RA) ++ | lwz CARG2, FORL_IDX*8+4(RA) ++ |.endif + | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt ++ |.if FPU + | lfd f2, FORL_STOP*8(RA) ++ |.else ++ | lwz CARG3, FORL_STOP*8(RA) ++ | lwz CARG4, FORL_STOP*8+4(RA) ++ |.endif + | bge ->vmeta_for + } +- | cmpwi cr6, TMP3, 0 ++ | cmpwi cr6, SAVE0, 0 + if (op != BC_JFORL) { + | srwi RD, RD, 1 + } ++ |.if FPU + | stfd f1, FORL_EXT*8(RA) ++ |.else ++ | stw CARG1, FORL_EXT*8(RA) ++ | stw CARG2, FORL_EXT*8+4(RA) ++ |.endif + if (op != BC_JFORL) { + | add RD, PC, RD + } ++ |.if FPU + | fcmpu cr0, f1, f2 ++ |.else ++ | mr SAVE1, RD ++ | blex __ledf2 ++ | cmpwi CRET1, 0 ++ | mr RD, SAVE1 ++ |.endif + if (op == BC_JFORI) { + | addis PC, RD, -(BCBIAS_J*4 >> 16) + } +diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc +index a003fb4f..fdffd4b6 100644 +--- a/src/vm_x64.dasc ++++ b/src/vm_x64.dasc +@@ -1,6 +1,6 @@ + |// Low-level VM code for x64 CPUs in LJ_GC64 mode. + |// Bytecode interpreter, fast functions and helper functions. +-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + | + |.arch x64 + |.section code_op, code_sub +@@ -1230,7 +1230,7 @@ static void build_subroutines(BuildCtx *ctx) + | mov [BASE-16], TAB:RC // Store metatable as default result. + | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] + | mov RAd, TAB:RB->hmask +- | and RAd, STR:RC->hash ++ | and RAd, STR:RC->sid + | settp STR:RC, LJ_TSTR + | imul RAd, #NODE + | add NODE:RA, TAB:RB->node +@@ -1346,44 +1346,28 @@ static void build_subroutines(BuildCtx *ctx) + |.ffunc_1 next + | je >2 // Missing 2nd arg? + |1: +- |.if X64WIN +- | mov RA, [BASE] +- | checktab RA, ->fff_fallback +- |.else +- | mov CARG2, [BASE] +- | checktab CARG2, ->fff_fallback +- |.endif +- | mov L:RB, SAVE_L +- | mov L:RB->base, BASE // Add frame since C call can throw. +- | mov L:RB->top, BASE // Dummy frame length is ok. ++ | mov CARG1, [BASE] + | mov PC, [BASE-8] ++ | checktab CARG1, ->fff_fallback ++ | mov RB, BASE // Save BASE. + |.if X64WIN +- | lea CARG3, [BASE+8] +- | mov CARG2, RA // Caveat: CARG2 == BASE. +- | mov CARG1, L:RB ++ | lea CARG3, [BASE-16] ++ | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE. + |.else +- | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. +- | mov CARG1, L:RB ++ | lea CARG2, [BASE+8] ++ | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE. + |.endif +- | mov SAVE_PC, PC // Needed for ITERN fallback. +- | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) +- | // Flag returned in eax (RD). +- | mov BASE, L:RB->base +- | test RDd, RDd; jz >3 // End of traversal? +- | // Copy key and value to results. +- | mov RB, [BASE+8] +- | mov RD, [BASE+16] +- | mov [BASE-16], RB +- | mov [BASE-8], RD +- |->fff_res2: +- | mov RDd, 1+2 +- | jmp ->fff_res ++ | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | // 1=found, 0=end, -1=error returned in eax (RD). ++ | mov BASE, RB // Restore BASE. ++ | test RDd, RDd; jg ->fff_res2 // Found key/value. ++ | js ->fff_fallback_2 // Invalid key. ++ | // End of traversal: return nil. ++ | mov aword [BASE-16], LJ_TNIL ++ | jmp ->fff_res1 + |2: // Set missing 2nd arg to nil. + | mov aword [BASE+8], LJ_TNIL + | jmp <1 +- |3: // End of traversal: return nil. +- | mov aword [BASE-16], LJ_TNIL +- | jmp ->fff_res1 + | + |.ffunc_1 pairs + | mov TAB:RB, [BASE] +@@ -1432,7 +1416,9 @@ static void build_subroutines(BuildCtx *ctx) + | // Copy array slot. + | mov RB, [RD] + | mov [BASE-8], RB +- | jmp ->fff_res2 ++ |->fff_res2: ++ | mov RDd, 1+2 ++ | jmp ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | cmp dword TAB:RB->hmask, 0; je ->fff_res0 + |.if X64WIN +@@ -1840,7 +1826,7 @@ static void build_subroutines(BuildCtx *ctx) + | jmp ->fff_res + | + |.macro math_minmax, name, cmovop, sseop +- | .ffunc name ++ | .ffunc_1 name + | mov RAd, 2 + |.if DUALNUM + | mov RB, [BASE] +@@ -2011,7 +1997,7 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + | mov RC, SBUF:CARG1->b + | mov SBUF:CARG1->L, L:RB +- | mov SBUF:CARG1->p, RC ++ | mov SBUF:CARG1->w, RC + | mov SAVE_PC, PC + | call extern lj_buf_putstr_ .. name + | mov CARG1, rax +@@ -2509,10 +2495,10 @@ static void build_subroutines(BuildCtx *ctx) + | jmp <2 + | + |9: // Rethrow error from the right C frame. +- | neg RD ++ | mov CARG2d, RDd + | mov CARG1, L:RB +- | mov CARG2, RD +- | call extern lj_err_throw // (lua_State *L, int errcode) ++ | neg CARG2d ++ | call extern lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- +@@ -2647,6 +2633,67 @@ static void build_subroutines(BuildCtx *ctx) + | .if X64WIN; pop rsi; .endif + | ret + | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_IDX, CARG2d ++ |.define NEXT_IDXa, CARG2 ++ |.define NEXT_PTR, RC ++ |.define NEXT_PTRd, RCd ++ |.define NEXT_TMP, CARG3 ++ |.define NEXT_ASIZE, CARG4d ++ |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro ++ |.if X64WIN ++ |.define NEXT_RES_PTR, [rsp+aword*5] ++ |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro ++ |.else ++ |.define NEXT_RES_PTR, [rsp+aword*1] ++ |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro ++ |.endif ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in edx. ++ |->vm_next: ++ |.if JIT ++ | mov NEXT_ASIZE, NEXT_TAB->asize ++ |1: // Traverse array part. ++ | cmp NEXT_IDX, NEXT_ASIZE; jae >5 ++ | mov NEXT_TMP, NEXT_TAB->array ++ | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8] ++ | cmp NEXT_TMP, LJ_TNIL; je >2 ++ | lea NEXT_PTR, NEXT_RES_PTR ++ | mov qword [NEXT_PTR], NEXT_TMP ++ |.if DUALNUM ++ | setint NEXT_TMP, NEXT_IDXa ++ | mov qword [NEXT_PTR+qword*1], NEXT_TMP ++ |.else ++ | cvtsi2sd xmm0, NEXT_IDX ++ | movsd qword [NEXT_PTR+qword*1], xmm0 ++ |.endif ++ | NEXT_RES_IDX 1 ++ | ret ++ |2: // Skip holes in array part. ++ | add NEXT_IDX, 1 ++ | jmp <1 ++ | ++ |5: // Traverse hash part. ++ | sub NEXT_IDX, NEXT_ASIZE ++ |6: ++ | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 ++ | imul NEXT_PTRd, NEXT_IDX, #NODE ++ | add NODE:NEXT_PTR, NEXT_TAB->node ++ | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7 ++ | NEXT_RES_IDXL NEXT_ASIZE+1 ++ | ret ++ |7: // Skip holes in hash part. ++ | add NEXT_IDX, 1 ++ | jmp <6 ++ | ++ |9: // End of iteration. Set the key to nil (not the value). ++ | NEXT_RES_IDX NEXT_ASIZE ++ | lea NEXT_PTR, NEXT_RES_PTR ++ | mov qword [NEXT_PTR+qword*1], LJ_TNIL ++ | ret ++ |.endif ++ | + |//----------------------------------------------------------------------- + |//-- Assertions --------------------------------------------------------- + |//----------------------------------------------------------------------- +@@ -3674,7 +3721,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | checktab TAB:RB, ->vmeta_tgets + |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * + | mov TMPRd, TAB:RB->hmask +- | and TMPRd, STR:RC->hash ++ | and TMPRd, STR:RC->sid + | imul TMPRd, #NODE + | add NODE:TMPR, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR +@@ -3806,7 +3853,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | checktab TAB:RB, ->vmeta_tsets + |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * + | mov TMPRd, TAB:RB->hmask +- | and TMPRd, STR:RC->hash ++ | and TMPRd, STR:RC->sid + | imul TMPRd, #NODE + | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. + | add NODE:TMPR, TAB:RB->node +@@ -4058,10 +4105,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + + case BC_ITERN: +- | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + |.if JIT +- | // NYI: add hotloop, record BC_ITERN. ++ | hotloop RBd + |.endif ++ |->vm_IITERN: ++ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + | mov TAB:RB, [BASE+RA*8-16] + | cleartp TAB:RB + | mov RCd, [BASE+RA*8-8] // Get index from control var. +@@ -4125,15 +4173,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5 + | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 + | branchPC RD +- | mov64 TMPR, U64x(fffe7fff, 00000000) ++ | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32) + | mov [BASE+RA*8-8], TMPR // Initialize control var. + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | mov PC_OP, BC_JMP + | branchPC RD ++ |.if JIT ++ | cmp byte [PC], BC_ITERN ++ | jne >6 ++ |.endif + | mov byte [PC], BC_ITERC + | jmp <1 ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | mov RA, [DISPATCH+DISPATCH_J(trace)] ++ | movzx RCd, word [PC+2] ++ | mov TRACE:RA, [RA+RC*8] ++ | mov eax, TRACE:RA->startins ++ | mov al, BC_ITERC ++ | mov dword [PC], eax ++ | jmp <1 ++ |.endif + break; + + case BC_VARG: +@@ -4734,7 +4796,7 @@ static void emit_asm_debug(BuildCtx *ctx) + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); + #endif + #if !LJ_NO_UNWIND +-#if (defined(__sun__) && defined(__svr4__)) ++#if LJ_TARGET_SOLARIS + fprintf(ctx->fp, "\t.section .eh_frame,"a",@unwind\n"); + #else + fprintf(ctx->fp, "\t.section .eh_frame,"a",@progbits\n"); +diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc +index 211ae7b9..cbf0810c 100644 +--- a/src/vm_x86.dasc ++++ b/src/vm_x86.dasc +@@ -1,6 +1,6 @@ + |// Low-level VM code for x86 CPUs. + |// Bytecode interpreter, fast functions and helper functions. +-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h + | + |.if P64 + |.arch x64 +@@ -1372,7 +1372,11 @@ static void build_subroutines(BuildCtx *ctx) + | mov LFUNC:RB, [RA-8] + | add NARGS:RD, 1 + | // This is fragile. L->base must not move, KBASE must always be defined. ++ |.if x64 ++ | cmp KBASEa, rdx // Continue with CALLT if flag set. ++ |.else + | cmp KBASE, BASE // Continue with CALLT if flag set. ++ |.endif + | je ->BC_CALLT_Z + | mov BASE, RA + | ins_call // Otherwise call resolved metamethod. +@@ -1522,7 +1526,7 @@ static void build_subroutines(BuildCtx *ctx) + | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. + | mov [BASE-8], TAB:RB + | mov RA, TAB:RB->hmask +- | and RA, STR:RC->hash ++ | and RA, STR:RC->sid + | imul RA, #NODE + | add NODE:RA, TAB:RB->node + |3: // Rearranged logic, because we expect _not_ to find the key. +@@ -1669,55 +1673,35 @@ static void build_subroutines(BuildCtx *ctx) + | je >2 // Missing 2nd arg? + |1: + | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback +- | mov L:RB, SAVE_L +- | mov L:RB->base, BASE // Add frame since C call can throw. +- | mov L:RB->top, BASE // Dummy frame length is ok. + | mov PC, [BASE-4] ++ | mov RB, BASE // Save BASE. + |.if X64WIN +- | lea CARG3d, [BASE+8] +- | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. +- | mov CARG1d, L:RB ++ | mov CARG1d, [BASE] ++ | lea CARG3d, [BASE-8] ++ | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE. + |.elif X64 +- | mov CARG2d, [BASE] +- | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. +- | mov CARG1d, L:RB ++ | mov CARG1d, [BASE] ++ | lea CARG2d, [BASE+8] ++ | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE. + |.else + | mov TAB:RD, [BASE] +- | mov ARG2, TAB:RD +- | mov ARG1, L:RB ++ | mov ARG1, TAB:RD + | add BASE, 8 ++ | mov ARG2, BASE ++ | sub BASE, 8+8 + | mov ARG3, BASE + |.endif +- | mov SAVE_PC, PC // Needed for ITERN fallback. +- | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) +- | // Flag returned in eax (RD). +- | mov BASE, L:RB->base +- | test RD, RD; jz >3 // End of traversal? +- | // Copy key and value to results. +- |.if X64 +- | mov RBa, [BASE+8] +- | mov RDa, [BASE+16] +- | mov [BASE-8], RBa +- | mov [BASE], RDa +- |.else +- | mov RB, [BASE+8] +- | mov RD, [BASE+12] +- | mov [BASE-8], RB +- | mov [BASE-4], RD +- | mov RB, [BASE+16] +- | mov RD, [BASE+20] +- | mov [BASE], RB +- | mov [BASE+4], RD +- |.endif +- |->fff_res2: +- | mov RD, 1+2 +- | jmp ->fff_res ++ | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | // 1=found, 0=end, -1=error returned in eax (RD). ++ | mov BASE, RB // Restore BASE. ++ | test RD, RD; jg ->fff_res2 // Found key/value. ++ | js ->fff_fallback_2 // Invalid key. ++ | // End of traversal: return nil. ++ | mov dword [BASE-4], LJ_TNIL ++ | jmp ->fff_res1 + |2: // Set missing 2nd arg to nil. + | mov dword [BASE+12], LJ_TNIL + | jmp <1 +- |3: // End of traversal: return nil. +- | mov dword [BASE-4], LJ_TNIL +- | jmp ->fff_res1 + | + |.ffunc_1 pairs + | mov TAB:RB, [BASE] +@@ -1771,7 +1755,9 @@ static void build_subroutines(BuildCtx *ctx) + | mov [BASE], RB + | mov [BASE+4], RD + |.endif +- | jmp ->fff_res2 ++ |->fff_res2: ++ | mov RD, 1+2 ++ | jmp ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | cmp dword TAB:RB->hmask, 0; je ->fff_res0 + | mov FCARG1, TAB:RB +@@ -2233,7 +2219,7 @@ static void build_subroutines(BuildCtx *ctx) + | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. + | + |.macro math_minmax, name, cmovop, sseop +- | .ffunc name ++ | .ffunc_1 name + | mov RA, 2 + | cmp dword [BASE+4], LJ_TISNUM + |.if DUALNUM +@@ -2419,9 +2405,9 @@ static void build_subroutines(BuildCtx *ctx) + | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] + | mov L:RB->base, BASE + | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE +- | mov RC, SBUF:FCARG1->b ++ | mov RCa, SBUF:FCARG1->b + | mov SBUF:FCARG1->L, L:RB +- | mov SBUF:FCARG1->p, RC ++ | mov SBUF:FCARG1->w, RCa + | mov SAVE_PC, PC + | call extern lj_buf_putstr_ .. name .. @8 + | mov FCARG1, eax +@@ -2960,10 +2946,10 @@ static void build_subroutines(BuildCtx *ctx) + | jmp <2 + | + |9: // Rethrow error from the right C frame. +- | neg RD +- | mov FCARG1, L:RB + | mov FCARG2, RD +- | call extern lj_err_throw@8 // (lua_State *L, int errcode) ++ | mov FCARG1, L:RB ++ | neg FCARG2 ++ | call extern lj_err_trace@8 // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- +@@ -3134,6 +3120,86 @@ static void build_subroutines(BuildCtx *ctx) + | ret + |.endif + | ++ |.define NEXT_TAB, TAB:FCARG1 ++ |.define NEXT_IDX, FCARG2 ++ |.define NEXT_PTR, RCa ++ |.define NEXT_PTRd, RC ++ |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro ++ |.if X64 ++ |.define NEXT_TMP, CARG3d ++ |.define NEXT_TMPq, CARG3 ++ |.define NEXT_ASIZE, CARG4d ++ |.macro NEXT_ENTER; .endmacro ++ |.macro NEXT_LEAVE; ret; .endmacro ++ |.if X64WIN ++ |.define NEXT_RES_PTR, [rsp+aword*5] ++ |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro ++ |.else ++ |.define NEXT_RES_PTR, [rsp+aword*1] ++ |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro ++ |.endif ++ |.else ++ |.define NEXT_ASIZE, esi ++ |.define NEXT_TMP, edi ++ |.macro NEXT_ENTER; push esi; push edi; .endmacro ++ |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro ++ |.define NEXT_RES_PTR, [esp+dword*3] ++ |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro ++ |.endif ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in edx. ++ |->vm_next: ++ |.if JIT ++ | NEXT_ENTER ++ | mov NEXT_ASIZE, NEXT_TAB->asize ++ |1: // Traverse array part. ++ | cmp NEXT_IDX, NEXT_ASIZE; jae >5 ++ | mov NEXT_TMP, NEXT_TAB->array ++ | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2 ++ | lea NEXT_PTR, NEXT_RES_PTR ++ |.if X64 ++ | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8] ++ | mov qword [NEXT_PTR], NEXT_TMPq ++ |.else ++ | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4] ++ | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8] ++ | mov dword [NEXT_PTR+4], NEXT_ASIZE ++ | mov dword [NEXT_PTR], NEXT_TMP ++ |.endif ++ |.if DUALNUM ++ | mov dword [NEXT_PTR+dword*3], LJ_TISNUM ++ | mov dword [NEXT_PTR+dword*2], NEXT_IDX ++ |.else ++ | cvtsi2sd xmm0, NEXT_IDX ++ | movsd qword [NEXT_PTR+dword*2], xmm0 ++ |.endif ++ | NEXT_RES_IDX 1 ++ | NEXT_LEAVE ++ |2: // Skip holes in array part. ++ | add NEXT_IDX, 1 ++ | jmp <1 ++ | ++ |5: // Traverse hash part. ++ | sub NEXT_IDX, NEXT_ASIZE ++ |6: ++ | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 ++ | imul NEXT_PTRd, NEXT_IDX, #NODE ++ | add NODE:NEXT_PTRd, dword NEXT_TAB->node ++ | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7 ++ | NEXT_RES_IDXL NEXT_ASIZE+1 ++ | NEXT_LEAVE ++ |7: // Skip holes in hash part. ++ | add NEXT_IDX, 1 ++ | jmp <6 ++ | ++ |9: // End of iteration. Set the key to nil (not the value). ++ | NEXT_RES_IDX NEXT_ASIZE ++ | lea NEXT_PTR, NEXT_RES_PTR ++ | mov dword [NEXT_PTR+dword*3], LJ_TNIL ++ | NEXT_LEAVE ++ |.endif ++ | + |//----------------------------------------------------------------------- + |//-- Assertions --------------------------------------------------------- + |//----------------------------------------------------------------------- +@@ -4286,7 +4352,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | mov TAB:RB, [BASE+RB*8] + |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. + | mov RA, TAB:RB->hmask +- | and RA, STR:RC->hash ++ | and RA, STR:RC->sid + | imul RA, #NODE + | add NODE:RA, TAB:RB->node + |1: +@@ -4457,7 +4523,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | mov TAB:RB, [BASE+RB*8] + |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. + | mov RA, TAB:RB->hmask +- | and RA, STR:RC->hash ++ | and RA, STR:RC->sid + | imul RA, #NODE + | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. + | add NODE:RA, TAB:RB->node +@@ -4785,10 +4851,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + + case BC_ITERN: +- | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + |.if JIT +- | // NYI: add hotloop, record BC_ITERN. ++ | hotloop RB + |.endif ++ |->vm_IITERN: ++ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + | mov TMP1, KBASE // Need two more free registers. + | mov TMP2, DISPATCH + | mov TAB:RB, [BASE+RA*8-16] +@@ -4876,14 +4943,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 + | branchPC RD + | mov dword [BASE+RA*8-8], 0 // Initialize control var. +- | mov dword [BASE+RA*8-4], 0xfffe7fff ++ | mov dword [BASE+RA*8-4], LJ_KEYINDEX + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | mov PC_OP, BC_JMP + | branchPC RD ++ |.if JIT ++ | cmp byte [PC], BC_ITERN ++ | jne >6 ++ |.endif + | mov byte [PC], BC_ITERC + | jmp <1 ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | mov RA, [DISPATCH+DISPATCH_J(trace)] ++ | movzx RC, word [PC+2] ++ | mov TRACE:RA, [RA+RC*4] ++ | mov eax, TRACE:RA->startins ++ | mov al, BC_ITERC ++ | mov dword [PC], eax ++ | jmp <1 ++ |.endif + break; + + case BC_VARG: +@@ -5548,7 +5629,7 @@ static void emit_asm_debug(BuildCtx *ctx) + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); + #endif + #if !LJ_NO_UNWIND +-#if (defined(__sun__) && defined(__svr4__)) ++#if LJ_TARGET_SOLARIS + #if LJ_64 + fprintf(ctx->fp, "\t.section .eh_frame,"a",@unwind\n"); + #else +diff --git a/src/xb1build.bat b/src/xb1build.bat +index 847e84a5..2eb68171 100644 +--- a/src/xb1build.bat ++++ b/src/xb1build.bat +@@ -9,12 +9,12 @@ + + @setlocal + @echo ---- Host compiler ---- +-@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /DLUAJIT_ENABLE_GC64 ++@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE + @set LJLINK=link /nologo + @set LJMT=mt /nologo + @set DASMDIR=..\dynasm + @set DASM=%DASMDIR%\dynasm.lua +-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c ++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c + + %LJCOMPILE% host\minilua.c + @if errorlevel 1 goto :BAD +diff --git a/src/xedkbuild.bat b/src/xedkbuild.bat +index 240ec878..37322d03 100644 +--- a/src/xedkbuild.bat ++++ b/src/xedkbuild.bat +@@ -14,7 +14,7 @@ + @set LJMT=mt /nologo + @set DASMDIR=..\dynasm + @set DASM=%DASMDIR%\dynasm.lua +-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c ++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c + + %LJCOMPILE% host\minilua.c + @if errorlevel 1 goto :BAD diff --git a/luajit-openresty-features.patch b/luajit-openresty-features.patch deleted file mode 100644 index 8a9d90c..0000000 --- a/luajit-openresty-features.patch +++ /dev/null @@ -1,824 +0,0 @@ -From a6879cb3982f02744dd77b6663ae6bc14162e652 Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" agentzh@gmail.com -Date: Sat, 19 Dec 2015 10:43:32 -0800 -Subject: [PATCH 02/13] Makefile: ensure we always install the symlink for - "luajit". - ---- - Makefile | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/Makefile b/Makefile -index 923bf72b..f4b84081 100644 ---- a/Makefile -+++ b/Makefile -@@ -130,13 +130,8 @@ install: $(LUAJIT_BIN) - $(RM) $(FILE_PC).tmp - cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) - cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) -+ $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) - @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" -- @echo "" -- @echo "Note: the development releases deliberately do NOT install a symlink for luajit" -- @echo "You can do this now by running this command (with sudo):" -- @echo "" -- @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)" -- @echo "" - - - uninstall: --- -2.21.0 - - -From e29e78dd64573947777e8ca7741d46d1c0ba2f7b Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" agentzh@gmail.com -Date: Tue, 14 Mar 2017 14:26:48 -0700 -Subject: [PATCH 03/13] optimize: lj_str_new: tests the full hash value before - doing the full string comparison on hash collisions. thanks Shuxin Yang for - the patch. - ---- - src/lj_str.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/lj_str.c b/src/lj_str.c -index 264dedc1..f1b5fb5d 100644 ---- a/src/lj_str.c -+++ b/src/lj_str.c -@@ -152,7 +152,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) - if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { - while (o != NULL) { - GCstr *sx = gco2str(o); -- if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { -+ if (sx->len == len && sx->hash == h && str_fastcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ -@@ -162,7 +162,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) - } else { /* Slow path: end of string is too close to a page boundary. */ - while (o != NULL) { - GCstr *sx = gco2str(o); -- if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { -+ if (sx->len == len && sx->hash == h && memcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ --- -2.21.0 - - -From 555ee4e814f799937ca505423fc05c0b0402f81c Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" yichun@openresty.com -Date: Tue, 15 Jan 2019 12:17:50 -0800 -Subject: [PATCH 04/13] bugfix: fixed assertion failure "lj_record.c:92: - rec_check_slots: Assertion `nslots <= 250' failed" found by stressing our - edgelang compiler. - ---- - src/lj_record.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/lj_record.c b/src/lj_record.c -index 7f37d6c6..4a50de1b 100644 ---- a/src/lj_record.c -+++ b/src/lj_record.c -@@ -1860,6 +1860,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) - lj_trace_err_info(J, LJ_TRERR_NYIBC); - } - } -+ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) -+ lj_trace_err(J, LJ_TRERR_STACKOV); - } - - /* -- Record allocations -------------------------------------------------- */ --- -2.21.0 - - -From 58e9941b6268202f7953a5534e0c662ad90b2510 Mon Sep 17 00:00:00 2001 -From: doujiang24 doujiang24@gmail.com -Date: Sun, 12 Mar 2017 21:04:50 +0800 -Subject: [PATCH 05/13] feature: added the bytecode option `L` to display lua - source line numbers. - -Signed-off-by: Yichun Zhang (agentzh) agentzh@gmail.com ---- - src/jit/bc.lua | 20 +++++++++++++------- - src/jit/bcsave.lua | 11 ++++++++--- - src/lib_jit.c | 6 ++++++ - 3 files changed, 27 insertions(+), 10 deletions(-) - -diff --git a/src/jit/bc.lua b/src/jit/bc.lua -index 193cf01f..80f92689 100644 ---- a/src/jit/bc.lua -+++ b/src/jit/bc.lua -@@ -63,15 +63,21 @@ local function ctlsub(c) - end - - -- Return one bytecode line. --local function bcline(func, pc, prefix) -- local ins, m = funcbc(func, pc) -+local function bcline(func, pc, prefix, lineinfo) -+ local ins, m, l = funcbc(func, pc, lineinfo and 1 or 0) - if not ins then return end - local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128) - local a = band(shr(ins, 8), 0xff) - local oidx = 6*band(ins, 0xff) - local op = sub(bcnames, oidx+1, oidx+6) -- local s = format("%04d %s %-6s %3s ", -- pc, prefix or " ", op, ma == 0 and "" or a) -+ local s -+ if lineinfo then -+ s = format("%04d %7s %s %-6s %3s ", -+ pc, "["..l.."]", prefix or " ", op, ma == 0 and "" or a) -+ else -+ s = format("%04d %s %-6s %3s ", -+ pc, prefix or " ", op, ma == 0 and "" or a) -+ end - local d = shr(ins, 16) - if mc == 13*128 then -- BCMjump - return format("%s=> %04d\n", s, pc+d-0x7fff) -@@ -124,20 +130,20 @@ local function bctargets(func) - end - - -- Dump bytecode instructions of a function. --local function bcdump(func, out, all) -+local function bcdump(func, out, all, lineinfo) - if not out then out = stdout end - local fi = funcinfo(func) - if all and fi.children then - for n=-1,-1000000000,-1 do - local k = funck(func, n) - if not k then break end -- if type(k) == "proto" then bcdump(k, out, true) end -+ if type(k) == "proto" then bcdump(k, out, true, lineinfo) end - end - end - out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined)) - local target = bctargets(func) - for pc=1,1000000000 do -- local s = bcline(func, pc, target[pc] and "=>") -+ local s = bcline(func, pc, target[pc] and "=>", lineinfo) - if not s then break end - out:write(s) - end -diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua -index 2553d97e..9c6146c2 100644 ---- a/src/jit/bcsave.lua -+++ b/src/jit/bcsave.lua -@@ -23,6 +23,7 @@ local function usage() - io.stderr:write[[ - Save LuaJIT bytecode: luajit -b[options] input output - -l Only list bytecode. -+ -L Only list bytecode with lineinfo. - -s Strip debug info (default). - -g Keep debug info. - -n name Set module name (default: auto-detect from input name). -@@ -575,9 +576,9 @@ end - - ------------------------------------------------------------------------------ - --local function bclist(input, output) -+local function bclist(input, output, lineinfo) - local f = readfile(input) -- require("jit.bc").dump(f, savefile(output, "w"), true) -+ require("jit.bc").dump(f, savefile(output, "w"), true, lineinfo) - end - - local function bcsave(ctx, input, output) -@@ -604,6 +605,7 @@ local function docmd(...) - local arg = {...} - local n = 1 - local list = false -+ local lineinfo = false - local ctx = { - strip = true, arch = jit.arch, os = string.lower(jit.os), - type = false, modname = false, -@@ -617,6 +619,9 @@ local function docmd(...) - local opt = string.sub(a, m, m) - if opt == "l" then - list = true -+ elseif opt == "L" then -+ list = true -+ lineinfo = true - elseif opt == "s" then - ctx.strip = true - elseif opt == "g" then -@@ -645,7 +650,7 @@ local function docmd(...) - end - if list then - if #arg == 0 or #arg > 2 then usage() end -- bclist(arg[1], arg[2] or "-") -+ bclist(arg[1], arg[2] or "-", lineinfo) - else - if #arg ~= 2 then usage() end - bcsave(ctx, arg[1], arg[2]) -diff --git a/src/lib_jit.c b/src/lib_jit.c -index 6e265fdb..6972550b 100644 ---- a/src/lib_jit.c -+++ b/src/lib_jit.c -@@ -224,6 +224,7 @@ LJLIB_CF(jit_util_funcbc) - { - GCproto *pt = check_Lproto(L, 0); - BCPos pc = (BCPos)lj_lib_checkint(L, 2); -+ int lineinfo = lj_lib_optint(L, 3, 0); - if (pc < pt->sizebc) { - BCIns ins = proto_bc(pt)[pc]; - BCOp op = bc_op(ins); -@@ -231,6 +232,11 @@ LJLIB_CF(jit_util_funcbc) - setintV(L->top, ins); - setintV(L->top+1, lj_bc_mode[op]); - L->top += 2; -+ if (lineinfo) { -+ setintV(L->top, lj_debug_line(pt, pc)); -+ L->top += 1; -+ return 3; -+ } - return 2; - } - return 0; --- -2.21.0 - - -From a61c93d0784c532db4ec0797475a0e0ad93dda4c Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" yichun@openresty.com -Date: Wed, 27 Feb 2019 17:20:19 -0800 -Subject: [PATCH 06/13] bugfix: ffi.C.FUNC(): it lacked a write barrier which - might lead to use-after-free issues and memory corruptions. - -Fix #42. ---- - src/lj_clib.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/lj_clib.c b/src/lj_clib.c -index f016b06b..a8672052 100644 ---- a/src/lj_clib.c -+++ b/src/lj_clib.c -@@ -384,6 +384,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) - cd = lj_cdata_new(cts, id, CTSIZE_PTR); - *(void **)cdataptr(cd) = p; - setcdataV(L, tv, cd); -+ lj_gc_anybarriert(L, cl->cache); - } - } - return tv; --- -2.21.0 - - -From 3086b483e76ad12ae0a0dfab60960c1175b69dab Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" agentzh@gmail.com -Date: Thu, 15 May 2014 16:03:29 -0700 -Subject: [PATCH 07/13] feature: added internal memory-buffer-based trace - entry/exit/start-recording event logging, mainly for debugging bugs in the - JIT compiler. it requires -DLUA_USE_TRACE_LOGS when building. - ---- - src/lj_debug.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++ - src/lj_debug.h | 11 +++++ - src/lj_trace.c | 9 ++++ - src/vm_x86.dasc | 24 ++++++++++ - 4 files changed, 169 insertions(+) - -diff --git a/src/lj_debug.c b/src/lj_debug.c -index 959dc289..7f4f793a 100644 ---- a/src/lj_debug.c -+++ b/src/lj_debug.c -@@ -697,3 +697,128 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, - lua_concat(L, (int)(L->top - L->base) - top); - } - -+#ifdef LUA_USE_TRACE_LOGS -+ -+#include "lj_dispatch.h" -+ -+#define MAX_TRACE_EVENTS 64 -+ -+enum { -+ LJ_TRACE_EVENT_ENTER, -+ LJ_TRACE_EVENT_EXIT, -+ LJ_TRACE_EVENT_START -+}; -+ -+typedef struct { -+ int event; -+ unsigned traceno; -+ unsigned exitno; -+ int directexit; -+ const BCIns *ins; -+ lua_State *thread; -+ GCfunc *fn; -+} lj_trace_event_record_t; -+ -+static lj_trace_event_record_t lj_trace_events[MAX_TRACE_EVENTS]; -+ -+static int rb_start = 0; -+static int rb_end = 0; -+static int rb_full = 0; -+ -+static void -+lj_trace_log_event(lj_trace_event_record_t *rec) -+{ -+ lj_trace_events[rb_end] = *rec; -+ -+ if (rb_full) { -+ rb_end++; -+ if (rb_end == MAX_TRACE_EVENTS) { -+ rb_end = 0; -+ } -+ rb_start = rb_end; -+ -+ } else { -+ rb_end++; -+ if (rb_end == MAX_TRACE_EVENTS) { -+ rb_end = 0; -+ rb_full = MAX_TRACE_EVENTS; -+ } -+ } -+} -+ -+static GCfunc* -+lj_debug_top_frame_fn(lua_State *L, const BCIns *pc) -+{ -+ int size; -+ cTValue *frame; -+ -+ frame = lj_debug_frame(L, 0, &size); -+ if (frame == NULL) { -+ return NULL; -+ } -+ -+ return frame_func(frame); -+} -+ -+void -+lj_log_trace_start_record(lua_State *L, unsigned traceno, const BCIns *pc, -+ GCfunc *fn) -+{ -+ lj_trace_event_record_t r; -+ -+ r.event = LJ_TRACE_EVENT_START; -+ r.thread = L; -+ r.ins = pc; -+ r.traceno = traceno; -+ r.fn = fn; -+ -+ lj_trace_log_event(&r); -+} -+ -+void -+lj_log_trace_entry(lua_State *L, unsigned traceno, const BCIns *pc) -+{ -+ lj_trace_event_record_t r; -+ -+ r.event = LJ_TRACE_EVENT_ENTER; -+ r.thread = L; -+ r.ins = pc; -+ r.traceno = traceno; -+ r.fn = lj_debug_top_frame_fn(L, pc); -+ -+ lj_trace_log_event(&r); -+} -+ -+static void -+lj_log_trace_exit_helper(lua_State *L, int vmstate, const BCIns *pc, int direct) -+{ -+ if (vmstate >= 0) { -+ lj_trace_event_record_t r; -+ -+ jit_State *J = L2J(L); -+ -+ r.event = LJ_TRACE_EVENT_EXIT; -+ r.thread = L; -+ r.ins = pc; -+ r.traceno = vmstate; -+ r.exitno = J->exitno; -+ r.directexit = direct; -+ r.fn = lj_debug_top_frame_fn(L, pc); -+ -+ lj_trace_log_event(&r); -+ } -+} -+ -+void -+lj_log_trace_normal_exit(lua_State *L, int vmstate, const BCIns *pc) -+{ -+ lj_log_trace_exit_helper(L, vmstate, pc, 0); -+} -+ -+void -+lj_log_trace_direct_exit(lua_State *L, int vmstate, const BCIns *pc) -+{ -+ lj_log_trace_exit_helper(L, vmstate, pc, 1); -+} -+ -+#endif /* LUA_USE_TRACE_LOGS */ -diff --git a/src/lj_debug.h b/src/lj_debug.h -index 5917c00b..82f53bda 100644 ---- a/src/lj_debug.h -+++ b/src/lj_debug.h -@@ -62,4 +62,15 @@ enum { - VARNAME__MAX - }; - -+#ifdef LUA_USE_TRACE_LOGS -+LJ_FUNC void LJ_FASTCALL lj_log_trace_direct_exit(lua_State *L, -+ int vmstate, const BCIns *pc); -+LJ_FUNC void LJ_FASTCALL lj_log_trace_normal_exit(lua_State *L, -+ int vmstate, const BCIns *pc); -+LJ_FUNC void LJ_FASTCALL lj_log_trace_entry(lua_State *L, -+ unsigned traceno, const BCIns *pc); -+LJ_FUNC void LJ_FASTCALL lj_log_trace_start_record(lua_State *L, unsigned traceno, -+ const BCIns *pc, GCfunc *fn); -+#endif -+ - #endif -diff --git a/src/lj_trace.c b/src/lj_trace.c -index d85b47f8..c2f0d8cf 100644 ---- a/src/lj_trace.c -+++ b/src/lj_trace.c -@@ -404,6 +404,9 @@ static void trace_start(jit_State *J) - { - lua_State *L; - TraceNo traceno; -+#ifdef LUA_USE_TRACE_LOGS -+ const BCIns *pc = J->pc; -+#endif - - if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ - if (J->parent == 0 && J->exitno == 0) { -@@ -462,6 +465,9 @@ static void trace_start(jit_State *J) - } - ); - lj_record_setup(J); -+#ifdef LUA_USE_TRACE_LOGS -+ lj_log_trace_start_record(L, (unsigned) J->cur.traceno, pc, J->fn); -+#endif - } - - /* Stop tracing. */ -@@ -890,6 +896,9 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) - } - } - } -+#ifdef LUA_USE_TRACE_LOGS -+ lj_log_trace_normal_exit(L, (int) T->traceno, pc); -+#endif - /* Return MULTRES or 0. */ - ERRNO_RESTORE - switch (bc_op(*pc)) { -diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc -index 211ae7b9..66377cd5 100644 ---- a/src/vm_x86.dasc -+++ b/src/vm_x86.dasc -@@ -2919,6 +2919,19 @@ static void build_subroutines(BuildCtx *ctx) - | mov r13, TMPa - | mov r12, TMPQ - |.endif -+#ifdef LUA_USE_TRACE_LOGS -+ | mov FCARG1, SAVE_L -+ | mov L:FCARG1->base, BASE -+ | mov RB, RD // Save RD -+ | mov TMP1, PC // Save PC -+ | mov CARG3d, PC // CARG3d == BASE -+ | mov FCARG2, dword [DISPATCH+DISPATCH_GL(vmstate)] -+ | call extern lj_log_trace_direct_exit@8 -+ | mov PC, TMP1 -+ | mov RD, RB -+ | mov RB, SAVE_L -+ | mov BASE, L:RB->base -+#endif - | test RD, RD; js >9 // Check for error from exit. - | mov L:RB, SAVE_L - | mov MULTRES, RD -@@ -5260,6 +5273,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - case BC_JLOOP: - |.if JIT - | ins_AD // RA = base (ignored), RD = traceno -+#ifdef LUA_USE_TRACE_LOGS -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Save BASE -+ | mov TMP1, RD // Save RD -+ | mov CARG3d, PC // CARG3d == BASE -+ | mov FCARG2, RD -+ | mov FCARG1, RB -+ | call extern lj_log_trace_entry@8 -+ | mov RD, TMP1 -+ | mov BASE, L:RB->base -+#endif - | mov RA, [DISPATCH+DISPATCH_J(trace)] - | mov TRACE:RD, [RA+RD*4] - | mov RDa, TRACE:RD->mcode --- -2.21.0 - - -From 00a5957d632f1715fdc88c1a3fe7cc355f5a13cb Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" agentzh@gmail.com -Date: Wed, 21 May 2014 16:05:13 -0700 -Subject: [PATCH 08/13] bugfix: fixed build regression on i386 introduced by - the LUA_USE_TRACE_LOGS feature. - ---- - src/vm_x86.dasc | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc -index 66377cd5..50210010 100644 ---- a/src/vm_x86.dasc -+++ b/src/vm_x86.dasc -@@ -2920,6 +2920,7 @@ static void build_subroutines(BuildCtx *ctx) - | mov r12, TMPQ - |.endif - #ifdef LUA_USE_TRACE_LOGS -+ |.if X64 - | mov FCARG1, SAVE_L - | mov L:FCARG1->base, BASE - | mov RB, RD // Save RD -@@ -2931,6 +2932,7 @@ static void build_subroutines(BuildCtx *ctx) - | mov RD, RB - | mov RB, SAVE_L - | mov BASE, L:RB->base -+ |.endif - #endif - | test RD, RD; js >9 // Check for error from exit. - | mov L:RB, SAVE_L -@@ -5274,6 +5276,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - |.if JIT - | ins_AD // RA = base (ignored), RD = traceno - #ifdef LUA_USE_TRACE_LOGS -+ |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Save BASE - | mov TMP1, RD // Save RD -@@ -5283,6 +5286,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) - | call extern lj_log_trace_entry@8 - | mov RD, TMP1 - | mov BASE, L:RB->base -+ |.endif - #endif - | mov RA, [DISPATCH+DISPATCH_J(trace)] - | mov TRACE:RD, [RA+RD*4] --- -2.21.0 - - -From 7950afe36eadad8b529f4aa90b303861619a2322 Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" agentzh@gmail.com -Date: Sat, 7 Jun 2014 13:41:24 -0700 -Subject: [PATCH 09/13] fixed compilation errors on Solaris when - -DLUA_USE_TRACE_LOGS is enabled. - ---- - src/lj_debug.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/lj_debug.c b/src/lj_debug.c -index 7f4f793a..b93b69d3 100644 ---- a/src/lj_debug.c -+++ b/src/lj_debug.c -@@ -760,7 +760,7 @@ lj_debug_top_frame_fn(lua_State *L, const BCIns *pc) - return frame_func(frame); - } - --void -+LJ_FUNC void LJ_FASTCALL - lj_log_trace_start_record(lua_State *L, unsigned traceno, const BCIns *pc, - GCfunc *fn) - { -@@ -775,7 +775,7 @@ lj_log_trace_start_record(lua_State *L, unsigned traceno, const BCIns *pc, - lj_trace_log_event(&r); - } - --void -+LJ_FUNC void LJ_FASTCALL - lj_log_trace_entry(lua_State *L, unsigned traceno, const BCIns *pc) - { - lj_trace_event_record_t r; -@@ -809,13 +809,13 @@ lj_log_trace_exit_helper(lua_State *L, int vmstate, const BCIns *pc, int direct) - } - } - --void -+LJ_FUNC void LJ_FASTCALL - lj_log_trace_normal_exit(lua_State *L, int vmstate, const BCIns *pc) - { - lj_log_trace_exit_helper(L, vmstate, pc, 0); - } - --void -+LJ_FUNC void LJ_FASTCALL - lj_log_trace_direct_exit(lua_State *L, int vmstate, const BCIns *pc) - { - lj_log_trace_exit_helper(L, vmstate, pc, 1); --- -2.21.0 - - -From bd304a366be2ffb10eec6aeba390595232958320 Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" agentzh@gmail.com -Date: Tue, 27 May 2014 12:37:13 -0700 -Subject: [PATCH 10/13] feature: jit.dump: output Lua source location after - every BC. - ---- - src/jit/dump.lua | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/src/jit/dump.lua b/src/jit/dump.lua -index 2bea652b..ef0dca61 100644 ---- a/src/jit/dump.lua -+++ b/src/jit/dump.lua -@@ -591,6 +591,9 @@ local function dump_record(tr, func, pc, depth, callee) - if pc >= 0 then - line = bcline(func, pc, recprefix) - if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end -+ if pc > 0 then -+ line = sub(line, 1, -2) .. " (" .. fmtfunc(func, pc) .. ")\n" -+ end - else - line = "0000 "..recprefix.." FUNCC \n" - callee = func --- -2.21.0 - - -From cce112ca4fdde7d1ca5963c50d0621fb2e526524 Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" yichun@openresty.com -Date: Fri, 5 Apr 2019 12:38:40 -0700 -Subject: [PATCH 11/13] feature: luajit -bl: dump the constant tables (KGC and - KN) for each lua proto object as well. - ---- - src/jit/bc.lua | 32 ++++++++++++++++++++++++++++++++ - 1 file changed, 32 insertions(+) - -diff --git a/src/jit/bc.lua b/src/jit/bc.lua -index 80f92689..9fee4cda 100644 ---- a/src/jit/bc.lua -+++ b/src/jit/bc.lua -@@ -141,6 +141,38 @@ local function bcdump(func, out, all, lineinfo) - end - end - out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined)) -+ -+ for n=-1,-1000000000,-1 do -+ local kc = funck(func, n) -+ if not kc then break end -+ -+ local typ = type(kc) -+ if typ == "string" then -+ kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub)) -+ out:write(format("KGC %d %s\n", -(n + 1), kc)) -+ elseif typ == "proto" then -+ local fi = funcinfo(kc) -+ if fi.ffid then -+ kc = vmdef.ffnames[fi.ffid] -+ else -+ kc = fi.loc -+ end -+ out:write(format("KGC %d %s\n", -(n + 1), kc)) -+ elseif typ == "table" then -+ out:write(format("KGC %d table\n", -(n + 1))) -+ else -+ -- error("unknown KGC type: " .. typ) -+ end -+ end -+ -+ for n=1,1000000000 do -+ local kc = funck(func, n) -+ if not kc then break end -+ if type(kc) == "number" then -+ out:write(format("KN %d %s\n", n, kc)) -+ end -+ end -+ - local target = bctargets(func) - for pc=1,1000000000 do - local s = bcline(func, pc, target[pc] and "=>", lineinfo) --- -2.21.0 - - -From 7d5f5be581ed392059601168a95068e026765aa0 Mon Sep 17 00:00:00 2001 -From: "Yichun Zhang (agentzh)" yichun@openresty.com -Date: Fri, 17 May 2019 14:49:48 -0700 -Subject: [PATCH 13/13] bugfix: thanks Julien Desgats for the report and Peter - Cawley for the patch. - -The test covering this bug was submitted to the openresty/luajit2-test-suite -repo as commit ce2c916d55. ---- - src/lj_tab.c | 81 ++++++++++++++++++++++++++++++++++------------------ - 1 file changed, 53 insertions(+), 28 deletions(-) - -diff --git a/src/lj_tab.c b/src/lj_tab.c -index c51666d3..ff216f3c 100644 ---- a/src/lj_tab.c -+++ b/src/lj_tab.c -@@ -474,6 +474,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) - lua_assert(freenode != &G(L)->nilnode); - collide = hashkey(t, &n->key); - if (collide != n) { /* Colliding node not the main node? */ -+ Node *nn; - while (noderef(collide->next) != n) /* Find predecessor. */ - collide = nextnode(collide); - setmref(collide->next, freenode); /* Relink chain. */ -@@ -483,39 +484,63 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) - freenode->next = n->next; - setmref(n->next, NULL); - setnilV(&n->val); -- /* Rechain pseudo-resurrected string keys with colliding hashes. */ -- while (nextnode(freenode)) { -- Node *nn = nextnode(freenode); -- if (tvisstr(&nn->key) && !tvisnil(&nn->val) && -- hashstr(t, strV(&nn->key)) == n) { -- freenode->next = nn->next; -- nn->next = n->next; -- setmref(n->next, nn); -- /* -- ** Rechaining a resurrected string key creates a new dilemma: -- ** Another string key may have originally been resurrected via -- ** _any_ of the previous nodes as a chain anchor. Including -- ** a node that had to be moved, which makes them unreachable. -- ** It's not feasible to check for all previous nodes, so rechain -- ** any string key that's currently in a non-main positions. -- */ -- while ((nn = nextnode(freenode))) { -- if (tvisstr(&nn->key) && !tvisnil(&nn->val)) { -- Node *mn = hashstr(t, strV(&nn->key)); -- if (mn != freenode) { -- freenode->next = nn->next; -- nn->next = mn->next; -- setmref(mn->next, nn); -+ /* -+ ** Nodes after n might have n as their main node, and need rechaining -+ ** back onto n. We make use of the following property of tables: for all -+ ** nodes m, at least one of the following four statements is true: -+ ** 1. tvisnil(&m->key) NB: tvisnil(&m->val) is a stronger statement -+ ** 2. tvisstr(&m->key) -+ ** 3. tvisstr(&main(m)->key) -+ ** 4. main(m) == main(main(m)) -+ ** Initially, we need to rechain any nn which has main(nn) == n. As -+ ** main(n) != n (because collide != n earlier), main(nn) == n requires -+ ** either statement 2 or statement 3 to be true about nn. -+ */ -+ if (!tvisstr(&n->key)) { -+ /* Statement 3 is not true, so only need to consider string keys. */ -+ while ((nn = nextnode(freenode))) { -+ if (tvisstr(&nn->key) && !tvisnil(&nn->val) && -+ hashstr(t, strV(&nn->key)) == n) { -+ goto rechain; -+ } -+ freenode = nn; -+ } -+ } else { -+ /* Statement 3 is true, so need to consider all types of key. */ -+ while ((nn = nextnode(freenode))) { -+ if (!tvisnil(&nn->val) && hashkey(t, &nn->key) == n) { -+ rechain: -+ freenode->next = nn->next; -+ nn->next = n->next; -+ setmref(n->next, nn); -+ /* -+ ** Rechaining one node onto n creates a new dilemma: we now need -+ ** to rechain any nn which has main(nn) == n OR has main(nn) equal -+ ** to any node which has already been rechained. Furthermore, at -+ ** least one of n and n->next will have a string key, so all types -+ ** of nn key need to be considered. Rather than testing whether -+ ** main(nn) definitely _is_ in the new chain, we test whether it -+ ** might _not_ be in the old chain, and if so re-link it into -+ ** the correct chain. -+ */ -+ while ((nn = nextnode(freenode))) { -+ if (!tvisnil(&nn->val)) { -+ Node *mn = hashkey(t, &nn->key); -+ if (mn != freenode && mn != nn) { -+ freenode->next = nn->next; -+ nn->next = mn->next; -+ setmref(mn->next, nn); -+ } else { -+ freenode = nn; -+ } - } else { - freenode = nn; - } -- } else { -- freenode = nn; - } -+ break; -+ } else { -+ freenode = nn; - } -- break; -- } else { -- freenode = nn; - } - } - } else { /* Otherwise use free node. */ --- -2.21.0 - diff --git a/luajit-s390x.patch b/luajit-s390x.patch deleted file mode 100644 index 2b10d06..0000000 --- a/luajit-s390x.patch +++ /dev/null @@ -1,44523 +0,0 @@ -From 0b8f9ea1080a6b2c4beab991ab7736fd0a7896a1 Mon Sep 17 00:00:00 2001 -From: ketank-new ketan22584@gmail.com -Date: Thu, 10 Nov 2016 10:33:16 +0530 -Subject: [PATCH 001/247] Create lj_target_s390.h - -Adding file lj_target_s390.h -Few arm based instructions are changed with equivalent s390x instructions ---- - src/lj_target_s390.h | 287 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 287 insertions(+) - create mode 100644 src/lj_target_s390.h - -diff --git a/src/lj_target_s390.h b/src/lj_target_s390.h -new file mode 100644 -index 0000000..7da2063 ---- /dev/null -+++ b/src/lj_target_s390.h -@@ -0,0 +1,287 @@ -+/* -+** Definitions for S390 CPUs. -+** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h -+*/ -+ -+#ifndef _LJ_TARGET_S390_H -+#define _LJ_TARGET_S390_H -+ -+/* -- Registers IDs ------------------------------------------------------- */ -+ -+#define GPRDEF(_) \ -+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ -+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \ -+#if LJ_SOFTFP -+#define FPRDEF(_) -+#else -+#define FPRDEF(_) \ -+ _(F0) _(F2) _(F4) _(F6) -+#endif -+#define VRIDDEF(_) -+ -+#define RIDENUM(name) RID_##name, -+ -+enum { -+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ -+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ -+ RID_MAX, -+ RID_TMP = RID_LR, -+ -+ /* Calling conventions. */ -+ RID_RET = RID_R0, -+ RID_RETLO = RID_R0, -+ RID_RETHI = RID_R1, -+#if LJ_SOFTFP -+ RID_FPRET = RID_R0, -+#else -+ RID_FPRET = RID_D0, -+#endif -+ -+ /* These definitions must match with the *.dasc file(s): */ -+ RID_BASE = RID_R9, /* Interpreter BASE. */ -+ RID_LPC = RID_R6, /* Interpreter PC. */ -+ RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */ -+ RID_LREG = RID_R8, /* Interpreter L. */ -+ -+ /* Register ranges [min, max) and number of registers. */ -+ RID_MIN_GPR = RID_R0, -+ RID_MAX_GPR = RID_PC+1, -+ RID_MIN_FPR = RID_MAX_GPR, -+#if LJ_SOFTFP -+ RID_MAX_FPR = RID_MIN_FPR, -+#else -+ RID_MAX_FPR = RID_D15+1, -+#endif -+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, -+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -+}; -+ -+#define RID_NUM_KREF RID_NUM_GPR -+#define RID_MIN_KREF RID_R0 -+ -+/* -- Register sets ------------------------------------------------------- */ -+ -+/* Make use of all registers, except sp, lr and pc. */ -+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1)) -+#define RSET_GPREVEN \ -+ (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \ -+ RID2RSET(RID_R8)|RID2RSET(RID_R10)) -+#define RSET_GPRODD \ -+ (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \ -+ RID2RSET(RID_R9)|RID2RSET(RID_R11)) -+#if LJ_SOFTFP -+#define RSET_FPR 0 -+#else -+#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) -+#endif -+#define RSET_ALL (RSET_GPR|RSET_FPR) -+#define RSET_INIT RSET_ALL -+ -+/* ABI-specific register sets. lr is an implicit scratch register. */ -+#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12)) -+#ifdef __APPLE__ -+#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9)) -+#else -+#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_ -+#endif -+#if LJ_SOFTFP -+#define RSET_SCRATCH_FPR 0 -+#else -+#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) -+#endif -+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -+#define REGARG_FIRSTGPR RID_R0 -+#define REGARG_LASTGPR RID_R3 -+#define REGARG_NUMGPR 4 -+#if LJ_ABI_SOFTFP -+#define REGARG_FIRSTFPR 0 -+#define REGARG_LASTFPR 0 -+#define REGARG_NUMFPR 0 -+#else -+#define REGARG_FIRSTFPR RID_D0 -+#define REGARG_LASTFPR RID_D7 -+#define REGARG_NUMFPR 8 -+#endif -+ -+/* -- Spill slots --------------------------------------------------------- */ -+ -+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -+** -+** SPS_FIXED: Available fixed spill slots in interpreter frame. -+** This definition must match with the *.dasc file(s). -+** -+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. -+*/ -+#define SPS_FIXED 2 -+#define SPS_FIRST 2 -+ -+#define SPOFS_TMP 0 -+ -+#define sps_scale(slot) (4 * (int32_t)(slot)) -+#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) -+ -+/* -- Exit state ---------------------------------------------------------- */ -+ -+/* This definition must match with the *.dasc file(s). */ -+typedef struct { -+#if !LJ_SOFTFP -+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -+#endif -+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ -+ int32_t spill[256]; /* Spill slots. */ -+} ExitState; -+ -+/* PC after instruction that caused an exit. Used to find the trace number. */ -+#define EXITSTATE_PCREG RID_PC -+/* Highest exit + 1 indicates stack check. */ -+#define EXITSTATE_CHECKEXIT 1 -+ -+#define EXITSTUB_SPACING 4 -+#define EXITSTUBS_PER_GROUP 32 -+ -+/* -- Instructions -------------------------------------------------------- */ -+ -+/* Instruction fields. */ -+#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28)) -+#define ARMF_N(r) ((r) << 16) -+#define ARMF_D(r) ((r) << 12) -+#define ARMF_S(r) ((r) << 8) -+#define ARMF_M(r) (r) -+#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7)) -+#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r)) -+ -+typedef enum S390Ins { -+ -+ // Unsupported in S390 -+ #ARMI_LDRSB = 0xe01000d0, -+ #ARMI_S = 0x000100000, -+ #ARMI_LDRD = 0xe00000d0, -+ #ARMI_ADC = 0xe0a00000, -+ #ARMI_SBC = 0xe0c00000, -+ #ARMI_STRB = 0xe4400000, -+ #ARMI_STRH = 0xe00000b0, -+ #ARMI_STRD = 0xe00000f0, -+ #ARMI_BL = 0xeb000000, -+ #ARMI_BLX = 0xfa000000, -+ #ARMI_BLXr = 0xe12fff30, -+ #ARMI_BIC = 0xe1c00000, -+ #ARMI_ORR = 0xe1800000, -+ #ARMI_LDRB = 0xe4500000, -+ #ARMI_MVN = 0xe1e00000, -+ #ARMI_LDRSH = 0xe01000f0, -+ #ARMI_NOP = 0xe1a00000, -+ #ARMI_PUSH = 0xe92d0000, -+ #ARMI_RSB = 0xe0600000, -+ #ARMI_RSC = 0xe0e00000, -+ #ARMI_TEQ = 0xe1300000, -+ #ARMI_CCAL = 0xe0000000, -+ #ARMI_K12 = 0x02000000, -+ #ARMI_KNEG = 0x00200000, -+ #ARMI_LS_W = 0x00200000, -+ #ARMI_LS_U = 0x00800000, -+ #ARMI_LS_P = 0x01000000, -+ #ARMI_LS_R = 0x02000000, -+ #ARMI_LSX_I = 0x00400000, -+ -+ -+ #ARMI_SUB = 0xe0400000, -+ #ARMI_ADD = 0xe0800000, -+ #ARMI_AND = 0xe0000000, -+ #ARMI_EOR = 0xe0200000, -+ #ARMI_MUL = 0xe0000090, -+ #ARMI_LDR = 0xe4100000, -+ #ARMI_CMP = 0xe1500000, -+ #ARMI_LDRH = 0xe01000b0, -+ #ARMI_B = 0xea000000, -+ #ARMI_MOV = 0xe1a00000, -+ #ARMI_STR = 0xe4000000, -+ #ARMI_TST = 0xe1100000, -+ #ARMI_SMULL = 0xe0c00090, -+ #ARMI_CMN = 0xe1700000, -+ S390I_SR = 0x1B000000, -+ S390I_AR = 0x1A000000, -+ S390I_NR = 0x14000000, -+ S390I_XR = 0x17000000, -+ S390I_MR = 0x1C000000, -+ S390I_LR = 0x18000000, -+ S390I_C = 0x59000000, -+ S390I_LH = 0x48000000, -+ S390I_BASR = 0x0D000000, -+ S390I_MVCL = 0x0e000000, -+ S390I_ST = 0x50000000, -+ S390I_TM = 0x91000000, -+ S390I_MP = 0xbd000090, -+ S390I_CLR = 0x15000000, -+ -+ /* ARMv6 */ -+ #ARMI_REV = 0xe6bf0f30, -+ #ARMI_SXTB = 0xe6af0070, -+ #ARMI_SXTH = 0xe6bf0070, -+ #ARMI_UXTB = 0xe6ef0070, -+ #ARMI_UXTH = 0xe6ff0070, -+ -+ /* ARMv6T2 */ -+ #ARMI_MOVW = 0xe3000000, -+ #ARMI_MOVT = 0xe3400000, -+ -+ /* VFP */ -+ ARMI_VMOV_D = 0xeeb00b40, -+ ARMI_VMOV_S = 0xeeb00a40, -+ ARMI_VMOVI_D = 0xeeb00b00, -+ -+ ARMI_VMOV_R_S = 0xee100a10, -+ ARMI_VMOV_S_R = 0xee000a10, -+ ARMI_VMOV_RR_D = 0xec500b10, -+ ARMI_VMOV_D_RR = 0xec400b10, -+ -+ ARMI_VADD_D = 0xee300b00, -+ ARMI_VSUB_D = 0xee300b40, -+ ARMI_VMUL_D = 0xee200b00, -+ ARMI_VMLA_D = 0xee000b00, -+ ARMI_VMLS_D = 0xee000b40, -+ ARMI_VNMLS_D = 0xee100b00, -+ ARMI_VDIV_D = 0xee800b00, -+ -+ ARMI_VABS_D = 0xeeb00bc0, -+ ARMI_VNEG_D = 0xeeb10b40, -+ ARMI_VSQRT_D = 0xeeb10bc0, -+ -+ ARMI_VCMP_D = 0xeeb40b40, -+ ARMI_VCMPZ_D = 0xeeb50b40, -+ -+ ARMI_VMRS = 0xeef1fa10, -+ -+ ARMI_VCVT_S32_F32 = 0xeebd0ac0, -+ ARMI_VCVT_S32_F64 = 0xeebd0bc0, -+ ARMI_VCVT_U32_F32 = 0xeebc0ac0, -+ ARMI_VCVT_U32_F64 = 0xeebc0bc0, -+ ARMI_VCVTR_S32_F32 = 0xeebd0a40, -+ ARMI_VCVTR_S32_F64 = 0xeebd0b40, -+ ARMI_VCVTR_U32_F32 = 0xeebc0a40, -+ ARMI_VCVTR_U32_F64 = 0xeebc0b40, -+ ARMI_VCVT_F32_S32 = 0xeeb80ac0, -+ ARMI_VCVT_F64_S32 = 0xeeb80bc0, -+ ARMI_VCVT_F32_U32 = 0xeeb80a40, -+ ARMI_VCVT_F64_U32 = 0xeeb80b40, -+ ARMI_VCVT_F32_F64 = 0xeeb70bc0, -+ ARMI_VCVT_F64_F32 = 0xeeb70ac0, -+ -+ ARMI_VLDR_S = 0xed100a00, -+ ARMI_VLDR_D = 0xed100b00, -+ ARMI_VSTR_S = 0xed000a00, -+ ARMI_VSTR_D = 0xed000b00, -+} S390Ins; -+ -+typedef enum S390Shift { -+ S390SH_SLL, S390SH_SRL, S390SH_SRA -+ # Adjustment needed for ROR -+} S390Shift; -+ -+/* ARM condition codes. */ -+typedef enum ARMCC { -+ CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, -+ CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, -+ CC_HS = CC_CS, CC_LO = CC_CC -+} ARMCC; -+ -+#endif --- -2.20.1 - - -From db6a5d23dcab4e4cffd70e8d8284306ea2cd3891 Mon Sep 17 00:00:00 2001 -From: ketank-new ketan22584@gmail.com -Date: Thu, 10 Nov 2016 10:35:35 +0530 -Subject: [PATCH 002/247] Update Makefile - -Added condition for s390 in Makefile ---- - src/Makefile | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/src/Makefile b/src/Makefile -index d22eb73..f388db1 100644 ---- a/src/Makefile -+++ b/src/Makefile -@@ -245,6 +245,9 @@ else - ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) - TARGET_LJARCH= arm - else -+ifneq (,$(findstring LJ_TARGET_S390 ,$(TARGET_TESTARCH))) -+ TARGET_LJARCH= s390 -+else - ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) - ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) - TARGET_ARCH= -D__AARCH64EB__=1 --- -2.20.1 - - -From 8623a84d2984441f4b58f001ee829abb7edb7b85 Mon Sep 17 00:00:00 2001 -From: ketank-new ketan22584@gmail.com -Date: Thu, 10 Nov 2016 10:42:51 +0530 -Subject: [PATCH 003/247] Update lj_arch.h - -Added supporting lines for s390 -Lines added using arm lines as reference ---- - src/lj_arch.h | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/src/lj_arch.h b/src/lj_arch.h -index 31a1159..6421545 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -29,6 +29,7 @@ - #define LUAJIT_ARCH_mips32 6 - #define LUAJIT_ARCH_MIPS64 7 - #define LUAJIT_ARCH_mips64 7 -+#define LUAJIT_ARCH_S390 8 - - /* Target OS. */ - #define LUAJIT_OS_OTHER 0 -@@ -49,6 +50,8 @@ - #define LUAJIT_TARGET LUAJIT_ARCH_ARM - #elif defined(__aarch64__) - #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 -+#elif defined(__s390__) || defined(__s390) || defined(__S390__) || defined(__S390) || defined(S390) -+#define LUAJIT_TARGET LUAJIT_ARCH_S390 - #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) - #define LUAJIT_TARGET LUAJIT_ARCH_PPC - #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) -@@ -241,6 +244,23 @@ - - #define LJ_ARCH_VERSION 80 - -+#elif LUAJIT_TARGET == LUAJIT_ARCH_S390 -+ -+ #define LJ_ARCH_NAME "s390" -+ #define LJ_ARCH_BITS 64 -+ #define LJ_ARCH_ENDIAN LUAJIT_BE -+ #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__ -+ #define LJ_ARCH_HASFPU 1 -+ #endif -+ #define LJ_ABI_EABI 1 -+ #define LJ_TARGET_S390 1 -+ #define LJ_TARGET_EHRETREG 0 -+ #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ -+ #define LJ_TARGET_MASKSHIFT 0 -+ #define LJ_TARGET_MASKROT 1 -+ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -+ - #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC - - #ifndef LJ_ARCH_ENDIAN -@@ -410,6 +430,10 @@ - #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__) - #error "Need at least Clang 3.5 or newer" - #endif -+#elif LJ_TARGET_S390 -+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) -+#error "Need at least GCC 4.2 or newer" -+#endif - #else - #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8) - #error "Need at least GCC 4.8 or newer" --- -2.20.1 - - -From bfb48077af7cd8f8fcc863b2cd79de24d99420cf Mon Sep 17 00:00:00 2001 -From: niravthakkar thakkarniravb@gmail.com -Date: Thu, 10 Nov 2016 19:00:41 +0530 -Subject: [PATCH 004/247] Copy of dasm_arm64.lua file, with few changes - -Have changed few sections of file, other part is common across architectures ---- - dynasm/dasm_s390x.lua | 1177 +++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 1177 insertions(+) - create mode 100644 dynasm/dasm_s390x.lua - -diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua -new file mode 100644 -index 0000000..a0a50e1 ---- /dev/null -+++ b/dynasm/dasm_s390x.lua -@@ -0,0 +1,1177 @@ -+------------------------------------------------------------------------------ -+-- DynASM s390x module. -+-- -+-- Copyright (C) 2005-2016 Mike Pall. All rights reserved. -+-- See dynasm.lua for full copyright notice. -+------------------------------------------------------------------------------ -+ -+-- Module information: -+local _info = { -+ arch = "s390x", -+ description = "DynASM s390x module", -+ version = "1.4.0", -+ vernum = 10400, -+ release = "2015-10-18", -+ author = "Mike Pall", -+ license = "MIT", -+} -+ -+-- Exported glue functions for the arch-specific module. -+local _M = { _info = _info } -+ -+-- Cache library functions. -+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs -+local assert, setmetatable, rawget = assert, setmetatable, rawget -+local _s = string -+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char -+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub -+local concat, sort, insert = table.concat, table.sort, table.insert -+local bit = bit or require("bit") -+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift -+local ror, tohex = bit.ror, bit.tohex -+ -+-- Inherited tables and callbacks. -+local g_opt, g_arch -+local wline, werror, wfatal, wwarn -+ -+-- Action name list. -+-- CHECK: Keep this in sync with the C code! -+local action_names = { -+ "STOP", "SECTION", "ESC", "REL_EXT", -+ "ALIGN", "REL_LG", "LABEL_LG", -+ "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", -+} -+ -+-- Maximum number of section buffer positions for dasm_put(). -+-- CHECK: Keep this in sync with the C code! -+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. -+ -+-- Action name -> action number. -+local map_action = {} -+for n,name in ipairs(action_names) do -+ map_action[name] = n-1 -+end -+ -+-- Action list buffer. -+local actlist = {} -+ -+-- Argument list for next dasm_put(). Start with offset 0 into action list. -+local actargs = { 0 } -+ -+-- Current number of section buffer positions for dasm_put(). -+local secpos = 1 -+ -+------------------------------------------------------------------------------ -+ -+-- Dump action names and numbers. -+local function dumpactions(out) -+ out:write("DynASM encoding engine action codes:\n") -+ for n,name in ipairs(action_names) do -+ local num = map_action[name] -+ out:write(format(" %-10s %02X %d\n", name, num, num)) -+ end -+ out:write("\n") -+end -+ -+-- Write action list buffer as a huge static C array. -+local function writeactions(out, name) -+ local nn = #actlist -+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end -+ out:write("static const unsigned int ", name, "[", nn, "] = {\n") -+ for i = 1,nn-1 do -+ assert(out:write("0x", tohex(actlist[i]), ",\n")) -+ end -+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) -+end -+ -+------------------------------------------------------------------------------ -+ -+-- Add word to action list. -+local function wputxw(n) -+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") -+ actlist[#actlist+1] = n -+end -+ -+-- Add action to list with optional arg. Advance buffer pos, too. -+local function waction(action, val, a, num) -+ local w = assert(map_action[action], "bad action name `"..action.."'") -+ wputxw(w * 0x10000 + (val or 0)) -+ if a then actargs[#actargs+1] = a end -+ if a or num then secpos = secpos + (num or 1) end -+end -+ -+-- Flush action list (intervening C code or buffer pos overflow). -+local function wflush(term) -+ if #actlist == actargs[1] then return end -- Nothing to flush. -+ if not term then waction("STOP") end -- Terminate action list. -+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) -+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). -+ secpos = 1 -- The actionlist offset occupies a buffer position, too. -+end -+ -+-- Put escaped word. -+local function wputw(n) -+ if n <= 0x000fffff then waction("ESC") end -+ wputxw(n) -+end -+ -+-- Reserve position for word. -+local function wpos() -+ local pos = #actlist+1 -+ actlist[pos] = "" -+ return pos -+end -+ -+-- Store word to reserved position. -+local function wputpos(pos, n) -+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") -+ if n <= 0x000fffff then -+ insert(actlist, pos+1, n) -+ n = map_action.ESC * 0x10000 -+ end -+ actlist[pos] = n -+end -+ -+------------------------------------------------------------------------------ -+ -+-- Global label name -> global label number. With auto assignment on 1st use. -+local next_global = 20 -+local map_global = setmetatable({}, { __index = function(t, name) -+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end -+ local n = next_global -+ if n > 2047 then werror("too many global labels") end -+ next_global = n + 1 -+ t[name] = n -+ return n -+end}) -+ -+-- Dump global labels. -+local function dumpglobals(out, lvl) -+ local t = {} -+ for name, n in pairs(map_global) do t[n] = name end -+ out:write("Global labels:\n") -+ for i=20,next_global-1 do -+ out:write(format(" %s\n", t[i])) -+ end -+ out:write("\n") -+end -+ -+-- Write global label enum. -+local function writeglobals(out, prefix) -+ local t = {} -+ for name, n in pairs(map_global) do t[n] = name end -+ out:write("enum {\n") -+ for i=20,next_global-1 do -+ out:write(" ", prefix, t[i], ",\n") -+ end -+ out:write(" ", prefix, "_MAX\n};\n") -+end -+ -+-- Write global label names. -+local function writeglobalnames(out, name) -+ local t = {} -+ for name, n in pairs(map_global) do t[n] = name end -+ out:write("static const char *const ", name, "[] = {\n") -+ for i=20,next_global-1 do -+ out:write(" "", t[i], "",\n") -+ end -+ out:write(" (const char *)0\n};\n") -+end -+ -+------------------------------------------------------------------------------ -+ -+-- Extern label name -> extern label number. With auto assignment on 1st use. -+local next_extern = 0 -+local map_extern_ = {} -+local map_extern = setmetatable({}, { __index = function(t, name) -+ -- No restrictions on the name for now. -+ local n = next_extern -+ if n > 2047 then werror("too many extern labels") end -+ next_extern = n + 1 -+ t[name] = n -+ map_extern_[n] = name -+ return n -+end}) -+ -+-- Dump extern labels. -+local function dumpexterns(out, lvl) -+ out:write("Extern labels:\n") -+ for i=0,next_extern-1 do -+ out:write(format(" %s\n", map_extern_[i])) -+ end -+ out:write("\n") -+end -+ -+-- Write extern label names. -+local function writeexternnames(out, name) -+ out:write("static const char *const ", name, "[] = {\n") -+ for i=0,next_extern-1 do -+ out:write(" "", map_extern_[i], "",\n") -+ end -+ out:write(" (const char *)0\n};\n") -+end -+ -+------------------------------------------------------------------------------ -+ -+-- Arch-specific maps. -+-- TODO: add s390x related register names -+-- Ext. register name -> int. name. -+--local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", } -+local map_archdef = {} -+ -+-- Int. register name -> ext. name. -+-- local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", } -+local map_reg_rev = {} -+ -+local map_type = {} -- Type name -> { ctype, reg } -+local ctypenum = 0 -- Type number (for Dt... macros). -+ -+-- Reverse defines for registers. -+function _M.revdef(s) -+ return map_reg_rev[s] or s -+end -+-- not sure of these -+local map_shift = { lsl = 0, lsr = 1, asr = 2, } -+ -+local map_extend = { -+ uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3, -+ sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7, -+} -+ -+local map_cond = { -+ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, -+ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, -+ hs = 2, lo = 3, -+} -+ -+------------------------------------------------------------------------------ -+ -+local parse_reg_type -+ -+ -+local function parse_gpr(expr) -+ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") -+ local tp = map_type[tname or expr] -+ if tp then -+ local reg = ovreg or tp.reg -+ if not reg then -+ werror("type `"..(tname or expr).."' needs a register override") -+ end -+ expr = reg -+ end -+ local r = match(expr, "^r([1-3]?[0-9])$") -+ if r then -+ r = tonumber(r) -+ if r <= 31 then return r, tp end -+ end -+ werror("bad register name `"..expr.."'") -+end -+ -+local function parse_fpr(expr) -+ local r = match(expr, "^f([1-3]?[0-9])$") -+ if r then -+ r = tonumber(r) -+ if r <= 31 then return r end -+ end -+ werror("bad register name `"..expr.."'") -+end -+ -+ -+ -+ -+ -+local function parse_reg_base(expr) -+ if expr == "sp" then return 0x3e0 end -+ local base, tp = parse_reg(expr) -+ if parse_reg_type ~= "x" then werror("bad register type") end -+ parse_reg_type = false -+ return shl(base, 5), tp -+end -+ -+local parse_ctx = {} -+ -+local loadenv = setfenv and function(s) -+ local code = loadstring(s, "") -+ if code then setfenv(code, parse_ctx) end -+ return code -+end or function(s) -+ return load(s, "", nil, parse_ctx) -+end -+ -+-- Try to parse simple arithmetic, too, since some basic ops are aliases. -+local function parse_number(n) -+ local x = tonumber(n) -+ if x then return x end -+ local code = loadenv("return "..n) -+ if code then -+ local ok, y = pcall(code) -+ if ok then return y end -+ end -+ return nil -+end -+ -+local function parse_imm(imm, bits, shift, scale, signed) -+ imm = match(imm, "^#(.*)$") -+ if not imm then werror("expected immediate operand") end -+ local n = parse_number(imm) -+ if n then -+ local m = sar(n, scale) -+ if shl(m, scale) == n then -+ if signed then -+ local s = sar(m, bits-1) -+ if s == 0 then return shl(m, shift) -+ elseif s == -1 then return shl(m + shl(1, bits), shift) end -+ else -+ if sar(m, bits) == 0 then return shl(m, shift) end -+ end -+ end -+ werror("out of range immediate `"..imm.."'") -+ else -+ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) -+ return 0 -+ end -+end -+ -+local function parse_imm12(imm) -+ imm = match(imm, "^#(.*)$") -+ if not imm then werror("expected immediate operand") end -+ local n = parse_number(imm) -+ if n then -+ if shr(n, 12) == 0 then -+ return shl(n, 10) -+ elseif band(n, 0xff000fff) == 0 then -+ return shr(n, 2) + 0x00400000 -+ end -+ werror("out of range immediate `"..imm.."'") -+ else -+ waction("IMM12", 0, imm) -+ return 0 -+ end -+end -+ -+local function parse_imm13(imm) -+ imm = match(imm, "^#(.*)$") -+ if not imm then werror("expected immediate operand") end -+ local n = parse_number(imm) -+ local r64 = parse_reg_type == "x" -+ if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then -+ local inv = false -+ if band(n, 1) == 1 then n = bit.bnot(n); inv = true end -+ local t = {} -+ for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end -+ local b = table.concat(t) -+ b = b..(r64 and (inv and "1" or "0"):rep(32) or b) -+ local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)") -+ if p0 then -+ local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a -+ if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then -+ local s = band(-2*w, 0x3f) - 1 -+ if w == 64 then s = s + 0x1000 end -+ if inv then -+ return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10) -+ else -+ return shl(w-#p0, 16) + shl(s+#p1, 10) -+ end -+ end -+ end -+ werror("out of range immediate `"..imm.."'") -+ elseif r64 then -+ waction("IMM13X", 0, format("(unsigned int)(%s)", imm)) -+ actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm) -+ return 0 -+ else -+ waction("IMM13W", 0, imm) -+ return 0 -+ end -+end -+ -+local function parse_imm6(imm) -+ imm = match(imm, "^#(.*)$") -+ if not imm then werror("expected immediate operand") end -+ local n = parse_number(imm) -+ if n then -+ if n >= 0 and n <= 63 then -+ return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0) -+ end -+ werror("out of range immediate `"..imm.."'") -+ else -+ waction("IMM6", 0, imm) -+ return 0 -+ end -+end -+ -+local function parse_imm_load(imm, scale) -+ local n = parse_number(imm) -+ if n then -+ local m = sar(n, scale) -+ if shl(m, scale) == n and m >= 0 and m < 0x1000 then -+ return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset. -+ elseif n >= -256 and n < 256 then -+ return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset. -+ end -+ werror("out of range immediate `"..imm.."'") -+ else -+ waction("IMML", 0, imm) -+ return 0 -+ end -+end -+ -+local function parse_fpimm(imm) -+ imm = match(imm, "^#(.*)$") -+ if not imm then werror("expected immediate operand") end -+ local n = parse_number(imm) -+ if n then -+ local m, e = math.frexp(n) -+ local s, e2 = 0, band(e-2, 7) -+ if m < 0 then m = -m; s = 0x00100000 end -+ m = m*32-16 -+ if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then -+ return s + shl(e2, 17) + shl(m, 13) -+ end -+ werror("out of range immediate `"..imm.."'") -+ else -+ werror("NYI fpimm action") -+ end -+end -+ -+local function parse_shift(expr) -+ local s, s2 = match(expr, "^(%S+)%s*(.*)$") -+ s = map_shift[s] -+ if not s then werror("expected shift operand") end -+ return parse_imm(s2, 6, 10, 0, false) + shl(s, 22) -+end -+ -+local function parse_lslx16(expr) -+ local n = match(expr, "^lsl%s*#(%d+)$") -+ n = tonumber(n) -+ if not n then werror("expected shift operand") end -+ if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then -+ werror("bad shift amount") -+ end -+ return shl(n, 17) -+end -+ -+local function parse_extend(expr) -+ local s, s2 = match(expr, "^(%S+)%s*(.*)$") -+ if s == "lsl" then -+ s = parse_reg_type == "x" and 3 or 2 -+ else -+ s = map_extend[s] -+ end -+ if not s then werror("expected extend operand") end -+ return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13) -+end -+ -+local function parse_cond(expr, inv) -+ local c = map_cond[expr] -+ if not c then werror("expected condition operand") end -+ return shl(bit.bxor(c, inv), 12) -+end -+ -+local function parse_load(params, nparams, n, op) -+ if params[n+2] then werror("too many operands") end -+ local pn, p2 = params[n], params[n+1] -+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") -+ if not p1 then -+ if not p2 then -+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") -+ if reg and tailr ~= "" then -+ local base, tp = parse_reg_base(reg) -+ if tp then -+ waction("IMML", 0, format(tp.ctypefmt, tailr)) -+ return op + base -+ end -+ end -+ end -+ werror("expected address operand") -+ end -+ local scale = shr(op, 30) -+ if p2 then -+ if wb == "!" then werror("bad use of '!'") end -+ op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400 -+ elseif wb == "!" then -+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") -+ if not p1a then werror("bad use of '!'") end -+ op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00 -+ else -+ local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$") -+ op = op + parse_reg_base(p1a) -+ if p2a ~= "" then -+ local imm = match(p2a, "^,%s*#(.*)$") -+ if imm then -+ op = op + parse_imm_load(imm, scale) -+ else -+ local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$") -+ op = op + shl(parse_reg(p2b), 16) + 0x00200800 -+ if parse_reg_type ~= "x" and parse_reg_type ~= "w" then -+ werror("bad index register type") -+ end -+ if p3b == "" then -+ if parse_reg_type ~= "x" then werror("bad index register type") end -+ op = op + 0x6000 -+ else -+ if p3s == "" or p3s == "#0" then -+ elseif p3s == "#"..scale then -+ op = op + 0x1000 -+ else -+ werror("bad scale") -+ end -+ if parse_reg_type == "x" then -+ if p3b == "lsl" and p3s ~= "" then op = op + 0x6000 -+ elseif p3b == "sxtx" then op = op + 0xe000 -+ else -+ werror("bad extend/shift specifier") -+ end -+ else -+ if p3b == "uxtw" then op = op + 0x4000 -+ elseif p3b == "sxtw" then op = op + 0xc000 -+ else -+ werror("bad extend/shift specifier") -+ end -+ end -+ end -+ end -+ else -+ if wb == "!" then werror("bad use of '!'") end -+ op = op + 0x01000000 -+ end -+ end -+ return op -+end -+ -+local function parse_load_pair(params, nparams, n, op) -+ if params[n+2] then werror("too many operands") end -+ local pn, p2 = params[n], params[n+1] -+ local scale = shr(op, 30) == 0 and 2 or 3 -+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") -+ if not p1 then -+ if not p2 then -+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") -+ if reg and tailr ~= "" then -+ local base, tp = parse_reg_base(reg) -+ if tp then -+ waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr)) -+ return op + base + 0x01000000 -+ end -+ end -+ end -+ werror("expected address operand") -+ end -+ if p2 then -+ if wb == "!" then werror("bad use of '!'") end -+ op = op + 0x00800000 -+ else -+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") -+ if p1a then p1, p2 = p1a, p2a else p2 = "#0" end -+ op = op + (wb == "!" and 0x01800000 or 0x01000000) -+ end -+ return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true) -+end -+ -+local function parse_label(label, def) -+ local prefix = sub(label, 1, 2) -+ -- =>label (pc label reference) -+ if prefix == "=>" then -+ return "PC", 0, sub(label, 3) -+ end -+ -- ->name (global label reference) -+ if prefix == "->" then -+ return "LG", map_global[sub(label, 3)] -+ end -+ if def then -+ -- [1-9] (local label definition) -+ if match(label, "^[1-9]$") then -+ return "LG", 10+tonumber(label) -+ end -+ else -+ -- [<>][1-9] (local label reference) -+ local dir, lnum = match(label, "^([<>])([1-9])$") -+ if dir then -- Fwd: 1-9, Bkwd: 11-19. -+ return "LG", lnum + (dir == ">" and 0 or 10) -+ end -+ -- extern label (extern label reference) -+ local extname = match(label, "^extern%s+(%S+)$") -+ if extname then -+ return "EXT", map_extern[extname] -+ end -+ end -+ werror("bad label `"..label.."'") -+end -+ -+local function branch_type(op) -+ if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL -+ elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or -+ band(op, 0x3b000000) == 0x18000000 then -+ return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal -+ elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ -+ elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR -+ elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP -+ else -+ assert(false, "unknown branch type") -+ end -+end -+ -+------------------------------------------------------------------------------ -+ -+local map_op, op_template -+ -+local function op_alias(opname, f) -+ return function(params, nparams) -+ if not params then return "-> "..opname:sub(1, -3) end -+ f(params, nparams) -+ op_template(params, map_op[opname], nparams) -+ end -+end -+ -+local function alias_bfx(p) -+ p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1" -+end -+ -+local function alias_bfiz(p) -+ parse_reg(p[1]) -+ if parse_reg_type == "w" then -+ p[3] = "#-("..p[3]:sub(2)..")%32" -+ p[4] = "#("..p[4]:sub(2)..")-1" -+ else -+ p[3] = "#-("..p[3]:sub(2)..")%64" -+ p[4] = "#("..p[4]:sub(2)..")-1" -+ end -+end -+ -+local alias_lslimm = op_alias("ubfm_4", function(p) -+ parse_reg(p[1]) -+ local sh = p[3]:sub(2) -+ if parse_reg_type == "w" then -+ p[3] = "#-("..sh..")%32" -+ p[4] = "#31-("..sh..")" -+ else -+ p[3] = "#-("..sh..")%64" -+ p[4] = "#63-("..sh..")" -+ end -+end) -+ -+-- Template strings for ARM instructions. -+map_op = { -+ -- Basic data processing instructions. -+ add_2 = "00000000005a0000RX-a|00000000001aRR|00000000b9f80000RRF-a|00000000e35a0000RXY-a|00000000e3080000RXY-a", -+ -+-- and has several possible ways, need to find one, currently added two type of -+ and_2 = "0000000000540000RX-a|00000000140000RR|00000000b9f4RRF-a|00000000e3540000RXY-a|00000000b9800000RRE| 00000000b9e40000RRF-a", -+ and_c = "0000000000d40000SS-a", -+ and_i = "0000000000940000SI|00000000eb540000SIY", -+ -+and_2 = "0000000000540000RX-a|0000000000140000RR|00000000b9f40000RRF-a|00000000e3540000RXY-a", -+ and_3 = "00000000e3800000RXY-a|00000000b9800000RRE|00000000b9e40000RRF-a", -+ and_c = "0000000000d40000SS-a", -+ and_i = "0000000000940000SI", -+ and_i4 = "00000000eb540000SIY" -+ and_i3 = "000000000a540000RI-a|000000000a550000RI-a|000000000c0a0000RIL-a|000000000a560000RI-a|000000000a570000RI-a|000000000c0bRIL-a" -+ --branch related instrcutions -+ bal = "0000000000450000RX-a", -+ balr = "0000000000050000RR", -+ bas = "00000000004d0000RX-a", -+ basr = "00000000000d0000RR", -+ bassm = "00000000000c0000RR", -+ bsm = "00000000000b0000RR", -+ bc = "0000000000470000Rx-b", -+ bcr = "00000000000070000RR", -+ bct = "0000000000460000RX-a", -+ bctr = "0000000000060000RR", -+ bctg = "00000000e3460000RXY-a", -+ bctgr = "00000000b9460000RRE", -+ bxh = "0000000000860000RS-a", -+ bxhg = "00000000eb440000RSY-a", -+ bxle = "0000000000870000RS-a", -+ bxleg = "00000000eb450000RSY-a", -+ bras = "000000000a750000RI-b", -+ brasl = "000000000c050000RIL-b", -+ brc = "000000000a740000RI-c", -+ brcl = "000000000c040000RIL-c", -+ brct = "000000000a760000RI-b", -+ brctg = "000000000a770000RI-b", -+ brctg = "00000000occ60000RIL-b", -+ brxh = "0000000000840000RSI", -+ brxhg = "00000000ec440000RIE-e", -+ brxle = "0000000000850000RSI", -+ brxlg = "00000000ec450000RIE-e", -+ -+ ----subtraction (basic operation) -+ sub = "00000000005b0000RX-a" -+ sr = "00000000001b0000RR" -+ srk = "00000000b9f90000RRF-a" -+ sy = "00000000e35b0000RXY-a" -+ sg = "00000000e3090000RXY-a" -+ sgr = "00000000b9090000RRE" -+ sgrk = "00000000b9e90000RRF-a" -+ sgf = "00000000e3190000RXY-a" -+ sgfr = "00000000b9190000RRE" -+ sh = "00000000004b0000RX-a" -+ shy = "00000000e37b0000RXY-a" -+ shhhr = "00000000b9c90000RRF-a" -+ shhlr = "00000000b9d90000RX-a" -+ sl = "00000000005f0000RX-a" -+ slr = "00000000001f0000RR" -+ slrk = "00000000b9f80000RR" -+ sly = "00000000e35f0000RXY-a", -+ slg = "00000000e30b0000RXY-a", -+ slgr = "00000000b9080000RRE", -+ slgrk = "00000000b9eb0000RRF-a", -+ slgf = "00000000e3180000RXY-a", -+ slgfr = "00000000b91b0000RRE", -+ slhhhr = "00000000b9cb0000RRF-a", -+ slhhlr = "00000000b9db0000RRF-a", -+ slfi = "000000000c250000RIL-a", -+ slgfi = "000000000c240000RIL-a", -+ slb = "00000000e3990000RXY-a", -+ slbr = "00000000b9990000RRE" , -+ slbg = "00000000e3890000RXY-a", -+ slbgr = "00000000b9890000RXY-a", -+ -+ cmp_2 = "0000000000590000RX-a|0000000000190000RR|00000000e3590000RXY-a", -+ cmp_3 = "00000000e3200000RXY-a|00000000b9200000RRE|00000000e3300000RXY-a| 00000000b9300000RRE", -+ -+ div_2 = "00000000005d0000RX-a|00000000001d0000RR|00000000e3970000RXY-a|00000000b9970000RRE", -+ div_3 ="00000000e3870000RXY-a|00000000b9870000RRE", -+ div_sing ="00000000e30d0000RXY-a|00000000b90d0000RRE|00000000e31d0000RXY-a|00000000b91d0000RRE", -+ -+ eor_2 = "0000000000570000RX-a|0000000000170000RR|00000000b9f70000RRF-a|00000000e3570000RXY-a", -+ eor_3 = "00000000e3820000RXY-a|00000000b9820000RRE|00000000b9e70000RRF-a| -+ eor_c = "0000000000d70000SS-a", -+ eor_i = "0000000000970000SI| 00000000eb570000|000000000c060000a|000000000c070000RIL-a", -+ -+ -- load instruction to be added and the following instructions need to be changed (are not s390x related) -+ -+ neg_2 = "4b0003e0DMg", -+ neg_3 = "4b0003e0DMSg", -+ negs_2 = "6b0003e0DMg", -+ negs_3 = "6b0003e0DMSg", -+ adc_3 = "1a000000DNMg", -+ adcs_3 = "3a000000DNMg", -+ sbc_3 = "5a000000DNMg", -+ sbcs_3 = "7a000000DNMg", -+ ngc_2 = "5a0003e0DMg", -+ ngcs_2 = "7a0003e0DMg", -+ and_3 = "0a000000DNMg|12000000pDNig", -+ and_4 = "0a000000DNMSg", -+ orr_3 = "2a000000DNMg|32000000pDNig", -+ orr_4 = "2a000000DNMSg", -+ eor_3 = "4a000000DNMg|52000000pDNig", -+ eor_4 = "4a000000DNMSg", -+ ands_3 = "6a000000DNMg|72000000DNig", -+ ands_4 = "6a000000DNMSg", -+ tst_2 = "6a00001fNMg|7200001fNig", -+ tst_3 = "6a00001fNMSg", -+ bic_3 = "0a200000DNMg", -+ bic_4 = "0a200000DNMSg", -+ orn_3 = "2a200000DNMg", -+ orn_4 = "2a200000DNMSg", -+ eon_3 = "4a200000DNMg", -+ eon_4 = "4a200000DNMSg", -+ bics_3 = "6a200000DNMg", -+ bics_4 = "6a200000DNMSg", -+ movn_2 = "12800000DWg", -+ movn_3 = "12800000DWRg", -+ movz_2 = "52800000DWg", -+ movz_3 = "52800000DWRg", -+ movk_2 = "72800000DWg", -+ movk_3 = "72800000DWRg", -+ -- TODO: this doesn't cover all valid immediates for mov reg, #imm. -+ mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg", -+ mov_3 = "2a0003e0DMSg", -+ mvn_2 = "2a2003e0DMg", -+ mvn_3 = "2a2003e0DMSg", -+ adr_2 = "10000000DBx", -+ adrp_2 = "90000000DBx", -+ csel_4 = "1a800000DNMCg", -+ csinc_4 = "1a800400DNMCg", -+ csinv_4 = "5a800000DNMCg", -+ csneg_4 = "5a800400DNMCg", -+ cset_2 = "1a9f07e0Dcg", -+ csetm_2 = "5a9f03e0Dcg", -+ cinc_3 = "1a800400DNmcg", -+ cinv_3 = "5a800000DNmcg", -+ cneg_3 = "5a800400DNmcg", -+ ccmn_4 = "3a400000NMVCg|3a400800N5VCg", -+ ccmp_4 = "7a400000NMVCg|7a400800N5VCg", -+ madd_4 = "1b000000DNMAg", -+ msub_4 = "1b008000DNMAg", -+ mul_3 = "1b007c00DNMg", -+ mneg_3 = "1b00fc00DNMg", -+ smaddl_4 = "9b200000DxNMwAx", -+ smsubl_4 = "9b208000DxNMwAx", -+ smull_3 = "9b207c00DxNMw", -+ smnegl_3 = "9b20fc00DxNMw", -+ smulh_3 = "9b407c00DNMx", -+ umaddl_4 = "9ba00000DxNMwAx", -+ umsubl_4 = "9ba08000DxNMwAx", -+ umull_3 = "9ba07c00DxNMw", -+ umnegl_3 = "9ba0fc00DxNMw", -+ umulh_3 = "9bc07c00DNMx", -+ udiv_3 = "1ac00800DNMg", -+ sdiv_3 = "1ac00c00DNMg", -+ -- Bit operations. -+ sbfm_4 = "13000000DN12w|93400000DN12x", -+ bfm_4 = "33000000DN12w|b3400000DN12x", -+ ubfm_4 = "53000000DN12w|d3400000DN12x", -+ extr_4 = "13800000DNM2w|93c00000DNM2x", -+ sxtb_2 = "13001c00DNw|93401c00DNx", -+ sxth_2 = "13003c00DNw|93403c00DNx", -+ sxtw_2 = "93407c00DxNw", -+ uxtb_2 = "53001c00DNw", -+ uxth_2 = "53003c00DNw", -+ sbfx_4 = op_alias("sbfm_4", alias_bfx), -+ bfxil_4 = op_alias("bfm_4", alias_bfx), -+ ubfx_4 = op_alias("ubfm_4", alias_bfx), -+ sbfiz_4 = op_alias("sbfm_4", alias_bfiz), -+ bfi_4 = op_alias("bfm_4", alias_bfiz), -+ ubfiz_4 = op_alias("ubfm_4", alias_bfiz), -+ lsl_3 = function(params, nparams) -+ if params and params[3]:byte() == 35 then -+ return alias_lslimm(params, nparams) -+ else -+ return op_template(params, "1ac02000DNMg", nparams) -+ end -+ end, -+ lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x", -+ asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x", -+ ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x", -+ clz_2 = "5ac01000DNg", -+ cls_2 = "5ac01400DNg", -+ rbit_2 = "5ac00000DNg", -+ rev_2 = "5ac00800DNw|dac00c00DNx", -+ rev16_2 = "5ac00400DNg", -+ rev32_2 = "dac00800DNx", -+ -- Loads and stores. -+ ["strb_*"] = "38000000DwL", -+ ["ldrb_*"] = "38400000DwL", -+ ["ldrsb_*"] = "38c00000DwL|38800000DxL", -+ ["strh_*"] = "78000000DwL", -+ ["ldrh_*"] = "78400000DwL", -+ ["ldrsh_*"] = "78c00000DwL|78800000DxL", -+ ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL", -+ ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL", -+ ["ldrsw_*"] = "98000000DxB|b8800000DxL", -+ -- NOTE: ldur etc. are handled by ldr et al. -+ ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", -+ ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", -+ ["ldpsw_*"] = "68400000DAxP", -+ -- Branches. -+ b_1 = "14000000B", -+ bl_1 = "94000000B", -+ blr_1 = "d63f0000Nx", -+ br_1 = "d61f0000Nx", -+ ret_0 = "d65f03c0", -+ ret_1 = "d65f0000Nx", -+ -- b.cond is added below. -+ cbz_2 = "34000000DBg", -+ cbnz_2 = "35000000DBg", -+ tbz_3 = "36000000DTBw|36000000DTBx", -+ tbnz_3 = "37000000DTBw|37000000DTBx", -+ -- Miscellaneous instructions. -+ -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr -+ -- TODO: sys, sysl, ic, dc, at, tlbi -+ -- TODO: hint, yield, wfe, wfi, sev, sevl -+ -- TODO: clrex, dsb, dmb, isb -+ nop_0 = "d503201f", -+ brk_0 = "d4200000", -+ brk_1 = "d4200000W", -+ -- Floating point instructions. -+ fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf", -+ fabs_2 = "1e20c000DNf", -+ fneg_2 = "1e214000DNf", -+ fsqrt_2 = "1e21c000DNf", -+ fcvt_2 = "1e22c000DdNs|1e624000DsNd", -+ -- TODO: half-precision and fixed-point conversions. -+ fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd", -+ fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd", -+ fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd", -+ fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd", -+ fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd", -+ fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd", -+ fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd", -+ fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd", -+ fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd", -+ fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd", -+ scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx", -+ ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx", -+ frintn_2 = "1e244000DNf", -+ frintp_2 = "1e24c000DNf", -+ frintm_2 = "1e254000DNf", -+ frintz_2 = "1e25c000DNf", -+ frinta_2 = "1e264000DNf", -+ frintx_2 = "1e274000DNf", -+ frinti_2 = "1e27c000DNf", -+ fadd_3 = "1e202800DNMf", -+ fsub_3 = "1e203800DNMf", -+ fmul_3 = "1e200800DNMf", -+ fnmul_3 = "1e208800DNMf", -+ fdiv_3 = "1e201800DNMf", -+ fmadd_4 = "1f000000DNMAf", -+ fmsub_4 = "1f008000DNMAf", -+ fnmadd_4 = "1f200000DNMAf", -+ fnmsub_4 = "1f208000DNMAf", -+ fmax_3 = "1e204800DNMf", -+ fmaxnm_3 = "1e206800DNMf", -+ fmin_3 = "1e205800DNMf", -+ fminnm_3 = "1e207800DNMf", -+ fcmp_2 = "1e202000NMf|1e202008NZf", -+ fcmpe_2 = "1e202010NMf|1e202018NZf", -+ fccmp_4 = "1e200400NMVCf", -+ fccmpe_4 = "1e200410NMVCf", -+ fcsel_4 = "1e200c00DNMCf", -+ -- TODO: crc32*, aes*, sha*, pmull -+ -- TODO: SIMD instructions. -+} -+for cond,c in pairs(map_cond) do -+ map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B" -+end -+------------------------------------------------------------------------------ -+-- Handle opcodes defined with template strings. -+local function parse_template(params, template, nparams, pos) -+ local op = tonumber(sub(template, 1, 12), 16) -- 13-16 ignored since those are trailing zeros added after the instruction -+ -- 00000000005a0000 converts to 90 -+ local n,rs = 1,26 -+ -+ parse_reg_type = false -+ -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on) -+ for p in gmatch(sub(template, 17), ".") do -+ local q = params[n] -+ if p == "R" then -+ op = op + parse_reg(q); n = n + 1 -+ elseif p == "N" then -+ op = op + shl(parse_reg(q), 5); n = n + 1 -+ elseif p == "M" then -+ op = op + shl(parse_reg(q), 16); n = n + 1 -+ elseif p == "A" then -+ op = op + shl(parse_reg(q), 10); n = n + 1 -+ elseif p == "m" then -+ op = op + shl(parse_reg(params[n-1]), 16) -+ elseif p == "p" then -+ if q == "sp" then params[n] = "@x31" end -+ elseif p == "g" then -+ if parse_reg_type == "x" then -+ op = op + 0x80000000 -+ elseif parse_reg_type ~= "w" then -+ werror("bad register type") -+ end -+ parse_reg_type = false -+ elseif p == "f" then -+ if parse_reg_type == "d" then -+ op = op + 0x00400000 -+ elseif parse_reg_type ~= "s" then -+ werror("bad register type") -+ end -+ parse_reg_type = false -+ elseif p == "x" or p == "w" or p == "d" or p == "s" then -+ if parse_reg_type ~= p then -+ werror("register size mismatch") -+ end -+ parse_reg_type = false -+ elseif p == "L" then -+ op = parse_load(params, nparams, n, op) -+ elseif p == "P" then -+ op = parse_load_pair(params, nparams, n, op) -+ elseif p == "B" then -+ local mode, v, s = parse_label(q, false); n = n + 1 -+ local m = branch_type(op) -+ waction("REL_"..mode, v+m, s, 1) -+ elseif p == "I" then -+ op = op + parse_imm12(q); n = n + 1 -+ elseif p == "i" then -+ op = op + parse_imm13(q); n = n + 1 -+ elseif p == "W" then -+ op = op + parse_imm(q, 16, 5, 0, false); n = n + 1 -+ elseif p == "T" then -+ op = op + parse_imm6(q); n = n + 1 -+ elseif p == "1" then -+ op = op + parse_imm(q, 6, 16, 0, false); n = n + 1 -+ elseif p == "2" then -+ op = op + parse_imm(q, 6, 10, 0, false); n = n + 1 -+ elseif p == "5" then -+ op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 -+ elseif p == "V" then -+ op = op + parse_imm(q, 4, 0, 0, false); n = n + 1 -+ elseif p == "F" then -+ op = op + parse_fpimm(q); n = n + 1 -+ elseif p == "Z" then -+ if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end -+ n = n + 1 -+ elseif p == "S" then -+ op = op + parse_shift(q); n = n + 1 -+ elseif p == "X" then -+ op = op + parse_extend(q); n = n + 1 -+ elseif p == "R" then -+ op = op + parse_lslx16(q); n = n + 1 -+ elseif p == "C" then -+ op = op + parse_cond(q, 0); n = n + 1 -+ elseif p == "c" then -+ op = op + parse_cond(q, 1); n = n + 1 -+ else -+ assert(false) -+ end -+ end -+ wputpos(pos, op) -+end -+function op_template(params, template, nparams) -+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end -+ -- Limit number of section buffer positions used by a single dasm_put(). -+ -- A single opcode needs a maximum of 3 positions. -+ if secpos+3 > maxsecpos then wflush() end -+ local pos = wpos() -+ local lpos, apos, spos = #actlist, #actargs, secpos -+ local ok, err -+ for t in gmatch(template, "[^|]+") do -+ ok, err = pcall(parse_template, params, t, nparams, pos) -+ if ok then return end -+ secpos = spos -+ actlist[lpos+1] = nil -+ actlist[lpos+2] = nil -+ actlist[lpos+3] = nil -+ actargs[apos+1] = nil -+ actargs[apos+2] = nil -+ actargs[apos+3] = nil -+ end -+ error(err, 0) -+end -+map_op[".template__"] = op_template -+------------------------------------------------------------------------------ -+-- Pseudo-opcode to mark the position where the action list is to be emitted. -+map_op[".actionlist_1"] = function(params) -+ if not params then return "cvar" end -+ local name = params[1] -- No syntax check. You get to keep the pieces. -+ wline(function(out) writeactions(out, name) end) -+end -+-- Pseudo-opcode to mark the position where the global enum is to be emitted. -+map_op[".globals_1"] = function(params) -+ if not params then return "prefix" end -+ local prefix = params[1] -- No syntax check. You get to keep the pieces. -+ wline(function(out) writeglobals(out, prefix) end) -+end -+-- Pseudo-opcode to mark the position where the global names are to be emitted. -+map_op[".globalnames_1"] = function(params) -+ if not params then return "cvar" end -+ local name = params[1] -- No syntax check. You get to keep the pieces. -+ wline(function(out) writeglobalnames(out, name) end) -+end -+-- Pseudo-opcode to mark the position where the extern names are to be emitted. -+map_op[".externnames_1"] = function(params) -+ if not params then return "cvar" end -+ local name = params[1] -- No syntax check. You get to keep the pieces. -+ wline(function(out) writeexternnames(out, name) end) -+end -+------------------------------------------------------------------------------ -+-- Label pseudo-opcode (converted from trailing colon form). -+map_op[".label_1"] = function(params) -+ if not params then return "[1-9] | ->global | =>pcexpr" end -+ if secpos+1 > maxsecpos then wflush() end -+ local mode, n, s = parse_label(params[1], true) -+ if mode == "EXT" then werror("bad label definition") end -+ waction("LABEL_"..mode, n, s, 1) -+end -+------------------------------------------------------------------------------ -+-- Pseudo-opcodes for data storage. -+map_op[".long_*"] = function(params) -+ if not params then return "imm..." end -+ for _,p in ipairs(params) do -+ local n = tonumber(p) -+ if not n then werror("bad immediate `"..p.."'") end -+ if n < 0 then n = n + 2^32 end -+ wputw(n) -+ if secpos+2 > maxsecpos then wflush() end -+ end -+end -+-- Alignment pseudo-opcode. -+map_op[".align_1"] = function(params) -+ if not params then return "numpow2" end -+ if secpos+1 > maxsecpos then wflush() end -+ local align = tonumber(params[1]) -+ if align then -+ local x = align -+ -- Must be a power of 2 in the range (2 ... 256). -+ for i=1,8 do -+ x = x / 2 -+ if x == 1 then -+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. -+ return -+ end -+ end -+ end -+ werror("bad alignment") -+end -+------------------------------------------------------------------------------ -+-- Pseudo-opcode for (primitive) type definitions (map to C types). -+map_op[".type_3"] = function(params, nparams) -+ if not params then -+ return nparams == 2 and "name, ctype" or "name, ctype, reg" -+ end -+ local name, ctype, reg = params[1], params[2], params[3] -+ if not match(name, "^[%a_][%w_]*$") then -+ werror("bad type name `"..name.."'") -+ end -+ local tp = map_type[name] -+ if tp then -+ werror("duplicate type `"..name.."'") -+ end -+ -- Add #type to defines. A bit unclean to put it in map_archdef. -+ map_archdef["#"..name] = "sizeof("..ctype..")" -+ -- Add new type and emit shortcut define. -+ local num = ctypenum + 1 -+ map_type[name] = { -+ ctype = ctype, -+ ctypefmt = format("Dt%X(%%s)", num), -+ reg = reg, -+ } -+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) -+ ctypenum = num -+end -+map_op[".type_2"] = map_op[".type_3"] -+-- Dump type definitions. -+local function dumptypes(out, lvl) -+ local t = {} -+ for name in pairs(map_type) do t[#t+1] = name end -+ sort(t) -+ out:write("Type definitions:\n") -+ for _,name in ipairs(t) do -+ local tp = map_type[name] -+ local reg = tp.reg or "" -+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) -+ end -+ out:write("\n") -+end -+------------------------------------------------------------------------------ -+-- Set the current section. -+function _M.section(num) -+ waction("SECTION", num) -+ wflush(true) -- SECTION is a terminal action. -+end -+------------------------------------------------------------------------------ -+-- Dump architecture description. -+function _M.dumparch(out) -+ out:write(format("DynASM %s version %s, released %s\n\n", -+ _info.arch, _info.version, _info.release)) -+ dumpactions(out) -+end -+-- Dump all user defined elements. -+function _M.dumpdef(out, lvl) -+ dumptypes(out, lvl) -+ dumpglobals(out, lvl) -+ dumpexterns(out, lvl) -+end -+------------------------------------------------------------------------------ -+-- Pass callbacks from/to the DynASM core. -+function _M.passcb(wl, we, wf, ww) -+ wline, werror, wfatal, wwarn = wl, we, wf, ww -+ return wflush -+end -+-- Setup the arch-specific module. -+function _M.setup(arch, opt) -+ g_arch, g_opt = arch, opt -+end -+-- Merge the core maps and the arch-specific maps. -+function _M.mergemaps(map_coreop, map_def) -+ setmetatable(map_op, { __index = map_coreop }) -+ setmetatable(map_def, { __index = map_archdef }) -+ return map_op, map_def -+end -+return _M -+------------------------------------------------------------------------------ --- -2.20.1 - - -From b8253103f7e5646ad487a7f491a0ca4fc6365abb Mon Sep 17 00:00:00 2001 -From: niravthakkar thakkarniravb@gmail.com -Date: Thu, 10 Nov 2016 19:00:51 +0530 -Subject: [PATCH 005/247] Created s390x header file - -Currently copy of ppc.h, which is same as arm64.h, and added the architecture definition ---- - dynasm/dasm_s390x.h | 418 ++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 418 insertions(+) - create mode 100644 dynasm/dasm_s390x.h - -diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h -new file mode 100644 -index 0000000..577920a ---- /dev/null -+++ b/dynasm/dasm_s390x.h -@@ -0,0 +1,418 @@ -+/* -+** DynASM s390x encoding engine. -+** Copyright (C) 2005-2016 Mike Pall. All rights reserved. -+** Released under the MIT license. See dynasm.lua for full copyright notice. -+*/ -+ -+#include <stddef.h> -+#include <stdarg.h> -+#include <string.h> -+#include <stdlib.h> -+ -+#define DASM_ARCH "s390" -+ -+#ifndef DASM_EXTERN -+#define DASM_EXTERN(a,b,c,d) 0 -+#endif -+ -+/* Action definitions. */ -+enum { -+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, -+ /* The following actions need a buffer position. */ -+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, -+ /* The following actions also have an argument. */ -+ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH, -+ DASM__MAX -+}; -+ -+/* Maximum number of section buffer positions for a single dasm_put() call. */ -+#define DASM_MAXSECPOS 25 -+ -+/* DynASM encoder status codes. Action list offset or number are or'ed in. */ -+#define DASM_S_OK 0x00000000 -+#define DASM_S_NOMEM 0x01000000 -+#define DASM_S_PHASE 0x02000000 -+#define DASM_S_MATCH_SEC 0x03000000 -+#define DASM_S_RANGE_I 0x11000000 -+#define DASM_S_RANGE_SEC 0x12000000 -+#define DASM_S_RANGE_LG 0x13000000 -+#define DASM_S_RANGE_PC 0x14000000 -+#define DASM_S_RANGE_REL 0x15000000 -+#define DASM_S_UNDEF_LG 0x21000000 -+#define DASM_S_UNDEF_PC 0x22000000 -+ -+/* Macros to convert positions (8 bit section + 24 bit index). */ -+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) -+#define DASM_POS2BIAS(pos) ((pos)&0xff000000) -+#define DASM_SEC2POS(sec) ((sec)<<24) -+#define DASM_POS2SEC(pos) ((pos)>>24) -+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) -+ -+/* Action list type. */ -+typedef const unsigned int *dasm_ActList; -+ -+/* Per-section structure. */ -+typedef struct dasm_Section { -+ int *rbuf; /* Biased buffer pointer (negative section bias). */ -+ int *buf; /* True buffer pointer. */ -+ size_t bsize; /* Buffer size in bytes. */ -+ int pos; /* Biased buffer position. */ -+ int epos; /* End of biased buffer position - max single put. */ -+ int ofs; /* Byte offset into section. */ -+} dasm_Section; -+ -+/* Core structure holding the DynASM encoding state. */ -+struct dasm_State { -+ size_t psize; /* Allocated size of this structure. */ -+ dasm_ActList actionlist; /* Current actionlist pointer. */ -+ int *lglabels; /* Local/global chain/pos ptrs. */ -+ size_t lgsize; -+ int *pclabels; /* PC label chains/pos ptrs. */ -+ size_t pcsize; -+ void **globals; /* Array of globals (bias -10). */ -+ dasm_Section *section; /* Pointer to active section. */ -+ size_t codesize; /* Total size of all code sections. */ -+ int maxsection; /* 0 <= sectionidx < maxsection. */ -+ int status; /* Status code. */ -+ dasm_Section sections[1]; /* All sections. Alloc-extended. */ -+}; -+ -+/* The size of the core structure depends on the max. number of sections. */ -+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) -+ -+ -+/* Initialize DynASM state. */ -+void dasm_init(Dst_DECL, int maxsection) -+{ -+ dasm_State *D; -+ size_t psz = 0; -+ int i; -+ Dst_REF = NULL; -+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); -+ D = Dst_REF; -+ D->psize = psz; -+ D->lglabels = NULL; -+ D->lgsize = 0; -+ D->pclabels = NULL; -+ D->pcsize = 0; -+ D->globals = NULL; -+ D->maxsection = maxsection; -+ for (i = 0; i < maxsection; i++) { -+ D->sections[i].buf = NULL; /* Need this for pass3. */ -+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); -+ D->sections[i].bsize = 0; -+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ -+ } -+} -+ -+/* Free DynASM state. */ -+void dasm_free(Dst_DECL) -+{ -+ dasm_State *D = Dst_REF; -+ int i; -+ for (i = 0; i < D->maxsection; i++) -+ if (D->sections[i].buf) -+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); -+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); -+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); -+ DASM_M_FREE(Dst, D, D->psize); -+} -+ -+/* Setup global label array. Must be called before dasm_setup(). */ -+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) -+{ -+ dasm_State *D = Dst_REF; -+ D->globals = gl - 10; /* Negative bias to compensate for locals. */ -+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); -+} -+ -+/* Grow PC label array. Can be called after dasm_setup(), too. */ -+void dasm_growpc(Dst_DECL, unsigned int maxpc) -+{ -+ dasm_State *D = Dst_REF; -+ size_t osz = D->pcsize; -+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); -+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); -+} -+ -+/* Setup encoder. */ -+void dasm_setup(Dst_DECL, const void *actionlist) -+{ -+ dasm_State *D = Dst_REF; -+ int i; -+ D->actionlist = (dasm_ActList)actionlist; -+ D->status = DASM_S_OK; -+ D->section = &D->sections[0]; -+ memset((void *)D->lglabels, 0, D->lgsize); -+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); -+ for (i = 0; i < D->maxsection; i++) { -+ D->sections[i].pos = DASM_SEC2POS(i); -+ D->sections[i].ofs = 0; -+ } -+} -+ -+ -+#ifdef DASM_CHECKS -+#define CK(x, st) \ -+ do { if (!(x)) { \ -+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) -+#define CKPL(kind, st) \ -+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ -+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) -+#else -+#define CK(x, st) ((void)0) -+#define CKPL(kind, st) ((void)0) -+#endif -+ -+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ -+void dasm_put(Dst_DECL, int start, ...) -+{ -+ va_list ap; -+ dasm_State *D = Dst_REF; -+ dasm_ActList p = D->actionlist + start; -+ dasm_Section *sec = D->section; -+ int pos = sec->pos, ofs = sec->ofs; -+ int *b; -+ -+ if (pos >= sec->epos) { -+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize, -+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); -+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos); -+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); -+ } -+ -+ b = sec->rbuf; -+ b[pos++] = start; -+ -+ va_start(ap, start); -+ while (1) { -+ unsigned int ins = *p++; -+ unsigned int action = (ins >> 16); -+ if (action >= DASM__MAX) { -+ ofs += 4; -+ } else { -+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; -+ switch (action) { -+ case DASM_STOP: goto stop; -+ case DASM_SECTION: -+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); -+ D->section = &D->sections[n]; goto stop; -+ case DASM_ESC: p++; ofs += 4; break; -+ case DASM_REL_EXT: break; -+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; -+ case DASM_REL_LG: -+ n = (ins & 2047) - 10; pl = D->lglabels + n; -+ /* Bkwd rel or global. */ -+ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } -+ pl += 10; n = *pl; -+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ -+ goto linkrel; -+ case DASM_REL_PC: -+ pl = D->pclabels + n; CKPL(pc, PC); -+ putrel: -+ n = *pl; -+ if (n < 0) { /* Label exists. Get label pos and store it. */ -+ b[pos] = -n; -+ } else { -+ linkrel: -+ b[pos] = n; /* Else link to rel chain, anchored at label. */ -+ *pl = pos; -+ } -+ pos++; -+ break; -+ case DASM_LABEL_LG: -+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; -+ case DASM_LABEL_PC: -+ pl = D->pclabels + n; CKPL(pc, PC); -+ putlabel: -+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ -+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; -+ } -+ *pl = -pos; /* Label exists now. */ -+ b[pos++] = ofs; /* Store pass1 offset estimate. */ -+ break; -+ case DASM_IMM: -+#ifdef DASM_CHECKS -+ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); -+#endif -+ n >>= ((ins>>10)&31); -+#ifdef DASM_CHECKS -+ if (ins & 0x8000) -+ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); -+ else -+ CK((n>>((ins>>5)&31)) == 0, RANGE_I); -+#endif -+ b[pos++] = n; -+ break; -+ case DASM_IMMSH: -+ CK((n >> 6) == 0, RANGE_I); -+ b[pos++] = n; -+ break; -+ } -+ } -+ } -+stop: -+ va_end(ap); -+ sec->pos = pos; -+ sec->ofs = ofs; -+} -+#undef CK -+ -+/* Pass 2: Link sections, shrink aligns, fix label offsets. */ -+int dasm_link(Dst_DECL, size_t *szp) -+{ -+ dasm_State *D = Dst_REF; -+ int secnum; -+ int ofs = 0; -+ -+#ifdef DASM_CHECKS -+ *szp = 0; -+ if (D->status != DASM_S_OK) return D->status; -+ { -+ int pc; -+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) -+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; -+ } -+#endif -+ -+ { /* Handle globals not defined in this translation unit. */ -+ int idx; -+ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { -+ int n = D->lglabels[idx]; -+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */ -+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } -+ } -+ } -+ -+ /* Combine all code sections. No support for data sections (yet). */ -+ for (secnum = 0; secnum < D->maxsection; secnum++) { -+ dasm_Section *sec = D->sections + secnum; -+ int *b = sec->rbuf; -+ int pos = DASM_SEC2POS(secnum); -+ int lastpos = sec->pos; -+ -+ while (pos != lastpos) { -+ dasm_ActList p = D->actionlist + b[pos++]; -+ while (1) { -+ unsigned int ins = *p++; -+ unsigned int action = (ins >> 16); -+ switch (action) { -+ case DASM_STOP: case DASM_SECTION: goto stop; -+ case DASM_ESC: p++; break; -+ case DASM_REL_EXT: break; -+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; -+ case DASM_REL_LG: case DASM_REL_PC: pos++; break; -+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; -+ case DASM_IMM: case DASM_IMMSH: pos++; break; -+ } -+ } -+ stop: (void)0; -+ } -+ ofs += sec->ofs; /* Next section starts right after current section. */ -+ } -+ -+ D->codesize = ofs; /* Total size of all code sections */ -+ *szp = ofs; -+ return DASM_S_OK; -+} -+ -+#ifdef DASM_CHECKS -+#define CK(x, st) \ -+ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) -+#else -+#define CK(x, st) ((void)0) -+#endif -+ -+/* Pass 3: Encode sections. */ -+int dasm_encode(Dst_DECL, void *buffer) -+{ -+ dasm_State *D = Dst_REF; -+ char *base = (char *)buffer; -+ unsigned int *cp = (unsigned int *)buffer; -+ int secnum; -+ -+ /* Encode all code sections. No support for data sections (yet). */ -+ for (secnum = 0; secnum < D->maxsection; secnum++) { -+ dasm_Section *sec = D->sections + secnum; -+ int *b = sec->buf; -+ int *endb = sec->rbuf + sec->pos; -+ -+ while (b != endb) { -+ dasm_ActList p = D->actionlist + *b++; -+ while (1) { -+ unsigned int ins = *p++; -+ unsigned int action = (ins >> 16); -+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; -+ switch (action) { -+ case DASM_STOP: case DASM_SECTION: goto stop; -+ case DASM_ESC: *cp++ = *p++; break; -+ case DASM_REL_EXT: -+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4; -+ goto patchrel; -+ case DASM_ALIGN: -+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; -+ break; -+ case DASM_REL_LG: -+ CK(n >= 0, UNDEF_LG); -+ case DASM_REL_PC: -+ CK(n >= 0, UNDEF_PC); -+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); -+ patchrel: -+ CK((n & 3) == 0 && -+ (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >> -+ ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL); -+ cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); -+ break; -+ case DASM_LABEL_LG: -+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); -+ break; -+ case DASM_LABEL_PC: break; -+ case DASM_IMM: -+ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); -+ break; -+ case DASM_IMMSH: -+ cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32); -+ break; -+ default: *cp++ = ins; break; -+ } -+ } -+ stop: (void)0; -+ } -+ } -+ -+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */ -+ return DASM_S_PHASE; -+ return DASM_S_OK; -+} -+#undef CK -+ -+/* Get PC label offset. */ -+int dasm_getpclabel(Dst_DECL, unsigned int pc) -+{ -+ dasm_State *D = Dst_REF; -+ if (pc*sizeof(int) < D->pcsize) { -+ int pos = D->pclabels[pc]; -+ if (pos < 0) return *DASM_POS2PTR(D, -pos); -+ if (pos > 0) return -1; /* Undefined. */ -+ } -+ return -2; /* Unused or out of range. */ -+} -+ -+#ifdef DASM_CHECKS -+/* Optional sanity checker to call between isolated encoding steps. */ -+int dasm_checkstep(Dst_DECL, int secmatch) -+{ -+ dasm_State *D = Dst_REF; -+ if (D->status == DASM_S_OK) { -+ int i; -+ for (i = 1; i <= 9; i++) { -+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } -+ D->lglabels[i] = 0; -+ } -+ } -+ if (D->status == DASM_S_OK && secmatch >= 0 && -+ D->section != &D->sections[secmatch]) -+ D->status = DASM_S_MATCH_SEC|(D->section-D->sections); -+ return D->status; -+} -+#endif --- -2.20.1 - - -From fa87b1a8e1ed070d0e2d980e840d1ad4abab91c9 Mon Sep 17 00:00:00 2001 -From: ketank-new ketan22584@gmail.com -Date: Fri, 11 Nov 2016 12:04:51 +0530 -Subject: [PATCH 006/247] Update lj_arch.h - -changed S390 to S390x ---- - src/lj_arch.h | 20 +++++--------------- - 1 file changed, 5 insertions(+), 15 deletions(-) - -diff --git a/src/lj_arch.h b/src/lj_arch.h -index 6421545..c781eb1 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -29,7 +29,7 @@ - #define LUAJIT_ARCH_mips32 6 - #define LUAJIT_ARCH_MIPS64 7 - #define LUAJIT_ARCH_mips64 7 --#define LUAJIT_ARCH_S390 8 -+#define LUAJIT_ARCH_S390x 8 - - /* Target OS. */ - #define LUAJIT_OS_OTHER 0 -@@ -50,8 +50,8 @@ - #define LUAJIT_TARGET LUAJIT_ARCH_ARM - #elif defined(__aarch64__) - #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 --#elif defined(__s390__) || defined(__s390) || defined(__S390__) || defined(__S390) || defined(S390) --#define LUAJIT_TARGET LUAJIT_ARCH_S390 -+#elif defined(__s390x__) || defined(__s390x) || defined(__S390x__) || defined(__S390x) || defined(S390x) -+#define LUAJIT_TARGET LUAJIT_ARCH_S390x - #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) - #define LUAJIT_TARGET LUAJIT_ARCH_PPC - #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) -@@ -246,20 +246,10 @@ - - #elif LUAJIT_TARGET == LUAJIT_ARCH_S390 - -- #define LJ_ARCH_NAME "s390" -+ #define LJ_ARCH_NAME "s390x" - #define LJ_ARCH_BITS 64 - #define LJ_ARCH_ENDIAN LUAJIT_BE -- #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__ -- #define LJ_ARCH_HASFPU 1 -- #endif -- #define LJ_ABI_EABI 1 - #define LJ_TARGET_S390 1 -- #define LJ_TARGET_EHRETREG 0 -- #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ -- #define LJ_TARGET_MASKSHIFT 0 -- #define LJ_TARGET_MASKROT 1 -- #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - - #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC - -@@ -430,7 +420,7 @@ - #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__) - #error "Need at least Clang 3.5 or newer" - #endif --#elif LJ_TARGET_S390 -+#elif LJ_TARGET_S390x - #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) - #error "Need at least GCC 4.2 or newer" - #endif --- -2.20.1 - - -From c585381130dd3c553986f36d7527e06f71b58f7c Mon Sep 17 00:00:00 2001 -From: ketank-new ketan22584@gmail.com -Date: Fri, 11 Nov 2016 12:08:47 +0530 -Subject: [PATCH 007/247] Update Makefile - -changed S390 to S390x ---- - src/Makefile | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/Makefile b/src/Makefile -index f388db1..2bf15d2 100644 ---- a/src/Makefile -+++ b/src/Makefile -@@ -245,8 +245,8 @@ else - ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) - TARGET_LJARCH= arm - else --ifneq (,$(findstring LJ_TARGET_S390 ,$(TARGET_TESTARCH))) -- TARGET_LJARCH= s390 -+ifneq (,$(findstring LJ_TARGET_S390x ,$(TARGET_TESTARCH))) -+ TARGET_LJARCH= s390x - else - ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) - ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) --- -2.20.1 - - -From 342ac2f0a831bb12277eee09f1b6e2158bfa248e Mon Sep 17 00:00:00 2001 -From: ketank-new ketan22584@gmail.com -Date: Fri, 11 Nov 2016 12:09:32 +0530 -Subject: [PATCH 008/247] Rename lj_target_s390.h to lj_target_s390x.h - -changed file name ---- - src/lj_target_s390.h | 287 ------------------------------------------ - src/lj_target_s390x.h | 287 ++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 287 insertions(+), 287 deletions(-) - delete mode 100644 src/lj_target_s390.h - create mode 100644 src/lj_target_s390x.h - -diff --git a/src/lj_target_s390.h b/src/lj_target_s390.h -deleted file mode 100644 -index 7da2063..0000000 ---- a/src/lj_target_s390.h -+++ /dev/null -@@ -1,287 +0,0 @@ --/* --** Definitions for S390 CPUs. --** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h --*/ -- --#ifndef _LJ_TARGET_S390_H --#define _LJ_TARGET_S390_H -- --/* -- Registers IDs ------------------------------------------------------- */ -- --#define GPRDEF(_) \ -- _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ -- _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \ --#if LJ_SOFTFP --#define FPRDEF(_) --#else --#define FPRDEF(_) \ -- _(F0) _(F2) _(F4) _(F6) --#endif --#define VRIDDEF(_) -- --#define RIDENUM(name) RID_##name, -- --enum { -- GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ -- FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ -- RID_MAX, -- RID_TMP = RID_LR, -- -- /* Calling conventions. */ -- RID_RET = RID_R0, -- RID_RETLO = RID_R0, -- RID_RETHI = RID_R1, --#if LJ_SOFTFP -- RID_FPRET = RID_R0, --#else -- RID_FPRET = RID_D0, --#endif -- -- /* These definitions must match with the *.dasc file(s): */ -- RID_BASE = RID_R9, /* Interpreter BASE. */ -- RID_LPC = RID_R6, /* Interpreter PC. */ -- RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */ -- RID_LREG = RID_R8, /* Interpreter L. */ -- -- /* Register ranges [min, max) and number of registers. */ -- RID_MIN_GPR = RID_R0, -- RID_MAX_GPR = RID_PC+1, -- RID_MIN_FPR = RID_MAX_GPR, --#if LJ_SOFTFP -- RID_MAX_FPR = RID_MIN_FPR, --#else -- RID_MAX_FPR = RID_D15+1, --#endif -- RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, -- RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR --}; -- --#define RID_NUM_KREF RID_NUM_GPR --#define RID_MIN_KREF RID_R0 -- --/* -- Register sets ------------------------------------------------------- */ -- --/* Make use of all registers, except sp, lr and pc. */ --#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1)) --#define RSET_GPREVEN \ -- (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \ -- RID2RSET(RID_R8)|RID2RSET(RID_R10)) --#define RSET_GPRODD \ -- (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \ -- RID2RSET(RID_R9)|RID2RSET(RID_R11)) --#if LJ_SOFTFP --#define RSET_FPR 0 --#else --#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) --#endif --#define RSET_ALL (RSET_GPR|RSET_FPR) --#define RSET_INIT RSET_ALL -- --/* ABI-specific register sets. lr is an implicit scratch register. */ --#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12)) --#ifdef __APPLE__ --#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9)) --#else --#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_ --#endif --#if LJ_SOFTFP --#define RSET_SCRATCH_FPR 0 --#else --#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) --#endif --#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) --#define REGARG_FIRSTGPR RID_R0 --#define REGARG_LASTGPR RID_R3 --#define REGARG_NUMGPR 4 --#if LJ_ABI_SOFTFP --#define REGARG_FIRSTFPR 0 --#define REGARG_LASTFPR 0 --#define REGARG_NUMFPR 0 --#else --#define REGARG_FIRSTFPR RID_D0 --#define REGARG_LASTFPR RID_D7 --#define REGARG_NUMFPR 8 --#endif -- --/* -- Spill slots --------------------------------------------------------- */ -- --/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. --** --** SPS_FIXED: Available fixed spill slots in interpreter frame. --** This definition must match with the *.dasc file(s). --** --** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. --*/ --#define SPS_FIXED 2 --#define SPS_FIRST 2 -- --#define SPOFS_TMP 0 -- --#define sps_scale(slot) (4 * (int32_t)(slot)) --#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) -- --/* -- Exit state ---------------------------------------------------------- */ -- --/* This definition must match with the *.dasc file(s). */ --typedef struct { --#if !LJ_SOFTFP -- lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ --#endif -- int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ -- int32_t spill[256]; /* Spill slots. */ --} ExitState; -- --/* PC after instruction that caused an exit. Used to find the trace number. */ --#define EXITSTATE_PCREG RID_PC --/* Highest exit + 1 indicates stack check. */ --#define EXITSTATE_CHECKEXIT 1 -- --#define EXITSTUB_SPACING 4 --#define EXITSTUBS_PER_GROUP 32 -- --/* -- Instructions -------------------------------------------------------- */ -- --/* Instruction fields. */ --#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28)) --#define ARMF_N(r) ((r) << 16) --#define ARMF_D(r) ((r) << 12) --#define ARMF_S(r) ((r) << 8) --#define ARMF_M(r) (r) --#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7)) --#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r)) -- --typedef enum S390Ins { -- -- // Unsupported in S390 -- #ARMI_LDRSB = 0xe01000d0, -- #ARMI_S = 0x000100000, -- #ARMI_LDRD = 0xe00000d0, -- #ARMI_ADC = 0xe0a00000, -- #ARMI_SBC = 0xe0c00000, -- #ARMI_STRB = 0xe4400000, -- #ARMI_STRH = 0xe00000b0, -- #ARMI_STRD = 0xe00000f0, -- #ARMI_BL = 0xeb000000, -- #ARMI_BLX = 0xfa000000, -- #ARMI_BLXr = 0xe12fff30, -- #ARMI_BIC = 0xe1c00000, -- #ARMI_ORR = 0xe1800000, -- #ARMI_LDRB = 0xe4500000, -- #ARMI_MVN = 0xe1e00000, -- #ARMI_LDRSH = 0xe01000f0, -- #ARMI_NOP = 0xe1a00000, -- #ARMI_PUSH = 0xe92d0000, -- #ARMI_RSB = 0xe0600000, -- #ARMI_RSC = 0xe0e00000, -- #ARMI_TEQ = 0xe1300000, -- #ARMI_CCAL = 0xe0000000, -- #ARMI_K12 = 0x02000000, -- #ARMI_KNEG = 0x00200000, -- #ARMI_LS_W = 0x00200000, -- #ARMI_LS_U = 0x00800000, -- #ARMI_LS_P = 0x01000000, -- #ARMI_LS_R = 0x02000000, -- #ARMI_LSX_I = 0x00400000, -- -- -- #ARMI_SUB = 0xe0400000, -- #ARMI_ADD = 0xe0800000, -- #ARMI_AND = 0xe0000000, -- #ARMI_EOR = 0xe0200000, -- #ARMI_MUL = 0xe0000090, -- #ARMI_LDR = 0xe4100000, -- #ARMI_CMP = 0xe1500000, -- #ARMI_LDRH = 0xe01000b0, -- #ARMI_B = 0xea000000, -- #ARMI_MOV = 0xe1a00000, -- #ARMI_STR = 0xe4000000, -- #ARMI_TST = 0xe1100000, -- #ARMI_SMULL = 0xe0c00090, -- #ARMI_CMN = 0xe1700000, -- S390I_SR = 0x1B000000, -- S390I_AR = 0x1A000000, -- S390I_NR = 0x14000000, -- S390I_XR = 0x17000000, -- S390I_MR = 0x1C000000, -- S390I_LR = 0x18000000, -- S390I_C = 0x59000000, -- S390I_LH = 0x48000000, -- S390I_BASR = 0x0D000000, -- S390I_MVCL = 0x0e000000, -- S390I_ST = 0x50000000, -- S390I_TM = 0x91000000, -- S390I_MP = 0xbd000090, -- S390I_CLR = 0x15000000, -- -- /* ARMv6 */ -- #ARMI_REV = 0xe6bf0f30, -- #ARMI_SXTB = 0xe6af0070, -- #ARMI_SXTH = 0xe6bf0070, -- #ARMI_UXTB = 0xe6ef0070, -- #ARMI_UXTH = 0xe6ff0070, -- -- /* ARMv6T2 */ -- #ARMI_MOVW = 0xe3000000, -- #ARMI_MOVT = 0xe3400000, -- -- /* VFP */ -- ARMI_VMOV_D = 0xeeb00b40, -- ARMI_VMOV_S = 0xeeb00a40, -- ARMI_VMOVI_D = 0xeeb00b00, -- -- ARMI_VMOV_R_S = 0xee100a10, -- ARMI_VMOV_S_R = 0xee000a10, -- ARMI_VMOV_RR_D = 0xec500b10, -- ARMI_VMOV_D_RR = 0xec400b10, -- -- ARMI_VADD_D = 0xee300b00, -- ARMI_VSUB_D = 0xee300b40, -- ARMI_VMUL_D = 0xee200b00, -- ARMI_VMLA_D = 0xee000b00, -- ARMI_VMLS_D = 0xee000b40, -- ARMI_VNMLS_D = 0xee100b00, -- ARMI_VDIV_D = 0xee800b00, -- -- ARMI_VABS_D = 0xeeb00bc0, -- ARMI_VNEG_D = 0xeeb10b40, -- ARMI_VSQRT_D = 0xeeb10bc0, -- -- ARMI_VCMP_D = 0xeeb40b40, -- ARMI_VCMPZ_D = 0xeeb50b40, -- -- ARMI_VMRS = 0xeef1fa10, -- -- ARMI_VCVT_S32_F32 = 0xeebd0ac0, -- ARMI_VCVT_S32_F64 = 0xeebd0bc0, -- ARMI_VCVT_U32_F32 = 0xeebc0ac0, -- ARMI_VCVT_U32_F64 = 0xeebc0bc0, -- ARMI_VCVTR_S32_F32 = 0xeebd0a40, -- ARMI_VCVTR_S32_F64 = 0xeebd0b40, -- ARMI_VCVTR_U32_F32 = 0xeebc0a40, -- ARMI_VCVTR_U32_F64 = 0xeebc0b40, -- ARMI_VCVT_F32_S32 = 0xeeb80ac0, -- ARMI_VCVT_F64_S32 = 0xeeb80bc0, -- ARMI_VCVT_F32_U32 = 0xeeb80a40, -- ARMI_VCVT_F64_U32 = 0xeeb80b40, -- ARMI_VCVT_F32_F64 = 0xeeb70bc0, -- ARMI_VCVT_F64_F32 = 0xeeb70ac0, -- -- ARMI_VLDR_S = 0xed100a00, -- ARMI_VLDR_D = 0xed100b00, -- ARMI_VSTR_S = 0xed000a00, -- ARMI_VSTR_D = 0xed000b00, --} S390Ins; -- --typedef enum S390Shift { -- S390SH_SLL, S390SH_SRL, S390SH_SRA -- # Adjustment needed for ROR --} S390Shift; -- --/* ARM condition codes. */ --typedef enum ARMCC { -- CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, -- CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, -- CC_HS = CC_CS, CC_LO = CC_CC --} ARMCC; -- --#endif -diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h -new file mode 100644 -index 0000000..7da2063 ---- /dev/null -+++ b/src/lj_target_s390x.h -@@ -0,0 +1,287 @@ -+/* -+** Definitions for S390 CPUs. -+** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h -+*/ -+ -+#ifndef _LJ_TARGET_S390_H -+#define _LJ_TARGET_S390_H -+ -+/* -- Registers IDs ------------------------------------------------------- */ -+ -+#define GPRDEF(_) \ -+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ -+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \ -+#if LJ_SOFTFP -+#define FPRDEF(_) -+#else -+#define FPRDEF(_) \ -+ _(F0) _(F2) _(F4) _(F6) -+#endif -+#define VRIDDEF(_) -+ -+#define RIDENUM(name) RID_##name, -+ -+enum { -+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ -+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ -+ RID_MAX, -+ RID_TMP = RID_LR, -+ -+ /* Calling conventions. */ -+ RID_RET = RID_R0, -+ RID_RETLO = RID_R0, -+ RID_RETHI = RID_R1, -+#if LJ_SOFTFP -+ RID_FPRET = RID_R0, -+#else -+ RID_FPRET = RID_D0, -+#endif -+ -+ /* These definitions must match with the *.dasc file(s): */ -+ RID_BASE = RID_R9, /* Interpreter BASE. */ -+ RID_LPC = RID_R6, /* Interpreter PC. */ -+ RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */ -+ RID_LREG = RID_R8, /* Interpreter L. */ -+ -+ /* Register ranges [min, max) and number of registers. */ -+ RID_MIN_GPR = RID_R0, -+ RID_MAX_GPR = RID_PC+1, -+ RID_MIN_FPR = RID_MAX_GPR, -+#if LJ_SOFTFP -+ RID_MAX_FPR = RID_MIN_FPR, -+#else -+ RID_MAX_FPR = RID_D15+1, -+#endif -+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, -+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -+}; -+ -+#define RID_NUM_KREF RID_NUM_GPR -+#define RID_MIN_KREF RID_R0 -+ -+/* -- Register sets ------------------------------------------------------- */ -+ -+/* Make use of all registers, except sp, lr and pc. */ -+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1)) -+#define RSET_GPREVEN \ -+ (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \ -+ RID2RSET(RID_R8)|RID2RSET(RID_R10)) -+#define RSET_GPRODD \ -+ (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \ -+ RID2RSET(RID_R9)|RID2RSET(RID_R11)) -+#if LJ_SOFTFP -+#define RSET_FPR 0 -+#else -+#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) -+#endif -+#define RSET_ALL (RSET_GPR|RSET_FPR) -+#define RSET_INIT RSET_ALL -+ -+/* ABI-specific register sets. lr is an implicit scratch register. */ -+#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12)) -+#ifdef __APPLE__ -+#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9)) -+#else -+#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_ -+#endif -+#if LJ_SOFTFP -+#define RSET_SCRATCH_FPR 0 -+#else -+#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) -+#endif -+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -+#define REGARG_FIRSTGPR RID_R0 -+#define REGARG_LASTGPR RID_R3 -+#define REGARG_NUMGPR 4 -+#if LJ_ABI_SOFTFP -+#define REGARG_FIRSTFPR 0 -+#define REGARG_LASTFPR 0 -+#define REGARG_NUMFPR 0 -+#else -+#define REGARG_FIRSTFPR RID_D0 -+#define REGARG_LASTFPR RID_D7 -+#define REGARG_NUMFPR 8 -+#endif -+ -+/* -- Spill slots --------------------------------------------------------- */ -+ -+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -+** -+** SPS_FIXED: Available fixed spill slots in interpreter frame. -+** This definition must match with the *.dasc file(s). -+** -+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. -+*/ -+#define SPS_FIXED 2 -+#define SPS_FIRST 2 -+ -+#define SPOFS_TMP 0 -+ -+#define sps_scale(slot) (4 * (int32_t)(slot)) -+#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) -+ -+/* -- Exit state ---------------------------------------------------------- */ -+ -+/* This definition must match with the *.dasc file(s). */ -+typedef struct { -+#if !LJ_SOFTFP -+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -+#endif -+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ -+ int32_t spill[256]; /* Spill slots. */ -+} ExitState; -+ -+/* PC after instruction that caused an exit. Used to find the trace number. */ -+#define EXITSTATE_PCREG RID_PC -+/* Highest exit + 1 indicates stack check. */ -+#define EXITSTATE_CHECKEXIT 1 -+ -+#define EXITSTUB_SPACING 4 -+#define EXITSTUBS_PER_GROUP 32 -+ -+/* -- Instructions -------------------------------------------------------- */ -+ -+/* Instruction fields. */ -+#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28)) -+#define ARMF_N(r) ((r) << 16) -+#define ARMF_D(r) ((r) << 12) -+#define ARMF_S(r) ((r) << 8) -+#define ARMF_M(r) (r) -+#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7)) -+#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r)) -+ -+typedef enum S390Ins { -+ -+ // Unsupported in S390 -+ #ARMI_LDRSB = 0xe01000d0, -+ #ARMI_S = 0x000100000, -+ #ARMI_LDRD = 0xe00000d0, -+ #ARMI_ADC = 0xe0a00000, -+ #ARMI_SBC = 0xe0c00000, -+ #ARMI_STRB = 0xe4400000, -+ #ARMI_STRH = 0xe00000b0, -+ #ARMI_STRD = 0xe00000f0, -+ #ARMI_BL = 0xeb000000, -+ #ARMI_BLX = 0xfa000000, -+ #ARMI_BLXr = 0xe12fff30, -+ #ARMI_BIC = 0xe1c00000, -+ #ARMI_ORR = 0xe1800000, -+ #ARMI_LDRB = 0xe4500000, -+ #ARMI_MVN = 0xe1e00000, -+ #ARMI_LDRSH = 0xe01000f0, -+ #ARMI_NOP = 0xe1a00000, -+ #ARMI_PUSH = 0xe92d0000, -+ #ARMI_RSB = 0xe0600000, -+ #ARMI_RSC = 0xe0e00000, -+ #ARMI_TEQ = 0xe1300000, -+ #ARMI_CCAL = 0xe0000000, -+ #ARMI_K12 = 0x02000000, -+ #ARMI_KNEG = 0x00200000, -+ #ARMI_LS_W = 0x00200000, -+ #ARMI_LS_U = 0x00800000, -+ #ARMI_LS_P = 0x01000000, -+ #ARMI_LS_R = 0x02000000, -+ #ARMI_LSX_I = 0x00400000, -+ -+ -+ #ARMI_SUB = 0xe0400000, -+ #ARMI_ADD = 0xe0800000, -+ #ARMI_AND = 0xe0000000, -+ #ARMI_EOR = 0xe0200000, -+ #ARMI_MUL = 0xe0000090, -+ #ARMI_LDR = 0xe4100000, -+ #ARMI_CMP = 0xe1500000, -+ #ARMI_LDRH = 0xe01000b0, -+ #ARMI_B = 0xea000000, -+ #ARMI_MOV = 0xe1a00000, -+ #ARMI_STR = 0xe4000000, -+ #ARMI_TST = 0xe1100000, -+ #ARMI_SMULL = 0xe0c00090, -+ #ARMI_CMN = 0xe1700000, -+ S390I_SR = 0x1B000000, -+ S390I_AR = 0x1A000000, -+ S390I_NR = 0x14000000, -+ S390I_XR = 0x17000000, -+ S390I_MR = 0x1C000000, -+ S390I_LR = 0x18000000, -+ S390I_C = 0x59000000, -+ S390I_LH = 0x48000000, -+ S390I_BASR = 0x0D000000, -+ S390I_MVCL = 0x0e000000, -+ S390I_ST = 0x50000000, -+ S390I_TM = 0x91000000, -+ S390I_MP = 0xbd000090, -+ S390I_CLR = 0x15000000, -+ -+ /* ARMv6 */ -+ #ARMI_REV = 0xe6bf0f30, -+ #ARMI_SXTB = 0xe6af0070, -+ #ARMI_SXTH = 0xe6bf0070, -+ #ARMI_UXTB = 0xe6ef0070, -+ #ARMI_UXTH = 0xe6ff0070, -+ -+ /* ARMv6T2 */ -+ #ARMI_MOVW = 0xe3000000, -+ #ARMI_MOVT = 0xe3400000, -+ -+ /* VFP */ -+ ARMI_VMOV_D = 0xeeb00b40, -+ ARMI_VMOV_S = 0xeeb00a40, -+ ARMI_VMOVI_D = 0xeeb00b00, -+ -+ ARMI_VMOV_R_S = 0xee100a10, -+ ARMI_VMOV_S_R = 0xee000a10, -+ ARMI_VMOV_RR_D = 0xec500b10, -+ ARMI_VMOV_D_RR = 0xec400b10, -+ -+ ARMI_VADD_D = 0xee300b00, -+ ARMI_VSUB_D = 0xee300b40, -+ ARMI_VMUL_D = 0xee200b00, -+ ARMI_VMLA_D = 0xee000b00, -+ ARMI_VMLS_D = 0xee000b40, -+ ARMI_VNMLS_D = 0xee100b00, -+ ARMI_VDIV_D = 0xee800b00, -+ -+ ARMI_VABS_D = 0xeeb00bc0, -+ ARMI_VNEG_D = 0xeeb10b40, -+ ARMI_VSQRT_D = 0xeeb10bc0, -+ -+ ARMI_VCMP_D = 0xeeb40b40, -+ ARMI_VCMPZ_D = 0xeeb50b40, -+ -+ ARMI_VMRS = 0xeef1fa10, -+ -+ ARMI_VCVT_S32_F32 = 0xeebd0ac0, -+ ARMI_VCVT_S32_F64 = 0xeebd0bc0, -+ ARMI_VCVT_U32_F32 = 0xeebc0ac0, -+ ARMI_VCVT_U32_F64 = 0xeebc0bc0, -+ ARMI_VCVTR_S32_F32 = 0xeebd0a40, -+ ARMI_VCVTR_S32_F64 = 0xeebd0b40, -+ ARMI_VCVTR_U32_F32 = 0xeebc0a40, -+ ARMI_VCVTR_U32_F64 = 0xeebc0b40, -+ ARMI_VCVT_F32_S32 = 0xeeb80ac0, -+ ARMI_VCVT_F64_S32 = 0xeeb80bc0, -+ ARMI_VCVT_F32_U32 = 0xeeb80a40, -+ ARMI_VCVT_F64_U32 = 0xeeb80b40, -+ ARMI_VCVT_F32_F64 = 0xeeb70bc0, -+ ARMI_VCVT_F64_F32 = 0xeeb70ac0, -+ -+ ARMI_VLDR_S = 0xed100a00, -+ ARMI_VLDR_D = 0xed100b00, -+ ARMI_VSTR_S = 0xed000a00, -+ ARMI_VSTR_D = 0xed000b00, -+} S390Ins; -+ -+typedef enum S390Shift { -+ S390SH_SLL, S390SH_SRL, S390SH_SRA -+ # Adjustment needed for ROR -+} S390Shift; -+ -+/* ARM condition codes. */ -+typedef enum ARMCC { -+ CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, -+ CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, -+ CC_HS = CC_CS, CC_LO = CC_CC -+} ARMCC; -+ -+#endif --- -2.20.1 - - -From a8fb2fa7613b5a5f314ae5c1d44fb53b9a89073e Mon Sep 17 00:00:00 2001 -From: ketank-new ketan22584@gmail.com -Date: Fri, 11 Nov 2016 12:13:30 +0530 -Subject: [PATCH 009/247] Update lj_target_s390x.h - -removed un replaced arm instructions -changed S390 to S390x ---- - src/lj_target_s390x.h | 129 ++++-------------------------------------- - 1 file changed, 12 insertions(+), 117 deletions(-) - -diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h -index 7da2063..27bb349 100644 ---- a/src/lj_target_s390x.h -+++ b/src/lj_target_s390x.h -@@ -10,12 +10,15 @@ - - #define GPRDEF(_) \ - _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ -- _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \ -+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _R(15) \ - #if LJ_SOFTFP - #define FPRDEF(_) - #else - #define FPRDEF(_) \ -- _(F0) _(F2) _(F4) _(F6) -+ _(F0) _(F1) _(F2) _(F3) \ -+ _(F4) _(F5) _(F6) _(F7) \ -+ _(F8) _(F9) _(F10) _(F11) \ -+ _(F12) _(F13) _(F14) _(F15) - #endif - #define VRIDDEF(_) - -@@ -150,54 +153,7 @@ typedef struct { - #define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7)) - #define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r)) - --typedef enum S390Ins { -- -- // Unsupported in S390 -- #ARMI_LDRSB = 0xe01000d0, -- #ARMI_S = 0x000100000, -- #ARMI_LDRD = 0xe00000d0, -- #ARMI_ADC = 0xe0a00000, -- #ARMI_SBC = 0xe0c00000, -- #ARMI_STRB = 0xe4400000, -- #ARMI_STRH = 0xe00000b0, -- #ARMI_STRD = 0xe00000f0, -- #ARMI_BL = 0xeb000000, -- #ARMI_BLX = 0xfa000000, -- #ARMI_BLXr = 0xe12fff30, -- #ARMI_BIC = 0xe1c00000, -- #ARMI_ORR = 0xe1800000, -- #ARMI_LDRB = 0xe4500000, -- #ARMI_MVN = 0xe1e00000, -- #ARMI_LDRSH = 0xe01000f0, -- #ARMI_NOP = 0xe1a00000, -- #ARMI_PUSH = 0xe92d0000, -- #ARMI_RSB = 0xe0600000, -- #ARMI_RSC = 0xe0e00000, -- #ARMI_TEQ = 0xe1300000, -- #ARMI_CCAL = 0xe0000000, -- #ARMI_K12 = 0x02000000, -- #ARMI_KNEG = 0x00200000, -- #ARMI_LS_W = 0x00200000, -- #ARMI_LS_U = 0x00800000, -- #ARMI_LS_P = 0x01000000, -- #ARMI_LS_R = 0x02000000, -- #ARMI_LSX_I = 0x00400000, -- -- -- #ARMI_SUB = 0xe0400000, -- #ARMI_ADD = 0xe0800000, -- #ARMI_AND = 0xe0000000, -- #ARMI_EOR = 0xe0200000, -- #ARMI_MUL = 0xe0000090, -- #ARMI_LDR = 0xe4100000, -- #ARMI_CMP = 0xe1500000, -- #ARMI_LDRH = 0xe01000b0, -- #ARMI_B = 0xea000000, -- #ARMI_MOV = 0xe1a00000, -- #ARMI_STR = 0xe4000000, -- #ARMI_TST = 0xe1100000, -- #ARMI_SMULL = 0xe0c00090, -- #ARMI_CMN = 0xe1700000, -+typedef enum S390xIns { - S390I_SR = 0x1B000000, - S390I_AR = 0x1A000000, - S390I_NR = 0x14000000, -@@ -212,76 +168,15 @@ typedef enum S390Ins { - S390I_TM = 0x91000000, - S390I_MP = 0xbd000090, - S390I_CLR = 0x15000000, -+} S390xIns; - -- /* ARMv6 */ -- #ARMI_REV = 0xe6bf0f30, -- #ARMI_SXTB = 0xe6af0070, -- #ARMI_SXTH = 0xe6bf0070, -- #ARMI_UXTB = 0xe6ef0070, -- #ARMI_UXTH = 0xe6ff0070, -- -- /* ARMv6T2 */ -- #ARMI_MOVW = 0xe3000000, -- #ARMI_MOVT = 0xe3400000, -- -- /* VFP */ -- ARMI_VMOV_D = 0xeeb00b40, -- ARMI_VMOV_S = 0xeeb00a40, -- ARMI_VMOVI_D = 0xeeb00b00, -- -- ARMI_VMOV_R_S = 0xee100a10, -- ARMI_VMOV_S_R = 0xee000a10, -- ARMI_VMOV_RR_D = 0xec500b10, -- ARMI_VMOV_D_RR = 0xec400b10, -- -- ARMI_VADD_D = 0xee300b00, -- ARMI_VSUB_D = 0xee300b40, -- ARMI_VMUL_D = 0xee200b00, -- ARMI_VMLA_D = 0xee000b00, -- ARMI_VMLS_D = 0xee000b40, -- ARMI_VNMLS_D = 0xee100b00, -- ARMI_VDIV_D = 0xee800b00, -- -- ARMI_VABS_D = 0xeeb00bc0, -- ARMI_VNEG_D = 0xeeb10b40, -- ARMI_VSQRT_D = 0xeeb10bc0, -- -- ARMI_VCMP_D = 0xeeb40b40, -- ARMI_VCMPZ_D = 0xeeb50b40, -- -- ARMI_VMRS = 0xeef1fa10, -- -- ARMI_VCVT_S32_F32 = 0xeebd0ac0, -- ARMI_VCVT_S32_F64 = 0xeebd0bc0, -- ARMI_VCVT_U32_F32 = 0xeebc0ac0, -- ARMI_VCVT_U32_F64 = 0xeebc0bc0, -- ARMI_VCVTR_S32_F32 = 0xeebd0a40, -- ARMI_VCVTR_S32_F64 = 0xeebd0b40, -- ARMI_VCVTR_U32_F32 = 0xeebc0a40, -- ARMI_VCVTR_U32_F64 = 0xeebc0b40, -- ARMI_VCVT_F32_S32 = 0xeeb80ac0, -- ARMI_VCVT_F64_S32 = 0xeeb80bc0, -- ARMI_VCVT_F32_U32 = 0xeeb80a40, -- ARMI_VCVT_F64_U32 = 0xeeb80b40, -- ARMI_VCVT_F32_F64 = 0xeeb70bc0, -- ARMI_VCVT_F64_F32 = 0xeeb70ac0, -- -- ARMI_VLDR_S = 0xed100a00, -- ARMI_VLDR_D = 0xed100b00, -- ARMI_VSTR_S = 0xed000a00, -- ARMI_VSTR_D = 0xed000b00, --} S390Ins; -- --typedef enum S390Shift { -+typedef enum S390xShift { - S390SH_SLL, S390SH_SRL, S390SH_SRA -- # Adjustment needed for ROR --} S390Shift; -+} S390xShift; - - /* ARM condition codes. */ --typedef enum ARMCC { -- CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, -- CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, -- CC_HS = CC_CS, CC_LO = CC_CC --} ARMCC; -+typedef enum S390xCC { -+ -+} S390xCC; - - #endif --- -2.20.1 - - -From d60e4da56523dd9fb69317ef4bdecf85cc3f47a2 Mon Sep 17 00:00:00 2001 -From: ketank-new ketan22584@gmail.com -Date: Tue, 15 Nov 2016 10:42:11 +0530 -Subject: [PATCH 010/247] Create vm_s390x.dasc - -created vm_s390x.dasc file -its a copy of vm_x86.dasc -working on to change this specific to s390x ---- - src/vm_s390x.dasc | 5779 +++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 5779 insertions(+) - create mode 100644 src/vm_s390x.dasc - -diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc -new file mode 100644 -index 0000000..d7d618d ---- /dev/null -+++ b/src/vm_s390x.dasc -@@ -0,0 +1,5779 @@ -+|// Low-level VM code for x86 CPUs. -+|// Bytecode interpreter, fast functions and helper functions. -+|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h -+| -+|.if P64 -+|.arch x64 -+|.else -+|.arch x86 -+|.endif -+|.section code_op, code_sub -+| -+|.actionlist build_actionlist -+|.globals GLOB_ -+|.globalnames globnames -+|.externnames extnames -+| -+|//----------------------------------------------------------------------- -+| -+|.if P64 -+|.define X64, 1 -+|.if WIN -+|.define X64WIN, 1 -+|.endif -+|.endif -+| -+|// Fixed register assignments for the interpreter. -+|// This is very fragile and has many dependencies. Caveat emptor. -+|.define BASE, edx // Not C callee-save, refetched anyway. -+|.if not X64 -+|.define KBASE, edi // Must be C callee-save. -+|.define KBASEa, KBASE -+|.define PC, esi // Must be C callee-save. -+|.define PCa, PC -+|.define DISPATCH, ebx // Must be C callee-save. -+|.elif X64WIN -+|.define KBASE, edi // Must be C callee-save. -+|.define KBASEa, rdi -+|.define PC, esi // Must be C callee-save. -+|.define PCa, rsi -+|.define DISPATCH, ebx // Must be C callee-save. -+|.else -+|.define KBASE, r15d // Must be C callee-save. -+|.define KBASEa, r15 -+|.define PC, ebx // Must be C callee-save. -+|.define PCa, rbx -+|.define DISPATCH, r14d // Must be C callee-save. -+|.endif -+| -+|.define RA, ecx -+|.define RAH, ch -+|.define RAL, cl -+|.define RB, ebp // Must be ebp (C callee-save). -+|.define RC, eax // Must be eax. -+|.define RCW, ax -+|.define RCH, ah -+|.define RCL, al -+|.define OP, RB -+|.define RD, RC -+|.define RDW, RCW -+|.define RDL, RCL -+|.if X64 -+|.define RAa, rcx -+|.define RBa, rbp -+|.define RCa, rax -+|.define RDa, rax -+|.else -+|.define RAa, RA -+|.define RBa, RB -+|.define RCa, RC -+|.define RDa, RD -+|.endif -+| -+|.if not X64 -+|.define FCARG1, ecx // x86 fastcall arguments. -+|.define FCARG2, edx -+|.elif X64WIN -+|.define CARG1, rcx // x64/WIN64 C call arguments. -+|.define CARG2, rdx -+|.define CARG3, r8 -+|.define CARG4, r9 -+|.define CARG1d, ecx -+|.define CARG2d, edx -+|.define CARG3d, r8d -+|.define CARG4d, r9d -+|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall. -+|.define FCARG2, CARG2d -+|.else -+|.define CARG1, rdi // x64/POSIX C call arguments. -+|.define CARG2, rsi -+|.define CARG3, rdx -+|.define CARG4, rcx -+|.define CARG5, r8 -+|.define CARG6, r9 -+|.define CARG1d, edi -+|.define CARG2d, esi -+|.define CARG3d, edx -+|.define CARG4d, ecx -+|.define CARG5d, r8d -+|.define CARG6d, r9d -+|.define FCARG1, CARG1d // Simulate x86 fastcall. -+|.define FCARG2, CARG2d -+|.endif -+| -+|// Type definitions. Some of these are only used for documentation. -+|.type L, lua_State -+|.type GL, global_State -+|.type TVALUE, TValue -+|.type GCOBJ, GCobj -+|.type STR, GCstr -+|.type TAB, GCtab -+|.type LFUNC, GCfuncL -+|.type CFUNC, GCfuncC -+|.type PROTO, GCproto -+|.type UPVAL, GCupval -+|.type NODE, Node -+|.type NARGS, int -+|.type TRACE, GCtrace -+|.type SBUF, SBuf -+| -+|// Stack layout while in interpreter. Must match with lj_frame.h. -+|//----------------------------------------------------------------------- -+|.if not X64 // x86 stack layout. -+| -+|.if WIN -+| -+|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). -+|.macro saveregs_ -+| push edi; push esi; push ebx -+| push extern lj_err_unwind_win -+| fs; push dword [0] -+| fs; mov [0], esp -+| sub esp, CFRAME_SPACE -+|.endmacro -+|.macro restoreregs -+| add esp, CFRAME_SPACE -+| fs; pop dword [0] -+| pop edi // Short for esp += 4. -+| pop ebx; pop esi; pop edi; pop ebp -+|.endmacro -+| -+|.else -+| -+|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). -+|.macro saveregs_ -+| push edi; push esi; push ebx -+| sub esp, CFRAME_SPACE -+|.endmacro -+|.macro restoreregs -+| add esp, CFRAME_SPACE -+| pop ebx; pop esi; pop edi; pop ebp -+|.endmacro -+| -+|.endif -+| -+|.macro saveregs -+| push ebp; saveregs_ -+|.endmacro -+| -+|.if WIN -+|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. -+|.define SAVE_NRES, aword [esp+aword*18] -+|.define SAVE_CFRAME, aword [esp+aword*17] -+|.define SAVE_L, aword [esp+aword*16] -+|//----- 16 byte aligned, ^^^ arguments from C caller -+|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. -+|.define SAVE_R4, aword [esp+aword*14] -+|.define SAVE_R3, aword [esp+aword*13] -+|.define SAVE_R2, aword [esp+aword*12] -+|//----- 16 byte aligned -+|.define SAVE_R1, aword [esp+aword*11] -+|.define SEH_FUNC, aword [esp+aword*10] -+|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. -+|.define UNUSED2, aword [esp+aword*8] -+|//----- 16 byte aligned -+|.define UNUSED1, aword [esp+aword*7] -+|.define SAVE_PC, aword [esp+aword*6] -+|.define TMP2, aword [esp+aword*5] -+|.define TMP1, aword [esp+aword*4] -+|//----- 16 byte aligned -+|.define ARG4, aword [esp+aword*3] -+|.define ARG3, aword [esp+aword*2] -+|.define ARG2, aword [esp+aword*1] -+|.define ARG1, aword [esp] //<-- esp while in interpreter. -+|//----- 16 byte aligned, ^^^ arguments for C callee -+|.else -+|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. -+|.define SAVE_NRES, aword [esp+aword*14] -+|.define SAVE_CFRAME, aword [esp+aword*13] -+|.define SAVE_L, aword [esp+aword*12] -+|//----- 16 byte aligned, ^^^ arguments from C caller -+|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. -+|.define SAVE_R4, aword [esp+aword*10] -+|.define SAVE_R3, aword [esp+aword*9] -+|.define SAVE_R2, aword [esp+aword*8] -+|//----- 16 byte aligned -+|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. -+|.define SAVE_PC, aword [esp+aword*6] -+|.define TMP2, aword [esp+aword*5] -+|.define TMP1, aword [esp+aword*4] -+|//----- 16 byte aligned -+|.define ARG4, aword [esp+aword*3] -+|.define ARG3, aword [esp+aword*2] -+|.define ARG2, aword [esp+aword*1] -+|.define ARG1, aword [esp] //<-- esp while in interpreter. -+|//----- 16 byte aligned, ^^^ arguments for C callee -+|.endif -+| -+|// FPARGx overlaps ARGx and ARG(x+1) on x86. -+|.define FPARG3, qword [esp+qword*1] -+|.define FPARG1, qword [esp] -+|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ). -+|.define TMPQ, qword [esp+aword*4] -+|.define TMP3, ARG4 -+|.define ARG5, TMP1 -+|.define TMPa, TMP1 -+|.define MULTRES, TMP2 -+| -+|// Arguments for vm_call and vm_pcall. -+|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME! -+| -+|// Arguments for vm_cpcall. -+|.define INARG_CP_CALL, SAVE_ERRF -+|.define INARG_CP_UD, SAVE_NRES -+|.define INARG_CP_FUNC, SAVE_CFRAME -+| -+|//----------------------------------------------------------------------- -+|.elif X64WIN // x64/Windows stack layout -+| -+|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -+|.macro saveregs_ -+| push rdi; push rsi; push rbx -+| sub rsp, CFRAME_SPACE -+|.endmacro -+|.macro saveregs -+| push rbp; saveregs_ -+|.endmacro -+|.macro restoreregs -+| add rsp, CFRAME_SPACE -+| pop rbx; pop rsi; pop rdi; pop rbp -+|.endmacro -+| -+|.define SAVE_CFRAME, aword [rsp+aword*13] -+|.define SAVE_PC, dword [rsp+dword*25] -+|.define SAVE_L, dword [rsp+dword*24] -+|.define SAVE_ERRF, dword [rsp+dword*23] -+|.define SAVE_NRES, dword [rsp+dword*22] -+|.define TMP2, dword [rsp+dword*21] -+|.define TMP1, dword [rsp+dword*20] -+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter -+|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -+|.define SAVE_R4, aword [rsp+aword*8] -+|.define SAVE_R3, aword [rsp+aword*7] -+|.define SAVE_R2, aword [rsp+aword*6] -+|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -+|.define ARG5, aword [rsp+aword*4] -+|.define CSAVE_4, aword [rsp+aword*3] -+|.define CSAVE_3, aword [rsp+aword*2] -+|.define CSAVE_2, aword [rsp+aword*1] -+|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. -+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee -+| -+|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). -+|.define TMPQ, qword [rsp+aword*10] -+|.define MULTRES, TMP2 -+|.define TMPa, ARG5 -+|.define ARG5d, dword [rsp+aword*4] -+|.define TMP3, ARG5d -+| -+|//----------------------------------------------------------------------- -+|.else // x64/POSIX stack layout -+| -+|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -+|.macro saveregs_ -+| push rbx; push r15; push r14 -+|.if NO_UNWIND -+| push r13; push r12 -+|.endif -+| sub rsp, CFRAME_SPACE -+|.endmacro -+|.macro saveregs -+| push rbp; saveregs_ -+|.endmacro -+|.macro restoreregs -+| add rsp, CFRAME_SPACE -+|.if NO_UNWIND -+| pop r12; pop r13 -+|.endif -+| pop r14; pop r15; pop rbx; pop rbp -+|.endmacro -+| -+|//----- 16 byte aligned, -+|.if NO_UNWIND -+|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. -+|.define SAVE_R4, aword [rsp+aword*10] -+|.define SAVE_R3, aword [rsp+aword*9] -+|.define SAVE_R2, aword [rsp+aword*8] -+|.define SAVE_R1, aword [rsp+aword*7] -+|.define SAVE_RU2, aword [rsp+aword*6] -+|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. -+|.else -+|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -+|.define SAVE_R4, aword [rsp+aword*8] -+|.define SAVE_R3, aword [rsp+aword*7] -+|.define SAVE_R2, aword [rsp+aword*6] -+|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -+|.endif -+|.define SAVE_CFRAME, aword [rsp+aword*4] -+|.define SAVE_PC, dword [rsp+dword*7] -+|.define SAVE_L, dword [rsp+dword*6] -+|.define SAVE_ERRF, dword [rsp+dword*5] -+|.define SAVE_NRES, dword [rsp+dword*4] -+|.define TMPa, aword [rsp+aword*1] -+|.define TMP2, dword [rsp+dword*1] -+|.define TMP1, dword [rsp] //<-- rsp while in interpreter. -+|//----- 16 byte aligned -+| -+|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). -+|.define TMPQ, qword [rsp] -+|.define TMP3, dword [rsp+aword*1] -+|.define MULTRES, TMP2 -+| -+|.endif -+| -+|//----------------------------------------------------------------------- -+| -+|// Instruction headers. -+|.macro ins_A; .endmacro -+|.macro ins_AD; .endmacro -+|.macro ins_AJ; .endmacro -+|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro -+|.macro ins_AB_; movzx RB, RCH; .endmacro -+|.macro ins_A_C; movzx RC, RCL; .endmacro -+|.macro ins_AND; not RDa; .endmacro -+| -+|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). -+|.macro ins_NEXT -+| mov RC, [PC] -+| movzx RA, RCH -+| movzx OP, RCL -+| add PC, 4 -+| shr RC, 16 -+|.if X64 -+| jmp aword [DISPATCH+OP*8] -+|.else -+| jmp aword [DISPATCH+OP*4] -+|.endif -+|.endmacro -+| -+|// Instruction footer. -+|.if 1 -+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -+| .define ins_next, ins_NEXT -+| .define ins_next_, ins_NEXT -+|.else -+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -+| // Affects only certain kinds of benchmarks (and only with -j off). -+| // Around 10%-30% slower on Core2, a lot more slower on P4. -+| .macro ins_next -+| jmp ->ins_next -+| .endmacro -+| .macro ins_next_ -+| ->ins_next: -+| ins_NEXT -+| .endmacro -+|.endif -+| -+|// Call decode and dispatch. -+|.macro ins_callt -+| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC -+| mov PC, LFUNC:RB->pc -+| mov RA, [PC] -+| movzx OP, RAL -+| movzx RA, RAH -+| add PC, 4 -+|.if X64 -+| jmp aword [DISPATCH+OP*8] -+|.else -+| jmp aword [DISPATCH+OP*4] -+|.endif -+|.endmacro -+| -+|.macro ins_call -+| // BASE = new base, RB = LFUNC, RD = nargs+1 -+| mov [BASE-4], PC -+| ins_callt -+|.endmacro -+| -+|//----------------------------------------------------------------------- -+| -+|// Macros to test operand types. -+|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro -+|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro -+|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro -+|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro -+|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro -+| -+|// These operands must be used with movzx. -+|.define PC_OP, byte [PC-4] -+|.define PC_RA, byte [PC-3] -+|.define PC_RB, byte [PC-1] -+|.define PC_RC, byte [PC-2] -+|.define PC_RD, word [PC-2] -+| -+|.macro branchPC, reg -+| lea PC, [PC+reg*4-BCBIAS_J*4] -+|.endmacro -+| -+|// Assumes DISPATCH is relative to GL. -+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -+| -+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -+| -+|// Decrement hashed hotcount and trigger trace recorder if zero. -+|.macro hotloop, reg -+| mov reg, PC -+| shr reg, 1 -+| and reg, HOTCOUNT_PCMASK -+| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP -+| jb ->vm_hotloop -+|.endmacro -+| -+|.macro hotcall, reg -+| mov reg, PC -+| shr reg, 1 -+| and reg, HOTCOUNT_PCMASK -+| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL -+| jb ->vm_hotcall -+|.endmacro -+| -+|// Set current VM state. -+|.macro set_vmstate, st -+| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st -+|.endmacro -+| -+|// x87 compares. -+|.macro fcomparepp // Compare and pop st0 >< st1. -+| fucomip st1 -+| fpop -+|.endmacro -+| -+|.macro fpop1; fstp st1; .endmacro -+| -+|// Synthesize SSE FP constants. -+|.macro sseconst_abs, reg, tmp // Synthesize abs mask. -+|.if X64 -+| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp -+|.else -+| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1 -+|.endif -+|.endmacro -+| -+|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. -+|.if X64 -+| mov64 tmp, U64x(val,00000000); movd reg, tmp -+|.else -+| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51 -+|.endif -+|.endmacro -+| -+|.macro sseconst_sign, reg, tmp // Synthesize sign mask. -+| sseconst_hi reg, tmp, 80000000 -+|.endmacro -+|.macro sseconst_1, reg, tmp // Synthesize 1.0. -+| sseconst_hi reg, tmp, 3ff00000 -+|.endmacro -+|.macro sseconst_m1, reg, tmp // Synthesize -1.0. -+| sseconst_hi reg, tmp, bff00000 -+|.endmacro -+|.macro sseconst_2p52, reg, tmp // Synthesize 2^52. -+| sseconst_hi reg, tmp, 43300000 -+|.endmacro -+|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. -+| sseconst_hi reg, tmp, 43380000 -+|.endmacro -+| -+|// Move table write barrier back. Overwrites reg. -+|.macro barrierback, tab, reg -+| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) -+| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] -+| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab -+| mov tab->gclist, reg -+|.endmacro -+| -+|//----------------------------------------------------------------------- -+ -+/* Generate subroutines used by opcodes and other parts of the VM. */ -+/* The .code_sub section should be last to help static branch prediction. */ -+static void build_subroutines(BuildCtx *ctx) -+{ -+ |.code_sub -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Return handling ---------------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->vm_returnp: -+ | test PC, FRAME_P -+ | jz ->cont_dispatch -+ | -+ | // Return from pcall or xpcall fast func. -+ | and PC, -8 -+ | sub BASE, PC // Restore caller base. -+ | lea RAa, [RA+PC-8] // Rebase RA and prepend one result. -+ | mov PC, [BASE-4] // Fetch PC of previous frame. -+ | // Prepending may overwrite the pcall frame, so do it at the end. -+ | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results. -+ | -+ |->vm_returnc: -+ | add RD, 1 // RD = nresults+1 -+ | jz ->vm_unwind_yield -+ | mov MULTRES, RD -+ | test PC, FRAME_TYPE -+ | jz ->BC_RET_Z // Handle regular return to Lua. -+ | -+ |->vm_return: -+ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return -+ | xor PC, FRAME_C -+ | test PC, FRAME_TYPE -+ | jnz ->vm_returnp -+ | -+ | // Return to C. -+ | set_vmstate C -+ | and PC, -8 -+ | sub PC, BASE -+ | neg PC // Previous base = BASE - delta. -+ | -+ | sub RD, 1 -+ | jz >2 -+ |1: // Move results down. -+ |.if X64 -+ | mov RBa, [BASE+RA] -+ | mov [BASE-8], RBa -+ |.else -+ | mov RB, [BASE+RA] -+ | mov [BASE-8], RB -+ | mov RB, [BASE+RA+4] -+ | mov [BASE-4], RB -+ |.endif -+ | add BASE, 8 -+ | sub RD, 1 -+ | jnz <1 -+ |2: -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, PC -+ |3: -+ | mov RD, MULTRES -+ | mov RA, SAVE_NRES // RA = wanted nresults+1 -+ |4: -+ | cmp RA, RD -+ | jne >6 // More/less results wanted? -+ |5: -+ | sub BASE, 8 -+ | mov L:RB->top, BASE -+ | -+ |->vm_leave_cp: -+ | mov RAa, SAVE_CFRAME // Restore previous C frame. -+ | mov L:RB->cframe, RAa -+ | xor eax, eax // Ok return status for vm_pcall. -+ | -+ |->vm_leave_unw: -+ | restoreregs -+ | ret -+ | -+ |6: -+ | jb >7 // Less results wanted? -+ | // More results wanted. Check stack size and fill up results with nil. -+ | cmp BASE, L:RB->maxstack -+ | ja >8 -+ | mov dword [BASE-4], LJ_TNIL -+ | add BASE, 8 -+ | add RD, 1 -+ | jmp <4 -+ | -+ |7: // Less results wanted. -+ | test RA, RA -+ | jz <5 // But check for LUA_MULTRET+1. -+ | sub RA, RD // Negative result! -+ | lea BASE, [BASE+RA*8] // Correct top. -+ | jmp <5 -+ | -+ |8: // Corner case: need to grow stack for filling up results. -+ | // This can happen if: -+ | // - A C function grows the stack (a lot). -+ | // - The GC shrinks the stack in between. -+ | // - A return back from a lua_call() with (high) nresults adjustment. -+ | mov L:RB->top, BASE // Save current top held in BASE (yes). -+ | mov MULTRES, RD // Need to fill only remainder with nil. -+ | mov FCARG2, RA -+ | mov FCARG1, L:RB -+ | call extern lj_state_growstack@8 // (lua_State *L, int n) -+ | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. -+ | jmp <3 -+ | -+ |->vm_unwind_yield: -+ | mov al, LUA_YIELD -+ | jmp ->vm_unwind_c_eh -+ | -+ |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall. -+ | // (void *cframe, int errcode) -+ |.if X64 -+ | mov eax, CARG2d // Error return status for vm_pcall. -+ | mov rsp, CARG1 -+ |.else -+ | mov eax, FCARG2 // Error return status for vm_pcall. -+ | mov esp, FCARG1 -+ |.if WIN -+ | lea FCARG1, SEH_NEXT -+ | fs; mov [0], FCARG1 -+ |.endif -+ |.endif -+ |->vm_unwind_c_eh: // Landing pad for external unwinder. -+ | mov L:RB, SAVE_L -+ | mov GL:RB, L:RB->glref -+ | mov dword GL:RB->vmstate, ~LJ_VMST_C -+ | jmp ->vm_leave_unw -+ | -+ |->vm_unwind_rethrow: -+ |.if X64 and not X64WIN -+ | mov FCARG1, SAVE_L -+ | mov FCARG2, eax -+ | restoreregs -+ | jmp extern lj_err_throw@8 // (lua_State *L, int errcode) -+ |.endif -+ | -+ |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall. -+ | // (void *cframe) -+ |.if X64 -+ | and CARG1, CFRAME_RAWMASK -+ | mov rsp, CARG1 -+ |.else -+ | and FCARG1, CFRAME_RAWMASK -+ | mov esp, FCARG1 -+ |.if WIN -+ | lea FCARG1, SEH_NEXT -+ | fs; mov [0], FCARG1 -+ |.endif -+ |.endif -+ |->vm_unwind_ff_eh: // Landing pad for external unwinder. -+ | mov L:RB, SAVE_L -+ | mov RAa, -8 // Results start at BASE+RA = BASE-8. -+ | mov RD, 1+1 // Really 1+2 results, incr. later. -+ | mov BASE, L:RB->base -+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. -+ | add DISPATCH, GG_G2DISP -+ | mov PC, [BASE-4] // Fetch PC of previous frame. -+ | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. -+ | set_vmstate INTERP -+ | jmp ->vm_returnc // Increments RD/MULTRES and returns. -+ | -+ |.if WIN and not X64 -+ |->vm_rtlunwind@16: // Thin layer around RtlUnwind. -+ | // (void *cframe, void *excptrec, void *unwinder, int errcode) -+ | mov [esp], FCARG1 // Return value for RtlUnwind. -+ | push FCARG2 // Exception record for RtlUnwind. -+ | push 0 // Ignored by RtlUnwind. -+ | push dword [FCARG1+CFRAME_OFS_SEH] -+ | call extern RtlUnwind@16 // Violates ABI (clobbers too much). -+ | mov FCARG1, eax -+ | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). -+ | ret // Jump to unwinder. -+ |.endif -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Grow stack for calls ----------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->vm_growstack_c: // Grow stack for C function. -+ | mov FCARG2, LUA_MINSTACK -+ | jmp >2 -+ | -+ |->vm_growstack_v: // Grow stack for vararg Lua function. -+ | sub RD, 8 -+ | jmp >1 -+ | -+ |->vm_growstack_f: // Grow stack for fixarg Lua function. -+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC -+ | lea RD, [BASE+NARGS:RD*8-8] -+ |1: -+ | movzx RA, byte [PC-4+PC2PROTO(framesize)] -+ | add PC, 4 // Must point after first instruction. -+ | mov L:RB->base, BASE -+ | mov L:RB->top, RD -+ | mov SAVE_PC, PC -+ | mov FCARG2, RA -+ |2: -+ | // RB = L, L->base = new base, L->top = top -+ | mov FCARG1, L:RB -+ | call extern lj_state_growstack@8 // (lua_State *L, int n) -+ | mov BASE, L:RB->base -+ | mov RD, L:RB->top -+ | mov LFUNC:RB, [BASE-8] -+ | sub RD, BASE -+ | shr RD, 3 -+ | add NARGS:RD, 1 -+ | // BASE = new base, RB = LFUNC, RD = nargs+1 -+ | ins_callt // Just retry the call. -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Entry points into the assembler VM --------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->vm_resume: // Setup C frame and resume thread. -+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) -+ | saveregs -+ |.if X64 -+ | mov L:RB, CARG1d // Caveat: CARG1d may be RA. -+ | mov SAVE_L, CARG1d -+ | mov RA, CARG2d -+ |.else -+ | mov L:RB, SAVE_L -+ | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! -+ |.endif -+ | mov PC, FRAME_CP -+ | xor RD, RD -+ | lea KBASEa, [esp+CFRAME_RESUME] -+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. -+ | add DISPATCH, GG_G2DISP -+ | mov SAVE_PC, RD // Any value outside of bytecode is ok. -+ | mov SAVE_CFRAME, RDa -+ |.if X64 -+ | mov SAVE_NRES, RD -+ | mov SAVE_ERRF, RD -+ |.endif -+ | mov L:RB->cframe, KBASEa -+ | cmp byte L:RB->status, RDL -+ | je >2 // Initial resume (like a call). -+ | -+ | // Resume after yield (like a return). -+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB -+ | set_vmstate INTERP -+ | mov byte L:RB->status, RDL -+ | mov BASE, L:RB->base -+ | mov RD, L:RB->top -+ | sub RD, RA -+ | shr RD, 3 -+ | add RD, 1 // RD = nresults+1 -+ | sub RA, BASE // RA = resultofs -+ | mov PC, [BASE-4] -+ | mov MULTRES, RD -+ | test PC, FRAME_TYPE -+ | jz ->BC_RET_Z -+ | jmp ->vm_return -+ | -+ |->vm_pcall: // Setup protected C frame and enter VM. -+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) -+ | saveregs -+ | mov PC, FRAME_CP -+ |.if X64 -+ | mov SAVE_ERRF, CARG4d -+ |.endif -+ | jmp >1 -+ | -+ |->vm_call: // Setup C frame and enter VM. -+ | // (lua_State *L, TValue *base, int nres1) -+ | saveregs -+ | mov PC, FRAME_C -+ | -+ |1: // Entry point for vm_pcall above (PC = ftype). -+ |.if X64 -+ | mov SAVE_NRES, CARG3d -+ | mov L:RB, CARG1d // Caveat: CARG1d may be RA. -+ | mov SAVE_L, CARG1d -+ | mov RA, CARG2d -+ |.else -+ | mov L:RB, SAVE_L -+ | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! -+ |.endif -+ | -+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. -+ | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. -+ | mov SAVE_CFRAME, KBASEa -+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. -+ | add DISPATCH, GG_G2DISP -+ |.if X64 -+ | mov L:RB->cframe, rsp -+ |.else -+ | mov L:RB->cframe, esp -+ |.endif -+ | -+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). -+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB -+ | set_vmstate INTERP -+ | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). -+ | add PC, RA -+ | sub PC, BASE // PC = frame delta + frame type -+ | -+ | mov RD, L:RB->top -+ | sub RD, RA -+ | shr NARGS:RD, 3 -+ | add NARGS:RD, 1 // RD = nargs+1 -+ | -+ |->vm_call_dispatch: -+ | mov LFUNC:RB, [RA-8] -+ | cmp dword [RA-4], LJ_TFUNC -+ | jne ->vmeta_call // Ensure KBASE defined and != BASE. -+ | -+ |->vm_call_dispatch_f: -+ | mov BASE, RA -+ | ins_call -+ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC -+ | -+ |->vm_cpcall: // Setup protected C frame, call C. -+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) -+ | saveregs -+ |.if X64 -+ | mov L:RB, CARG1d // Caveat: CARG1d may be RA. -+ | mov SAVE_L, CARG1d -+ |.else -+ | mov L:RB, SAVE_L -+ | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap! -+ | mov RC, INARG_CP_UD // Get args before they are overwritten. -+ | mov RA, INARG_CP_FUNC -+ | mov BASE, INARG_CP_CALL -+ |.endif -+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. -+ | -+ | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). -+ | sub KBASE, L:RB->top -+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. -+ | mov SAVE_ERRF, 0 // No error function. -+ | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. -+ | add DISPATCH, GG_G2DISP -+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). -+ | -+ |.if X64 -+ | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. -+ | mov SAVE_CFRAME, KBASEa -+ | mov L:RB->cframe, rsp -+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB -+ | -+ | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) -+ |.else -+ | mov ARG3, RC // Have to copy args downwards. -+ | mov ARG2, RA -+ | mov ARG1, L:RB -+ | -+ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. -+ | mov SAVE_CFRAME, KBASE -+ | mov L:RB->cframe, esp -+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB -+ | -+ | call BASE // (lua_State *L, lua_CFunction func, void *ud) -+ |.endif -+ | // TValue * (new base) or NULL returned in eax (RC). -+ | test RC, RC -+ | jz ->vm_leave_cp // No base? Just remove C frame. -+ | mov RA, RC -+ | mov PC, FRAME_CP -+ | jmp <2 // Else continue with the call. -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Metamethod handling ------------------------------------------------ -+ |//----------------------------------------------------------------------- -+ | -+ |//-- Continuation dispatch ---------------------------------------------- -+ | -+ |->cont_dispatch: -+ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) -+ | add RA, BASE -+ | and PC, -8 -+ | mov RB, BASE -+ | sub BASE, PC // Restore caller BASE. -+ | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. -+ | mov RC, RA // ... in [RC] -+ | mov PC, [RB-12] // Restore PC from [cont|PC]. -+ |.if X64 -+ | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. -+ |.if FFI -+ | cmp RA, 1 -+ | jbe >1 -+ |.endif -+ | lea KBASEa, qword [=>0] -+ | add RAa, KBASEa -+ |.else -+ | mov RA, dword [RB-16] -+ |.if FFI -+ | cmp RA, 1 -+ | jbe >1 -+ |.endif -+ |.endif -+ | mov LFUNC:KBASE, [BASE-8] -+ | mov KBASE, LFUNC:KBASE->pc -+ | mov KBASE, [KBASE+PC2PROTO(k)] -+ | // BASE = base, RC = result, RB = meta base -+ | jmp RAa // Jump to continuation. -+ | -+ |.if FFI -+ |1: -+ | je ->cont_ffi_callback // cont = 1: return from FFI callback. -+ | // cont = 0: Tail call from C function. -+ | sub RB, BASE -+ | shr RB, 3 -+ | lea RD, [RB-1] -+ | jmp ->vm_call_tail -+ |.endif -+ | -+ |->cont_cat: // BASE = base, RC = result, RB = mbase -+ | movzx RA, PC_RB -+ | sub RB, 16 -+ | lea RA, [BASE+RA*8] -+ | sub RA, RB -+ | je ->cont_ra -+ | neg RA -+ | shr RA, 3 -+ |.if X64WIN -+ | mov CARG3d, RA -+ | mov L:CARG1d, SAVE_L -+ | mov L:CARG1d->base, BASE -+ | mov RCa, [RC] -+ | mov [RB], RCa -+ | mov CARG2d, RB -+ |.elif X64 -+ | mov L:CARG1d, SAVE_L -+ | mov L:CARG1d->base, BASE -+ | mov CARG3d, RA -+ | mov RAa, [RC] -+ | mov [RB], RAa -+ | mov CARG2d, RB -+ |.else -+ | mov ARG3, RA -+ | mov RA, [RC+4] -+ | mov RC, [RC] -+ | mov [RB+4], RA -+ | mov [RB], RC -+ | mov ARG2, RB -+ |.endif -+ | jmp ->BC_CAT_Z -+ | -+ |//-- Table indexing metamethods ----------------------------------------- -+ | -+ |->vmeta_tgets: -+ | mov TMP1, RC // RC = GCstr * -+ | mov TMP2, LJ_TSTR -+ | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. -+ | cmp PC_OP, BC_GGET -+ | jne >1 -+ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. -+ | mov [RA], TAB:RB // RB = GCtab * -+ | mov dword [RA+4], LJ_TTAB -+ | mov RB, RA -+ | jmp >2 -+ | -+ |->vmeta_tgetb: -+ | movzx RC, PC_RC -+ |.if DUALNUM -+ | mov TMP2, LJ_TISNUM -+ | mov TMP1, RC -+ |.else -+ | cvtsi2sd xmm0, RC -+ | movsd TMPQ, xmm0 -+ |.endif -+ | lea RCa, TMPQ // Store temp. TValue in TMPQ. -+ | jmp >1 -+ | -+ |->vmeta_tgetv: -+ | movzx RC, PC_RC // Reload TValue *k from RC. -+ | lea RC, [BASE+RC*8] -+ |1: -+ | movzx RB, PC_RB // Reload TValue *t from RB. -+ | lea RB, [BASE+RB*8] -+ |2: -+ |.if X64 -+ | mov L:CARG1d, SAVE_L -+ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. -+ | mov CARG2d, RB -+ | mov CARG3, RCa // May be 64 bit ptr to stack. -+ | mov L:RB, L:CARG1d -+ |.else -+ | mov ARG2, RB -+ | mov L:RB, SAVE_L -+ | mov ARG3, RC -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) -+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC). -+ | mov BASE, L:RB->base -+ | test RC, RC -+ | jz >3 -+ |->cont_ra: // BASE = base, RC = result -+ | movzx RA, PC_RA -+ |.if X64 -+ | mov RBa, [RC] -+ | mov [BASE+RA*8], RBa -+ |.else -+ | mov RB, [RC+4] -+ | mov RC, [RC] -+ | mov [BASE+RA*8+4], RB -+ | mov [BASE+RA*8], RC -+ |.endif -+ | ins_next -+ | -+ |3: // Call __index metamethod. -+ | // BASE = base, L->top = new base, stack = cont/func/t/k -+ | mov RA, L:RB->top -+ | mov [RA-12], PC // [cont|PC] -+ | lea PC, [RA+FRAME_CONT] -+ | sub PC, BASE -+ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. -+ | mov NARGS:RD, 2+1 // 2 args for func(t, k). -+ | jmp ->vm_call_dispatch_f -+ | -+ |->vmeta_tgetr: -+ | mov FCARG1, TAB:RB -+ | mov RB, BASE // Save BASE. -+ | mov FCARG2, RC // Caveat: FCARG2 == BASE -+ | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) -+ | // cTValue * or NULL returned in eax (RC). -+ | movzx RA, PC_RA -+ | mov BASE, RB // Restore BASE. -+ | test RC, RC -+ | jnz ->BC_TGETR_Z -+ | mov dword [BASE+RA*8+4], LJ_TNIL -+ | jmp ->BC_TGETR2_Z -+ | -+ |//----------------------------------------------------------------------- -+ | -+ |->vmeta_tsets: -+ | mov TMP1, RC // RC = GCstr * -+ | mov TMP2, LJ_TSTR -+ | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. -+ | cmp PC_OP, BC_GSET -+ | jne >1 -+ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. -+ | mov [RA], TAB:RB // RB = GCtab * -+ | mov dword [RA+4], LJ_TTAB -+ | mov RB, RA -+ | jmp >2 -+ | -+ |->vmeta_tsetb: -+ | movzx RC, PC_RC -+ |.if DUALNUM -+ | mov TMP2, LJ_TISNUM -+ | mov TMP1, RC -+ |.else -+ | cvtsi2sd xmm0, RC -+ | movsd TMPQ, xmm0 -+ |.endif -+ | lea RCa, TMPQ // Store temp. TValue in TMPQ. -+ | jmp >1 -+ | -+ |->vmeta_tsetv: -+ | movzx RC, PC_RC // Reload TValue *k from RC. -+ | lea RC, [BASE+RC*8] -+ |1: -+ | movzx RB, PC_RB // Reload TValue *t from RB. -+ | lea RB, [BASE+RB*8] -+ |2: -+ |.if X64 -+ | mov L:CARG1d, SAVE_L -+ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. -+ | mov CARG2d, RB -+ | mov CARG3, RCa // May be 64 bit ptr to stack. -+ | mov L:RB, L:CARG1d -+ |.else -+ | mov ARG2, RB -+ | mov L:RB, SAVE_L -+ | mov ARG3, RC -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) -+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC). -+ | mov BASE, L:RB->base -+ | test RC, RC -+ | jz >3 -+ | // NOBARRIER: lj_meta_tset ensures the table is not black. -+ | movzx RA, PC_RA -+ |.if X64 -+ | mov RBa, [BASE+RA*8] -+ | mov [RC], RBa -+ |.else -+ | mov RB, [BASE+RA*8+4] -+ | mov RA, [BASE+RA*8] -+ | mov [RC+4], RB -+ | mov [RC], RA -+ |.endif -+ |->cont_nop: // BASE = base, (RC = result) -+ | ins_next -+ | -+ |3: // Call __newindex metamethod. -+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) -+ | mov RA, L:RB->top -+ | mov [RA-12], PC // [cont|PC] -+ | movzx RC, PC_RA -+ | // Copy value to third argument. -+ |.if X64 -+ | mov RBa, [BASE+RC*8] -+ | mov [RA+16], RBa -+ |.else -+ | mov RB, [BASE+RC*8+4] -+ | mov RC, [BASE+RC*8] -+ | mov [RA+20], RB -+ | mov [RA+16], RC -+ |.endif -+ | lea PC, [RA+FRAME_CONT] -+ | sub PC, BASE -+ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. -+ | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). -+ | jmp ->vm_call_dispatch_f -+ | -+ |->vmeta_tsetr: -+ |.if X64WIN -+ | mov L:CARG1d, SAVE_L -+ | mov CARG3d, RC -+ | mov L:CARG1d->base, BASE -+ | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. -+ |.elif X64 -+ | mov L:CARG1d, SAVE_L -+ | mov CARG2d, TAB:RB -+ | mov L:CARG1d->base, BASE -+ | mov RB, BASE // Save BASE. -+ | mov CARG3d, RC // Caveat: CARG3d == BASE. -+ |.else -+ | mov L:RA, SAVE_L -+ | mov ARG2, TAB:RB -+ | mov RB, BASE // Save BASE. -+ | mov ARG3, RC -+ | mov ARG1, L:RA -+ | mov L:RA->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) -+ | // TValue * returned in eax (RC). -+ | movzx RA, PC_RA -+ | mov BASE, RB // Restore BASE. -+ | jmp ->BC_TSETR_Z -+ | -+ |//-- Comparison metamethods --------------------------------------------- -+ | -+ |->vmeta_comp: -+ |.if X64 -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE. -+ |.if X64WIN -+ | lea CARG3d, [BASE+RD*8] -+ | lea CARG2d, [BASE+RA*8] -+ |.else -+ | lea CARG2d, [BASE+RA*8] -+ | lea CARG3d, [BASE+RD*8] -+ |.endif -+ | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA. -+ | movzx CARG4d, PC_OP -+ |.else -+ | movzx RB, PC_OP -+ | lea RD, [BASE+RD*8] -+ | lea RA, [BASE+RA*8] -+ | mov ARG4, RB -+ | mov L:RB, SAVE_L -+ | mov ARG3, RD -+ | mov ARG2, RA -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) -+ | // 0/1 or TValue * (metamethod) returned in eax (RC). -+ |3: -+ | mov BASE, L:RB->base -+ | cmp RC, 1 -+ | ja ->vmeta_binop -+ |4: -+ | lea PC, [PC+4] -+ | jb >6 -+ |5: -+ | movzx RD, PC_RD -+ | branchPC RD -+ |6: -+ | ins_next -+ | -+ |->cont_condt: // BASE = base, RC = result -+ | add PC, 4 -+ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true. -+ | jb <5 -+ | jmp <6 -+ | -+ |->cont_condf: // BASE = base, RC = result -+ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false. -+ | jmp <4 -+ | -+ |->vmeta_equal: -+ | sub PC, 4 -+ |.if X64WIN -+ | mov CARG3d, RD -+ | mov CARG4d, RB -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Caveat: CARG2d == BASE. -+ | mov CARG2d, RA -+ | mov CARG1d, L:RB // Caveat: CARG1d == RA. -+ |.elif X64 -+ | mov CARG2d, RA -+ | mov CARG4d, RB // Caveat: CARG4d == RA. -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Caveat: CARG3d == BASE. -+ | mov CARG3d, RD -+ | mov CARG1d, L:RB -+ |.else -+ | mov ARG4, RB -+ | mov L:RB, SAVE_L -+ | mov ARG3, RD -+ | mov ARG2, RA -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) -+ | // 0/1 or TValue * (metamethod) returned in eax (RC). -+ | jmp <3 -+ | -+ |->vmeta_equal_cd: -+ |.if FFI -+ | sub PC, 4 -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | mov FCARG1, L:RB -+ | mov FCARG2, dword [PC-4] -+ | mov SAVE_PC, PC -+ | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) -+ | // 0/1 or TValue * (metamethod) returned in eax (RC). -+ | jmp <3 -+ |.endif -+ | -+ |->vmeta_istype: -+ |.if X64 -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. -+ | mov CARG2d, RA -+ | movzx CARG3d, PC_RD -+ | mov L:CARG1d, L:RB -+ |.else -+ | movzx RD, PC_RD -+ | mov ARG2, RA -+ | mov L:RB, SAVE_L -+ | mov ARG3, RD -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) -+ | mov BASE, L:RB->base -+ | jmp <6 -+ | -+ |//-- Arithmetic metamethods --------------------------------------------- -+ | -+ |->vmeta_arith_vno: -+ |.if DUALNUM -+ | movzx RB, PC_RB -+ |.endif -+ |->vmeta_arith_vn: -+ | lea RC, [KBASE+RC*8] -+ | jmp >1 -+ | -+ |->vmeta_arith_nvo: -+ |.if DUALNUM -+ | movzx RC, PC_RC -+ |.endif -+ |->vmeta_arith_nv: -+ | lea RC, [KBASE+RC*8] -+ | lea RB, [BASE+RB*8] -+ | xchg RB, RC -+ | jmp >2 -+ | -+ |->vmeta_unm: -+ | lea RC, [BASE+RD*8] -+ | mov RB, RC -+ | jmp >2 -+ | -+ |->vmeta_arith_vvo: -+ |.if DUALNUM -+ | movzx RB, PC_RB -+ |.endif -+ |->vmeta_arith_vv: -+ | lea RC, [BASE+RC*8] -+ |1: -+ | lea RB, [BASE+RB*8] -+ |2: -+ | lea RA, [BASE+RA*8] -+ |.if X64WIN -+ | mov CARG3d, RB -+ | mov CARG4d, RC -+ | movzx RC, PC_OP -+ | mov ARG5d, RC -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Caveat: CARG2d == BASE. -+ | mov CARG2d, RA -+ | mov CARG1d, L:RB // Caveat: CARG1d == RA. -+ |.elif X64 -+ | movzx CARG5d, PC_OP -+ | mov CARG2d, RA -+ | mov CARG4d, RC // Caveat: CARG4d == RA. -+ | mov L:CARG1d, SAVE_L -+ | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE. -+ | mov CARG3d, RB -+ | mov L:RB, L:CARG1d -+ |.else -+ | mov ARG3, RB -+ | mov L:RB, SAVE_L -+ | mov ARG4, RC -+ | movzx RC, PC_OP -+ | mov ARG2, RA -+ | mov ARG5, RC -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) -+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC). -+ | mov BASE, L:RB->base -+ | test RC, RC -+ | jz ->cont_nop -+ | -+ | // Call metamethod for binary op. -+ |->vmeta_binop: -+ | // BASE = base, RC = new base, stack = cont/func/o1/o2 -+ | mov RA, RC -+ | sub RC, BASE -+ | mov [RA-12], PC // [cont|PC] -+ | lea PC, [RC+FRAME_CONT] -+ | mov NARGS:RD, 2+1 // 2 args for func(o1, o2). -+ | jmp ->vm_call_dispatch -+ | -+ |->vmeta_len: -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE -+ | mov L:FCARG1, L:RB -+ | mov SAVE_PC, PC -+ | call extern lj_meta_len@8 // (lua_State *L, TValue *o) -+ | // NULL (retry) or TValue * (metamethod) returned in eax (RC). -+ | mov BASE, L:RB->base -+#if LJ_52 -+ | test RC, RC -+ | jne ->vmeta_binop // Binop call for compatibility. -+ | movzx RD, PC_RD -+ | mov TAB:FCARG1, [BASE+RD*8] -+ | jmp ->BC_LEN_Z -+#else -+ | jmp ->vmeta_binop // Binop call for compatibility. -+#endif -+ | -+ |//-- Call metamethod ---------------------------------------------------- -+ | -+ |->vmeta_call_ra: -+ | lea RA, [BASE+RA*8+8] -+ |->vmeta_call: // Resolve and call __call metamethod. -+ | // BASE = old base, RA = new base, RC = nargs+1, PC = return -+ | mov TMP2, RA // Save RA, RC for us. -+ | mov TMP1, NARGS:RD -+ | sub RA, 8 -+ |.if X64 -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. -+ | mov CARG2d, RA -+ | lea CARG3d, [RA+NARGS:RD*8] -+ | mov CARG1d, L:RB // Caveat: CARG1d may be RA. -+ |.else -+ | lea RC, [RA+NARGS:RD*8] -+ | mov L:RB, SAVE_L -+ | mov ARG2, RA -+ | mov ARG3, RC -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE // This is the callers base! -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) -+ | mov BASE, L:RB->base -+ | mov RA, TMP2 -+ | mov NARGS:RD, TMP1 -+ | mov LFUNC:RB, [RA-8] -+ | add NARGS:RD, 1 -+ | // This is fragile. L->base must not move, KBASE must always be defined. -+ | cmp KBASE, BASE // Continue with CALLT if flag set. -+ | je ->BC_CALLT_Z -+ | mov BASE, RA -+ | ins_call // Otherwise call resolved metamethod. -+ | -+ |//-- Argument coercion for 'for' statement ------------------------------ -+ | -+ |->vmeta_for: -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | mov FCARG2, RA // Caveat: FCARG2 == BASE -+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA -+ | mov SAVE_PC, PC -+ | call extern lj_meta_for@8 // (lua_State *L, TValue *base) -+ | mov BASE, L:RB->base -+ | mov RC, [PC-4] -+ | movzx RA, RCH -+ | movzx OP, RCL -+ | shr RC, 16 -+ |.if X64 -+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. -+ |.else -+ | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI. -+ |.endif -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Fast functions ----------------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |.macro .ffunc, name -+ |->ff_ .. name: -+ |.endmacro -+ | -+ |.macro .ffunc_1, name -+ |->ff_ .. name: -+ | cmp NARGS:RD, 1+1; jb ->fff_fallback -+ |.endmacro -+ | -+ |.macro .ffunc_2, name -+ |->ff_ .. name: -+ | cmp NARGS:RD, 2+1; jb ->fff_fallback -+ |.endmacro -+ | -+ |.macro .ffunc_nsse, name, op -+ | .ffunc_1 name -+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback -+ | op xmm0, qword [BASE] -+ |.endmacro -+ | -+ |.macro .ffunc_nsse, name -+ | .ffunc_nsse name, movsd -+ |.endmacro -+ | -+ |.macro .ffunc_nnsse, name -+ | .ffunc_2 name -+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback -+ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback -+ | movsd xmm0, qword [BASE] -+ | movsd xmm1, qword [BASE+8] -+ |.endmacro -+ | -+ |.macro .ffunc_nnr, name -+ | .ffunc_2 name -+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback -+ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback -+ | fld qword [BASE+8] -+ | fld qword [BASE] -+ |.endmacro -+ | -+ |// Inlined GC threshold check. Caveat: uses label 1. -+ |.macro ffgccheck -+ | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] -+ | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] -+ | jb >1 -+ | call ->fff_gcstep -+ |1: -+ |.endmacro -+ | -+ |//-- Base library: checks ----------------------------------------------- -+ | -+ |.ffunc_1 assert -+ | mov RB, [BASE+4] -+ | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback -+ | mov PC, [BASE-4] -+ | mov MULTRES, RD -+ | mov [BASE-4], RB -+ | mov RB, [BASE] -+ | mov [BASE-8], RB -+ | sub RD, 2 -+ | jz >2 -+ | mov RA, BASE -+ |1: -+ | add RA, 8 -+ |.if X64 -+ | mov RBa, [RA] -+ | mov [RA-8], RBa -+ |.else -+ | mov RB, [RA+4] -+ | mov [RA-4], RB -+ | mov RB, [RA] -+ | mov [RA-8], RB -+ |.endif -+ | sub RD, 1 -+ | jnz <1 -+ |2: -+ | mov RD, MULTRES -+ | jmp ->fff_res_ -+ | -+ |.ffunc_1 type -+ | mov RB, [BASE+4] -+ |.if X64 -+ | mov RA, RB -+ | sar RA, 15 -+ | cmp RA, -2 -+ | je >3 -+ |.endif -+ | mov RC, ~LJ_TNUMX -+ | not RB -+ | cmp RC, RB -+ | cmova RC, RB -+ |2: -+ | mov CFUNC:RB, [BASE-8] -+ | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] -+ | mov PC, [BASE-4] -+ | mov dword [BASE-4], LJ_TSTR -+ | mov [BASE-8], STR:RC -+ | jmp ->fff_res1 -+ |.if X64 -+ |3: -+ | mov RC, ~LJ_TLIGHTUD -+ | jmp <2 -+ |.endif -+ | -+ |//-- Base library: getters and setters --------------------------------- -+ | -+ |.ffunc_1 getmetatable -+ | mov RB, [BASE+4] -+ | mov PC, [BASE-4] -+ | cmp RB, LJ_TTAB; jne >6 -+ |1: // Field metatable must be at same offset for GCtab and GCudata! -+ | mov TAB:RB, [BASE] -+ | mov TAB:RB, TAB:RB->metatable -+ |2: -+ | test TAB:RB, TAB:RB -+ | mov dword [BASE-4], LJ_TNIL -+ | jz ->fff_res1 -+ | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)] -+ | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. -+ | mov [BASE-8], TAB:RB -+ | mov RA, TAB:RB->hmask -+ | and RA, STR:RC->hash -+ | imul RA, #NODE -+ | add NODE:RA, TAB:RB->node -+ |3: // Rearranged logic, because we expect _not_ to find the key. -+ | cmp dword NODE:RA->key.it, LJ_TSTR -+ | jne >4 -+ | cmp dword NODE:RA->key.gcr, STR:RC -+ | je >5 -+ |4: -+ | mov NODE:RA, NODE:RA->next -+ | test NODE:RA, NODE:RA -+ | jnz <3 -+ | jmp ->fff_res1 // Not found, keep default result. -+ |5: -+ | mov RB, [RA+4] -+ | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. -+ | mov RC, [RA] -+ | mov [BASE-4], RB // Return value of mt.__metatable. -+ | mov [BASE-8], RC -+ | jmp ->fff_res1 -+ | -+ |6: -+ | cmp RB, LJ_TUDATA; je <1 -+ |.if X64 -+ | cmp RB, LJ_TNUMX; ja >8 -+ | cmp RB, LJ_TISNUM; jbe >7 -+ | mov RB, LJ_TLIGHTUD -+ | jmp >8 -+ |7: -+ |.else -+ | cmp RB, LJ_TISNUM; ja >8 -+ |.endif -+ | mov RB, LJ_TNUMX -+ |8: -+ | not RB -+ | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] -+ | jmp <2 -+ | -+ |.ffunc_2 setmetatable -+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -+ | // Fast path: no mt for table yet and not clearing the mt. -+ | mov TAB:RB, [BASE] -+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback -+ | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback -+ | mov TAB:RC, [BASE+8] -+ | mov TAB:RB->metatable, TAB:RC -+ | mov PC, [BASE-4] -+ | mov dword [BASE-4], LJ_TTAB // Return original table. -+ | mov [BASE-8], TAB:RB -+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) -+ | jz >1 -+ | // Possible write barrier. Table is black, but skip iswhite(mt) check. -+ | barrierback TAB:RB, RC -+ |1: -+ | jmp ->fff_res1 -+ | -+ |.ffunc_2 rawget -+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -+ |.if X64WIN -+ | mov RB, BASE // Save BASE. -+ | lea CARG3d, [BASE+8] -+ | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. -+ | mov CARG1d, SAVE_L -+ |.elif X64 -+ | mov RB, BASE // Save BASE. -+ | mov CARG2d, [BASE] -+ | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. -+ | mov CARG1d, SAVE_L -+ |.else -+ | mov TAB:RD, [BASE] -+ | mov L:RB, SAVE_L -+ | mov ARG2, TAB:RD -+ | mov ARG1, L:RB -+ | mov RB, BASE // Save BASE. -+ | add BASE, 8 -+ | mov ARG3, BASE -+ |.endif -+ | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) -+ | // cTValue * returned in eax (RD). -+ | mov BASE, RB // Restore BASE. -+ | // Copy table slot. -+ |.if X64 -+ | mov RBa, [RD] -+ | mov PC, [BASE-4] -+ | mov [BASE-8], RBa -+ |.else -+ | mov RB, [RD] -+ | mov RD, [RD+4] -+ | mov PC, [BASE-4] -+ | mov [BASE-8], RB -+ | mov [BASE-4], RD -+ |.endif -+ | jmp ->fff_res1 -+ | -+ |//-- Base library: conversions ------------------------------------------ -+ | -+ |.ffunc tonumber -+ | // Only handles the number case inline (without a base argument). -+ | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. -+ | cmp dword [BASE+4], LJ_TISNUM -+ |.if DUALNUM -+ | jne >1 -+ | mov RB, dword [BASE]; jmp ->fff_resi -+ |1: -+ | ja ->fff_fallback -+ |.else -+ | jae ->fff_fallback -+ |.endif -+ | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 -+ | -+ |.ffunc_1 tostring -+ | // Only handles the string or number case inline. -+ | mov PC, [BASE-4] -+ | cmp dword [BASE+4], LJ_TSTR; jne >3 -+ | // A __tostring method in the string base metatable is ignored. -+ | mov STR:RD, [BASE] -+ |2: -+ | mov dword [BASE-4], LJ_TSTR -+ | mov [BASE-8], STR:RD -+ | jmp ->fff_res1 -+ |3: // Handle numbers inline, unless a number base metatable is present. -+ | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback -+ | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 -+ | jne ->fff_fallback -+ | ffgccheck // Caveat: uses label 1. -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Add frame since C call can throw. -+ | mov SAVE_PC, PC // Redundant (but a defined value). -+ |.if X64 and not X64WIN -+ | mov FCARG2, BASE // Otherwise: FCARG2 == BASE -+ |.endif -+ | mov L:FCARG1, L:RB -+ |.if DUALNUM -+ | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) -+ |.else -+ | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) -+ |.endif -+ | // GCstr returned in eax (RD). -+ | mov BASE, L:RB->base -+ | jmp <2 -+ | -+ |//-- Base library: iterators ------------------------------------------- -+ | -+ |.ffunc_1 next -+ | je >2 // Missing 2nd arg? -+ |1: -+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Add frame since C call can throw. -+ | mov L:RB->top, BASE // Dummy frame length is ok. -+ | mov PC, [BASE-4] -+ |.if X64WIN -+ | lea CARG3d, [BASE+8] -+ | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. -+ | mov CARG1d, L:RB -+ |.elif X64 -+ | mov CARG2d, [BASE] -+ | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. -+ | mov CARG1d, L:RB -+ |.else -+ | mov TAB:RD, [BASE] -+ | mov ARG2, TAB:RD -+ | mov ARG1, L:RB -+ | add BASE, 8 -+ | mov ARG3, BASE -+ |.endif -+ | mov SAVE_PC, PC // Needed for ITERN fallback. -+ | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) -+ | // Flag returned in eax (RD). -+ | mov BASE, L:RB->base -+ | test RD, RD; jz >3 // End of traversal? -+ | // Copy key and value to results. -+ |.if X64 -+ | mov RBa, [BASE+8] -+ | mov RDa, [BASE+16] -+ | mov [BASE-8], RBa -+ | mov [BASE], RDa -+ |.else -+ | mov RB, [BASE+8] -+ | mov RD, [BASE+12] -+ | mov [BASE-8], RB -+ | mov [BASE-4], RD -+ | mov RB, [BASE+16] -+ | mov RD, [BASE+20] -+ | mov [BASE], RB -+ | mov [BASE+4], RD -+ |.endif -+ |->fff_res2: -+ | mov RD, 1+2 -+ | jmp ->fff_res -+ |2: // Set missing 2nd arg to nil. -+ | mov dword [BASE+12], LJ_TNIL -+ | jmp <1 -+ |3: // End of traversal: return nil. -+ | mov dword [BASE-4], LJ_TNIL -+ | jmp ->fff_res1 -+ | -+ |.ffunc_1 pairs -+ | mov TAB:RB, [BASE] -+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -+#if LJ_52 -+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback -+#endif -+ | mov CFUNC:RB, [BASE-8] -+ | mov CFUNC:RD, CFUNC:RB->upvalue[0] -+ | mov PC, [BASE-4] -+ | mov dword [BASE-4], LJ_TFUNC -+ | mov [BASE-8], CFUNC:RD -+ | mov dword [BASE+12], LJ_TNIL -+ | mov RD, 1+3 -+ | jmp ->fff_res -+ | -+ |.ffunc_2 ipairs_aux -+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -+ | cmp dword [BASE+12], LJ_TISNUM -+ |.if DUALNUM -+ | jne ->fff_fallback -+ |.else -+ | jae ->fff_fallback -+ |.endif -+ | mov PC, [BASE-4] -+ |.if DUALNUM -+ | mov RD, dword [BASE+8] -+ | add RD, 1 -+ | mov dword [BASE-4], LJ_TISNUM -+ | mov dword [BASE-8], RD -+ |.else -+ | movsd xmm0, qword [BASE+8] -+ | sseconst_1 xmm1, RBa -+ | addsd xmm0, xmm1 -+ | cvttsd2si RD, xmm0 -+ | movsd qword [BASE-8], xmm0 -+ |.endif -+ | mov TAB:RB, [BASE] -+ | cmp RD, TAB:RB->asize; jae >2 // Not in array part? -+ | shl RD, 3 -+ | add RD, TAB:RB->array -+ |1: -+ | cmp dword [RD+4], LJ_TNIL; je ->fff_res0 -+ | // Copy array slot. -+ |.if X64 -+ | mov RBa, [RD] -+ | mov [BASE], RBa -+ |.else -+ | mov RB, [RD] -+ | mov RD, [RD+4] -+ | mov [BASE], RB -+ | mov [BASE+4], RD -+ |.endif -+ | jmp ->fff_res2 -+ |2: // Check for empty hash part first. Otherwise call C function. -+ | cmp dword TAB:RB->hmask, 0; je ->fff_res0 -+ | mov FCARG1, TAB:RB -+ | mov RB, BASE // Save BASE. -+ | mov FCARG2, RD // Caveat: FCARG2 == BASE -+ | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) -+ | // cTValue * or NULL returned in eax (RD). -+ | mov BASE, RB -+ | test RD, RD -+ | jnz <1 -+ |->fff_res0: -+ | mov RD, 1+0 -+ | jmp ->fff_res -+ | -+ |.ffunc_1 ipairs -+ | mov TAB:RB, [BASE] -+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -+#if LJ_52 -+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback -+#endif -+ | mov CFUNC:RB, [BASE-8] -+ | mov CFUNC:RD, CFUNC:RB->upvalue[0] -+ | mov PC, [BASE-4] -+ | mov dword [BASE-4], LJ_TFUNC -+ | mov [BASE-8], CFUNC:RD -+ |.if DUALNUM -+ | mov dword [BASE+12], LJ_TISNUM -+ | mov dword [BASE+8], 0 -+ |.else -+ | xorps xmm0, xmm0 -+ | movsd qword [BASE+8], xmm0 -+ |.endif -+ | mov RD, 1+3 -+ | jmp ->fff_res -+ | -+ |//-- Base library: catch errors ---------------------------------------- -+ | -+ |.ffunc_1 pcall -+ | lea RA, [BASE+8] -+ | sub NARGS:RD, 1 -+ | mov PC, 8+FRAME_PCALL -+ |1: -+ | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)] -+ | shr RB, HOOK_ACTIVE_SHIFT -+ | and RB, 1 -+ | add PC, RB // Remember active hook before pcall. -+ | jmp ->vm_call_dispatch -+ | -+ |.ffunc_2 xpcall -+ | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback -+ | mov RB, [BASE+4] // Swap function and traceback. -+ | mov [BASE+12], RB -+ | mov dword [BASE+4], LJ_TFUNC -+ | mov LFUNC:RB, [BASE] -+ | mov PC, [BASE+8] -+ | mov [BASE+8], LFUNC:RB -+ | mov [BASE], PC -+ | lea RA, [BASE+16] -+ | sub NARGS:RD, 2 -+ | mov PC, 16+FRAME_PCALL -+ | jmp <1 -+ | -+ |//-- Coroutine library -------------------------------------------------- -+ | -+ |.macro coroutine_resume_wrap, resume -+ |.if resume -+ |.ffunc_1 coroutine_resume -+ | mov L:RB, [BASE] -+ |.else -+ |.ffunc coroutine_wrap_aux -+ | mov CFUNC:RB, [BASE-8] -+ | mov L:RB, CFUNC:RB->upvalue[0].gcr -+ |.endif -+ | mov PC, [BASE-4] -+ | mov SAVE_PC, PC -+ |.if X64 -+ | mov TMP1, L:RB -+ |.else -+ | mov ARG1, L:RB -+ |.endif -+ |.if resume -+ | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback -+ |.endif -+ | cmp aword L:RB->cframe, 0; jne ->fff_fallback -+ | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback -+ | mov RA, L:RB->top -+ | je >1 // Status != LUA_YIELD (i.e. 0)? -+ | cmp RA, L:RB->base // Check for presence of initial func. -+ | je ->fff_fallback -+ |1: -+ |.if resume -+ | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). -+ |.else -+ | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). -+ |.endif -+ | cmp PC, L:RB->maxstack; ja ->fff_fallback -+ | mov L:RB->top, PC -+ | -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ |.if resume -+ | add BASE, 8 // Keep resumed thread in stack for GC. -+ |.endif -+ | mov L:RB->top, BASE -+ |.if resume -+ | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. -+ |.else -+ | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. -+ |.endif -+ | sub RBa, PCa // Relative to PC. -+ | -+ | cmp PC, RA -+ | je >3 -+ |2: // Move args to coroutine. -+ |.if X64 -+ | mov RCa, [PC+RB] -+ | mov [PC-8], RCa -+ |.else -+ | mov RC, [PC+RB+4] -+ | mov [PC-4], RC -+ | mov RC, [PC+RB] -+ | mov [PC-8], RC -+ |.endif -+ | sub PC, 8 -+ | cmp PC, RA -+ | jne <2 -+ |3: -+ |.if X64 -+ | mov CARG2d, RA -+ | mov CARG1d, TMP1 -+ |.else -+ | mov ARG2, RA -+ | xor RA, RA -+ | mov ARG4, RA -+ | mov ARG3, RA -+ |.endif -+ | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) -+ | -+ | mov L:RB, SAVE_L -+ |.if X64 -+ | mov L:PC, TMP1 -+ |.else -+ | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. -+ |.endif -+ | mov BASE, L:RB->base -+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB -+ | set_vmstate INTERP -+ | -+ | cmp eax, LUA_YIELD -+ | ja >8 -+ |4: -+ | mov RA, L:PC->base -+ | mov KBASE, L:PC->top -+ | mov L:PC->top, RA // Clear coroutine stack. -+ | mov PC, KBASE -+ | sub PC, RA -+ | je >6 // No results? -+ | lea RD, [BASE+PC] -+ | shr PC, 3 -+ | cmp RD, L:RB->maxstack -+ | ja >9 // Need to grow stack? -+ | -+ | mov RB, BASE -+ | sub RBa, RAa -+ |5: // Move results from coroutine. -+ |.if X64 -+ | mov RDa, [RA] -+ | mov [RA+RB], RDa -+ |.else -+ | mov RD, [RA] -+ | mov [RA+RB], RD -+ | mov RD, [RA+4] -+ | mov [RA+RB+4], RD -+ |.endif -+ | add RA, 8 -+ | cmp RA, KBASE -+ | jne <5 -+ |6: -+ |.if resume -+ | lea RD, [PC+2] // nresults+1 = 1 + true + results. -+ | mov dword [BASE-4], LJ_TTRUE // Prepend true to results. -+ |.else -+ | lea RD, [PC+1] // nresults+1 = 1 + results. -+ |.endif -+ |7: -+ | mov PC, SAVE_PC -+ | mov MULTRES, RD -+ |.if resume -+ | mov RAa, -8 -+ |.else -+ | xor RA, RA -+ |.endif -+ | test PC, FRAME_TYPE -+ | jz ->BC_RET_Z -+ | jmp ->vm_return -+ | -+ |8: // Coroutine returned with error (at co->top-1). -+ |.if resume -+ | mov dword [BASE-4], LJ_TFALSE // Prepend false to results. -+ | mov RA, L:PC->top -+ | sub RA, 8 -+ | mov L:PC->top, RA // Clear error from coroutine stack. -+ | // Copy error message. -+ |.if X64 -+ | mov RDa, [RA] -+ | mov [BASE], RDa -+ |.else -+ | mov RD, [RA] -+ | mov [BASE], RD -+ | mov RD, [RA+4] -+ | mov [BASE+4], RD -+ |.endif -+ | mov RD, 1+2 // nresults+1 = 1 + false + error. -+ | jmp <7 -+ |.else -+ | mov FCARG2, L:PC -+ | mov FCARG1, L:RB -+ | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co) -+ | // Error function does not return. -+ |.endif -+ | -+ |9: // Handle stack expansion on return from yield. -+ |.if X64 -+ | mov L:RA, TMP1 -+ |.else -+ | mov L:RA, ARG1 // The callee doesn't modify SAVE_L. -+ |.endif -+ | mov L:RA->top, KBASE // Undo coroutine stack clearing. -+ | mov FCARG2, PC -+ | mov FCARG1, L:RB -+ | call extern lj_state_growstack@8 // (lua_State *L, int n) -+ |.if X64 -+ | mov L:PC, TMP1 -+ |.else -+ | mov L:PC, ARG1 -+ |.endif -+ | mov BASE, L:RB->base -+ | jmp <4 // Retry the stack move. -+ |.endmacro -+ | -+ | coroutine_resume_wrap 1 // coroutine.resume -+ | coroutine_resume_wrap 0 // coroutine.wrap -+ | -+ |.ffunc coroutine_yield -+ | mov L:RB, SAVE_L -+ | test aword L:RB->cframe, CFRAME_RESUME -+ | jz ->fff_fallback -+ | mov L:RB->base, BASE -+ | lea RD, [BASE+NARGS:RD*8-8] -+ | mov L:RB->top, RD -+ | xor RD, RD -+ | mov aword L:RB->cframe, RDa -+ | mov al, LUA_YIELD -+ | mov byte L:RB->status, al -+ | jmp ->vm_leave_unw -+ | -+ |//-- Math library ------------------------------------------------------- -+ | -+ |.if not DUALNUM -+ |->fff_resi: // Dummy. -+ |.endif -+ | -+ |->fff_resn: -+ | mov PC, [BASE-4] -+ | fstp qword [BASE-8] -+ | jmp ->fff_res1 -+ | -+ | .ffunc_1 math_abs -+ |.if DUALNUM -+ | cmp dword [BASE+4], LJ_TISNUM; jne >2 -+ | mov RB, dword [BASE] -+ | cmp RB, 0; jns ->fff_resi -+ | neg RB; js >1 -+ |->fff_resbit: -+ |->fff_resi: -+ | mov PC, [BASE-4] -+ | mov dword [BASE-4], LJ_TISNUM -+ | mov dword [BASE-8], RB -+ | jmp ->fff_res1 -+ |1: -+ | mov PC, [BASE-4] -+ | mov dword [BASE-4], 0x41e00000 // 2^31. -+ | mov dword [BASE-8], 0 -+ | jmp ->fff_res1 -+ |2: -+ | ja ->fff_fallback -+ |.else -+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback -+ |.endif -+ | movsd xmm0, qword [BASE] -+ | sseconst_abs xmm1, RDa -+ | andps xmm0, xmm1 -+ |->fff_resxmm0: -+ | mov PC, [BASE-4] -+ | movsd qword [BASE-8], xmm0 -+ | // fallthrough -+ | -+ |->fff_res1: -+ | mov RD, 1+1 -+ |->fff_res: -+ | mov MULTRES, RD -+ |->fff_res_: -+ | test PC, FRAME_TYPE -+ | jnz >7 -+ |5: -+ | cmp PC_RB, RDL // More results expected? -+ | ja >6 -+ | // Adjust BASE. KBASE is assumed to be set for the calling frame. -+ | movzx RA, PC_RA -+ | not RAa // Note: ~RA = -(RA+1) -+ | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 -+ | ins_next -+ | -+ |6: // Fill up results with nil. -+ | mov dword [BASE+RD*8-12], LJ_TNIL -+ | add RD, 1 -+ | jmp <5 -+ | -+ |7: // Non-standard return case. -+ | mov RAa, -8 // Results start at BASE+RA = BASE-8. -+ | jmp ->vm_return -+ | -+ |.if X64 -+ |.define fff_resfp, fff_resxmm0 -+ |.else -+ |.define fff_resfp, fff_resn -+ |.endif -+ | -+ |.macro math_round, func -+ | .ffunc math_ .. func -+ |.if DUALNUM -+ | cmp dword [BASE+4], LJ_TISNUM; jne >1 -+ | mov RB, dword [BASE]; jmp ->fff_resi -+ |1: -+ | ja ->fff_fallback -+ |.else -+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback -+ |.endif -+ | movsd xmm0, qword [BASE] -+ | call ->vm_ .. func .. _sse -+ |.if DUALNUM -+ | cvttsd2si RB, xmm0 -+ | cmp RB, 0x80000000 -+ | jne ->fff_resi -+ | cvtsi2sd xmm1, RB -+ | ucomisd xmm0, xmm1 -+ | jp ->fff_resxmm0 -+ | je ->fff_resi -+ |.endif -+ | jmp ->fff_resxmm0 -+ |.endmacro -+ | -+ | math_round floor -+ | math_round ceil -+ | -+ |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 -+ | -+ |.ffunc math_log -+ | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. -+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback -+ | movsd xmm0, qword [BASE] -+ |.if not X64 -+ | movsd FPARG1, xmm0 -+ |.endif -+ | mov RB, BASE -+ | call extern log -+ | mov BASE, RB -+ | jmp ->fff_resfp -+ | -+ |.macro math_extern, func -+ | .ffunc_nsse math_ .. func -+ |.if not X64 -+ | movsd FPARG1, xmm0 -+ |.endif -+ | mov RB, BASE -+ | call extern func -+ | mov BASE, RB -+ | jmp ->fff_resfp -+ |.endmacro -+ | -+ |.macro math_extern2, func -+ | .ffunc_nnsse math_ .. func -+ |.if not X64 -+ | movsd FPARG1, xmm0 -+ | movsd FPARG3, xmm1 -+ |.endif -+ | mov RB, BASE -+ | call extern func -+ | mov BASE, RB -+ | jmp ->fff_resfp -+ |.endmacro -+ | -+ | math_extern log10 -+ | math_extern exp -+ | math_extern sin -+ | math_extern cos -+ | math_extern tan -+ | math_extern asin -+ | math_extern acos -+ | math_extern atan -+ | math_extern sinh -+ | math_extern cosh -+ | math_extern tanh -+ | math_extern2 pow -+ | math_extern2 atan2 -+ | math_extern2 fmod -+ | -+ |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn -+ | -+ |.ffunc_1 math_frexp -+ | mov RB, [BASE+4] -+ | cmp RB, LJ_TISNUM; jae ->fff_fallback -+ | mov PC, [BASE-4] -+ | mov RC, [BASE] -+ | mov [BASE-4], RB; mov [BASE-8], RC -+ | shl RB, 1; cmp RB, 0xffe00000; jae >3 -+ | or RC, RB; jz >3 -+ | mov RC, 1022 -+ | cmp RB, 0x00200000; jb >4 -+ |1: -+ | shr RB, 21; sub RB, RC // Extract and unbias exponent. -+ | cvtsi2sd xmm0, RB -+ | mov RB, [BASE-4] -+ | and RB, 0x800fffff // Mask off exponent. -+ | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. -+ | mov [BASE-4], RB -+ |2: -+ | movsd qword [BASE], xmm0 -+ | mov RD, 1+2 -+ | jmp ->fff_res -+ |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. -+ | xorps xmm0, xmm0; jmp <2 -+ |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. -+ | movsd xmm0, qword [BASE] -+ | sseconst_hi xmm1, RBa, 43500000 // 2^54. -+ | mulsd xmm0, xmm1 -+ | movsd qword [BASE-8], xmm0 -+ | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 -+ | -+ |.ffunc_nsse math_modf -+ | mov RB, [BASE+4] -+ | mov PC, [BASE-4] -+ | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? -+ | movaps xmm4, xmm0 -+ | call ->vm_trunc_sse -+ | subsd xmm4, xmm0 -+ |1: -+ | movsd qword [BASE-8], xmm0 -+ | movsd qword [BASE], xmm4 -+ | mov RC, [BASE-4]; mov RB, [BASE+4] -+ | xor RC, RB; js >3 // Need to adjust sign? -+ |2: -+ | mov RD, 1+2 -+ | jmp ->fff_res -+ |3: -+ | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. -+ | jmp <2 -+ |4: -+ | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. -+ | -+ |.macro math_minmax, name, cmovop, sseop -+ | .ffunc name -+ | mov RA, 2 -+ | cmp dword [BASE+4], LJ_TISNUM -+ |.if DUALNUM -+ | jne >4 -+ | mov RB, dword [BASE] -+ |1: // Handle integers. -+ | cmp RA, RD; jae ->fff_resi -+ | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3 -+ | cmp RB, dword [BASE+RA*8-8] -+ | cmovop RB, dword [BASE+RA*8-8] -+ | add RA, 1 -+ | jmp <1 -+ |3: -+ | ja ->fff_fallback -+ | // Convert intermediate result to number and continue below. -+ | cvtsi2sd xmm0, RB -+ | jmp >6 -+ |4: -+ | ja ->fff_fallback -+ |.else -+ | jae ->fff_fallback -+ |.endif -+ | -+ | movsd xmm0, qword [BASE] -+ |5: // Handle numbers or integers. -+ | cmp RA, RD; jae ->fff_resxmm0 -+ | cmp dword [BASE+RA*8-4], LJ_TISNUM -+ |.if DUALNUM -+ | jb >6 -+ | ja ->fff_fallback -+ | cvtsi2sd xmm1, dword [BASE+RA*8-8] -+ | jmp >7 -+ |.else -+ | jae ->fff_fallback -+ |.endif -+ |6: -+ | movsd xmm1, qword [BASE+RA*8-8] -+ |7: -+ | sseop xmm0, xmm1 -+ | add RA, 1 -+ | jmp <5 -+ |.endmacro -+ | -+ | math_minmax math_min, cmovg, minsd -+ | math_minmax math_max, cmovl, maxsd -+ | -+ |//-- String library ----------------------------------------------------- -+ | -+ |.ffunc string_byte // Only handle the 1-arg case here. -+ | cmp NARGS:RD, 1+1; jne ->fff_fallback -+ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback -+ | mov STR:RB, [BASE] -+ | mov PC, [BASE-4] -+ | cmp dword STR:RB->len, 1 -+ | jb ->fff_res0 // Return no results for empty string. -+ | movzx RB, byte STR:RB[1] -+ |.if DUALNUM -+ | jmp ->fff_resi -+ |.else -+ | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 -+ |.endif -+ | -+ |.ffunc string_char // Only handle the 1-arg case here. -+ | ffgccheck -+ | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. -+ | cmp dword [BASE+4], LJ_TISNUM -+ |.if DUALNUM -+ | jne ->fff_fallback -+ | mov RB, dword [BASE] -+ | cmp RB, 255; ja ->fff_fallback -+ | mov TMP2, RB -+ |.else -+ | jae ->fff_fallback -+ | cvttsd2si RB, qword [BASE] -+ | cmp RB, 255; ja ->fff_fallback -+ | mov TMP2, RB -+ |.endif -+ |.if X64 -+ | mov TMP3, 1 -+ |.else -+ | mov ARG3, 1 -+ |.endif -+ | lea RDa, TMP2 // Points to stack. Little-endian. -+ |->fff_newstr: -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ |.if X64 -+ | mov CARG3d, TMP3 // Zero-extended to size_t. -+ | mov CARG2, RDa // May be 64 bit ptr to stack. -+ | mov CARG1d, L:RB -+ |.else -+ | mov ARG2, RD -+ | mov ARG1, L:RB -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_str_new // (lua_State *L, char *str, size_t l) -+ |->fff_resstr: -+ | // GCstr * returned in eax (RD). -+ | mov BASE, L:RB->base -+ | mov PC, [BASE-4] -+ | mov dword [BASE-4], LJ_TSTR -+ | mov [BASE-8], STR:RD -+ | jmp ->fff_res1 -+ | -+ |.ffunc string_sub -+ | ffgccheck -+ | mov TMP2, -1 -+ | cmp NARGS:RD, 1+2; jb ->fff_fallback -+ | jna >1 -+ | cmp dword [BASE+20], LJ_TISNUM -+ |.if DUALNUM -+ | jne ->fff_fallback -+ | mov RB, dword [BASE+16] -+ | mov TMP2, RB -+ |.else -+ | jae ->fff_fallback -+ | cvttsd2si RB, qword [BASE+16] -+ | mov TMP2, RB -+ |.endif -+ |1: -+ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback -+ | cmp dword [BASE+12], LJ_TISNUM -+ |.if DUALNUM -+ | jne ->fff_fallback -+ |.else -+ | jae ->fff_fallback -+ |.endif -+ | mov STR:RB, [BASE] -+ | mov TMP3, STR:RB -+ | mov RB, STR:RB->len -+ |.if DUALNUM -+ | mov RA, dword [BASE+8] -+ |.else -+ | cvttsd2si RA, qword [BASE+8] -+ |.endif -+ | mov RC, TMP2 -+ | cmp RB, RC // len < end? (unsigned compare) -+ | jb >5 -+ |2: -+ | test RA, RA // start <= 0? -+ | jle >7 -+ |3: -+ | mov STR:RB, TMP3 -+ | sub RC, RA // start > end? -+ | jl ->fff_emptystr -+ | lea RB, [STR:RB+RA+#STR-1] -+ | add RC, 1 -+ |4: -+ |.if X64 -+ | mov TMP3, RC -+ |.else -+ | mov ARG3, RC -+ |.endif -+ | mov RD, RB -+ | jmp ->fff_newstr -+ | -+ |5: // Negative end or overflow. -+ | jl >6 -+ | lea RC, [RC+RB+1] // end = end+(len+1) -+ | jmp <2 -+ |6: // Overflow. -+ | mov RC, RB // end = len -+ | jmp <2 -+ | -+ |7: // Negative start or underflow. -+ | je >8 -+ | add RA, RB // start = start+(len+1) -+ | add RA, 1 -+ | jg <3 // start > 0? -+ |8: // Underflow. -+ | mov RA, 1 // start = 1 -+ | jmp <3 -+ | -+ |->fff_emptystr: // Range underflow. -+ | xor RC, RC // Zero length. Any ptr in RB is ok. -+ | jmp <4 -+ | -+ |.macro ffstring_op, name -+ | .ffunc_1 string_ .. name -+ | ffgccheck -+ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback -+ | mov L:RB, SAVE_L -+ | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] -+ | mov L:RB->base, BASE -+ | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE -+ | mov RC, SBUF:FCARG1->b -+ | mov SBUF:FCARG1->L, L:RB -+ | mov SBUF:FCARG1->p, RC -+ | mov SAVE_PC, PC -+ | call extern lj_buf_putstr_ .. name .. @8 -+ | mov FCARG1, eax -+ | call extern lj_buf_tostr@4 -+ | jmp ->fff_resstr -+ |.endmacro -+ | -+ |ffstring_op reverse -+ |ffstring_op lower -+ |ffstring_op upper -+ | -+ |//-- Bit library -------------------------------------------------------- -+ | -+ |.macro .ffunc_bit, name, kind, fdef -+ | fdef name -+ |.if kind == 2 -+ | sseconst_tobit xmm1, RBa -+ |.endif -+ | cmp dword [BASE+4], LJ_TISNUM -+ |.if DUALNUM -+ | jne >1 -+ | mov RB, dword [BASE] -+ |.if kind > 0 -+ | jmp >2 -+ |.else -+ | jmp ->fff_resbit -+ |.endif -+ |1: -+ | ja ->fff_fallback -+ |.else -+ | jae ->fff_fallback -+ |.endif -+ | movsd xmm0, qword [BASE] -+ |.if kind < 2 -+ | sseconst_tobit xmm1, RBa -+ |.endif -+ | addsd xmm0, xmm1 -+ | movd RB, xmm0 -+ |2: -+ |.endmacro -+ | -+ |.macro .ffunc_bit, name, kind -+ | .ffunc_bit name, kind, .ffunc_1 -+ |.endmacro -+ | -+ |.ffunc_bit bit_tobit, 0 -+ | jmp ->fff_resbit -+ | -+ |.macro .ffunc_bit_op, name, ins -+ | .ffunc_bit name, 2 -+ | mov TMP2, NARGS:RD // Save for fallback. -+ | lea RD, [BASE+NARGS:RD*8-16] -+ |1: -+ | cmp RD, BASE -+ | jbe ->fff_resbit -+ | cmp dword [RD+4], LJ_TISNUM -+ |.if DUALNUM -+ | jne >2 -+ | ins RB, dword [RD] -+ | sub RD, 8 -+ | jmp <1 -+ |2: -+ | ja ->fff_fallback_bit_op -+ |.else -+ | jae ->fff_fallback_bit_op -+ |.endif -+ | movsd xmm0, qword [RD] -+ | addsd xmm0, xmm1 -+ | movd RA, xmm0 -+ | ins RB, RA -+ | sub RD, 8 -+ | jmp <1 -+ |.endmacro -+ | -+ |.ffunc_bit_op bit_band, and -+ |.ffunc_bit_op bit_bor, or -+ |.ffunc_bit_op bit_bxor, xor -+ | -+ |.ffunc_bit bit_bswap, 1 -+ | bswap RB -+ | jmp ->fff_resbit -+ | -+ |.ffunc_bit bit_bnot, 1 -+ | not RB -+ |.if DUALNUM -+ | jmp ->fff_resbit -+ |.else -+ |->fff_resbit: -+ | cvtsi2sd xmm0, RB -+ | jmp ->fff_resxmm0 -+ |.endif -+ | -+ |->fff_fallback_bit_op: -+ | mov NARGS:RD, TMP2 // Restore for fallback -+ | jmp ->fff_fallback -+ | -+ |.macro .ffunc_bit_sh, name, ins -+ |.if DUALNUM -+ | .ffunc_bit name, 1, .ffunc_2 -+ | // Note: no inline conversion from number for 2nd argument! -+ | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback -+ | mov RA, dword [BASE+8] -+ |.else -+ | .ffunc_nnsse name -+ | sseconst_tobit xmm2, RBa -+ | addsd xmm0, xmm2 -+ | addsd xmm1, xmm2 -+ | movd RB, xmm0 -+ | movd RA, xmm1 -+ |.endif -+ | ins RB, cl // Assumes RA is ecx. -+ | jmp ->fff_resbit -+ |.endmacro -+ | -+ |.ffunc_bit_sh bit_lshift, shl -+ |.ffunc_bit_sh bit_rshift, shr -+ |.ffunc_bit_sh bit_arshift, sar -+ |.ffunc_bit_sh bit_rol, rol -+ |.ffunc_bit_sh bit_ror, ror -+ | -+ |//----------------------------------------------------------------------- -+ | -+ |->fff_fallback_2: -+ | mov NARGS:RD, 1+2 // Other args are ignored, anyway. -+ | jmp ->fff_fallback -+ |->fff_fallback_1: -+ | mov NARGS:RD, 1+1 // Other args are ignored, anyway. -+ |->fff_fallback: // Call fast function fallback handler. -+ | // BASE = new base, RD = nargs+1 -+ | mov L:RB, SAVE_L -+ | mov PC, [BASE-4] // Fallback may overwrite PC. -+ | mov SAVE_PC, PC // Redundant (but a defined value). -+ | mov L:RB->base, BASE -+ | lea RD, [BASE+NARGS:RD*8-8] -+ | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. -+ | mov L:RB->top, RD -+ | mov CFUNC:RD, [BASE-8] -+ | cmp RA, L:RB->maxstack -+ | ja >5 // Need to grow stack. -+ |.if X64 -+ | mov CARG1d, L:RB -+ |.else -+ | mov ARG1, L:RB -+ |.endif -+ | call aword CFUNC:RD->f // (lua_State *L) -+ | mov BASE, L:RB->base -+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1. -+ | test RD, RD; jg ->fff_res // Returned nresults+1? -+ |1: -+ | mov RA, L:RB->top -+ | sub RA, BASE -+ | shr RA, 3 -+ | test RD, RD -+ | lea NARGS:RD, [RA+1] -+ | mov LFUNC:RB, [BASE-8] -+ | jne ->vm_call_tail // Returned -1? -+ | ins_callt // Returned 0: retry fast path. -+ | -+ |// Reconstruct previous base for vmeta_call during tailcall. -+ |->vm_call_tail: -+ | mov RA, BASE -+ | test PC, FRAME_TYPE -+ | jnz >3 -+ | movzx RB, PC_RA -+ | not RBa // Note: ~RB = -(RB+1) -+ | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8 -+ | jmp ->vm_call_dispatch // Resolve again for tailcall. -+ |3: -+ | mov RB, PC -+ | and RB, -8 -+ | sub BASE, RB -+ | jmp ->vm_call_dispatch // Resolve again for tailcall. -+ | -+ |5: // Grow stack for fallback handler. -+ | mov FCARG2, LUA_MINSTACK -+ | mov FCARG1, L:RB -+ | call extern lj_state_growstack@8 // (lua_State *L, int n) -+ | mov BASE, L:RB->base -+ | xor RD, RD // Simulate a return 0. -+ | jmp <1 // Dumb retry (goes through ff first). -+ | -+ |->fff_gcstep: // Call GC step function. -+ | // BASE = new base, RD = nargs+1 -+ | pop RBa // Must keep stack at same level. -+ | mov TMPa, RBa // Save return address -+ | mov L:RB, SAVE_L -+ | mov SAVE_PC, PC // Redundant (but a defined value). -+ | mov L:RB->base, BASE -+ | lea RD, [BASE+NARGS:RD*8-8] -+ | mov FCARG1, L:RB -+ | mov L:RB->top, RD -+ | call extern lj_gc_step@4 // (lua_State *L) -+ | mov BASE, L:RB->base -+ | mov RD, L:RB->top -+ | sub RD, BASE -+ | shr RD, 3 -+ | add NARGS:RD, 1 -+ | mov RBa, TMPa -+ | push RBa // Restore return address. -+ | ret -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Special dispatch targets ------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->vm_record: // Dispatch target for recording phase. -+ |.if JIT -+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] -+ | test RDL, HOOK_VMEVENT // No recording while in vmevent. -+ | jnz >5 -+ | // Decrement the hookcount for consistency, but always do the call. -+ | test RDL, HOOK_ACTIVE -+ | jnz >1 -+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT -+ | jz >1 -+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)] -+ | jmp >1 -+ |.endif -+ | -+ |->vm_rethook: // Dispatch target for return hooks. -+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] -+ | test RDL, HOOK_ACTIVE // Hook already active? -+ | jnz >5 -+ | jmp >1 -+ | -+ |->vm_inshook: // Dispatch target for instr/line hooks. -+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] -+ | test RDL, HOOK_ACTIVE // Hook already active? -+ | jnz >5 -+ | -+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT -+ | jz >5 -+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)] -+ | jz >1 -+ | test RDL, LUA_MASKLINE -+ | jz >5 -+ |1: -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | mov FCARG2, PC // Caveat: FCARG2 == BASE -+ | mov FCARG1, L:RB -+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. -+ | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) -+ |3: -+ | mov BASE, L:RB->base -+ |4: -+ | movzx RA, PC_RA -+ |5: -+ | movzx OP, PC_OP -+ | movzx RD, PC_RD -+ |.if X64 -+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. -+ |.else -+ | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins. -+ |.endif -+ | -+ |->cont_hook: // Continue from hook yield. -+ | add PC, 4 -+ | mov RA, [RB-24] -+ | mov MULTRES, RA // Restore MULTRES for *M ins. -+ | jmp <4 -+ | -+ |->vm_hotloop: // Hot loop counter underflow. -+ |.if JIT -+ | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). -+ | mov RB, LFUNC:RB->pc -+ | movzx RD, byte [RB+PC2PROTO(framesize)] -+ | lea RD, [BASE+RD*8] -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | mov L:RB->top, RD -+ | mov FCARG2, PC -+ | lea FCARG1, [DISPATCH+GG_DISP2J] -+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa -+ | mov SAVE_PC, PC -+ | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) -+ | jmp <3 -+ |.endif -+ | -+ |->vm_callhook: // Dispatch target for call hooks. -+ | mov SAVE_PC, PC -+ |.if JIT -+ | jmp >1 -+ |.endif -+ | -+ |->vm_hotcall: // Hot call counter underflow. -+ |.if JIT -+ | mov SAVE_PC, PC -+ | or PC, 1 // Marker for hot call. -+ |1: -+ |.endif -+ | lea RD, [BASE+NARGS:RD*8-8] -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | mov L:RB->top, RD -+ | mov FCARG2, PC -+ | mov FCARG1, L:RB -+ | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) -+ | // ASMFunction returned in eax/rax (RDa). -+ | mov SAVE_PC, 0 // Invalidate for subsequent line hook. -+ |.if JIT -+ | and PC, -2 -+ |.endif -+ | mov BASE, L:RB->base -+ | mov RAa, RDa -+ | mov RD, L:RB->top -+ | sub RD, BASE -+ | mov RBa, RAa -+ | movzx RA, PC_RA -+ | shr RD, 3 -+ | add NARGS:RD, 1 -+ | jmp RBa -+ | -+ |->cont_stitch: // Trace stitching. -+ |.if JIT -+ | // BASE = base, RC = result, RB = mbase -+ | mov TRACE:RA, [RB-24] // Save previous trace. -+ | mov TMP1, TRACE:RA -+ | mov TMP3, DISPATCH // Need one more register. -+ | mov DISPATCH, MULTRES -+ | movzx RA, PC_RA -+ | lea RA, [BASE+RA*8] // Call base. -+ | sub DISPATCH, 1 -+ | jz >2 -+ |1: // Move results down. -+ |.if X64 -+ | mov RBa, [RC] -+ | mov [RA], RBa -+ |.else -+ | mov RB, [RC] -+ | mov [RA], RB -+ | mov RB, [RC+4] -+ | mov [RA+4], RB -+ |.endif -+ | add RC, 8 -+ | add RA, 8 -+ | sub DISPATCH, 1 -+ | jnz <1 -+ |2: -+ | movzx RC, PC_RA -+ | movzx RB, PC_RB -+ | add RC, RB -+ | lea RC, [BASE+RC*8-8] -+ |3: -+ | cmp RC, RA -+ | ja >9 // More results wanted? -+ | -+ | mov DISPATCH, TMP3 -+ | mov TRACE:RD, TMP1 // Get previous trace. -+ | movzx RB, word TRACE:RD->traceno -+ | movzx RD, word TRACE:RD->link -+ | cmp RD, RB -+ | je ->cont_nop // Blacklisted. -+ | test RD, RD -+ | jne =>BC_JLOOP // Jump to stitched trace. -+ | -+ | // Stitch a new trace to the previous trace. -+ | mov [DISPATCH+DISPATCH_J(exitno)], RB -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | mov FCARG2, PC -+ | lea FCARG1, [DISPATCH+GG_DISP2J] -+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa -+ | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) -+ | mov BASE, L:RB->base -+ | jmp ->cont_nop -+ | -+ |9: // Fill up results with nil. -+ | mov dword [RA+4], LJ_TNIL -+ | add RA, 8 -+ | jmp <3 -+ |.endif -+ | -+ |->vm_profhook: // Dispatch target for profiler hook. -+#if LJ_HASPROFILE -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | mov FCARG2, PC // Caveat: FCARG2 == BASE -+ | mov FCARG1, L:RB -+ | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) -+ | mov BASE, L:RB->base -+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. -+ | sub PC, 4 -+ | jmp ->cont_nop -+#endif -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Trace exit handler ------------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |// Called from an exit stub with the exit number on the stack. -+ |// The 16 bit exit number is stored with two (sign-extended) push imm8. -+ |->vm_exit_handler: -+ |.if JIT -+ |.if X64 -+ | push r13; push r12 -+ | push r11; push r10; push r9; push r8 -+ | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp -+ | push rbx; push rdx; push rcx; push rax -+ | movzx RC, byte [rbp-8] // Reconstruct exit number. -+ | mov RCH, byte [rbp-16] -+ | mov [rbp-8], r15; mov [rbp-16], r14 -+ |.else -+ | push ebp; lea ebp, [esp+12]; push ebp -+ | push ebx; push edx; push ecx; push eax -+ | movzx RC, byte [ebp-4] // Reconstruct exit number. -+ | mov RCH, byte [ebp-8] -+ | mov [ebp-4], edi; mov [ebp-8], esi -+ |.endif -+ | // Caveat: DISPATCH is ebx. -+ | mov DISPATCH, [ebp] -+ | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. -+ | set_vmstate EXIT -+ | mov [DISPATCH+DISPATCH_J(exitno)], RC -+ | mov [DISPATCH+DISPATCH_J(parent)], RA -+ |.if X64 -+ |.if X64WIN -+ | sub rsp, 16*8+4*8 // Room for SSE regs + save area. -+ |.else -+ | sub rsp, 16*8 // Room for SSE regs. -+ |.endif -+ | add rbp, -128 -+ | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 -+ | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 -+ | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 -+ | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 -+ | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 -+ | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 -+ | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 -+ | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 -+ |.else -+ | sub esp, 8*8+16 // Room for SSE regs + args. -+ | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6 -+ | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4 -+ | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2 -+ | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 -+ |.endif -+ | // Caveat: RB is ebp. -+ | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] -+ | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] -+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa -+ | mov L:RB->base, BASE -+ |.if X64WIN -+ | lea CARG2, [rsp+4*8] -+ |.elif X64 -+ | mov CARG2, rsp -+ |.else -+ | lea FCARG2, [esp+16] -+ |.endif -+ | lea FCARG1, [DISPATCH+GG_DISP2J] -+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 -+ | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) -+ | // MULTRES or negated error code returned in eax (RD). -+ | mov RAa, L:RB->cframe -+ | and RAa, CFRAME_RAWMASK -+ |.if X64WIN -+ | // Reposition stack later. -+ |.elif X64 -+ | mov rsp, RAa // Reposition stack to C frame. -+ |.else -+ | mov esp, RAa // Reposition stack to C frame. -+ |.endif -+ | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). -+ | mov BASE, L:RB->base -+ | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC. -+ |.if X64 -+ | jmp >1 -+ |.endif -+ |.endif -+ |->vm_exit_interp: -+ | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. -+ |.if JIT -+ |.if X64 -+ | // Restore additional callee-save registers only used in compiled code. -+ |.if X64WIN -+ | lea RAa, [rsp+9*16+4*8] -+ |1: -+ | movdqa xmm15, [RAa-9*16] -+ | movdqa xmm14, [RAa-8*16] -+ | movdqa xmm13, [RAa-7*16] -+ | movdqa xmm12, [RAa-6*16] -+ | movdqa xmm11, [RAa-5*16] -+ | movdqa xmm10, [RAa-4*16] -+ | movdqa xmm9, [RAa-3*16] -+ | movdqa xmm8, [RAa-2*16] -+ | movdqa xmm7, [RAa-1*16] -+ | mov rsp, RAa // Reposition stack to C frame. -+ | movdqa xmm6, [RAa] -+ | mov r15, CSAVE_3 -+ | mov r14, CSAVE_4 -+ |.else -+ | add rsp, 16 // Reposition stack to C frame. -+ |1: -+ |.endif -+ | mov r13, TMPa -+ | mov r12, TMPQ -+ |.endif -+ | test RD, RD; js >9 // Check for error from exit. -+ | mov L:RB, SAVE_L -+ | mov MULTRES, RD -+ | mov LFUNC:KBASE, [BASE-8] -+ | mov KBASE, LFUNC:KBASE->pc -+ | mov KBASE, [KBASE+PC2PROTO(k)] -+ | mov L:RB->base, BASE -+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 -+ | set_vmstate INTERP -+ | // Modified copy of ins_next which handles function header dispatch, too. -+ | mov RC, [PC] -+ | movzx RA, RCH -+ | movzx OP, RCL -+ | add PC, 4 -+ | shr RC, 16 -+ | cmp OP, BC_FUNCF // Function header? -+ | jb >3 -+ | cmp OP, BC_FUNCC+2 // Fast function? -+ | jae >4 -+ |2: -+ | mov RC, MULTRES // RC/RD holds nres+1. -+ |3: -+ |.if X64 -+ | jmp aword [DISPATCH+OP*8] -+ |.else -+ | jmp aword [DISPATCH+OP*4] -+ |.endif -+ | -+ |4: // Check frame below fast function. -+ | mov RC, [BASE-4] -+ | test RC, FRAME_TYPE -+ | jnz <2 // Trace stitching continuation? -+ | // Otherwise set KBASE for Lua function below fast function. -+ | movzx RC, byte [RC-3] -+ | not RCa -+ | mov LFUNC:KBASE, [BASE+RC*8-8] -+ | mov KBASE, LFUNC:KBASE->pc -+ | mov KBASE, [KBASE+PC2PROTO(k)] -+ | jmp <2 -+ | -+ |9: // Rethrow error from the right C frame. -+ | neg RD -+ | mov FCARG1, L:RB -+ | mov FCARG2, RD -+ | call extern lj_err_throw@8 // (lua_State *L, int errcode) -+ |.endif -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Math helper functions ---------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |// FP value rounding. Called by math.floor/math.ceil fast functions -+ |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. -+ |.macro vm_round, name, mode, cond -+ |->name: -+ |.if not X64 and cond -+ | movsd xmm0, qword [esp+4] -+ | call ->name .. _sse -+ | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. -+ | fld qword [esp+4] -+ | ret -+ |.endif -+ | -+ |->name .. _sse: -+ | sseconst_abs xmm2, RDa -+ | sseconst_2p52 xmm3, RDa -+ | movaps xmm1, xmm0 -+ | andpd xmm1, xmm2 // |x| -+ | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. -+ | jbe >1 -+ | andnpd xmm2, xmm0 // Isolate sign bit. -+ |.if mode == 2 // trunc(x)? -+ | movaps xmm0, xmm1 -+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 -+ | subsd xmm1, xmm3 -+ | sseconst_1 xmm3, RDa -+ | cmpsd xmm0, xmm1, 1 // |x| < result? -+ | andpd xmm0, xmm3 -+ | subsd xmm1, xmm0 // If yes, subtract -1. -+ | orpd xmm1, xmm2 // Merge sign bit back in. -+ |.else -+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 -+ | subsd xmm1, xmm3 -+ | orpd xmm1, xmm2 // Merge sign bit back in. -+ | .if mode == 1 // ceil(x)? -+ | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0. -+ | cmpsd xmm0, xmm1, 6 // x > result? -+ | .else // floor(x)? -+ | sseconst_1 xmm2, RDa -+ | cmpsd xmm0, xmm1, 1 // x < result? -+ | .endif -+ | andpd xmm0, xmm2 -+ | subsd xmm1, xmm0 // If yes, subtract +-1. -+ |.endif -+ | movaps xmm0, xmm1 -+ |1: -+ | ret -+ |.endmacro -+ | -+ | vm_round vm_floor, 0, 1 -+ | vm_round vm_ceil, 1, JIT -+ | vm_round vm_trunc, 2, JIT -+ | -+ |// FP modulo x%y. Called by BC_MOD* and vm_arith. -+ |->vm_mod: -+ |// Args in xmm0/xmm1, return value in xmm0. -+ |// Caveat: xmm0-xmm5 and RC (eax) modified! -+ | movaps xmm5, xmm0 -+ | divsd xmm0, xmm1 -+ | sseconst_abs xmm2, RDa -+ | sseconst_2p52 xmm3, RDa -+ | movaps xmm4, xmm0 -+ | andpd xmm4, xmm2 // |x/y| -+ | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. -+ | jbe >1 -+ | andnpd xmm2, xmm0 // Isolate sign bit. -+ | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 -+ | subsd xmm4, xmm3 -+ | orpd xmm4, xmm2 // Merge sign bit back in. -+ | sseconst_1 xmm2, RDa -+ | cmpsd xmm0, xmm4, 1 // x/y < result? -+ | andpd xmm0, xmm2 -+ | subsd xmm4, xmm0 // If yes, subtract 1.0. -+ | movaps xmm0, xmm5 -+ | mulsd xmm1, xmm4 -+ | subsd xmm0, xmm1 -+ | ret -+ |1: -+ | mulsd xmm1, xmm0 -+ | movaps xmm0, xmm5 -+ | subsd xmm0, xmm1 -+ | ret -+ | -+ |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. -+ |->vm_powi_sse: -+ | cmp eax, 1; jle >6 // i<=1? -+ | // Now 1 < (unsigned)i <= 0x80000000. -+ |1: // Handle leading zeros. -+ | test eax, 1; jnz >2 -+ | mulsd xmm0, xmm0 -+ | shr eax, 1 -+ | jmp <1 -+ |2: -+ | shr eax, 1; jz >5 -+ | movaps xmm1, xmm0 -+ |3: // Handle trailing bits. -+ | mulsd xmm0, xmm0 -+ | shr eax, 1; jz >4 -+ | jnc <3 -+ | mulsd xmm1, xmm0 -+ | jmp <3 -+ |4: -+ | mulsd xmm0, xmm1 -+ |5: -+ | ret -+ |6: -+ | je <5 // x^1 ==> x -+ | jb >7 // x^0 ==> 1 -+ | neg eax -+ | call <1 -+ | sseconst_1 xmm1, RDa -+ | divsd xmm1, xmm0 -+ | movaps xmm0, xmm1 -+ | ret -+ |7: -+ | sseconst_1 xmm0, RDa -+ | ret -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Miscellaneous functions -------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) -+ |->vm_cpuid: -+ |.if X64 -+ | mov eax, CARG1d -+ | .if X64WIN; push rsi; mov rsi, CARG2; .endif -+ | push rbx -+ | xor ecx, ecx -+ | cpuid -+ | mov [rsi], eax -+ | mov [rsi+4], ebx -+ | mov [rsi+8], ecx -+ | mov [rsi+12], edx -+ | pop rbx -+ | .if X64WIN; pop rsi; .endif -+ | ret -+ |.else -+ | pushfd -+ | pop edx -+ | mov ecx, edx -+ | xor edx, 0x00200000 // Toggle ID bit in flags. -+ | push edx -+ | popfd -+ | pushfd -+ | pop edx -+ | xor eax, eax // Zero means no features supported. -+ | cmp ecx, edx -+ | jz >1 // No ID toggle means no CPUID support. -+ | mov eax, [esp+4] // Argument 1 is function number. -+ | push edi -+ | push ebx -+ | xor ecx, ecx -+ | cpuid -+ | mov edi, [esp+16] // Argument 2 is result area. -+ | mov [edi], eax -+ | mov [edi+4], ebx -+ | mov [edi+8], ecx -+ | mov [edi+12], edx -+ | pop ebx -+ | pop edi -+ |1: -+ | ret -+ |.endif -+ | -+ |//----------------------------------------------------------------------- -+ |//-- Assertions --------------------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |->assert_bad_for_arg_type: -+#ifdef LUA_USE_ASSERT -+ | int3 -+#endif -+ | int3 -+ | -+ |//----------------------------------------------------------------------- -+ |//-- FFI helper functions ----------------------------------------------- -+ |//----------------------------------------------------------------------- -+ | -+ |// Handler for callback functions. Callback slot number in ah/al. -+ |->vm_ffi_callback: -+ |.if FFI -+ |.type CTSTATE, CTState, PC -+ |.if not X64 -+ | sub esp, 16 // Leave room for SAVE_ERRF etc. -+ |.endif -+ | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. -+ | lea DISPATCH, [ebp+GG_G2DISP] -+ | mov CTSTATE, GL:ebp->ctype_state -+ | movzx eax, ax -+ | mov CTSTATE->cb.slot, eax -+ |.if X64 -+ | mov CTSTATE->cb.gpr[0], CARG1 -+ | mov CTSTATE->cb.gpr[1], CARG2 -+ | mov CTSTATE->cb.gpr[2], CARG3 -+ | mov CTSTATE->cb.gpr[3], CARG4 -+ | movsd qword CTSTATE->cb.fpr[0], xmm0 -+ | movsd qword CTSTATE->cb.fpr[1], xmm1 -+ | movsd qword CTSTATE->cb.fpr[2], xmm2 -+ | movsd qword CTSTATE->cb.fpr[3], xmm3 -+ |.if X64WIN -+ | lea rax, [rsp+CFRAME_SIZE+4*8] -+ |.else -+ | lea rax, [rsp+CFRAME_SIZE] -+ | mov CTSTATE->cb.gpr[4], CARG5 -+ | mov CTSTATE->cb.gpr[5], CARG6 -+ | movsd qword CTSTATE->cb.fpr[4], xmm4 -+ | movsd qword CTSTATE->cb.fpr[5], xmm5 -+ | movsd qword CTSTATE->cb.fpr[6], xmm6 -+ | movsd qword CTSTATE->cb.fpr[7], xmm7 -+ |.endif -+ | mov CTSTATE->cb.stack, rax -+ | mov CARG2, rsp -+ |.else -+ | lea eax, [esp+CFRAME_SIZE+16] -+ | mov CTSTATE->cb.gpr[0], FCARG1 -+ | mov CTSTATE->cb.gpr[1], FCARG2 -+ | mov CTSTATE->cb.stack, eax -+ | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp. -+ | mov FCARG2, [esp+CFRAME_SIZE+8] -+ | mov SAVE_RET, FCARG1 -+ | mov SAVE_R4, FCARG2 -+ | mov FCARG2, esp -+ |.endif -+ | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. -+ | mov FCARG1, CTSTATE -+ | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf) -+ | // lua_State * returned in eax (RD). -+ | set_vmstate INTERP -+ | mov BASE, L:RD->base -+ | mov RD, L:RD->top -+ | sub RD, BASE -+ | mov LFUNC:RB, [BASE-8] -+ | shr RD, 3 -+ | add RD, 1 -+ | ins_callt -+ |.endif -+ | -+ |->cont_ffi_callback: // Return from FFI callback. -+ |.if FFI -+ | mov L:RA, SAVE_L -+ | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] -+ | mov aword CTSTATE->L, L:RAa -+ | mov L:RA->base, BASE -+ | mov L:RA->top, RB -+ | mov FCARG1, CTSTATE -+ | mov FCARG2, RC -+ | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o) -+ |.if X64 -+ | mov rax, CTSTATE->cb.gpr[0] -+ | movsd xmm0, qword CTSTATE->cb.fpr[0] -+ | jmp ->vm_leave_unw -+ |.else -+ | mov L:RB, SAVE_L -+ | mov eax, CTSTATE->cb.gpr[0] -+ | mov edx, CTSTATE->cb.gpr[1] -+ | cmp dword CTSTATE->cb.gpr[2], 1 -+ | jb >7 -+ | je >6 -+ | fld qword CTSTATE->cb.fpr[0].d -+ | jmp >7 -+ |6: -+ | fld dword CTSTATE->cb.fpr[0].f -+ |7: -+ | mov ecx, L:RB->top -+ | movzx ecx, word [ecx+6] // Get stack adjustment and copy up. -+ | mov SAVE_L, ecx // Must be one slot above SAVE_RET -+ | restoreregs -+ | pop ecx // Move return addr from SAVE_RET. -+ | add esp, [esp] // Adjust stack. -+ | add esp, 16 -+ | push ecx -+ | ret -+ |.endif -+ |.endif -+ | -+ |->vm_ffi_call@4: // Call C function via FFI. -+ | // Caveat: needs special frame unwinding, see below. -+ |.if FFI -+ |.if X64 -+ | .type CCSTATE, CCallState, rbx -+ | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 -+ |.else -+ | .type CCSTATE, CCallState, ebx -+ | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1 -+ |.endif -+ | -+ | // Readjust stack. -+ |.if X64 -+ | mov eax, CCSTATE->spadj -+ | sub rsp, rax -+ |.else -+ | sub esp, CCSTATE->spadj -+ |.if WIN -+ | mov CCSTATE->spadj, esp -+ |.endif -+ |.endif -+ | -+ | // Copy stack slots. -+ | movzx ecx, byte CCSTATE->nsp -+ | sub ecx, 1 -+ | js >2 -+ |1: -+ |.if X64 -+ | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] -+ | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax -+ |.else -+ | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] -+ | mov [esp+ecx*4], eax -+ |.endif -+ | sub ecx, 1 -+ | jns <1 -+ |2: -+ | -+ |.if X64 -+ | movzx eax, byte CCSTATE->nfpr -+ | mov CARG1, CCSTATE->gpr[0] -+ | mov CARG2, CCSTATE->gpr[1] -+ | mov CARG3, CCSTATE->gpr[2] -+ | mov CARG4, CCSTATE->gpr[3] -+ |.if not X64WIN -+ | mov CARG5, CCSTATE->gpr[4] -+ | mov CARG6, CCSTATE->gpr[5] -+ |.endif -+ | test eax, eax; jz >5 -+ | movaps xmm0, CCSTATE->fpr[0] -+ | movaps xmm1, CCSTATE->fpr[1] -+ | movaps xmm2, CCSTATE->fpr[2] -+ | movaps xmm3, CCSTATE->fpr[3] -+ |.if not X64WIN -+ | cmp eax, 4; jbe >5 -+ | movaps xmm4, CCSTATE->fpr[4] -+ | movaps xmm5, CCSTATE->fpr[5] -+ | movaps xmm6, CCSTATE->fpr[6] -+ | movaps xmm7, CCSTATE->fpr[7] -+ |.endif -+ |5: -+ |.else -+ | mov FCARG1, CCSTATE->gpr[0] -+ | mov FCARG2, CCSTATE->gpr[1] -+ |.endif -+ | -+ | call aword CCSTATE->func -+ | -+ |.if X64 -+ | mov CCSTATE->gpr[0], rax -+ | movaps CCSTATE->fpr[0], xmm0 -+ |.if not X64WIN -+ | mov CCSTATE->gpr[1], rdx -+ | movaps CCSTATE->fpr[1], xmm1 -+ |.endif -+ |.else -+ | mov CCSTATE->gpr[0], eax -+ | mov CCSTATE->gpr[1], edx -+ | cmp byte CCSTATE->resx87, 1 -+ | jb >7 -+ | je >6 -+ | fstp qword CCSTATE->fpr[0].d[0] -+ | jmp >7 -+ |6: -+ | fstp dword CCSTATE->fpr[0].f[0] -+ |7: -+ |.if WIN -+ | sub CCSTATE->spadj, esp -+ |.endif -+ |.endif -+ | -+ |.if X64 -+ | mov rbx, [rbp-8]; leave; ret -+ |.else -+ | mov ebx, [ebp-4]; leave; ret -+ |.endif -+ |.endif -+ |// Note: vm_ffi_call must be the last function in this object file! -+ | -+ |//----------------------------------------------------------------------- -+} -+ -+/* Generate the code for a single instruction. */ -+static void build_ins(BuildCtx *ctx, BCOp op, int defop) -+{ -+ int vk = 0; -+ |// Note: aligning all instructions does not pay off. -+ |=>defop: -+ -+ switch (op) { -+ -+ /* -- Comparison ops ---------------------------------------------------- */ -+ -+ /* Remember: all ops branch for a true comparison, fall through otherwise. */ -+ -+ |.macro jmp_comp, lt, ge, le, gt, target -+ ||switch (op) { -+ ||case BC_ISLT: -+ | lt target -+ ||break; -+ ||case BC_ISGE: -+ | ge target -+ ||break; -+ ||case BC_ISLE: -+ | le target -+ ||break; -+ ||case BC_ISGT: -+ | gt target -+ ||break; -+ ||default: break; /* Shut up GCC. */ -+ ||} -+ |.endmacro -+ -+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: -+ | // RA = src1, RD = src2, JMP with RD = target -+ | ins_AD -+ |.if DUALNUM -+ | checkint RA, >7 -+ | checkint RD, >8 -+ | mov RB, dword [BASE+RA*8] -+ | add PC, 4 -+ | cmp RB, dword [BASE+RD*8] -+ | jmp_comp jge, jl, jg, jle, >9 -+ |6: -+ | movzx RD, PC_RD -+ | branchPC RD -+ |9: -+ | ins_next -+ | -+ |7: // RA is not an integer. -+ | ja ->vmeta_comp -+ | // RA is a number. -+ | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp -+ | // RA is a number, RD is an integer. -+ | cvtsi2sd xmm0, dword [BASE+RD*8] -+ | jmp >2 -+ | -+ |8: // RA is an integer, RD is not an integer. -+ | ja ->vmeta_comp -+ | // RA is an integer, RD is a number. -+ | cvtsi2sd xmm1, dword [BASE+RA*8] -+ | movsd xmm0, qword [BASE+RD*8] -+ | add PC, 4 -+ | ucomisd xmm0, xmm1 -+ | jmp_comp jbe, ja, jb, jae, <9 -+ | jmp <6 -+ |.else -+ | checknum RA, ->vmeta_comp -+ | checknum RD, ->vmeta_comp -+ |.endif -+ |1: -+ | movsd xmm0, qword [BASE+RD*8] -+ |2: -+ | add PC, 4 -+ | ucomisd xmm0, qword [BASE+RA*8] -+ |3: -+ | // Unordered: all of ZF CF PF set, ordered: PF clear. -+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. -+ |.if DUALNUM -+ | jmp_comp jbe, ja, jb, jae, <9 -+ | jmp <6 -+ |.else -+ | jmp_comp jbe, ja, jb, jae, >1 -+ | movzx RD, PC_RD -+ | branchPC RD -+ |1: -+ | ins_next -+ |.endif -+ break; -+ -+ case BC_ISEQV: case BC_ISNEV: -+ vk = op == BC_ISEQV; -+ | ins_AD // RA = src1, RD = src2, JMP with RD = target -+ | mov RB, [BASE+RD*8+4] -+ | add PC, 4 -+ |.if DUALNUM -+ | cmp RB, LJ_TISNUM; jne >7 -+ | checkint RA, >8 -+ | mov RB, dword [BASE+RD*8] -+ | cmp RB, dword [BASE+RA*8] -+ if (vk) { -+ | jne >9 -+ } else { -+ | je >9 -+ } -+ | movzx RD, PC_RD -+ | branchPC RD -+ |9: -+ | ins_next -+ | -+ |7: // RD is not an integer. -+ | ja >5 -+ | // RD is a number. -+ | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 -+ | // RD is a number, RA is an integer. -+ | cvtsi2sd xmm0, dword [BASE+RA*8] -+ | jmp >2 -+ | -+ |8: // RD is an integer, RA is not an integer. -+ | ja >5 -+ | // RD is an integer, RA is a number. -+ | cvtsi2sd xmm0, dword [BASE+RD*8] -+ | ucomisd xmm0, qword [BASE+RA*8] -+ | jmp >4 -+ | -+ |.else -+ | cmp RB, LJ_TISNUM; jae >5 -+ | checknum RA, >5 -+ |.endif -+ |1: -+ | movsd xmm0, qword [BASE+RA*8] -+ |2: -+ | ucomisd xmm0, qword [BASE+RD*8] -+ |4: -+ iseqne_fp: -+ if (vk) { -+ | jp >2 // Unordered means not equal. -+ | jne >2 -+ } else { -+ | jp >2 // Unordered means not equal. -+ | je >1 -+ } -+ iseqne_end: -+ if (vk) { -+ |1: // EQ: Branch to the target. -+ | movzx RD, PC_RD -+ | branchPC RD -+ |2: // NE: Fallthrough to next instruction. -+ |.if not FFI -+ |3: -+ |.endif -+ } else { -+ |.if not FFI -+ |3: -+ |.endif -+ |2: // NE: Branch to the target. -+ | movzx RD, PC_RD -+ | branchPC RD -+ |1: // EQ: Fallthrough to next instruction. -+ } -+ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || -+ op == BC_ISEQN || op == BC_ISNEN)) { -+ | jmp <9 -+ } else { -+ | ins_next -+ } -+ | -+ if (op == BC_ISEQV || op == BC_ISNEV) { -+ |5: // Either or both types are not numbers. -+ |.if FFI -+ | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd -+ | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd -+ |.endif -+ | checktp RA, RB // Compare types. -+ | jne <2 // Not the same type? -+ | cmp RB, LJ_TISPRI -+ | jae <1 // Same type and primitive type? -+ | -+ | // Same types and not a primitive type. Compare GCobj or pvalue. -+ | mov RA, [BASE+RA*8] -+ | mov RD, [BASE+RD*8] -+ | cmp RA, RD -+ | je <1 // Same GCobjs or pvalues? -+ | cmp RB, LJ_TISTABUD -+ | ja <2 // Different objects and not table/ud? -+ |.if X64 -+ | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata. -+ | jb <2 -+ |.endif -+ | -+ | // Different tables or userdatas. Need to check __eq metamethod. -+ | // Field metatable must be at same offset for GCtab and GCudata! -+ | mov TAB:RB, TAB:RA->metatable -+ | test TAB:RB, TAB:RB -+ | jz <2 // No metatable? -+ | test byte TAB:RB->nomm, 1<<MM_eq -+ | jnz <2 // Or 'no __eq' flag set? -+ if (vk) { -+ | xor RB, RB // ne = 0 -+ } else { -+ | mov RB, 1 // ne = 1 -+ } -+ | jmp ->vmeta_equal // Handle __eq metamethod. -+ } else { -+ |.if FFI -+ |3: -+ | cmp RB, LJ_TCDATA -+ if (LJ_DUALNUM && vk) { -+ | jne <9 -+ } else { -+ | jne <2 -+ } -+ | jmp ->vmeta_equal_cd -+ |.endif -+ } -+ break; -+ case BC_ISEQS: case BC_ISNES: -+ vk = op == BC_ISEQS; -+ | ins_AND // RA = src, RD = str const, JMP with RD = target -+ | mov RB, [BASE+RA*8+4] -+ | add PC, 4 -+ | cmp RB, LJ_TSTR; jne >3 -+ | mov RA, [BASE+RA*8] -+ | cmp RA, [KBASE+RD*4] -+ iseqne_test: -+ if (vk) { -+ | jne >2 -+ } else { -+ | je >1 -+ } -+ goto iseqne_end; -+ case BC_ISEQN: case BC_ISNEN: -+ vk = op == BC_ISEQN; -+ | ins_AD // RA = src, RD = num const, JMP with RD = target -+ | mov RB, [BASE+RA*8+4] -+ | add PC, 4 -+ |.if DUALNUM -+ | cmp RB, LJ_TISNUM; jne >7 -+ | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 -+ | mov RB, dword [KBASE+RD*8] -+ | cmp RB, dword [BASE+RA*8] -+ if (vk) { -+ | jne >9 -+ } else { -+ | je >9 -+ } -+ | movzx RD, PC_RD -+ | branchPC RD -+ |9: -+ | ins_next -+ | -+ |7: // RA is not an integer. -+ | ja >3 -+ | // RA is a number. -+ | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 -+ | // RA is a number, RD is an integer. -+ | cvtsi2sd xmm0, dword [KBASE+RD*8] -+ | jmp >2 -+ | -+ |8: // RA is an integer, RD is a number. -+ | cvtsi2sd xmm0, dword [BASE+RA*8] -+ | ucomisd xmm0, qword [KBASE+RD*8] -+ | jmp >4 -+ |.else -+ | cmp RB, LJ_TISNUM; jae >3 -+ |.endif -+ |1: -+ | movsd xmm0, qword [KBASE+RD*8] -+ |2: -+ | ucomisd xmm0, qword [BASE+RA*8] -+ |4: -+ goto iseqne_fp; -+ case BC_ISEQP: case BC_ISNEP: -+ vk = op == BC_ISEQP; -+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target -+ | mov RB, [BASE+RA*8+4] -+ | add PC, 4 -+ | cmp RB, RD -+ if (!LJ_HASFFI) goto iseqne_test; -+ if (vk) { -+ | jne >3 -+ | movzx RD, PC_RD -+ | branchPC RD -+ |2: -+ | ins_next -+ |3: -+ | cmp RB, LJ_TCDATA; jne <2 -+ | jmp ->vmeta_equal_cd -+ } else { -+ | je >2 -+ | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd -+ | movzx RD, PC_RD -+ | branchPC RD -+ |2: -+ | ins_next -+ } -+ break; -+ -+ /* -- Unary test and copy ops ------------------------------------------- */ -+ -+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: -+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target -+ | mov RB, [BASE+RD*8+4] -+ | add PC, 4 -+ | cmp RB, LJ_TISTRUECOND -+ if (op == BC_IST || op == BC_ISTC) { -+ | jae >1 -+ } else { -+ | jb >1 -+ } -+ if (op == BC_ISTC || op == BC_ISFC) { -+ | mov [BASE+RA*8+4], RB -+ | mov RB, [BASE+RD*8] -+ | mov [BASE+RA*8], RB -+ } -+ | movzx RD, PC_RD -+ | branchPC RD -+ |1: // Fallthrough to the next instruction. -+ | ins_next -+ break; -+ -+ case BC_ISTYPE: -+ | ins_AD // RA = src, RD = -type -+ | add RD, [BASE+RA*8+4] -+ | jne ->vmeta_istype -+ | ins_next -+ break; -+ case BC_ISNUM: -+ | ins_AD // RA = src, RD = -(TISNUM-1) -+ | checknum RA, ->vmeta_istype -+ | ins_next -+ break; -+ -+ /* -- Unary ops --------------------------------------------------------- */ -+ -+ case BC_MOV: -+ | ins_AD // RA = dst, RD = src -+ |.if X64 -+ | mov RBa, [BASE+RD*8] -+ | mov [BASE+RA*8], RBa -+ |.else -+ | mov RB, [BASE+RD*8+4] -+ | mov RD, [BASE+RD*8] -+ | mov [BASE+RA*8+4], RB -+ | mov [BASE+RA*8], RD -+ |.endif -+ | ins_next_ -+ break; -+ case BC_NOT: -+ | ins_AD // RA = dst, RD = src -+ | xor RB, RB -+ | checktp RD, LJ_TISTRUECOND -+ | adc RB, LJ_TTRUE -+ | mov [BASE+RA*8+4], RB -+ | ins_next -+ break; -+ case BC_UNM: -+ | ins_AD // RA = dst, RD = src -+ |.if DUALNUM -+ | checkint RD, >5 -+ | mov RB, [BASE+RD*8] -+ | neg RB -+ | jo >4 -+ | mov dword [BASE+RA*8+4], LJ_TISNUM -+ | mov dword [BASE+RA*8], RB -+ |9: -+ | ins_next -+ |4: -+ | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. -+ | mov dword [BASE+RA*8], 0 -+ | jmp <9 -+ |5: -+ | ja ->vmeta_unm -+ |.else -+ | checknum RD, ->vmeta_unm -+ |.endif -+ | movsd xmm0, qword [BASE+RD*8] -+ | sseconst_sign xmm1, RDa -+ | xorps xmm0, xmm1 -+ | movsd qword [BASE+RA*8], xmm0 -+ |.if DUALNUM -+ | jmp <9 -+ |.else -+ | ins_next -+ |.endif -+ break; -+ case BC_LEN: -+ | ins_AD // RA = dst, RD = src -+ | checkstr RD, >2 -+ | mov STR:RD, [BASE+RD*8] -+ |.if DUALNUM -+ | mov RD, dword STR:RD->len -+ |1: -+ | mov dword [BASE+RA*8+4], LJ_TISNUM -+ | mov dword [BASE+RA*8], RD -+ |.else -+ | xorps xmm0, xmm0 -+ | cvtsi2sd xmm0, dword STR:RD->len -+ |1: -+ | movsd qword [BASE+RA*8], xmm0 -+ |.endif -+ | ins_next -+ |2: -+ | checktab RD, ->vmeta_len -+ | mov TAB:FCARG1, [BASE+RD*8] -+#if LJ_52 -+ | mov TAB:RB, TAB:FCARG1->metatable -+ | cmp TAB:RB, 0 -+ | jnz >9 -+ |3: -+#endif -+ |->BC_LEN_Z: -+ | mov RB, BASE // Save BASE. -+ | call extern lj_tab_len@4 // (GCtab *t) -+ | // Length of table returned in eax (RD). -+ |.if DUALNUM -+ | // Nothing to do. -+ |.else -+ | cvtsi2sd xmm0, RD -+ |.endif -+ | mov BASE, RB // Restore BASE. -+ | movzx RA, PC_RA -+ | jmp <1 -+#if LJ_52 -+ |9: // Check for __len. -+ | test byte TAB:RB->nomm, 1<<MM_len -+ | jnz <3 -+ | jmp ->vmeta_len // 'no __len' flag NOT set: check. -+#endif -+ break; -+ -+ /* -- Binary ops -------------------------------------------------------- */ -+ -+ |.macro ins_arithpre, sseins, ssereg -+ | ins_ABC -+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); -+ ||switch (vk) { -+ ||case 0: -+ | checknum RB, ->vmeta_arith_vn -+ | .if DUALNUM -+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn -+ | .endif -+ | movsd xmm0, qword [BASE+RB*8] -+ | sseins ssereg, qword [KBASE+RC*8] -+ || break; -+ ||case 1: -+ | checknum RB, ->vmeta_arith_nv -+ | .if DUALNUM -+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv -+ | .endif -+ | movsd xmm0, qword [KBASE+RC*8] -+ | sseins ssereg, qword [BASE+RB*8] -+ || break; -+ ||default: -+ | checknum RB, ->vmeta_arith_vv -+ | checknum RC, ->vmeta_arith_vv -+ | movsd xmm0, qword [BASE+RB*8] -+ | sseins ssereg, qword [BASE+RC*8] -+ || break; -+ ||} -+ |.endmacro -+ | -+ |.macro ins_arithdn, intins -+ | ins_ABC -+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); -+ ||switch (vk) { -+ ||case 0: -+ | checkint RB, ->vmeta_arith_vn -+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn -+ | mov RB, [BASE+RB*8] -+ | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno -+ || break; -+ ||case 1: -+ | checkint RB, ->vmeta_arith_nv -+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv -+ | mov RC, [KBASE+RC*8] -+ | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo -+ || break; -+ ||default: -+ | checkint RB, ->vmeta_arith_vv -+ | checkint RC, ->vmeta_arith_vv -+ | mov RB, [BASE+RB*8] -+ | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo -+ || break; -+ ||} -+ | mov dword [BASE+RA*8+4], LJ_TISNUM -+ ||if (vk == 1) { -+ | mov dword [BASE+RA*8], RC -+ ||} else { -+ | mov dword [BASE+RA*8], RB -+ ||} -+ | ins_next -+ |.endmacro -+ | -+ |.macro ins_arithpost -+ | movsd qword [BASE+RA*8], xmm0 -+ |.endmacro -+ | -+ |.macro ins_arith, sseins -+ | ins_arithpre sseins, xmm0 -+ | ins_arithpost -+ | ins_next -+ |.endmacro -+ | -+ |.macro ins_arith, intins, sseins -+ |.if DUALNUM -+ | ins_arithdn intins -+ |.else -+ | ins_arith, sseins -+ |.endif -+ |.endmacro -+ -+ | // RA = dst, RB = src1 or num const, RC = src2 or num const -+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: -+ | ins_arith add, addsd -+ break; -+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: -+ | ins_arith sub, subsd -+ break; -+ case BC_MULVN: case BC_MULNV: case BC_MULVV: -+ | ins_arith imul, mulsd -+ break; -+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: -+ | ins_arith divsd -+ break; -+ case BC_MODVN: -+ | ins_arithpre movsd, xmm1 -+ |->BC_MODVN_Z: -+ | call ->vm_mod -+ | ins_arithpost -+ | ins_next -+ break; -+ case BC_MODNV: case BC_MODVV: -+ | ins_arithpre movsd, xmm1 -+ | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. -+ break; -+ case BC_POW: -+ | ins_arithpre movsd, xmm1 -+ | mov RB, BASE -+ |.if not X64 -+ | movsd FPARG1, xmm0 -+ | movsd FPARG3, xmm1 -+ |.endif -+ | call extern pow -+ | movzx RA, PC_RA -+ | mov BASE, RB -+ |.if X64 -+ | ins_arithpost -+ |.else -+ | fstp qword [BASE+RA*8] -+ |.endif -+ | ins_next -+ break; -+ -+ case BC_CAT: -+ | ins_ABC // RA = dst, RB = src_start, RC = src_end -+ |.if X64 -+ | mov L:CARG1d, SAVE_L -+ | mov L:CARG1d->base, BASE -+ | lea CARG2d, [BASE+RC*8] -+ | mov CARG3d, RC -+ | sub CARG3d, RB -+ |->BC_CAT_Z: -+ | mov L:RB, L:CARG1d -+ |.else -+ | lea RA, [BASE+RC*8] -+ | sub RC, RB -+ | mov ARG2, RA -+ | mov ARG3, RC -+ |->BC_CAT_Z: -+ | mov L:RB, SAVE_L -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) -+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC). -+ | mov BASE, L:RB->base -+ | test RC, RC -+ | jnz ->vmeta_binop -+ | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. -+ | movzx RA, PC_RA -+ |.if X64 -+ | mov RCa, [BASE+RB*8] -+ | mov [BASE+RA*8], RCa -+ |.else -+ | mov RC, [BASE+RB*8+4] -+ | mov RB, [BASE+RB*8] -+ | mov [BASE+RA*8+4], RC -+ | mov [BASE+RA*8], RB -+ |.endif -+ | ins_next -+ break; -+ -+ /* -- Constant ops ------------------------------------------------------ */ -+ -+ case BC_KSTR: -+ | ins_AND // RA = dst, RD = str const (~) -+ | mov RD, [KBASE+RD*4] -+ | mov dword [BASE+RA*8+4], LJ_TSTR -+ | mov [BASE+RA*8], RD -+ | ins_next -+ break; -+ case BC_KCDATA: -+ |.if FFI -+ | ins_AND // RA = dst, RD = cdata const (~) -+ | mov RD, [KBASE+RD*4] -+ | mov dword [BASE+RA*8+4], LJ_TCDATA -+ | mov [BASE+RA*8], RD -+ | ins_next -+ |.endif -+ break; -+ case BC_KSHORT: -+ | ins_AD // RA = dst, RD = signed int16 literal -+ |.if DUALNUM -+ | movsx RD, RDW -+ | mov dword [BASE+RA*8+4], LJ_TISNUM -+ | mov dword [BASE+RA*8], RD -+ |.else -+ | movsx RD, RDW // Sign-extend literal. -+ | cvtsi2sd xmm0, RD -+ | movsd qword [BASE+RA*8], xmm0 -+ |.endif -+ | ins_next -+ break; -+ case BC_KNUM: -+ | ins_AD // RA = dst, RD = num const -+ | movsd xmm0, qword [KBASE+RD*8] -+ | movsd qword [BASE+RA*8], xmm0 -+ | ins_next -+ break; -+ case BC_KPRI: -+ | ins_AND // RA = dst, RD = primitive type (~) -+ | mov [BASE+RA*8+4], RD -+ | ins_next -+ break; -+ case BC_KNIL: -+ | ins_AD // RA = dst_start, RD = dst_end -+ | lea RA, [BASE+RA*8+12] -+ | lea RD, [BASE+RD*8+4] -+ | mov RB, LJ_TNIL -+ | mov [RA-8], RB // Sets minimum 2 slots. -+ |1: -+ | mov [RA], RB -+ | add RA, 8 -+ | cmp RA, RD -+ | jbe <1 -+ | ins_next -+ break; -+ -+ /* -- Upvalue and function ops ------------------------------------------ */ -+ -+ case BC_UGET: -+ | ins_AD // RA = dst, RD = upvalue # -+ | mov LFUNC:RB, [BASE-8] -+ | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)] -+ | mov RB, UPVAL:RB->v -+ |.if X64 -+ | mov RDa, [RB] -+ | mov [BASE+RA*8], RDa -+ |.else -+ | mov RD, [RB+4] -+ | mov RB, [RB] -+ | mov [BASE+RA*8+4], RD -+ | mov [BASE+RA*8], RB -+ |.endif -+ | ins_next -+ break; -+ case BC_USETV: -+#define TV2MARKOFS \ -+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) -+ | ins_AD // RA = upvalue #, RD = src -+ | mov LFUNC:RB, [BASE-8] -+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] -+ | cmp byte UPVAL:RB->closed, 0 -+ | mov RB, UPVAL:RB->v -+ | mov RA, [BASE+RD*8] -+ | mov RD, [BASE+RD*8+4] -+ | mov [RB], RA -+ | mov [RB+4], RD -+ | jz >1 -+ | // Check barrier for closed upvalue. -+ | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) -+ | jnz >2 -+ |1: -+ | ins_next -+ | -+ |2: // Upvalue is black. Check if new value is collectable and white. -+ | sub RD, LJ_TISGCV -+ | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) -+ | jbe <1 -+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) -+ | jz <1 -+ | // Crossed a write barrier. Move the barrier forward. -+ |.if X64 and not X64WIN -+ | mov FCARG2, RB -+ | mov RB, BASE // Save BASE. -+ |.else -+ | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). -+ |.endif -+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G] -+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) -+ | mov BASE, RB // Restore BASE. -+ | jmp <1 -+ break; -+#undef TV2MARKOFS -+ case BC_USETS: -+ | ins_AND // RA = upvalue #, RD = str const (~) -+ | mov LFUNC:RB, [BASE-8] -+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] -+ | mov GCOBJ:RA, [KBASE+RD*4] -+ | mov RD, UPVAL:RB->v -+ | mov [RD], GCOBJ:RA -+ | mov dword [RD+4], LJ_TSTR -+ | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) -+ | jnz >2 -+ |1: -+ | ins_next -+ | -+ |2: // Check if string is white and ensure upvalue is closed. -+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) -+ | jz <1 -+ | cmp byte UPVAL:RB->closed, 0 -+ | jz <1 -+ | // Crossed a write barrier. Move the barrier forward. -+ | mov RB, BASE // Save BASE (FCARG2 == BASE). -+ | mov FCARG2, RD -+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G] -+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) -+ | mov BASE, RB // Restore BASE. -+ | jmp <1 -+ break; -+ case BC_USETN: -+ | ins_AD // RA = upvalue #, RD = num const -+ | mov LFUNC:RB, [BASE-8] -+ | movsd xmm0, qword [KBASE+RD*8] -+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] -+ | mov RA, UPVAL:RB->v -+ | movsd qword [RA], xmm0 -+ | ins_next -+ break; -+ case BC_USETP: -+ | ins_AND // RA = upvalue #, RD = primitive type (~) -+ | mov LFUNC:RB, [BASE-8] -+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] -+ | mov RA, UPVAL:RB->v -+ | mov [RA+4], RD -+ | ins_next -+ break; -+ case BC_UCLO: -+ | ins_AD // RA = level, RD = target -+ | branchPC RD // Do this first to free RD. -+ | mov L:RB, SAVE_L -+ | cmp dword L:RB->openupval, 0 -+ | je >1 -+ | mov L:RB->base, BASE -+ | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE -+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA -+ | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level) -+ | mov BASE, L:RB->base -+ |1: -+ | ins_next -+ break; -+ -+ case BC_FNEW: -+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) -+ |.if X64 -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. -+ | mov CARG3d, [BASE-8] -+ | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *. -+ | mov CARG1d, L:RB -+ |.else -+ | mov LFUNC:RA, [BASE-8] -+ | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *. -+ | mov L:RB, SAVE_L -+ | mov ARG3, LFUNC:RA -+ | mov ARG2, PROTO:RD -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | // (lua_State *L, GCproto *pt, GCfuncL *parent) -+ | call extern lj_func_newL_gc -+ | // GCfuncL * returned in eax (RC). -+ | mov BASE, L:RB->base -+ | movzx RA, PC_RA -+ | mov [BASE+RA*8], LFUNC:RC -+ | mov dword [BASE+RA*8+4], LJ_TFUNC -+ | ins_next -+ break; -+ -+ /* -- Table ops --------------------------------------------------------- */ -+ -+ case BC_TNEW: -+ | ins_AD // RA = dst, RD = hbits|asize -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] -+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] -+ | mov SAVE_PC, PC -+ | jae >5 -+ |1: -+ |.if X64 -+ | mov CARG3d, RD -+ | and RD, 0x7ff -+ | shr CARG3d, 11 -+ |.else -+ | mov RA, RD -+ | and RD, 0x7ff -+ | shr RA, 11 -+ | mov ARG3, RA -+ |.endif -+ | cmp RD, 0x7ff -+ | je >3 -+ |2: -+ |.if X64 -+ | mov L:CARG1d, L:RB -+ | mov CARG2d, RD -+ |.else -+ | mov ARG1, L:RB -+ | mov ARG2, RD -+ |.endif -+ | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) -+ | // Table * returned in eax (RC). -+ | mov BASE, L:RB->base -+ | movzx RA, PC_RA -+ | mov [BASE+RA*8], TAB:RC -+ | mov dword [BASE+RA*8+4], LJ_TTAB -+ | ins_next -+ |3: // Turn 0x7ff into 0x801. -+ | mov RD, 0x801 -+ | jmp <2 -+ |5: -+ | mov L:FCARG1, L:RB -+ | call extern lj_gc_step_fixtop@4 // (lua_State *L) -+ | movzx RD, PC_RD -+ | jmp <1 -+ break; -+ case BC_TDUP: -+ | ins_AND // RA = dst, RD = table const (~) (holding template table) -+ | mov L:RB, SAVE_L -+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] -+ | mov SAVE_PC, PC -+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] -+ | mov L:RB->base, BASE -+ | jae >3 -+ |2: -+ | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE -+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA -+ | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) -+ | // Table * returned in eax (RC). -+ | mov BASE, L:RB->base -+ | movzx RA, PC_RA -+ | mov [BASE+RA*8], TAB:RC -+ | mov dword [BASE+RA*8+4], LJ_TTAB -+ | ins_next -+ |3: -+ | mov L:FCARG1, L:RB -+ | call extern lj_gc_step_fixtop@4 // (lua_State *L) -+ | movzx RD, PC_RD // Need to reload RD. -+ | not RDa -+ | jmp <2 -+ break; -+ -+ case BC_GGET: -+ | ins_AND // RA = dst, RD = str const (~) -+ | mov LFUNC:RB, [BASE-8] -+ | mov TAB:RB, LFUNC:RB->env -+ | mov STR:RC, [KBASE+RD*4] -+ | jmp ->BC_TGETS_Z -+ break; -+ case BC_GSET: -+ | ins_AND // RA = src, RD = str const (~) -+ | mov LFUNC:RB, [BASE-8] -+ | mov TAB:RB, LFUNC:RB->env -+ | mov STR:RC, [KBASE+RD*4] -+ | jmp ->BC_TSETS_Z -+ break; -+ -+ case BC_TGETV: -+ | ins_ABC // RA = dst, RB = table, RC = key -+ | checktab RB, ->vmeta_tgetv -+ | mov TAB:RB, [BASE+RB*8] -+ | -+ | // Integer key? -+ |.if DUALNUM -+ | checkint RC, >5 -+ | mov RC, dword [BASE+RC*8] -+ |.else -+ | // Convert number to int and back and compare. -+ | checknum RC, >5 -+ | movsd xmm0, qword [BASE+RC*8] -+ | cvttsd2si RC, xmm0 -+ | cvtsi2sd xmm1, RC -+ | ucomisd xmm0, xmm1 -+ | jne ->vmeta_tgetv // Generic numeric key? Use fallback. -+ |.endif -+ | cmp RC, TAB:RB->asize // Takes care of unordered, too. -+ | jae ->vmeta_tgetv // Not in array part? Use fallback. -+ | shl RC, 3 -+ | add RC, TAB:RB->array -+ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. -+ | je >2 -+ | // Get array slot. -+ |.if X64 -+ | mov RBa, [RC] -+ | mov [BASE+RA*8], RBa -+ |.else -+ | mov RB, [RC] -+ | mov RC, [RC+4] -+ | mov [BASE+RA*8], RB -+ | mov [BASE+RA*8+4], RC -+ |.endif -+ |1: -+ | ins_next -+ | -+ |2: // Check for __index if table value is nil. -+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. -+ | jz >3 -+ | mov TAB:RA, TAB:RB->metatable -+ | test byte TAB:RA->nomm, 1<<MM_index -+ | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. -+ | movzx RA, PC_RA // Restore RA. -+ |3: -+ | mov dword [BASE+RA*8+4], LJ_TNIL -+ | jmp <1 -+ | -+ |5: // String key? -+ | checkstr RC, ->vmeta_tgetv -+ | mov STR:RC, [BASE+RC*8] -+ | jmp ->BC_TGETS_Z -+ break; -+ case BC_TGETS: -+ | ins_ABC // RA = dst, RB = table, RC = str const (~) -+ | not RCa -+ | mov STR:RC, [KBASE+RC*4] -+ | checktab RB, ->vmeta_tgets -+ | mov TAB:RB, [BASE+RB*8] -+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. -+ | mov RA, TAB:RB->hmask -+ | and RA, STR:RC->hash -+ | imul RA, #NODE -+ | add NODE:RA, TAB:RB->node -+ |1: -+ | cmp dword NODE:RA->key.it, LJ_TSTR -+ | jne >4 -+ | cmp dword NODE:RA->key.gcr, STR:RC -+ | jne >4 -+ | // Ok, key found. Assumes: offsetof(Node, val) == 0 -+ | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath. -+ | je >5 // Key found, but nil value? -+ | movzx RC, PC_RA -+ | // Get node value. -+ |.if X64 -+ | mov RBa, [RA] -+ | mov [BASE+RC*8], RBa -+ |.else -+ | mov RB, [RA] -+ | mov RA, [RA+4] -+ | mov [BASE+RC*8], RB -+ | mov [BASE+RC*8+4], RA -+ |.endif -+ |2: -+ | ins_next -+ | -+ |3: -+ | movzx RC, PC_RA -+ | mov dword [BASE+RC*8+4], LJ_TNIL -+ | jmp <2 -+ | -+ |4: // Follow hash chain. -+ | mov NODE:RA, NODE:RA->next -+ | test NODE:RA, NODE:RA -+ | jnz <1 -+ | // End of hash chain: key not found, nil result. -+ | -+ |5: // Check for __index if table value is nil. -+ | mov TAB:RA, TAB:RB->metatable -+ | test TAB:RA, TAB:RA -+ | jz <3 // No metatable: done. -+ | test byte TAB:RA->nomm, 1<<MM_index -+ | jnz <3 // 'no __index' flag set: done. -+ | jmp ->vmeta_tgets // Caveat: preserve STR:RC. -+ break; -+ case BC_TGETB: -+ | ins_ABC // RA = dst, RB = table, RC = byte literal -+ | checktab RB, ->vmeta_tgetb -+ | mov TAB:RB, [BASE+RB*8] -+ | cmp RC, TAB:RB->asize -+ | jae ->vmeta_tgetb -+ | shl RC, 3 -+ | add RC, TAB:RB->array -+ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. -+ | je >2 -+ | // Get array slot. -+ |.if X64 -+ | mov RBa, [RC] -+ | mov [BASE+RA*8], RBa -+ |.else -+ | mov RB, [RC] -+ | mov RC, [RC+4] -+ | mov [BASE+RA*8], RB -+ | mov [BASE+RA*8+4], RC -+ |.endif -+ |1: -+ | ins_next -+ | -+ |2: // Check for __index if table value is nil. -+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. -+ | jz >3 -+ | mov TAB:RA, TAB:RB->metatable -+ | test byte TAB:RA->nomm, 1<<MM_index -+ | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. -+ | movzx RA, PC_RA // Restore RA. -+ |3: -+ | mov dword [BASE+RA*8+4], LJ_TNIL -+ | jmp <1 -+ break; -+ case BC_TGETR: -+ | ins_ABC // RA = dst, RB = table, RC = key -+ | mov TAB:RB, [BASE+RB*8] -+ |.if DUALNUM -+ | mov RC, dword [BASE+RC*8] -+ |.else -+ | cvttsd2si RC, qword [BASE+RC*8] -+ |.endif -+ | cmp RC, TAB:RB->asize -+ | jae ->vmeta_tgetr // Not in array part? Use fallback. -+ | shl RC, 3 -+ | add RC, TAB:RB->array -+ | // Get array slot. -+ |->BC_TGETR_Z: -+ |.if X64 -+ | mov RBa, [RC] -+ | mov [BASE+RA*8], RBa -+ |.else -+ | mov RB, [RC] -+ | mov RC, [RC+4] -+ | mov [BASE+RA*8], RB -+ | mov [BASE+RA*8+4], RC -+ |.endif -+ |->BC_TGETR2_Z: -+ | ins_next -+ break; -+ -+ case BC_TSETV: -+ | ins_ABC // RA = src, RB = table, RC = key -+ | checktab RB, ->vmeta_tsetv -+ | mov TAB:RB, [BASE+RB*8] -+ | -+ | // Integer key? -+ |.if DUALNUM -+ | checkint RC, >5 -+ | mov RC, dword [BASE+RC*8] -+ |.else -+ | // Convert number to int and back and compare. -+ | checknum RC, >5 -+ | movsd xmm0, qword [BASE+RC*8] -+ | cvttsd2si RC, xmm0 -+ | cvtsi2sd xmm1, RC -+ | ucomisd xmm0, xmm1 -+ | jne ->vmeta_tsetv // Generic numeric key? Use fallback. -+ |.endif -+ | cmp RC, TAB:RB->asize // Takes care of unordered, too. -+ | jae ->vmeta_tsetv -+ | shl RC, 3 -+ | add RC, TAB:RB->array -+ | cmp dword [RC+4], LJ_TNIL -+ | je >3 // Previous value is nil? -+ |1: -+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) -+ | jnz >7 -+ |2: // Set array slot. -+ |.if X64 -+ | mov RBa, [BASE+RA*8] -+ | mov [RC], RBa -+ |.else -+ | mov RB, [BASE+RA*8+4] -+ | mov RA, [BASE+RA*8] -+ | mov [RC+4], RB -+ | mov [RC], RA -+ |.endif -+ | ins_next -+ | -+ |3: // Check for __newindex if previous value is nil. -+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. -+ | jz <1 -+ | mov TAB:RA, TAB:RB->metatable -+ | test byte TAB:RA->nomm, 1<<MM_newindex -+ | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. -+ | movzx RA, PC_RA // Restore RA. -+ | jmp <1 -+ | -+ |5: // String key? -+ | checkstr RC, ->vmeta_tsetv -+ | mov STR:RC, [BASE+RC*8] -+ | jmp ->BC_TSETS_Z -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:RB, RA -+ | movzx RA, PC_RA // Restore RA. -+ | jmp <2 -+ break; -+ case BC_TSETS: -+ | ins_ABC // RA = src, RB = table, RC = str const (~) -+ | not RCa -+ | mov STR:RC, [KBASE+RC*4] -+ | checktab RB, ->vmeta_tsets -+ | mov TAB:RB, [BASE+RB*8] -+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. -+ | mov RA, TAB:RB->hmask -+ | and RA, STR:RC->hash -+ | imul RA, #NODE -+ | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. -+ | add NODE:RA, TAB:RB->node -+ |1: -+ | cmp dword NODE:RA->key.it, LJ_TSTR -+ | jne >5 -+ | cmp dword NODE:RA->key.gcr, STR:RC -+ | jne >5 -+ | // Ok, key found. Assumes: offsetof(Node, val) == 0 -+ | cmp dword [RA+4], LJ_TNIL -+ | je >4 // Previous value is nil? -+ |2: -+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) -+ | jnz >7 -+ |3: // Set node value. -+ | movzx RC, PC_RA -+ |.if X64 -+ | mov RBa, [BASE+RC*8] -+ | mov [RA], RBa -+ |.else -+ | mov RB, [BASE+RC*8+4] -+ | mov RC, [BASE+RC*8] -+ | mov [RA+4], RB -+ | mov [RA], RC -+ |.endif -+ | ins_next -+ | -+ |4: // Check for __newindex if previous value is nil. -+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. -+ | jz <2 -+ | mov TMP1, RA // Save RA. -+ | mov TAB:RA, TAB:RB->metatable -+ | test byte TAB:RA->nomm, 1<<MM_newindex -+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. -+ | mov RA, TMP1 // Restore RA. -+ | jmp <2 -+ | -+ |5: // Follow hash chain. -+ | mov NODE:RA, NODE:RA->next -+ | test NODE:RA, NODE:RA -+ | jnz <1 -+ | // End of hash chain: key not found, add a new one. -+ | -+ | // But check for __newindex first. -+ | mov TAB:RA, TAB:RB->metatable -+ | test TAB:RA, TAB:RA -+ | jz >6 // No metatable: continue. -+ | test byte TAB:RA->nomm, 1<<MM_newindex -+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. -+ |6: -+ | mov TMP1, STR:RC -+ | mov TMP2, LJ_TSTR -+ | mov TMP3, TAB:RB // Save TAB:RB for us. -+ |.if X64 -+ | mov L:CARG1d, SAVE_L -+ | mov L:CARG1d->base, BASE -+ | lea CARG3, TMP1 -+ | mov CARG2d, TAB:RB -+ | mov L:RB, L:CARG1d -+ |.else -+ | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. -+ | mov ARG2, TAB:RB -+ | mov L:RB, SAVE_L -+ | mov ARG3, RC -+ | mov ARG1, L:RB -+ | mov L:RB->base, BASE -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) -+ | // Handles write barrier for the new key. TValue * returned in eax (RC). -+ | mov BASE, L:RB->base -+ | mov TAB:RB, TMP3 // Need TAB:RB for barrier. -+ | mov RA, eax -+ | jmp <2 // Must check write barrier for value. -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:RB, RC // Destroys STR:RC. -+ | jmp <3 -+ break; -+ case BC_TSETB: -+ | ins_ABC // RA = src, RB = table, RC = byte literal -+ | checktab RB, ->vmeta_tsetb -+ | mov TAB:RB, [BASE+RB*8] -+ | cmp RC, TAB:RB->asize -+ | jae ->vmeta_tsetb -+ | shl RC, 3 -+ | add RC, TAB:RB->array -+ | cmp dword [RC+4], LJ_TNIL -+ | je >3 // Previous value is nil? -+ |1: -+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) -+ | jnz >7 -+ |2: // Set array slot. -+ |.if X64 -+ | mov RAa, [BASE+RA*8] -+ | mov [RC], RAa -+ |.else -+ | mov RB, [BASE+RA*8+4] -+ | mov RA, [BASE+RA*8] -+ | mov [RC+4], RB -+ | mov [RC], RA -+ |.endif -+ | ins_next -+ | -+ |3: // Check for __newindex if previous value is nil. -+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. -+ | jz <1 -+ | mov TAB:RA, TAB:RB->metatable -+ | test byte TAB:RA->nomm, 1<<MM_newindex -+ | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. -+ | movzx RA, PC_RA // Restore RA. -+ | jmp <1 -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:RB, RA -+ | movzx RA, PC_RA // Restore RA. -+ | jmp <2 -+ break; -+ case BC_TSETR: -+ | ins_ABC // RA = src, RB = table, RC = key -+ | mov TAB:RB, [BASE+RB*8] -+ |.if DUALNUM -+ | mov RC, dword [BASE+RC*8] -+ |.else -+ | cvttsd2si RC, qword [BASE+RC*8] -+ |.endif -+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) -+ | jnz >7 -+ |2: -+ | cmp RC, TAB:RB->asize -+ | jae ->vmeta_tsetr -+ | shl RC, 3 -+ | add RC, TAB:RB->array -+ | // Set array slot. -+ |->BC_TSETR_Z: -+ |.if X64 -+ | mov RBa, [BASE+RA*8] -+ | mov [RC], RBa -+ |.else -+ | mov RB, [BASE+RA*8+4] -+ | mov RA, [BASE+RA*8] -+ | mov [RC+4], RB -+ | mov [RC], RA -+ |.endif -+ | ins_next -+ | -+ |7: // Possible table write barrier for the value. Skip valiswhite check. -+ | barrierback TAB:RB, RA -+ | movzx RA, PC_RA // Restore RA. -+ | jmp <2 -+ break; -+ -+ case BC_TSETM: -+ | ins_AD // RA = base (table at base-1), RD = num const (start index) -+ | mov TMP1, KBASE // Need one more free register. -+ | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word. -+ |1: -+ | lea RA, [BASE+RA*8] -+ | mov TAB:RB, [RA-8] // Guaranteed to be a table. -+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) -+ | jnz >7 -+ |2: -+ | mov RD, MULTRES -+ | sub RD, 1 -+ | jz >4 // Nothing to copy? -+ | add RD, KBASE // Compute needed size. -+ | cmp RD, TAB:RB->asize -+ | ja >5 // Doesn't fit into array part? -+ | sub RD, KBASE -+ | shl KBASE, 3 -+ | add KBASE, TAB:RB->array -+ |3: // Copy result slots to table. -+ |.if X64 -+ | mov RBa, [RA] -+ | add RA, 8 -+ | mov [KBASE], RBa -+ |.else -+ | mov RB, [RA] -+ | mov [KBASE], RB -+ | mov RB, [RA+4] -+ | add RA, 8 -+ | mov [KBASE+4], RB -+ |.endif -+ | add KBASE, 8 -+ | sub RD, 1 -+ | jnz <3 -+ |4: -+ | mov KBASE, TMP1 -+ | ins_next -+ | -+ |5: // Need to resize array part. -+ |.if X64 -+ | mov L:CARG1d, SAVE_L -+ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. -+ | mov CARG2d, TAB:RB -+ | mov CARG3d, RD -+ | mov L:RB, L:CARG1d -+ |.else -+ | mov ARG2, TAB:RB -+ | mov L:RB, SAVE_L -+ | mov L:RB->base, BASE -+ | mov ARG3, RD -+ | mov ARG1, L:RB -+ |.endif -+ | mov SAVE_PC, PC -+ | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) -+ | mov BASE, L:RB->base -+ | movzx RA, PC_RA // Restore RA. -+ | jmp <1 // Retry. -+ | -+ |7: // Possible table write barrier for any value. Skip valiswhite check. -+ | barrierback TAB:RB, RD -+ | jmp <2 -+ break; -+ -+ /* -- Calls and vararg handling ----------------------------------------- */ -+ -+ case BC_CALL: case BC_CALLM: -+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs -+ if (op == BC_CALLM) { -+ | add NARGS:RD, MULTRES -+ } -+ | cmp dword [BASE+RA*8+4], LJ_TFUNC -+ | mov LFUNC:RB, [BASE+RA*8] -+ | jne ->vmeta_call_ra -+ | lea BASE, [BASE+RA*8+8] -+ | ins_call -+ break; -+ -+ case BC_CALLMT: -+ | ins_AD // RA = base, RD = extra_nargs -+ | add NARGS:RD, MULTRES -+ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. -+ break; -+ case BC_CALLT: -+ | ins_AD // RA = base, RD = nargs+1 -+ | lea RA, [BASE+RA*8+8] -+ | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. -+ | mov LFUNC:RB, [RA-8] -+ | cmp dword [RA-4], LJ_TFUNC -+ | jne ->vmeta_call -+ |->BC_CALLT_Z: -+ | mov PC, [BASE-4] -+ | test PC, FRAME_TYPE -+ | jnz >7 -+ |1: -+ | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below. -+ | mov MULTRES, NARGS:RD -+ | sub NARGS:RD, 1 -+ | jz >3 -+ |2: // Move args down. -+ |.if X64 -+ | mov RBa, [RA] -+ | add RA, 8 -+ | mov [KBASE], RBa -+ |.else -+ | mov RB, [RA] -+ | mov [KBASE], RB -+ | mov RB, [RA+4] -+ | add RA, 8 -+ | mov [KBASE+4], RB -+ |.endif -+ | add KBASE, 8 -+ | sub NARGS:RD, 1 -+ | jnz <2 -+ | -+ | mov LFUNC:RB, [BASE-8] -+ |3: -+ | mov NARGS:RD, MULTRES -+ | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? -+ | ja >5 -+ |4: -+ | ins_callt -+ | -+ |5: // Tailcall to a fast function. -+ | test PC, FRAME_TYPE // Lua frame below? -+ | jnz <4 -+ | movzx RA, PC_RA -+ | not RAa -+ | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE. -+ | mov KBASE, LFUNC:KBASE->pc -+ | mov KBASE, [KBASE+PC2PROTO(k)] -+ | jmp <4 -+ | -+ |7: // Tailcall from a vararg function. -+ | sub PC, FRAME_VARG -+ | test PC, FRAME_TYPEP -+ | jnz >8 // Vararg frame below? -+ | sub BASE, PC // Need to relocate BASE/KBASE down. -+ | mov KBASE, BASE -+ | mov PC, [BASE-4] -+ | jmp <1 -+ |8: -+ | add PC, FRAME_VARG -+ | jmp <1 -+ break; -+ -+ case BC_ITERC: -+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) -+ | lea RA, [BASE+RA*8+8] // fb = base+1 -+ |.if X64 -+ | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3]. -+ | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2]. -+ | mov [RA], RBa -+ | mov [RA+8], RCa -+ |.else -+ | mov RB, [RA-24] // Copy state. fb[0] = fb[-3]. -+ | mov RC, [RA-20] -+ | mov [RA], RB -+ | mov [RA+4], RC -+ | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2]. -+ | mov RC, [RA-12] -+ | mov [RA+8], RB -+ | mov [RA+12], RC -+ |.endif -+ | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4] -+ | mov RC, [RA-28] -+ | mov [RA-8], LFUNC:RB -+ | mov [RA-4], RC -+ | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. -+ | mov NARGS:RD, 2+1 -+ | jne ->vmeta_call -+ | mov BASE, RA -+ | ins_call -+ break; -+ -+ case BC_ITERN: -+ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) -+ |.if JIT -+ | // NYI: add hotloop, record BC_ITERN. -+ |.endif -+ | mov TMP1, KBASE // Need two more free registers. -+ | mov TMP2, DISPATCH -+ | mov TAB:RB, [BASE+RA*8-16] -+ | mov RC, [BASE+RA*8-8] // Get index from control var. -+ | mov DISPATCH, TAB:RB->asize -+ | add PC, 4 -+ | mov KBASE, TAB:RB->array -+ |1: // Traverse array part. -+ | cmp RC, DISPATCH; jae >5 // Index points after array part? -+ | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 -+ |.if DUALNUM -+ | mov dword [BASE+RA*8+4], LJ_TISNUM -+ | mov dword [BASE+RA*8], RC -+ |.else -+ | cvtsi2sd xmm0, RC -+ |.endif -+ | // Copy array slot to returned value. -+ |.if X64 -+ | mov RBa, [KBASE+RC*8] -+ | mov [BASE+RA*8+8], RBa -+ |.else -+ | mov RB, [KBASE+RC*8+4] -+ | mov [BASE+RA*8+12], RB -+ | mov RB, [KBASE+RC*8] -+ | mov [BASE+RA*8+8], RB -+ |.endif -+ | add RC, 1 -+ | // Return array index as a numeric key. -+ |.if DUALNUM -+ | // See above. -+ |.else -+ | movsd qword [BASE+RA*8], xmm0 -+ |.endif -+ | mov [BASE+RA*8-8], RC // Update control var. -+ |2: -+ | movzx RD, PC_RD // Get target from ITERL. -+ | branchPC RD -+ |3: -+ | mov DISPATCH, TMP2 -+ | mov KBASE, TMP1 -+ | ins_next -+ | -+ |4: // Skip holes in array part. -+ | add RC, 1 -+ | jmp <1 -+ | -+ |5: // Traverse hash part. -+ | sub RC, DISPATCH -+ |6: -+ | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. -+ | imul KBASE, RC, #NODE -+ | add NODE:KBASE, TAB:RB->node -+ | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7 -+ | lea DISPATCH, [RC+DISPATCH+1] -+ | // Copy key and value from hash slot. -+ |.if X64 -+ | mov RBa, NODE:KBASE->key -+ | mov RCa, NODE:KBASE->val -+ | mov [BASE+RA*8], RBa -+ | mov [BASE+RA*8+8], RCa -+ |.else -+ | mov RB, NODE:KBASE->key.gcr -+ | mov RC, NODE:KBASE->key.it -+ | mov [BASE+RA*8], RB -+ | mov [BASE+RA*8+4], RC -+ | mov RB, NODE:KBASE->val.gcr -+ | mov RC, NODE:KBASE->val.it -+ | mov [BASE+RA*8+8], RB -+ | mov [BASE+RA*8+12], RC -+ |.endif -+ | mov [BASE+RA*8-8], DISPATCH -+ | jmp <2 -+ | -+ |7: // Skip holes in hash part. -+ | add RC, 1 -+ | jmp <6 -+ break; -+ -+ case BC_ISNEXT: -+ | ins_AD // RA = base, RD = target (points to ITERN) -+ | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5 -+ | mov CFUNC:RB, [BASE+RA*8-24] -+ | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5 -+ | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5 -+ | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 -+ | branchPC RD -+ | mov dword [BASE+RA*8-8], 0 // Initialize control var. -+ | mov dword [BASE+RA*8-4], 0xfffe7fff -+ |1: -+ | ins_next -+ |5: // Despecialize bytecode if any of the checks fail. -+ | mov PC_OP, BC_JMP -+ | branchPC RD -+ | mov byte [PC], BC_ITERC -+ | jmp <1 -+ break; -+ -+ case BC_VARG: -+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams -+ | mov TMP1, KBASE // Need one more free register. -+ | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] -+ | lea RA, [BASE+RA*8] -+ | sub KBASE, [BASE-4] -+ | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. -+ | test RB, RB -+ | jz >5 // Copy all varargs? -+ | lea RB, [RA+RB*8-8] -+ | cmp KBASE, BASE // No vararg slots? -+ | jnb >2 -+ |1: // Copy vararg slots to destination slots. -+ |.if X64 -+ | mov RCa, [KBASE-8] -+ | add KBASE, 8 -+ | mov [RA], RCa -+ |.else -+ | mov RC, [KBASE-8] -+ | mov [RA], RC -+ | mov RC, [KBASE-4] -+ | add KBASE, 8 -+ | mov [RA+4], RC -+ |.endif -+ | add RA, 8 -+ | cmp RA, RB // All destination slots filled? -+ | jnb >3 -+ | cmp KBASE, BASE // No more vararg slots? -+ | jb <1 -+ |2: // Fill up remainder with nil. -+ | mov dword [RA+4], LJ_TNIL -+ | add RA, 8 -+ | cmp RA, RB -+ | jb <2 -+ |3: -+ | mov KBASE, TMP1 -+ | ins_next -+ | -+ |5: // Copy all varargs. -+ | mov MULTRES, 1 // MULTRES = 0+1 -+ | mov RC, BASE -+ | sub RC, KBASE -+ | jbe <3 // No vararg slots? -+ | mov RB, RC -+ | shr RB, 3 -+ | add RB, 1 -+ | mov MULTRES, RB // MULTRES = #varargs+1 -+ | mov L:RB, SAVE_L -+ | add RC, RA -+ | cmp RC, L:RB->maxstack -+ | ja >7 // Need to grow stack? -+ |6: // Copy all vararg slots. -+ |.if X64 -+ | mov RCa, [KBASE-8] -+ | add KBASE, 8 -+ | mov [RA], RCa -+ |.else -+ | mov RC, [KBASE-8] -+ | mov [RA], RC -+ | mov RC, [KBASE-4] -+ | add KBASE, 8 -+ | mov [RA+4], RC -+ |.endif -+ | add RA, 8 -+ | cmp KBASE, BASE // No more vararg slots? -+ | jb <6 -+ | jmp <3 -+ | -+ |7: // Grow stack for varargs. -+ | mov L:RB->base, BASE -+ | mov L:RB->top, RA -+ | mov SAVE_PC, PC -+ | sub KBASE, BASE // Need delta, because BASE may change. -+ | mov FCARG2, MULTRES -+ | sub FCARG2, 1 -+ | mov FCARG1, L:RB -+ | call extern lj_state_growstack@8 // (lua_State *L, int n) -+ | mov BASE, L:RB->base -+ | mov RA, L:RB->top -+ | add KBASE, BASE -+ | jmp <6 -+ break; -+ -+ /* -- Returns ----------------------------------------------------------- */ -+ -+ case BC_RETM: -+ | ins_AD // RA = results, RD = extra_nresults -+ | add RD, MULTRES // MULTRES >=1, so RD >=1. -+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. -+ break; -+ -+ case BC_RET: case BC_RET0: case BC_RET1: -+ | ins_AD // RA = results, RD = nresults+1 -+ if (op != BC_RET0) { -+ | shl RA, 3 -+ } -+ |1: -+ | mov PC, [BASE-4] -+ | mov MULTRES, RD // Save nresults+1. -+ | test PC, FRAME_TYPE // Check frame type marker. -+ | jnz >7 // Not returning to a fixarg Lua func? -+ switch (op) { -+ case BC_RET: -+ |->BC_RET_Z: -+ | mov KBASE, BASE // Use KBASE for result move. -+ | sub RD, 1 -+ | jz >3 -+ |2: // Move results down. -+ |.if X64 -+ | mov RBa, [KBASE+RA] -+ | mov [KBASE-8], RBa -+ |.else -+ | mov RB, [KBASE+RA] -+ | mov [KBASE-8], RB -+ | mov RB, [KBASE+RA+4] -+ | mov [KBASE-4], RB -+ |.endif -+ | add KBASE, 8 -+ | sub RD, 1 -+ | jnz <2 -+ |3: -+ | mov RD, MULTRES // Note: MULTRES may be >255. -+ | movzx RB, PC_RB // So cannot compare with RDL! -+ |5: -+ | cmp RB, RD // More results expected? -+ | ja >6 -+ break; -+ case BC_RET1: -+ |.if X64 -+ | mov RBa, [BASE+RA] -+ | mov [BASE-8], RBa -+ |.else -+ | mov RB, [BASE+RA+4] -+ | mov [BASE-4], RB -+ | mov RB, [BASE+RA] -+ | mov [BASE-8], RB -+ |.endif -+ /* fallthrough */ -+ case BC_RET0: -+ |5: -+ | cmp PC_RB, RDL // More results expected? -+ | ja >6 -+ default: -+ break; -+ } -+ | movzx RA, PC_RA -+ | not RAa // Note: ~RA = -(RA+1) -+ | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 -+ | mov LFUNC:KBASE, [BASE-8] -+ | mov KBASE, LFUNC:KBASE->pc -+ | mov KBASE, [KBASE+PC2PROTO(k)] -+ | ins_next -+ | -+ |6: // Fill up results with nil. -+ if (op == BC_RET) { -+ | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base. -+ | add KBASE, 8 -+ } else { -+ | mov dword [BASE+RD*8-12], LJ_TNIL -+ } -+ | add RD, 1 -+ | jmp <5 -+ | -+ |7: // Non-standard return case. -+ | lea RB, [PC-FRAME_VARG] -+ | test RB, FRAME_TYPEP -+ | jnz ->vm_return -+ | // Return from vararg function: relocate BASE down and RA up. -+ | sub BASE, RB -+ if (op != BC_RET0) { -+ | add RA, RB -+ } -+ | jmp <1 -+ break; -+ -+ /* -- Loops and branches ------------------------------------------------ */ -+ -+ |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4] -+ |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12] -+ |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20] -+ |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] -+ -+ case BC_FORL: -+ |.if JIT -+ | hotloop RB -+ |.endif -+ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. -+ break; -+ -+ case BC_JFORI: -+ case BC_JFORL: -+#if !LJ_HASJIT -+ break; -+#endif -+ case BC_FORI: -+ case BC_IFORL: -+ vk = (op == BC_IFORL || op == BC_JFORL); -+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop) -+ | lea RA, [BASE+RA*8] -+ if (LJ_DUALNUM) { -+ | cmp FOR_TIDX, LJ_TISNUM; jne >9 -+ if (!vk) { -+ | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for -+ | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for -+ | mov RB, dword FOR_IDX -+ | cmp dword FOR_STEP, 0; jl >5 -+ } else { -+#ifdef LUA_USE_ASSERT -+ | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type -+ | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type -+#endif -+ | mov RB, dword FOR_STEP -+ | test RB, RB; js >5 -+ | add RB, dword FOR_IDX; jo >1 -+ | mov dword FOR_IDX, RB -+ } -+ | cmp RB, dword FOR_STOP -+ | mov FOR_TEXT, LJ_TISNUM -+ | mov dword FOR_EXT, RB -+ if (op == BC_FORI) { -+ | jle >7 -+ |1: -+ |6: -+ | branchPC RD -+ } else if (op == BC_JFORI) { -+ | branchPC RD -+ | movzx RD, PC_RD -+ | jle =>BC_JLOOP -+ |1: -+ |6: -+ } else if (op == BC_IFORL) { -+ | jg >7 -+ |6: -+ | branchPC RD -+ |1: -+ } else { -+ | jle =>BC_JLOOP -+ |1: -+ |6: -+ } -+ |7: -+ | ins_next -+ | -+ |5: // Invert check for negative step. -+ if (vk) { -+ | add RB, dword FOR_IDX; jo <1 -+ | mov dword FOR_IDX, RB -+ } -+ | cmp RB, dword FOR_STOP -+ | mov FOR_TEXT, LJ_TISNUM -+ | mov dword FOR_EXT, RB -+ if (op == BC_FORI) { -+ | jge <7 -+ } else if (op == BC_JFORI) { -+ | branchPC RD -+ | movzx RD, PC_RD -+ | jge =>BC_JLOOP -+ } else if (op == BC_IFORL) { -+ | jl <7 -+ } else { -+ | jge =>BC_JLOOP -+ } -+ | jmp <6 -+ |9: // Fallback to FP variant. -+ } else if (!vk) { -+ | cmp FOR_TIDX, LJ_TISNUM -+ } -+ if (!vk) { -+ | jae ->vmeta_for -+ | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for -+ } else { -+#ifdef LUA_USE_ASSERT -+ | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type -+ | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type -+#endif -+ } -+ | mov RB, FOR_TSTEP // Load type/hiword of for step. -+ if (!vk) { -+ | cmp RB, LJ_TISNUM; jae ->vmeta_for -+ } -+ | movsd xmm0, qword FOR_IDX -+ | movsd xmm1, qword FOR_STOP -+ if (vk) { -+ | addsd xmm0, qword FOR_STEP -+ | movsd qword FOR_IDX, xmm0 -+ | test RB, RB; js >3 -+ } else { -+ | jl >3 -+ } -+ | ucomisd xmm1, xmm0 -+ |1: -+ | movsd qword FOR_EXT, xmm0 -+ if (op == BC_FORI) { -+ |.if DUALNUM -+ | jnb <7 -+ |.else -+ | jnb >2 -+ | branchPC RD -+ |.endif -+ } else if (op == BC_JFORI) { -+ | branchPC RD -+ | movzx RD, PC_RD -+ | jnb =>BC_JLOOP -+ } else if (op == BC_IFORL) { -+ |.if DUALNUM -+ | jb <7 -+ |.else -+ | jb >2 -+ | branchPC RD -+ |.endif -+ } else { -+ | jnb =>BC_JLOOP -+ } -+ |.if DUALNUM -+ | jmp <6 -+ |.else -+ |2: -+ | ins_next -+ |.endif -+ | -+ |3: // Invert comparison if step is negative. -+ | ucomisd xmm0, xmm1 -+ | jmp <1 -+ break; -+ -+ case BC_ITERL: -+ |.if JIT -+ | hotloop RB -+ |.endif -+ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. -+ break; -+ -+ case BC_JITERL: -+#if !LJ_HASJIT -+ break; -+#endif -+ case BC_IITERL: -+ | ins_AJ // RA = base, RD = target -+ | lea RA, [BASE+RA*8] -+ | mov RB, [RA+4] -+ | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. -+ if (op == BC_JITERL) { -+ | mov [RA-4], RB -+ | mov RB, [RA] -+ | mov [RA-8], RB -+ | jmp =>BC_JLOOP -+ } else { -+ | branchPC RD // Otherwise save control var + branch. -+ | mov RD, [RA] -+ | mov [RA-4], RB -+ | mov [RA-8], RD -+ } -+ |1: -+ | ins_next -+ break; -+ -+ case BC_LOOP: -+ | ins_A // RA = base, RD = target (loop extent) -+ | // Note: RA/RD is only used by trace recorder to determine scope/extent -+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop. -+ |.if JIT -+ | hotloop RB -+ |.endif -+ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. -+ break; -+ -+ case BC_ILOOP: -+ | ins_A // RA = base, RD = target (loop extent) -+ | ins_next -+ break; -+ -+ case BC_JLOOP: -+ |.if JIT -+ | ins_AD // RA = base (ignored), RD = traceno -+ | mov RA, [DISPATCH+DISPATCH_J(trace)] -+ | mov TRACE:RD, [RA+RD*4] -+ | mov RDa, TRACE:RD->mcode -+ | mov L:RB, SAVE_L -+ | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE -+ | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB -+ | // Save additional callee-save registers only used in compiled code. -+ |.if X64WIN -+ | mov TMPQ, r12 -+ | mov TMPa, r13 -+ | mov CSAVE_4, r14 -+ | mov CSAVE_3, r15 -+ | mov RAa, rsp -+ | sub rsp, 9*16+4*8 -+ | movdqa [RAa], xmm6 -+ | movdqa [RAa-1*16], xmm7 -+ | movdqa [RAa-2*16], xmm8 -+ | movdqa [RAa-3*16], xmm9 -+ | movdqa [RAa-4*16], xmm10 -+ | movdqa [RAa-5*16], xmm11 -+ | movdqa [RAa-6*16], xmm12 -+ | movdqa [RAa-7*16], xmm13 -+ | movdqa [RAa-8*16], xmm14 -+ | movdqa [RAa-9*16], xmm15 -+ |.elif X64 -+ | mov TMPQ, r12 -+ | mov TMPa, r13 -+ | sub rsp, 16 -+ |.endif -+ | jmp RDa -+ |.endif -+ break; -+ -+ case BC_JMP: -+ | ins_AJ // RA = unused, RD = target -+ | branchPC RD -+ | ins_next -+ break; -+ -+ /* -- Function headers -------------------------------------------------- */ -+ -+ /* -+ ** Reminder: A function may be called with func/args above L->maxstack, -+ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, -+ ** too. This means all FUNC* ops (including fast functions) must check -+ ** for stack overflow _before_ adding more slots! -+ */ -+ -+ case BC_FUNCF: -+ |.if JIT -+ | hotcall RB -+ |.endif -+ case BC_FUNCV: /* NYI: compiled vararg functions. */ -+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. -+ break; -+ -+ case BC_JFUNCF: -+#if !LJ_HASJIT -+ break; -+#endif -+ case BC_IFUNCF: -+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 -+ | mov KBASE, [PC-4+PC2PROTO(k)] -+ | mov L:RB, SAVE_L -+ | lea RA, [BASE+RA*8] // Top of frame. -+ | cmp RA, L:RB->maxstack -+ | ja ->vm_growstack_f -+ | movzx RA, byte [PC-4+PC2PROTO(numparams)] -+ | cmp NARGS:RD, RA // Check for missing parameters. -+ | jbe >3 -+ |2: -+ if (op == BC_JFUNCF) { -+ | movzx RD, PC_RD -+ | jmp =>BC_JLOOP -+ } else { -+ | ins_next -+ } -+ | -+ |3: // Clear missing parameters. -+ | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL -+ | add NARGS:RD, 1 -+ | cmp NARGS:RD, RA -+ | jbe <3 -+ | jmp <2 -+ break; -+ -+ case BC_JFUNCV: -+#if !LJ_HASJIT -+ break; -+#endif -+ | int3 // NYI: compiled vararg functions -+ break; /* NYI: compiled vararg functions. */ -+ -+ case BC_IFUNCV: -+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 -+ | lea RB, [NARGS:RD*8+FRAME_VARG] -+ | lea RD, [BASE+NARGS:RD*8] -+ | mov LFUNC:KBASE, [BASE-8] -+ | mov [RD-4], RB // Store delta + FRAME_VARG. -+ | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC. -+ | mov L:RB, SAVE_L -+ | lea RA, [RD+RA*8] -+ | cmp RA, L:RB->maxstack -+ | ja ->vm_growstack_v // Need to grow stack. -+ | mov RA, BASE -+ | mov BASE, RD -+ | movzx RB, byte [PC-4+PC2PROTO(numparams)] -+ | test RB, RB -+ | jz >2 -+ |1: // Copy fixarg slots up to new frame. -+ | add RA, 8 -+ | cmp RA, BASE -+ | jnb >3 // Less args than parameters? -+ | mov KBASE, [RA-8] -+ | mov [RD], KBASE -+ | mov KBASE, [RA-4] -+ | mov [RD+4], KBASE -+ | add RD, 8 -+ | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC). -+ | sub RB, 1 -+ | jnz <1 -+ |2: -+ if (op == BC_JFUNCV) { -+ | movzx RD, PC_RD -+ | jmp =>BC_JLOOP -+ } else { -+ | mov KBASE, [PC-4+PC2PROTO(k)] -+ | ins_next -+ } -+ | -+ |3: // Clear missing parameters. -+ | mov dword [RD+4], LJ_TNIL -+ | add RD, 8 -+ | sub RB, 1 -+ | jnz <3 -+ | jmp <2 -+ break; -+ -+ case BC_FUNCC: -+ case BC_FUNCCW: -+ | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 -+ | mov CFUNC:RB, [BASE-8] -+ | mov KBASEa, CFUNC:RB->f -+ | mov L:RB, SAVE_L -+ | lea RD, [BASE+NARGS:RD*8-8] -+ | mov L:RB->base, BASE -+ | lea RA, [RD+8*LUA_MINSTACK] -+ | cmp RA, L:RB->maxstack -+ | mov L:RB->top, RD -+ if (op == BC_FUNCC) { -+ |.if X64 -+ | mov CARG1d, L:RB // Caveat: CARG1d may be RA. -+ |.else -+ | mov ARG1, L:RB -+ |.endif -+ } else { -+ |.if X64 -+ | mov CARG2, KBASEa -+ | mov CARG1d, L:RB // Caveat: CARG1d may be RA. -+ |.else -+ | mov ARG2, KBASEa -+ | mov ARG1, L:RB -+ |.endif -+ } -+ | ja ->vm_growstack_c // Need to grow stack. -+ | set_vmstate C -+ if (op == BC_FUNCC) { -+ | call KBASEa // (lua_State *L) -+ } else { -+ | // (lua_State *L, lua_CFunction f) -+ | call aword [DISPATCH+DISPATCH_GL(wrapf)] -+ } -+ | // nresults returned in eax (RD). -+ | mov BASE, L:RB->base -+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB -+ | set_vmstate INTERP -+ | lea RA, [BASE+RD*8] -+ | neg RA -+ | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 -+ | mov PC, [BASE-4] // Fetch PC of caller. -+ | jmp ->vm_returnc -+ break; -+ -+ /* ---------------------------------------------------------------------- */ -+ -+ default: -+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); -+ exit(2); -+ break; -+ } -+} -+ -+static int build_backend(BuildCtx *ctx) -+{ -+ int op; -+ dasm_growpc(Dst, BC__MAX); -+ build_subroutines(ctx); -+ |.code_op -+ for (op = 0; op < BC__MAX; op++) -+ build_ins(ctx, (BCOp)op, op); -+ return BC__MAX; -+} -+ -+/* Emit pseudo frame-info for all assembler functions. */ -+static void emit_asm_debug(BuildCtx *ctx) -+{ -+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); -+#if LJ_64 -+#define SZPTR "8" -+#define BSZPTR "3" -+#define REG_SP "0x7" -+#define REG_RA "0x10" -+#else -+#define SZPTR "4" -+#define BSZPTR "2" -+#define REG_SP "0x4" -+#define REG_RA "0x8" -+#endif -+ switch (ctx->mode) { -+ case BUILD_elfasm: -+ fprintf(ctx->fp, "\t.section .debug_frame,"",@progbits\n"); -+ fprintf(ctx->fp, -+ ".Lframe0:\n" -+ "\t.long .LECIE0-.LSCIE0\n" -+ ".LSCIE0:\n" -+ "\t.long 0xffffffff\n" -+ "\t.byte 0x1\n" -+ "\t.string ""\n" -+ "\t.uleb128 0x1\n" -+ "\t.sleb128 -" SZPTR "\n" -+ "\t.byte " REG_RA "\n" -+ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" -+ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" -+ "\t.align " SZPTR "\n" -+ ".LECIE0:\n\n"); -+ fprintf(ctx->fp, -+ ".LSFDE0:\n" -+ "\t.long .LEFDE0-.LASFDE0\n" -+ ".LASFDE0:\n" -+ "\t.long .Lframe0\n" -+#if LJ_64 -+ "\t.quad .Lbegin\n" -+ "\t.quad %d\n" -+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ -+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ -+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ -+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ -+#if LJ_NO_UNWIND -+ "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ -+ "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ -+#endif -+#else -+ "\t.long .Lbegin\n" -+ "\t.long %d\n" -+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ -+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ -+ "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ -+ "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ -+ "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ -+#endif -+ "\t.align " SZPTR "\n" -+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); -+#if LJ_HASFFI -+ fprintf(ctx->fp, -+ ".LSFDE1:\n" -+ "\t.long .LEFDE1-.LASFDE1\n" -+ ".LASFDE1:\n" -+ "\t.long .Lframe0\n" -+#if LJ_64 -+ "\t.quad lj_vm_ffi_call\n" -+ "\t.quad %d\n" -+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ -+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ -+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ -+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -+#else -+ "\t.long lj_vm_ffi_call\n" -+ "\t.long %d\n" -+ "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ -+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ -+ "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ -+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ -+#endif -+ "\t.align " SZPTR "\n" -+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -+#endif -+#if !LJ_NO_UNWIND -+#if (defined(__sun__) && defined(__svr4__)) -+#if LJ_64 -+ fprintf(ctx->fp, "\t.section .eh_frame,"a",@unwind\n"); -+#else -+ fprintf(ctx->fp, "\t.section .eh_frame,"aw",@progbits\n"); -+#endif -+#else -+ fprintf(ctx->fp, "\t.section .eh_frame,"a",@progbits\n"); -+#endif -+ fprintf(ctx->fp, -+ ".Lframe1:\n" -+ "\t.long .LECIE1-.LSCIE1\n" -+ ".LSCIE1:\n" -+ "\t.long 0\n" -+ "\t.byte 0x1\n" -+ "\t.string "zPR"\n" -+ "\t.uleb128 0x1\n" -+ "\t.sleb128 -" SZPTR "\n" -+ "\t.byte " REG_RA "\n" -+ "\t.uleb128 6\n" /* augmentation length */ -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.long lj_err_unwind_dwarf-.\n" -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" -+ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" -+ "\t.align " SZPTR "\n" -+ ".LECIE1:\n\n"); -+ fprintf(ctx->fp, -+ ".LSFDE2:\n" -+ "\t.long .LEFDE2-.LASFDE2\n" -+ ".LASFDE2:\n" -+ "\t.long .LASFDE2-.Lframe1\n" -+ "\t.long .Lbegin-.\n" -+ "\t.long %d\n" -+ "\t.uleb128 0\n" /* augmentation length */ -+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ -+#if LJ_64 -+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ -+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ -+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ -+#else -+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ -+ "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ -+ "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ -+ "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ -+#endif -+ "\t.align " SZPTR "\n" -+ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); -+#if LJ_HASFFI -+ fprintf(ctx->fp, -+ ".Lframe2:\n" -+ "\t.long .LECIE2-.LSCIE2\n" -+ ".LSCIE2:\n" -+ "\t.long 0\n" -+ "\t.byte 0x1\n" -+ "\t.string "zR"\n" -+ "\t.uleb128 0x1\n" -+ "\t.sleb128 -" SZPTR "\n" -+ "\t.byte " REG_RA "\n" -+ "\t.uleb128 1\n" /* augmentation length */ -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" -+ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" -+ "\t.align " SZPTR "\n" -+ ".LECIE2:\n\n"); -+ fprintf(ctx->fp, -+ ".LSFDE3:\n" -+ "\t.long .LEFDE3-.LASFDE3\n" -+ ".LASFDE3:\n" -+ "\t.long .LASFDE3-.Lframe2\n" -+ "\t.long lj_vm_ffi_call-.\n" -+ "\t.long %d\n" -+ "\t.uleb128 0\n" /* augmentation length */ -+#if LJ_64 -+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ -+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ -+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ -+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -+#else -+ "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ -+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ -+ "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ -+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ -+#endif -+ "\t.align " SZPTR "\n" -+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -+#endif -+#endif -+ break; -+#if !LJ_NO_UNWIND -+ /* Mental note: never let Apple design an assembler. -+ ** Or a linker. Or a plastic case. But I digress. -+ */ -+ case BUILD_machasm: { -+#if LJ_HASFFI -+ int fcsize = 0; -+#endif -+ int i; -+ fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); -+ fprintf(ctx->fp, -+ "EH_frame1:\n" -+ "\t.set L$set$x,LECIEX-LSCIEX\n" -+ "\t.long L$set$x\n" -+ "LSCIEX:\n" -+ "\t.long 0\n" -+ "\t.byte 0x1\n" -+ "\t.ascii "zPR\0"\n" -+ "\t.byte 0x1\n" -+ "\t.byte 128-" SZPTR "\n" -+ "\t.byte " REG_RA "\n" -+ "\t.byte 6\n" /* augmentation length */ -+ "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ -+#if LJ_64 -+ "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" -+#else -+ "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n" -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */ -+#endif -+ "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" -+ "\t.align " BSZPTR "\n" -+ "LECIEX:\n\n"); -+ for (i = 0; i < ctx->nsym; i++) { -+ const char *name = ctx->sym[i].name; -+ int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; -+ if (size == 0) continue; -+#if LJ_HASFFI -+ if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } -+#endif -+ fprintf(ctx->fp, -+ "%s.eh:\n" -+ "LSFDE%d:\n" -+ "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" -+ "\t.long L$set$%d\n" -+ "LASFDE%d:\n" -+ "\t.long LASFDE%d-EH_frame1\n" -+ "\t.long %s-.\n" -+ "\t.long %d\n" -+ "\t.byte 0\n" /* augmentation length */ -+ "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ -+#if LJ_64 -+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ -+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ -+ "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ -+ "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ -+#else -+ "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ -+ "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */ -+ "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */ -+ "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */ -+#endif -+ "\t.align " BSZPTR "\n" -+ "LEFDE%d:\n\n", -+ name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); -+ } -+#if LJ_HASFFI -+ if (fcsize) { -+ fprintf(ctx->fp, -+ "EH_frame2:\n" -+ "\t.set L$set$y,LECIEY-LSCIEY\n" -+ "\t.long L$set$y\n" -+ "LSCIEY:\n" -+ "\t.long 0\n" -+ "\t.byte 0x1\n" -+ "\t.ascii "zR\0"\n" -+ "\t.byte 0x1\n" -+ "\t.byte 128-" SZPTR "\n" -+ "\t.byte " REG_RA "\n" -+ "\t.byte 1\n" /* augmentation length */ -+#if LJ_64 -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" -+#else -+ "\t.byte 0x1b\n" /* pcrel|sdata4 */ -+ "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */ -+#endif -+ "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" -+ "\t.align " BSZPTR "\n" -+ "LECIEY:\n\n"); -+ fprintf(ctx->fp, -+ "_lj_vm_ffi_call.eh:\n" -+ "LSFDEY:\n" -+ "\t.set L$set$yy,LEFDEY-LASFDEY\n" -+ "\t.long L$set$yy\n" -+ "LASFDEY:\n" -+ "\t.long LASFDEY-EH_frame2\n" -+ "\t.long _lj_vm_ffi_call-.\n" -+ "\t.long %d\n" -+ "\t.byte 0\n" /* augmentation length */ -+#if LJ_64 -+ "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ -+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ -+ "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ -+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ -+#else -+ "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */ -+ "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ -+ "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */ -+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */ -+#endif -+ "\t.align " BSZPTR "\n" -+ "LEFDEY:\n\n", fcsize); -+ } -+#endif -+#if !LJ_64 -+ fprintf(ctx->fp, -+ "\t.non_lazy_symbol_pointer\n" -+ "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" -+ ".indirect_symbol _lj_err_unwind_dwarf\n" -+ ".long 0\n\n"); -+ fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); -+ { -+ const char *const *xn; -+ for (xn = ctx->extnames; *xn; xn++) -+ if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) -+ fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii "\364\364\364\364\364"\n", *xn, *xn); -+ } -+#endif -+ fprintf(ctx->fp, ".subsections_via_symbols\n"); -+ } -+ break; -+#endif -+ default: /* Difficult for other modes. */ -+ break; -+ } -+} --- -2.20.1 - - -From 60d18a8d74c593fa689880a228c5e8c13fc33c9e Mon Sep 17 00:00:00 2001 -From: Michael Munday munday@ca.ibm.com -Date: Tue, 15 Nov 2016 13:50:15 -0500 -Subject: [PATCH 011/247] Fix some s390x declarations. - -s/S390x/S390X/ ---- - Makefile | 2 +- - src/Makefile | 3 ++- - src/lj_arch.h | 29 +++++++++++++++++++---------- - 3 files changed, 22 insertions(+), 12 deletions(-) - -diff --git a/Makefile b/Makefile -index 923bf72..fc8ed61 100644 ---- a/Makefile -+++ b/Makefile -@@ -88,7 +88,7 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h - FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ - dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ - dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ -- dis_mips64.lua dis_mips64el.lua vmdef.lua -+ dis_mips64.lua dis_mips64el.lua dis_s390x.lua vmdef.lua - - ifeq (,$(findstring Windows,$(OS))) - HOST_SYS:= $(shell uname -s) -diff --git a/src/Makefile b/src/Makefile -index 2bf15d2..d0f160a 100644 ---- a/src/Makefile -+++ b/src/Makefile -@@ -245,7 +245,7 @@ else - ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) - TARGET_LJARCH= arm - else --ifneq (,$(findstring LJ_TARGET_S390x ,$(TARGET_TESTARCH))) -+ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH))) - TARGET_LJARCH= s390x - else - ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) -@@ -279,6 +279,7 @@ endif - endif - endif - endif -+endif - - ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) - TARGET_SYS= PS3 -diff --git a/src/lj_arch.h b/src/lj_arch.h -index c781eb1..32c706f 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h -@@ -29,7 +29,8 @@ - #define LUAJIT_ARCH_mips32 6 - #define LUAJIT_ARCH_MIPS64 7 - #define LUAJIT_ARCH_mips64 7 --#define LUAJIT_ARCH_S390x 8 -+#define LUAJIT_ARCH_S390X 8 -+#define LUAJIT_ARCH_s390x 8 - - /* Target OS. */ - #define LUAJIT_OS_OTHER 0 -@@ -50,8 +51,8 @@ - #define LUAJIT_TARGET LUAJIT_ARCH_ARM - #elif defined(__aarch64__) - #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 --#elif defined(__s390x__) || defined(__s390x) || defined(__S390x__) || defined(__S390x) || defined(S390x) --#define LUAJIT_TARGET LUAJIT_ARCH_S390x -+#elif defined(__s390x__) || defined(__s390x) -+#define LUAJIT_TARGET LUAJIT_ARCH_S390X - #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) - #define LUAJIT_TARGET LUAJIT_ARCH_PPC - #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) -@@ -244,13 +245,6 @@ - - #define LJ_ARCH_VERSION 80 - --#elif LUAJIT_TARGET == LUAJIT_ARCH_S390
arch-excludes@lists.fedoraproject.org