The package rpms/luajit.git has added or updated architecture specific content in its
spec file (ExclusiveArch/ExcludeArch or %ifarch/%ifnarch) in commit(s):
https://src.fedoraproject.org/cgit/rpms/luajit.git/commit/?id=3bc1e4725fb....
Change:
+ExclusiveArch: %{arm} %{ix86} x86_64 %{mips} aarch64
Thanks.
Full change:
============
commit ee6c7ab93f1b70e0297868d285935cae3aea43b9
Author: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
Date: Mon Nov 29 18:35:24 2021 +0530
Update dates in changelog for F35
diff --git a/luajit.spec b/luajit.spec
index 49540fc..4d1d67a 100644
--- a/luajit.spec
+++ b/luajit.spec
@@ -94,10 +94,10 @@ make check || true
%{_libdir}/pkgconfig/%{name}.pc
%changelog
-* Tue Oct 26 2021 Siddhesh Poyarekar <siddhesh(a)gotplt.org> - 2.1.0-0.22beta3
+* Mon Nov 29 2021 Siddhesh Poyarekar <siddhesh(a)gotplt.org> - 2.1.0-0.22beta3
- Bring back the earlier code to do ln -sf.
-* Tue Oct 12 2021 Andreas Schneider <asn(a)redhat.com> - 2.1.0-0.21beta3
+* Mon Nov 29 2021 Andreas Schneider <asn(a)redhat.com> - 2.1.0-0.21beta3
- Rebase onto
https://github.com/LuaJIT/LuaJIT/tree/v2.1
- Dropped support for ppc64le
- Dropped support for s390x
commit 74fc89321e0bd683e944f505dde072fd73d8a2ce
Author: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
Date: Tue Oct 26 21:38:28 2021 +0530
Bring back the earlier code to do ln -sf
diff --git a/luajit.spec b/luajit.spec
index 48300de..49540fc 100644
--- a/luajit.spec
+++ b/luajit.spec
@@ -4,7 +4,7 @@ Name: luajit
Version: 2.1.0
%global apiver %(v=%{version}; echo ${v%.${v#[0-9].[0-9].}})
%global srcver %{version}%{?rctag:-%{rctag}}
-Release: 0.21%{?rctag:%{rctag}}%{?dist}
+Release: 0.22%{?rctag:%{rctag}}%{?dist}
Summary: Just-In-Time Compiler for Lua
License: MIT
URL:
http://luajit.org/
@@ -60,14 +60,17 @@ make amalg Q= E=@: PREFIX=%{_prefix} TARGET_STRIP=: \
%make_install PREFIX=%{_prefix} \
MULTILIB=%{_lib}
-ln -sf luajit-2.1.0-beta3 %{buildroot}%{_bindir}/luajit
-
rm -rf _tmp_html ; mkdir _tmp_html
cp -a doc _tmp_html/html
# Remove static .a
find %{buildroot} -type f -name *.a -delete -print
+%if %{defined rctag}
+# Development versions are not doing such symlink
+ln -s %{name}-%{srcver} %{buildroot}%{_bindir}/%{name}
+%endif
+
%ldconfig_scriptlets
%check
@@ -91,6 +94,9 @@ make check || true
%{_libdir}/pkgconfig/%{name}.pc
%changelog
+* Tue Oct 26 2021 Siddhesh Poyarekar <siddhesh(a)gotplt.org> - 2.1.0-0.22beta3
+- Bring back the earlier code to do ln -sf.
+
* Tue Oct 12 2021 Andreas Schneider <asn(a)redhat.com> - 2.1.0-0.21beta3
- Rebase onto
https://github.com/LuaJIT/LuaJIT/tree/v2.1
- Dropped support for ppc64le
commit 3bc1e4725fb68d9b8b5a528673b143d437084948
Author: Andreas Schneider <asn(a)cryptomilk.org>
Date: Tue Oct 12 22:14:42 2021 +0200
Apply patches from
https://github.com/LuaJIT/LuaJIT/
diff --git a/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch
b/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch
deleted file mode 100644
index 16aca3b..0000000
--- a/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-commit 31afda31814ec02f82ffb0519bee496c87eeaa89
-Merge: 8271c64 1c89933
-Author: Mike Pall <mike>
-Date: Tue May 9 21:01:23 2017 +0200
-
- Merge branch 'master' into v2.1
-
-commit 1c89933f129dde76944336c6bfd05297b8d67730
-Author: Mike Pall <mike>
-Date: Tue May 9 20:59:37 2017 +0200
-
- Fix LJ_MAX_JSLOTS assertion in rec_check_slots().
-
- Thanks to Yichun Zhang.
-
-diff --git a/src/lj_record.c b/src/lj_record.c
-index 9d0469c..c2d0274 100644
---- a/src/lj_record.c
-+++ b/src/lj_record.c
-@@ -87,9 +87,9 @@ static void rec_check_slots(jit_State *J)
- BCReg s, nslots = J->baseslot + J->maxslot;
- int32_t depth = 0;
- cTValue *base = J->L->base - J->baseslot;
-- lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot <
LJ_MAX_JSLOTS);
-+ lua_assert(J->baseslot >= 1+LJ_FR2);
- lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] &
TREF_FRAME));
-- lua_assert(nslots < LJ_MAX_JSLOTS);
-+ lua_assert(nslots <= LJ_MAX_JSLOTS);
- for (s = 0; s < nslots; s++) {
- TRef tr = J->slot[s];
- if (tr) {
diff --git a/0002-Add-missing-LJ_MAX_JSLOTS-check.patch
b/0002-Add-missing-LJ_MAX_JSLOTS-check.patch
deleted file mode 100644
index 70ccfd5..0000000
--- a/0002-Add-missing-LJ_MAX_JSLOTS-check.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-commit 6259c0b909a8c00fabe3c7e6bd81150ee08cbf9f
-Merge: 31afda3 630ff31
-Author: Mike Pall <mike>
-Date: Wed May 17 17:38:53 2017 +0200
-
- Merge branch 'master' into v2.1
-
-commit 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c
-Author: Mike Pall <mike>
-Date: Wed May 17 17:37:35 2017 +0200
-
- Add missing LJ_MAX_JSLOTS check.
-
- Thanks to Yichun Zhang.
-
-From 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 17 May 2017 17:37:35 +0200
-Subject: [PATCH 02/72] Add missing LJ_MAX_JSLOTS check.
-
-Thanks to Yichun Zhang.
----
- src/lj_record.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/lj_record.c b/src/lj_record.c
-index cecacd2..bc4e8a6 100644
---- a/src/lj_record.c
-+++ b/src/lj_record.c
-@@ -633,6 +633,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
- J->framedepth++;
- J->base += func+1+LJ_FR2;
- J->baseslot += func+1+LJ_FR2;
-+ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
-+ lj_trace_err(J, LJ_TRERR_STACKOV);
- }
-
- /* Record tail call. */
---
-2.20.1
diff --git a/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch
b/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch
deleted file mode 100644
index 9d8300f..0000000
--- a/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-From 7381b620358c2561e8690149f1d25828fdad6675 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 7 Jun 2017 19:16:22 +0200
-Subject: [PATCH 03/72] MIPS: Use precise search for exit jump patching.
-
-Contributed by Djordje Kovacevic and Stefan Pejic.
----
- src/lj_asm_mips.h | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
-index 03270cc..d0a1ca5 100644
---- a/src/lj_asm_mips.h
-+++ b/src/lj_asm_mips.h
-@@ -1933,7 +1933,11 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
- MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
- for (p++; p < pe; p++) {
- if (*p == exitload) { /* Look for load of exit number. */
-- if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */
-+ /* Look for exitstub branch. Yes, this covers all used branch variants. */
-+ if (((p[-1] ^ (px-p)) & 0xffffu) == 0 &&
-+ ((p[-1] & 0xf0000000u) == MIPSI_BEQ ||
-+ (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ ||
-+ (p[-1] & 0xffe00000u) == MIPSI_BC1F)) {
- ptrdiff_t delta = target - p;
- if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */
- patchbranch:
---
-2.20.1
-
diff --git a/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch
b/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch
deleted file mode 100644
index 4da6b4d..0000000
--- a/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From c7c3c4da432ddb543d4b0a9abbb245f11b26afd0 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 7 Jun 2017 19:36:46 +0200
-Subject: [PATCH 04/72] MIPS: Fix handling of spare long-range jump slots.
-
-Contributed by Djordje Kovacevic and Stefan Pejic.
----
- src/lj_asm_mips.h | 9 +++++----
- src/lj_jit.h | 6 ++++++
- src/lj_mcode.c | 6 ------
- 3 files changed, 11 insertions(+), 10 deletions(-)
-
-diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
-index d0a1ca5..7631190 100644
---- a/src/lj_asm_mips.h
-+++ b/src/lj_asm_mips.h
-@@ -65,10 +65,9 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
- static void asm_sparejump_setup(ASMState *as)
- {
- MCode *mxp = as->mcbot;
-- /* Assumes sizeof(MCLink) == 8. */
-- if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) {
-+ if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) {
- lua_assert(MIPSI_NOP == 0);
-- memset(mxp+2, 0, MIPS_SPAREJUMP*8);
-+ memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode));
- mxp += MIPS_SPAREJUMP*2;
- lua_assert(mxp < as->mctop);
- lj_mcode_sync(as->mcbot, mxp);
-@@ -1947,7 +1946,9 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
- if (!cstart) cstart = p-1;
- } else { /* Branch out of range. Use spare jump slot in mcarea. */
- int i;
-- for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) {
-+ for (i = (int)(sizeof(MCLink)/sizeof(MCode));
-+ i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2);
-+ i += 2) {
- if (mcarea[i] == tjump) {
- delta = mcarea+i - p;
- goto patchbranch;
-diff --git a/src/lj_jit.h b/src/lj_jit.h
-index a2e8fd9..3f38d28 100644
---- a/src/lj_jit.h
-+++ b/src/lj_jit.h
-@@ -155,6 +155,12 @@ typedef uint8_t MCode;
- typedef uint32_t MCode;
- #endif
-
-+/* Linked list of MCode areas. */
-+typedef struct MCLink {
-+ MCode *next; /* Next area. */
-+ size_t size; /* Size of current area. */
-+} MCLink;
-+
- /* Stack snapshot header. */
- typedef struct SnapShot {
- uint16_t mapofs; /* Offset into snapshot map. */
-diff --git a/src/lj_mcode.c b/src/lj_mcode.c
-index f0a1f69..5ea89f6 100644
---- a/src/lj_mcode.c
-+++ b/src/lj_mcode.c
-@@ -272,12 +272,6 @@ static void *mcode_alloc(jit_State *J, size_t sz)
-
- /* -- MCode area management ----------------------------------------------- */
-
--/* Linked list of MCode areas. */
--typedef struct MCLink {
-- MCode *next; /* Next area. */
-- size_t size; /* Size of current area. */
--} MCLink;
--
- /* Allocate a new MCode area. */
- static void mcode_allocarea(jit_State *J)
- {
---
-2.20.1
-
diff --git a/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch
b/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch
deleted file mode 100644
index dda4ae2..0000000
--- a/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch
+++ /dev/null
@@ -1,982 +0,0 @@
-From a057a07ab702e225e21848d4f918886c5b0ac06b Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 7 Jun 2017 23:56:54 +0200
-Subject: [PATCH 05/72] MIPS64: Add soft-float support to JIT compiler backend.
-
-Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
-Sponsored by Cisco Systems, Inc.
----
- src/lj_arch.h | 4 +-
- src/lj_asm.c | 8 +-
- src/lj_asm_mips.h | 217 +++++++++++++++++++++++++++++++++++++--------
- src/lj_crecord.c | 4 +-
- src/lj_emit_mips.h | 2 +
- src/lj_ffrecord.c | 2 +-
- src/lj_ircall.h | 43 ++++++---
- src/lj_iropt.h | 2 +-
- src/lj_jit.h | 4 +-
- src/lj_obj.h | 3 +
- src/lj_opt_split.c | 2 +-
- src/lj_snap.c | 21 +++--
- src/vm_mips64.dasc | 49 ++++++++++
- 13 files changed, 286 insertions(+), 75 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index c8d7138..b770564 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -337,9 +337,6 @@
- #define LJ_ARCH_BITS 32
- #define LJ_TARGET_MIPS32 1
- #else
--#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU
--#define LJ_ARCH_NOJIT 1 /* NYI */
--#endif
- #define LJ_ARCH_BITS 64
- #define LJ_TARGET_MIPS64 1
- #define LJ_TARGET_GC64 1
-@@ -512,6 +509,7 @@
- #define LJ_ABI_SOFTFP 0
- #endif
- #define LJ_SOFTFP (!LJ_ARCH_HASFPU)
-+#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32)
-
- #if LJ_ARCH_ENDIAN == LUAJIT_BE
- #define LJ_LE 0
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index c2cf5a9..bed2268 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -338,7 +338,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
- ra_modified(as, r);
- ir->r = RID_INIT; /* Do not keep any hint. */
- RA_DBGX((as, "remat $i $r", ir, r));
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- if (ir->o == IR_KNUM) {
- emit_loadk64(as, r, ir);
- } else
-@@ -1305,7 +1305,7 @@ static void asm_call(ASMState *as, IRIns *ir)
- asm_gencall(as, ci, args);
- }
-
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
- {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-@@ -1652,10 +1652,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
- case IR_MUL: asm_mul(as, ir); break;
- case IR_MOD: asm_mod(as, ir); break;
- case IR_NEG: asm_neg(as, ir); break;
--#if LJ_SOFTFP
-+#if LJ_SOFTFP32
- case IR_DIV: case IR_POW: case IR_ABS:
- case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
-- lua_assert(0); /* Unused for LJ_SOFTFP. */
-+ lua_assert(0); /* Unused for LJ_SOFTFP32. */
- break;
- #else
- case IR_DIV: asm_div(as, ir); break;
-diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
-index 05af3d0..1406a87 100644
---- a/src/lj_asm_mips.h
-+++ b/src/lj_asm_mips.h
-@@ -290,7 +290,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- {
- ra_leftov(as, gpr, ref);
- gpr++;
--#if LJ_64
-+#if LJ_64 && !LJ_SOFTFP
- fpr++;
- #endif
- }
-@@ -301,7 +301,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- emit_spstore(as, ir, r, ofs);
- ofs += irt_isnum(ir->t) ? 8 : 4;
- #else
-- emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR)
&& !irt_is64(ir->t)) ? 4 : 0));
-+ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) &&
!irt_is64(ir->t)) ? 4 : 0));
- ofs += 8;
- #endif
- }
-@@ -312,7 +312,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- #endif
- if (gpr <= REGARG_LASTGPR) {
- gpr++;
--#if LJ_64
-+#if LJ_64 && !LJ_SOFTFP
- fpr++;
- #endif
- } else {
-@@ -461,12 +461,36 @@ static void asm_tobit(ASMState *as, IRIns *ir)
- emit_tg(as, MIPSI_MFC1, dest, tmp);
- emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
- }
-+#elif LJ_64 /* && LJ_SOFTFP */
-+static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
-+{
-+ /* The modified regs must match with the *.dasc implementation. */
-+ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
-+ RID2RSET(RID_R1)|RID2RSET(RID_R12);
-+ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
-+ ra_evictset(as, drop);
-+ /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
-+ ra_destreg(as, ir, RID_RET);
-+ asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO);
-+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0);
-+ if (r == RID_NONE)
-+ ra_leftov(as, REGARG_FIRSTGPR, ir->op1);
-+ else if (r != REGARG_FIRSTGPR)
-+ emit_move(as, REGARG_FIRSTGPR, r);
-+}
-+
-+static void asm_tobit(ASMState *as, IRIns *ir)
-+{
-+ Reg dest = ra_dest(as, ir, RSET_GPR);
-+ emit_dta(as, MIPSI_SLL, dest, dest, 0);
-+ asm_callid(as, ir, IRCALL_lj_vm_tobit);
-+}
- #endif
-
- static void asm_conv(ASMState *as, IRIns *ir)
- {
- IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- int stfp = (st == IRT_NUM || st == IRT_FLOAT);
- #endif
- #if LJ_64
-@@ -477,12 +501,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
- lua_assert(!(irt_isint64(ir->t) ||
- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
- #endif
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP32
- /* FP conversions are handled by SPLIT. */
- lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
- /* Can't check for same types: SPLIT uses CONV
int.int + BXOR for sfp NEG. */
- #else
- lua_assert(irt_type(ir->t) != st);
-+#if !LJ_SOFTFP
- if (irt_isfp(ir->t)) {
- Reg dest = ra_dest(as, ir, RSET_FPR);
- if (stfp) { /* FP to FP conversion. */
-@@ -608,6 +633,42 @@ static void asm_conv(ASMState *as, IRIns *ir)
- }
- }
- } else
-+#else
-+ if (irt_isfp(ir->t)) {
-+#if LJ_64 && LJ_HASFFI
-+ if (stfp) { /* FP to FP conversion. */
-+ asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d :
-+ IRCALL_softfp_d2f);
-+ } else { /* Integer to FP conversion. */
-+ IRCallID cid = ((IRT_IS64 >> st) & 1) ?
-+ (irt_isnum(ir->t) ?
-+ (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) :
-+ (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) :
-+ (irt_isnum(ir->t) ?
-+ (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) :
-+ (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f));
-+ asm_callid(as, ir, cid);
-+ }
-+#else
-+ asm_callid(as, ir, IRCALL_softfp_i2d);
-+#endif
-+ } else if (stfp) { /* FP to integer conversion. */
-+ if (irt_isguard(ir->t)) {
-+ /* Checked conversions are only supported from number to int. */
-+ lua_assert(irt_isint(ir->t) && st == IRT_NUM);
-+ asm_tointg(as, ir, RID_NONE);
-+ } else {
-+ IRCallID cid = irt_is64(ir->t) ?
-+ ((st == IRT_NUM) ?
-+ (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
-+ (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
-+ ((st == IRT_NUM) ?
-+ (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
-+ (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
-+ asm_callid(as, ir, cid);
-+ }
-+ } else
-+#endif
- #endif
- {
- Reg dest = ra_dest(as, ir, RSET_GPR);
-@@ -665,7 +726,7 @@ static void asm_strto(ASMState *as, IRIns *ir)
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
- IRRef args[2];
- int32_t ofs = 0;
--#if LJ_SOFTFP
-+#if LJ_SOFTFP32
- ra_evictset(as, RSET_SCRATCH);
- if (ra_used(ir)) {
- if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
-@@ -806,7 +867,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- MCLabel l_end, l_loop, l_next;
-
- rset_clear(allow, tab);
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP32
- if (!isk) {
- key = ra_alloc1(as, refkey, allow);
- rset_clear(allow, key);
-@@ -826,7 +887,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- }
- }
- #else
-- if (irt_isnum(kt)) {
-+ if (!LJ_SOFTFP && irt_isnum(kt)) {
- key = ra_alloc1(as, refkey, RSET_FPR);
- tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
- } else if (!irt_ispri(kt)) {
-@@ -882,6 +943,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
- emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
- emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
-+ } else if (LJ_SOFTFP && irt_isnum(kt)) {
-+ emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
-+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
- } else if (irt_isaddr(kt)) {
- Reg refk = tmp2;
- if (isk) {
-@@ -960,7 +1024,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
- if (irt_isnum(kt)) {
- emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
-- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0);
-+ emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0);
- emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0);
- #if !LJ_SOFTFP
- emit_tg(as, MIPSI_DMFC1, tmp1, key);
-@@ -1123,7 +1187,7 @@ static MIPSIns asm_fxloadins(IRIns *ir)
- case IRT_U8: return MIPSI_LBU;
- case IRT_I16: return MIPSI_LH;
- case IRT_U16: return MIPSI_LHU;
-- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1;
-+ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1;
- case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
- default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW;
- }
-@@ -1134,7 +1198,7 @@ static MIPSIns asm_fxstoreins(IRIns *ir)
- switch (irt_type(ir->t)) {
- case IRT_I8: case IRT_U8: return MIPSI_SB;
- case IRT_I16: case IRT_U16: return MIPSI_SH;
-- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1;
-+ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1;
- case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
- default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW;
- }
-@@ -1199,7 +1263,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
-
- static void asm_ahuvload(ASMState *as, IRIns *ir)
- {
-- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP);
-+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
- Reg dest = RID_NONE, type = RID_TMP, idx;
- RegSet allow = RSET_GPR;
- int32_t ofs = 0;
-@@ -1212,7 +1276,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
- }
- }
- if (ra_used(ir)) {
-- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
-+ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
- irt_isint(ir->t) || irt_isaddr(ir->t));
- dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
- rset_clear(allow, dest);
-@@ -1261,10 +1325,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
- int32_t ofs = 0;
- if (ir->r == RID_SINK)
- return;
-- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-- src = ra_alloc1(as, ir->op2, RSET_FPR);
-+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
-+ src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
- idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
-- emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
-+ emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs);
- } else {
- #if LJ_32
- if (!irt_ispri(ir->t)) {
-@@ -1312,7 +1376,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
- IRType1 t = ir->t;
- #if LJ_32
- int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
-- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP);
-+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
- if (hiop)
- t.irt = IRT_NUM;
- #else
-@@ -1320,7 +1384,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
- #endif
- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
- lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP32
- lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
- if (hiop && ra_used(ir+1)) {
- type = ra_dest(as, ir+1, allow);
-@@ -1328,29 +1392,44 @@ static void asm_sload(ASMState *as, IRIns *ir)
- }
- #else
- if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) &&
irt_isint(t)) {
-- dest = ra_scratch(as, RSET_FPR);
-+ dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR);
- asm_tointg(as, ir, dest);
- t.irt = IRT_NUM; /* Continue with a regular number type check. */
- } else
- #endif
- if (ra_used(ir)) {
-- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
-+ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
- irt_isint(ir->t) || irt_isaddr(ir->t));
- dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
- rset_clear(allow, dest);
- base = ra_alloc1(as, REF_BASE, allow);
- rset_clear(allow, base);
-- if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
-+ if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) {
- if (irt_isint(t)) {
-- Reg tmp = ra_scratch(as, RSET_FPR);
-+ Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
-+#if LJ_SOFTFP
-+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
-+ ra_destreg(as, ir, RID_RET);
-+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0);
-+ if (tmp != REGARG_FIRSTGPR)
-+ emit_move(as, REGARG_FIRSTGPR, tmp);
-+#else
- emit_tg(as, MIPSI_MFC1, dest, tmp);
- emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
-+#endif
- dest = tmp;
- t.irt = IRT_NUM; /* Check for original type. */
- } else {
- Reg tmp = ra_scratch(as, RSET_GPR);
-+#if LJ_SOFTFP
-+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
-+ ra_destreg(as, ir, RID_RET);
-+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0);
-+ emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0);
-+#else
- emit_fg(as, MIPSI_CVT_D_W, dest, dest);
- emit_tg(as, MIPSI_MTC1, tmp, dest);
-+#endif
- dest = tmp;
- t.irt = IRT_INT; /* Check for original type. */
- }
-@@ -1399,7 +1478,7 @@ dotypecheck:
- if (irt_isnum(t)) {
- asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
- emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
-- if (ra_hasreg(dest))
-+ if (!LJ_SOFTFP && ra_hasreg(dest))
- emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
- } else {
- asm_guard(as, MIPSI_BNE, RID_TMP,
-@@ -1409,7 +1488,7 @@ dotypecheck:
- }
- emit_tsi(as, MIPSI_LD, type, base, ofs);
- } else if (ra_hasreg(dest)) {
-- if (irt_isnum(t))
-+ if (!LJ_SOFTFP && irt_isnum(t))
- emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
- else
- emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base,
-@@ -1548,26 +1627,40 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
- Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
- emit_fg(as, mi, dest, left);
- }
-+#endif
-
-+#if !LJ_SOFTFP32
- static void asm_fpmath(ASMState *as, IRIns *ir)
- {
- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
- return;
-+#if !LJ_SOFTFP
- if (ir->op2 <= IRFPM_TRUNC)
- asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
- else if (ir->op2 == IRFPM_SQRT)
- asm_fpunary(as, ir, MIPSI_SQRT_D);
- else
-+#endif
- asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
- }
- #endif
-
-+#if !LJ_SOFTFP
-+#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D)
-+#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D)
-+#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D)
-+#elif LJ_64 /* && LJ_SOFTFP */
-+#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add)
-+#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub)
-+#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul)
-+#endif
-+
- static void asm_add(ASMState *as, IRIns *ir)
- {
- IRType1 t = ir->t;
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- if (irt_isnum(t)) {
-- asm_fparith(as, ir, MIPSI_ADD_D);
-+ asm_fpadd(as, ir);
- } else
- #endif
- {
-@@ -1589,9 +1682,9 @@ static void asm_add(ASMState *as, IRIns *ir)
-
- static void asm_sub(ASMState *as, IRIns *ir)
- {
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- if (irt_isnum(ir->t)) {
-- asm_fparith(as, ir, MIPSI_SUB_D);
-+ asm_fpsub(as, ir);
- } else
- #endif
- {
-@@ -1605,9 +1698,9 @@ static void asm_sub(ASMState *as, IRIns *ir)
-
- static void asm_mul(ASMState *as, IRIns *ir)
- {
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- if (irt_isnum(ir->t)) {
-- asm_fparith(as, ir, MIPSI_MUL_D);
-+ asm_fpmul(as, ir);
- } else
- #endif
- {
-@@ -1634,7 +1727,7 @@ static void asm_mod(ASMState *as, IRIns *ir)
- asm_callid(as, ir, IRCALL_lj_vm_modi);
- }
-
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- static void asm_pow(ASMState *as, IRIns *ir)
- {
- #if LJ_64 && LJ_HASFFI
-@@ -1654,7 +1747,11 @@ static void asm_div(ASMState *as, IRIns *ir)
- IRCALL_lj_carith_divu64);
- else
- #endif
-+#if !LJ_SOFTFP
- asm_fparith(as, ir, MIPSI_DIV_D);
-+#else
-+ asm_callid(as, ir, IRCALL_softfp_div);
-+#endif
- }
- #endif
-
-@@ -1664,6 +1761,13 @@ static void asm_neg(ASMState *as, IRIns *ir)
- if (irt_isnum(ir->t)) {
- asm_fpunary(as, ir, MIPSI_NEG_D);
- } else
-+#elif LJ_64 /* && LJ_SOFTFP */
-+ if (irt_isnum(ir->t)) {
-+ Reg dest = ra_dest(as, ir, RSET_GPR);
-+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
-+ emit_dst(as, MIPSI_XOR, dest, left,
-+ ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest)));
-+ } else
- #endif
- {
- Reg dest = ra_dest(as, ir, RSET_GPR);
-@@ -1673,7 +1777,17 @@ static void asm_neg(ASMState *as, IRIns *ir)
- }
- }
-
-+#if !LJ_SOFTFP
- #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
-+#elif LJ_64 /* && LJ_SOFTFP */
-+static void asm_abs(ASMState *as, IRIns *ir)
-+{
-+ Reg dest = ra_dest(as, ir, RSET_GPR);
-+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
-+ emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0);
-+}
-+#endif
-+
- #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
- #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
-
-@@ -1918,15 +2032,21 @@ static void asm_bror(ASMState *as, IRIns *ir)
- }
- }
-
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP
- static void asm_sfpmin_max(ASMState *as, IRIns *ir)
- {
- CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin :
IRCALL_lj_vm_sfmax];
-+#if LJ_64
-+ IRRef args[2];
-+ args[0] = ir->op1;
-+ args[1] = ir->op2;
-+#else
- IRRef args[4];
- args[0^LJ_BE] = ir->op1;
- args[1^LJ_BE] = (ir+1)->op1;
- args[2^LJ_BE] = ir->op2;
- args[3^LJ_BE] = (ir+1)->op2;
-+#endif
- asm_setupresult(as, ir, &ci);
- emit_call(as, (void *)ci.func, 0);
- ci.func = NULL;
-@@ -1936,7 +2056,10 @@ static void asm_sfpmin_max(ASMState *as, IRIns *ir)
-
- static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
- {
-- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
-+#if LJ_SOFTFP
-+ asm_sfpmin_max(as, ir);
-+#else
- Reg dest = ra_dest(as, ir, RSET_FPR);
- Reg right, left = ra_alloc2(as, ir, RSET_FPR);
- right = (left >> 8); left &= 255;
-@@ -1947,6 +2070,7 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
- if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
- }
- emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
-+#endif
- } else {
- Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg right, left = ra_alloc2(as, ir, RSET_GPR);
-@@ -1967,18 +2091,24 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
-
- /* -- Comparisons --------------------------------------------------------- */
-
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP
- /* SFP comparisons. */
- static void asm_sfpcomp(ASMState *as, IRIns *ir)
- {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
- RegSet drop = RSET_SCRATCH;
- Reg r;
-+#if LJ_64
-+ IRRef args[2];
-+ args[0] = ir->op1;
-+ args[1] = ir->op2;
-+#else
- IRRef args[4];
- args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1;
- args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2;
-+#endif
-
-- for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
-+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) {
- if (!rset_test(as->freeset, r) &&
- regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
- rset_clear(drop, r);
-@@ -2032,11 +2162,15 @@ static void asm_comp(ASMState *as, IRIns *ir)
- {
- /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
- IROp op = ir->o;
-- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
-+#if LJ_SOFTFP
-+ asm_sfpcomp(as, ir);
-+#else
- Reg right, left = ra_alloc2(as, ir, RSET_FPR);
- right = (left >> 8); left &= 255;
- asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
- emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
-+#endif
- } else {
- Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
- if (op == IR_ABC) op = IR_UGT;
-@@ -2068,9 +2202,13 @@ static void asm_equal(ASMState *as, IRIns *ir)
- Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ?
- RSET_FPR : RSET_GPR);
- right = (left >> 8); left &= 255;
-- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
-+#if LJ_SOFTFP
-+ asm_sfpcomp(as, ir);
-+#else
- asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
- emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
-+#endif
- } else {
- asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
- }
-@@ -2263,7 +2401,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
- if ((sn & SNAP_NORESTORE))
- continue;
- if (irt_isnum(ir->t)) {
--#if LJ_SOFTFP
-+#if LJ_SOFTFP32
- Reg tmp;
- RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
- lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
-@@ -2272,6 +2410,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
- if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
- tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
- emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
-+#elif LJ_SOFTFP /* && LJ_64 */
-+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
-+ emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs);
- #else
- Reg src = ra_alloc1(as, ref, RSET_FPR);
- emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
-diff --git a/src/lj_crecord.c b/src/lj_crecord.c
-index e32ae23..fd59e28 100644
---- a/src/lj_crecord.c
-+++ b/src/lj_crecord.c
-@@ -212,7 +212,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp,
- ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0);
- ml[i].trofs = trofs;
- i++;
-- rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1;
-+ rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1;
- if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */
- rwin = 0;
- for ( ; j < i; j++) {
-@@ -1130,7 +1130,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
- else
- tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT);
- }
-- } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4)
{
-+ } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size >
4) {
- lj_needsplit(J);
- }
- #if LJ_TARGET_X86
-diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
-index 8a9ee24..bb6593a 100644
---- a/src/lj_emit_mips.h
-+++ b/src/lj_emit_mips.h
-@@ -12,6 +12,8 @@ static intptr_t get_k64val(IRIns *ir)
- return (intptr_t)ir_kgc(ir);
- } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
- return (intptr_t)ir_kptr(ir);
-+ } else if (LJ_SOFTFP && ir->o == IR_KNUM) {
-+ return (intptr_t)ir_knum(ir)->u64;
- } else {
- lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
- return ir->i; /* Sign-extended. */
-diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
-index dfdee2d..849d7a2 100644
---- a/src/lj_ffrecord.c
-+++ b/src/lj_ffrecord.c
-@@ -1012,7 +1012,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J,
RecordFFData *rd)
- handle_num:
- tra = lj_ir_tonum(J, tra);
- tr = lj_ir_call(J, id, tr, trsf, tra);
-- if (LJ_SOFTFP) lj_needsplit(J);
-+ if (LJ_SOFTFP32) lj_needsplit(J);
- break;
- case STRFMT_STR:
- if (!tref_isstr(tra)) {
-diff --git a/src/lj_ircall.h b/src/lj_ircall.h
-index 973c36e..7312006 100644
---- a/src/lj_ircall.h
-+++ b/src/lj_ircall.h
-@@ -51,7 +51,7 @@ typedef struct CCallInfo {
- #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
- #define CCI_XA (1u << CCI_XARGS_SHIFT)
-
--#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
-+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
- #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
- #else
- #define CCI_XNARGS(ci) CCI_NARGS((ci))
-@@ -78,13 +78,19 @@ typedef struct CCallInfo {
- #define IRCALLCOND_SOFTFP_FFI(x) NULL
- #endif
-
--#if LJ_SOFTFP && LJ_TARGET_MIPS32
-+#if LJ_SOFTFP && LJ_TARGET_MIPS
- #define IRCALLCOND_SOFTFP_MIPS(x) x
- #else
- #define IRCALLCOND_SOFTFP_MIPS(x) NULL
- #endif
-
--#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32)
-+#if LJ_SOFTFP && LJ_TARGET_MIPS64
-+#define IRCALLCOND_SOFTFP_MIPS64(x) x
-+#else
-+#define IRCALLCOND_SOFTFP_MIPS64(x) NULL
-+#endif
-+
-+#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
-
- #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
- #define IRCALLCOND_FP64_FFI(x) x
-@@ -112,6 +118,14 @@ typedef struct CCallInfo {
- #define XA2_FP 0
- #endif
-
-+#if LJ_SOFTFP32
-+#define XA_FP32 CCI_XA
-+#define XA2_FP32 (CCI_XA+CCI_XA)
-+#else
-+#define XA_FP32 0
-+#define XA2_FP32 0
-+#endif
-+
- #if LJ_32
- #define XA_64 CCI_XA
- #define XA2_64 (CCI_XA+CCI_XA)
-@@ -181,20 +195,21 @@ typedef struct CCallInfo {
- _(ANY, pow, 2, N, NUM, XA2_FP) \
- _(ANY, atan2, 2, N, NUM, XA2_FP) \
- _(ANY, ldexp, 2, N, NUM, XA_FP) \
-- _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
-- _(SOFTFP, softfp_add, 4, N, NUM, 0) \
-- _(SOFTFP, softfp_sub, 4, N, NUM, 0) \
-- _(SOFTFP, softfp_mul, 4, N, NUM, 0) \
-- _(SOFTFP, softfp_div, 4, N, NUM, 0) \
-- _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \
-+ _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
-+ _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \
- _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \
-- _(SOFTFP, softfp_d2i, 2, N, INT, 0) \
-- _(SOFTFP_MIPS, lj_vm_sfmin, 4, N, NUM, 0) \
-- _(SOFTFP_MIPS, lj_vm_sfmax, 4, N, NUM, 0) \
-+ _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \
-+ _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
- _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
- _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
-- _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \
-- _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \
-+ _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
-+ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
- _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
- _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
- _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
-diff --git a/src/lj_iropt.h b/src/lj_iropt.h
-index 73aef0e..a59ba3f 100644
---- a/src/lj_iropt.h
-+++ b/src/lj_iropt.h
-@@ -150,7 +150,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
- /* Optimization passes. */
- LJ_FUNC void lj_opt_dce(jit_State *J);
- LJ_FUNC int lj_opt_loop(jit_State *J);
--#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
-+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
- LJ_FUNC void lj_opt_split(jit_State *J);
- #else
- #define lj_opt_split(J) UNUSED(J)
-diff --git a/src/lj_jit.h b/src/lj_jit.h
-index 2fa8efc..f37e792 100644
---- a/src/lj_jit.h
-+++ b/src/lj_jit.h
-@@ -374,7 +374,7 @@ enum {
- ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
-
- /* Set/reset flag to activate the SPLIT pass for the current trace. */
--#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
-+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
- #define lj_needsplit(J) (J->needsplit = 1)
- #define lj_resetsplit(J) (J->needsplit = 0)
- #else
-@@ -437,7 +437,7 @@ typedef struct jit_State {
- MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
-
- PostProc postproc; /* Required post-processing after execution. */
--#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
-+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
- uint8_t needsplit; /* Need SPLIT pass. */
- #endif
- uint8_t retryrec; /* Retry recording. */
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index 52372c3..c7e4742 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -924,6 +924,9 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue
*o2)
-
- #if LJ_SOFTFP
- LJ_ASMF int32_t lj_vm_tobit(double x);
-+#if LJ_TARGET_MIPS64
-+LJ_ASMF int32_t lj_vm_tointg(double x);
-+#endif
- #endif
-
- static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
-diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
-index fc93520..79ac3cc 100644
---- a/src/lj_opt_split.c
-+++ b/src/lj_opt_split.c
-@@ -8,7 +8,7 @@
-
- #include "lj_obj.h"
-
--#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
-+#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
-
- #include "lj_err.h"
- #include "lj_buf.h"
-diff --git a/src/lj_snap.c b/src/lj_snap.c
-index bb063c2..44fa379 100644
---- a/src/lj_snap.c
-+++ b/src/lj_snap.c
-@@ -93,7 +93,7 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg
nslots)
- (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
- sn |= SNAP_NORESTORE;
- }
-- if (LJ_SOFTFP && irt_isnum(ir->t))
-+ if (LJ_SOFTFP32 && irt_isnum(ir->t))
- sn |= SNAP_SOFTFPNUM;
- map[n++] = sn;
- }
-@@ -374,7 +374,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
- break;
- }
- }
-- } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
-+ } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
- ref++;
- } else if (ir->o == IR_PVAL) {
- ref = ir->op1 + REF_BIAS;
-@@ -486,7 +486,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
- } else {
- IRType t = irt_type(ir->t);
- uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
-- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
-+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
- if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
- tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
- }
-@@ -520,7 +520,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
- if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
- if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
- snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
-- else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
-+ else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
- irs+1 < irlast && (irs+1)->o == IR_HIOP)
- snap_pref(J, T, map, nent, seen, (irs+1)->op2);
- }
-@@ -579,10 +579,10 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
- lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
- val = snap_pref(J, T, map, nent, seen, irc->op1);
- val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
-- } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
-+ } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
- irs+1 < irlast && (irs+1)->o == IR_HIOP) {
- IRType t = IRT_I64;
-- if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
-+ if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
- t = IRT_NUM;
- lj_needsplit(J);
- if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
-@@ -635,7 +635,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
- int32_t *sps = &ex->spill[regsp_spill(rs)];
- if (irt_isinteger(t)) {
- setintV(o, *sps);
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- } else if (irt_isnum(t)) {
- o->u64 = *(uint64_t *)sps;
- #endif
-@@ -660,6 +660,9 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
- #if !LJ_SOFTFP
- } else if (irt_isnum(t)) {
- setnumV(o, ex->fpr[r-RID_MIN_FPR]);
-+#elif LJ_64 /* && LJ_SOFTFP */
-+ } else if (irt_isnum(t)) {
-+ o->u64 = ex->gpr[r-RID_MIN_GPR];
- #endif
- #if LJ_64 && !LJ_GC64
- } else if (irt_is64(t)) {
-@@ -813,7 +816,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
- val = lj_tab_set(J->L, t, &tmp);
- /* NOBARRIER: The table is new (marked white). */
- snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
-- if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o ==
IR_HIOP) {
-+ if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o
== IR_HIOP) {
- snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
- val->u32.hi = tmp.u32.lo;
- }
-@@ -874,7 +877,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
- continue;
- }
- snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
-- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
-+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
- TValue tmp;
- snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
- o->u32.hi = tmp.u32.lo;
-diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
-index c06270a..75b38de 100644
---- a/src/vm_mips64.dasc
-+++ b/src/vm_mips64.dasc
-@@ -1980,6 +1980,38 @@ static void build_subroutines(BuildCtx *ctx)
- |1:
- | jr ra
- |. move CRET1, r0
-+ |
-+ |// FP number to int conversion with a check for soft-float.
-+ |// Modifies CARG1, CRET1, CRET2, TMP0, AT.
-+ |->vm_tointg:
-+ |.if JIT
-+ | dsll CRET2, CARG1, 1
-+ | beqz CRET2, >2
-+ |. li TMP0, 1076
-+ | dsrl AT, CRET2, 53
-+ | dsubu TMP0, TMP0, AT
-+ | sltiu AT, TMP0, 54
-+ | beqz AT, >1
-+ |. dextm CRET2, CRET2, 0, 20
-+ | dinsu CRET2, AT, 21, 21
-+ | slt AT, CARG1, r0
-+ | dsrlv CRET1, CRET2, TMP0
-+ | dsubu CARG1, r0, CRET1
-+ | movn CRET1, CARG1, AT
-+ | li CARG1, 64
-+ | subu TMP0, CARG1, TMP0
-+ | dsllv CRET2, CRET2, TMP0 // Integer check.
-+ | sextw AT, CRET1
-+ | xor AT, CRET1, AT // Range check.
-+ | jr ra
-+ |. movz CRET2, AT, CRET2
-+ |1:
-+ | jr ra
-+ |. li CRET2, 1
-+ |2:
-+ | jr ra
-+ |. move CRET1, r0
-+ |.endif
- |.endif
- |
- |.macro .ffunc_bit, name
-@@ -2665,6 +2697,23 @@ static void build_subroutines(BuildCtx *ctx)
- |. li CRET1, 0
- |.endif
- |
-+ |.macro sfmin_max, name, intins
-+ |->vm_sf .. name:
-+ |.if JIT and not FPU
-+ | move TMP2, ra
-+ | bal ->vm_sfcmpolt
-+ |. nop
-+ | move ra, TMP2
-+ | move TMP0, CRET1
-+ | move CRET1, CARG1
-+ | jr ra
-+ |. intins CRET1, CARG2, TMP0
-+ |.endif
-+ |.endmacro
-+ |
-+ | sfmin_max min, movz
-+ | sfmin_max max, movn
-+ |
- |//-----------------------------------------------------------------------
- |//-- Miscellaneous functions --------------------------------------------
- |//-----------------------------------------------------------------------
---
-2.20.1
-
diff --git a/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch
b/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch
deleted file mode 100644
index 133018d..0000000
--- a/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From b0ecc6dd65a0b40e1868f20719c4f7c4880dc32d Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 8 Jun 2017 00:15:15 +0200
-Subject: [PATCH 06/72] FreeBSD/x64: Avoid changing resource limits, if not
- needed.
-
----
- src/lj_alloc.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_alloc.c b/src/lj_alloc.c
-index 95d15d0..9fc761c 100644
---- a/src/lj_alloc.c
-+++ b/src/lj_alloc.c
-@@ -343,7 +343,7 @@ static void *CALL_MMAP(size_t size)
- }
- #endif
-
--#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
-+#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__
< 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
-
- #include <sys/resource.h>
-
---
-2.20.1
-
diff --git a/0007-Remove-unused-define.patch b/0007-Remove-unused-define.patch
deleted file mode 100644
index c4729e1..0000000
--- a/0007-Remove-unused-define.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 6a71e71c1430e5a8f794a52cb2da66e2693db796 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 11 Jun 2017 10:02:08 +0200
-Subject: [PATCH 07/72] Remove unused define.
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Suggested by 罗泽轩.
----
- src/lj_def.h | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/lj_def.h b/src/lj_def.h
-index 2d8fff6..e67bb24 100644
---- a/src/lj_def.h
-+++ b/src/lj_def.h
-@@ -80,7 +80,6 @@ typedef unsigned int uintptr_t;
- #define LJ_MIN_SBUF 32 /* Min. string buffer length. */
- #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
- #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
--#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */
-
- /* JIT compiler limits. */
- #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
---
-2.20.1
-
diff --git a/0008-Modify-fix-for-warning-from-ar.patch
b/0008-Modify-fix-for-warning-from-ar.patch
deleted file mode 100644
index 4d9b0e4..0000000
--- a/0008-Modify-fix-for-warning-from-ar.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From 82151a4514e6538086f3f5e01cb8d4b22287b14f Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 12 Jun 2017 09:24:00 +0200
-Subject: [PATCH 08/72] Modify fix for warning from 'ar'.
-
----
- src/Makefile | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index f7f81a4..24e8c0e 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -208,7 +208,7 @@ TARGET_CC= $(STATIC_CC)
- TARGET_STCC= $(STATIC_CC)
- TARGET_DYNCC= $(DYNAMIC_CC)
- TARGET_LD= $(CROSS)$(CC)
--TARGET_AR= $(CROSS)ar rcus 2>/dev/null
-+TARGET_AR= $(CROSS)ar rcus
- TARGET_STRIP= $(CROSS)strip
-
- TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
-@@ -293,6 +293,7 @@ ifeq (Windows,$(TARGET_SYS))
- TARGET_XSHLDFLAGS= -shared
- TARGET_DYNXLDOPTS=
- else
-+ TARGET_AR+= 2>/dev/null
- ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector
2>/dev/null || echo 1))
- TARGET_XCFLAGS+= -fno-stack-protector
- endif
---
-2.20.1
-
diff --git a/0009-x64-LJ_GC64-Fix-emit_rma.patch b/0009-x64-LJ_GC64-Fix-emit_rma.patch
deleted file mode 100644
index ff59f09..0000000
--- a/0009-x64-LJ_GC64-Fix-emit_rma.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From 7e662e4f87134f1e84f7bea80933e033c5bf53a3 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 26 Jul 2017 09:52:53 +0200
-Subject: [PATCH 09/72] x64/LJ_GC64: Fix emit_rma().
-
----
- src/lj_emit_x86.h | 24 +++++++++++++++++++++---
- 1 file changed, 21 insertions(+), 3 deletions(-)
-
-diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
-index 5207f9d..5b139bd 100644
---- a/src/lj_emit_x86.h
-+++ b/src/lj_emit_x86.h
-@@ -343,9 +343,27 @@ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void
*addr)
- emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
- } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
- emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
-- } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) {
-- emit_rmro(as, xo, rr, rr, 0);
-- emit_loadu64(as, rr, (uintptr_t)addr);
-+ } else if (!checki32((intptr_t)addr)) {
-+ Reg ra = (rr & 15);
-+ if (xo != XO_MOV) {
-+ /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
-+ uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
-+ uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
-+ ra = RID_DISPATCH;
-+ if (checku32(dispaddr)) {
-+ emit_loadi(as, ra, (int32_t)dispaddr);
-+ } else { /* Full-size 64 bit load. */
-+ MCode *p = as->mcp;
-+ *(uint64_t *)(p-8) = dispaddr;
-+ p[-9] = (MCode)(XI_MOVri+(ra&7));
-+ p[-10] = 0x48 + ((ra>>3)&1);
-+ p -= 10;
-+ as->mcp = p;
-+ }
-+ if (xo == XO_GROUP3b) emit_i8(as, i8);
-+ }
-+ emit_rmro(as, xo, rr, ra, 0);
-+ emit_loadu64(as, ra, (uintptr_t)addr);
- } else
- #endif
- {
---
-2.20.1
-
diff --git a/0010-PPC-Add-soft-float-support-to-interpreter.patch
b/0010-PPC-Add-soft-float-support-to-interpreter.patch
deleted file mode 100644
index 52d3638..0000000
--- a/0010-PPC-Add-soft-float-support-to-interpreter.patch
+++ /dev/null
@@ -1,2761 +0,0 @@
-From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 26 Jul 2017 09:52:19 +0200
-Subject: [PATCH 10/72] PPC: Add soft-float support to interpreter.
-
-Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
-Sponsored by Cisco Systems, Inc.
----
- src/host/buildvm_asm.c | 2 +-
- src/lj_arch.h | 29 +-
- src/lj_ccall.c | 38 +-
- src/lj_ccall.h | 4 +-
- src/lj_ccallback.c | 30 +-
- src/lj_frame.h | 2 +-
- src/lj_ircall.h | 2 +-
- src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++-------
- 8 files changed, 1101 insertions(+), 255 deletions(-)
-
-diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
-index ffd1490..43595b3 100644
---- a/src/host/buildvm_asm.c
-+++ b/src/host/buildvm_asm.c
-@@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx)
- #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
- fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX
"progbits\n");
- #endif
--#if LJ_TARGET_PPC && !LJ_TARGET_PS3
-+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
- /* Hard-float ABI. */
- fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
- #endif
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index b770564..0145a7c 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -254,6 +254,29 @@
- #else
- #define LJ_ARCH_BITS 32
- #define LJ_ARCH_NAME "ppc"
-+
-+#if !defined(LJ_ARCH_HASFPU)
-+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
-+#define LJ_ARCH_HASFPU 0
-+#else
-+#define LJ_ARCH_HASFPU 1
-+#endif
-+#endif
-+
-+#if !defined(LJ_ABI_SOFTFP)
-+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
-+#define LJ_ABI_SOFTFP 1
-+#else
-+#define LJ_ABI_SOFTFP 0
-+#endif
-+#endif
-+#endif
-+
-+#if LJ_ABI_SOFTFP
-+#define LJ_ARCH_NOJIT 1 /* NYI */
-+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-+#else
-+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
- #endif
-
- #define LJ_TARGET_PPC 1
-@@ -262,7 +285,6 @@
- #define LJ_TARGET_MASKSHIFT 0
- #define LJ_TARGET_MASKROT 1
- #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
--#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
-
- #if LJ_TARGET_CONSOLE
- #define LJ_ARCH_PPC32ON64 1
-@@ -415,16 +437,13 @@
- #error "No support for ILP32 model on ARM64"
- #endif
- #elif LJ_TARGET_PPC
--#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
--#error "No support for PowerPC CPUs without double-precision FPU"
--#endif
- #if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
- #error "No support for little-endian PPC32"
- #endif
- #if LJ_ARCH_PPC64
- #error "No support for PowerPC 64 bit mode (yet)"
- #endif
--#ifdef __NO_FPRS__
-+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
- #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
- #endif
- #elif LJ_TARGET_MIPS32
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index 5c252e5..799be48 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -387,6 +387,24 @@
- #define CCALL_HANDLE_COMPLEXARG \
- /* Pass complex by value in 2 or 4 GPRs. */
-
-+#define CCALL_HANDLE_GPR \
-+ /* Try to pass argument in GPRs. */ \
-+ if (n > 1) { \
-+ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
-+ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
-+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
-+ else if (ngpr + n > maxgpr) \
-+ ngpr = maxgpr; /* Prevent reordering. */ \
-+ } \
-+ if (ngpr + n <= maxgpr) { \
-+ dp = &cc->gpr[ngpr]; \
-+ ngpr += n; \
-+ goto done; \
-+ } \
-+
-+#if LJ_ABI_SOFTFP
-+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
-+#else
- #define CCALL_HANDLE_REGARG \
- if (isfp) { /* Try to pass argument in FPRs. */ \
- if (nfpr + 1 <= CCALL_NARG_FPR) { \
-@@ -395,24 +413,16 @@
- d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
- goto done; \
- } \
-- } else { /* Try to pass argument in GPRs. */ \
-- if (n > 1) { \
-- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
-- if (ctype_isinteger(d->info)) \
-- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
-- else if (ngpr + n > maxgpr) \
-- ngpr = maxgpr; /* Prevent reordering. */ \
-- } \
-- if (ngpr + n <= maxgpr) { \
-- dp = &cc->gpr[ngpr]; \
-- ngpr += n; \
-- goto done; \
-- } \
-+ } else { \
-+ CCALL_HANDLE_GPR \
- }
-+#endif
-
-+#if !LJ_ABI_SOFTFP
- #define CCALL_HANDLE_RET \
- if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
- ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
-+#endif
-
- #elif LJ_TARGET_MIPS32
- /* -- MIPS o32 calling conventions ---------------------------------------- */
-@@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- }
- if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
-
--#if LJ_TARGET_X64 || LJ_TARGET_PPC
-+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
- cc->nfpr = nfpr; /* Required for vararg functions. */
- #endif
- cc->nsp = nsp;
-diff --git a/src/lj_ccall.h b/src/lj_ccall.h
-index 59f6648..6efa48c 100644
---- a/src/lj_ccall.h
-+++ b/src/lj_ccall.h
-@@ -86,9 +86,9 @@ typedef union FPRArg {
- #elif LJ_TARGET_PPC
-
- #define CCALL_NARG_GPR 8
--#define CCALL_NARG_FPR 8
-+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
- #define CCALL_NRET_GPR 4 /* For complex double. */
--#define CCALL_NRET_FPR 1
-+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
- #define CCALL_SPS_EXTRA 4
- #define CCALL_SPS_FREE 0
-
-diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
-index 846827b..03494a7 100644
---- a/src/lj_ccallback.c
-+++ b/src/lj_ccallback.c
-@@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *cts)
-
- #elif LJ_TARGET_PPC
-
-+#define CALLBACK_HANDLE_GPR \
-+ if (n > 1) { \
-+ lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */
\
-+ ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \
-+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
-+ } \
-+ if (ngpr + n <= maxgpr) { \
-+ sp = &cts->cb.gpr[ngpr]; \
-+ ngpr += n; \
-+ goto done; \
-+ }
-+
-+#if LJ_ABI_SOFTFP
-+#define CALLBACK_HANDLE_REGARG \
-+ CALLBACK_HANDLE_GPR \
-+ UNUSED(isfp);
-+#else
- #define CALLBACK_HANDLE_REGARG \
- if (isfp) { \
- if (nfpr + 1 <= CCALL_NARG_FPR) { \
-@@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *cts)
- goto done; \
- } \
- } else { /* Try to pass argument in GPRs. */ \
-- if (n > 1) { \
-- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
-- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
-- } \
-- if (ngpr + n <= maxgpr) { \
-- sp = &cts->cb.gpr[ngpr]; \
-- ngpr += n; \
-- goto done; \
-- } \
-+ CALLBACK_HANDLE_GPR \
- }
-+#endif
-
-+#if !LJ_ABI_SOFTFP
- #define CALLBACK_HANDLE_RET \
- if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
- *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
-+#endif
-
- #elif LJ_TARGET_MIPS32
-
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index 19c49a4..04cb5a3 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
- #define CFRAME_OFS_L 36
- #define CFRAME_OFS_PC 32
- #define CFRAME_OFS_MULTRES 28
--#define CFRAME_SIZE 272
-+#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
- #define CFRAME_SHIFT_MULTRES 3
- #endif
- #elif LJ_TARGET_MIPS32
-diff --git a/src/lj_ircall.h b/src/lj_ircall.h
-index 7312006..9b3883b 100644
---- a/src/lj_ircall.h
-+++ b/src/lj_ircall.h
-@@ -287,7 +287,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
- #define fp64_f2l __aeabi_f2lz
- #define fp64_f2ul __aeabi_f2ulz
- #endif
--#elif LJ_TARGET_MIPS
-+#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
- #define softfp_add __adddf3
- #define softfp_sub __subdf3
- #define softfp_mul __muldf3
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index b4260eb..0839668 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -103,6 +103,18 @@
- |// Fixed register assignments for the interpreter.
- |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
- |
-+|.macro .FPU, a, b
-+|.if FPU
-+| a, b
-+|.endif
-+|.endmacro
-+|
-+|.macro .FPU, a, b, c
-+|.if FPU
-+| a, b, c
-+|.endif
-+|.endmacro
-+|
- |// The following must be C callee-save (but BASE is often refetched).
- |.define BASE, r14 // Base of current Lua stack frame.
- |.define KBASE, r15 // Constants of current Lua function.
-@@ -116,8 +128,10 @@
- |.define TISNUM, r22
- |.define TISNIL, r23
- |.define ZERO, r24
-+|.if FPU
- |.define TOBIT, f30 // 2^52 + 2^51.
- |.define TONUM, f31 // 2^52 + 2^51 + 2^31.
-+|.endif
- |
- |// The following temporaries are not saved across C calls, except for RA.
- |.define RA, r20 // Callee-save.
-@@ -133,6 +147,7 @@
- |
- |// Saved temporaries.
- |.define SAVE0, r21
-+|.define SAVE1, r25
- |
- |// Calling conventions.
- |.define CARG1, r3
-@@ -141,8 +156,10 @@
- |.define CARG4, r6 // Overlaps TMP3.
- |.define CARG5, r7 // Overlaps INS.
- |
-+|.if FPU
- |.define FARG1, f1
- |.define FARG2, f2
-+|.endif
- |
- |.define CRET1, r3
- |.define CRET2, r4
-@@ -213,10 +230,16 @@
- |.endif
- |.else
- |
-+|.if FPU
- |.define SAVE_LR, 276(sp)
- |.define CFRAME_SPACE, 272 // Delta for sp.
- |// Back chain for sp: 272(sp) <-- sp entering interpreter
- |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
-+|.else
-+|.define SAVE_LR, 132(sp)
-+|.define CFRAME_SPACE, 128 // Delta for sp.
-+|// Back chain for sp: 128(sp) <-- sp entering interpreter
-+|.endif
- |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
- |.define SAVE_CR, 52(sp) // 32 bit CR save.
- |.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
-@@ -226,16 +249,25 @@
- |.define SAVE_PC, 32(sp)
- |.define SAVE_MULTRES, 28(sp)
- |.define UNUSED1, 24(sp)
-+|.if FPU
- |.define TMPD_LO, 20(sp)
- |.define TMPD_HI, 16(sp)
- |.define TONUM_LO, 12(sp)
- |.define TONUM_HI, 8(sp)
-+|.else
-+|.define SFSAVE_4, 20(sp)
-+|.define SFSAVE_3, 16(sp)
-+|.define SFSAVE_2, 12(sp)
-+|.define SFSAVE_1, 8(sp)
-+|.endif
- |// Next frame lr: 4(sp)
- |// Back chain for sp: 0(sp) <-- sp while in interpreter
- |
-+|.if FPU
- |.define TMPD_BLO, 23(sp)
- |.define TMPD, TMPD_HI
- |.define TONUM_D, TONUM_HI
-+|.endif
- |
- |.endif
- |
-@@ -245,7 +277,7 @@
- |.else
- | stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
- |.endif
--| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
-+| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
- |.endmacro
- |.macro rest_, reg
- |.if GPR64
-@@ -253,7 +285,7 @@
- |.else
- | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
- |.endif
--| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
-+| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
- |.endmacro
- |
- |.macro saveregs
-@@ -323,6 +355,7 @@
- |// Trap for not-yet-implemented parts.
- |.macro NYI; tw 4, sp, sp; .endmacro
- |
-+|.if FPU
- |// int/FP conversions.
- |.macro tonum_i, freg, reg
- | xoris reg, reg, 0x8000
-@@ -346,6 +379,7 @@
- |.macro toint, reg, freg
- | toint reg, freg, freg
- |.endmacro
-+|.endif
- |
- |//-----------------------------------------------------------------------
- |
-@@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *ctx)
- | beq >2
- |1:
- | addic. TMP1, TMP1, -8
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz CARG1, 0(RA)
-+ | lwz CARG2, 4(RA)
-+ |.endif
- | addi RA, RA, 8
-+ |.if FPU
- | stfd f0, 0(BASE)
-+ |.else
-+ | stw CARG1, 0(BASE)
-+ | stw CARG2, 4(BASE)
-+ |.endif
- | addi BASE, BASE, 8
- | bney <1
- |
-@@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *ctx)
- | .toc ld TOCREG, SAVE_TOC
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp BASE, L->base
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | li ZERO, 0
-- | stw TMP3, TMPD
-+ | .FPU stw TMP3, TMPD
- | li TMP1, LJ_TFALSE
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | li TISNIL, LJ_TNIL
- | li_vmstate INTERP
-- | lfs TOBIT, TMPD
-+ | .FPU lfs TOBIT, TMPD
- | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
- | la RA, -8(BASE) // Results start at BASE-8.
-- | stw TMP3, TMPD
-+ | .FPU stw TMP3, TMPD
- | addi DISPATCH, DISPATCH, GG_G2DISP
- | stw TMP1, 0(RA) // Prepend false to error message.
- | li RD, 16 // 2 results: false + error message.
- | st_vmstate
-- | lfs TONUM, TMPD
-+ | .FPU lfs TONUM, TMPD
- | b ->vm_returnc
- |
- |//-----------------------------------------------------------------------
-@@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *ctx)
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp TMP1, L->top
- | lwz PC, FRAME_PC(BASE)
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | stb CARG3, L->status
-- | stw TMP3, TMPD
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-- | lfs TOBIT, TMPD
-+ | .FPU stw TMP3, TMPD
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU lfs TOBIT, TMPD
- | sub RD, TMP1, BASE
-- | stw TMP3, TMPD
-- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
-+ | .FPU stw TMP3, TMPD
-+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | addi RD, RD, 8
-- | stw TMP0, TONUM_HI
-+ | .FPU stw TMP0, TONUM_HI
- | li_vmstate INTERP
- | li ZERO, 0
- | st_vmstate
- | andix. TMP0, PC, FRAME_TYPE
- | mr MULTRES, RD
-- | lfs TONUM, TMPD
-+ | .FPU lfs TONUM, TMPD
- | li TISNIL, LJ_TNIL
- | beq ->BC_RET_Z
- | b ->vm_return
-@@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *ctx)
- | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp TMP1, L->top
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | add PC, PC, BASE
-- | stw TMP3, TMPD
-+ | .FPU stw TMP3, TMPD
- | li ZERO, 0
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-- | lfs TOBIT, TMPD
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU lfs TOBIT, TMPD
- | sub PC, PC, TMP2 // PC = frame delta + frame type
-- | stw TMP3, TMPD
-- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
-+ | .FPU stw TMP3, TMPD
-+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | sub NARGS8:RC, TMP1, BASE
-- | stw TMP0, TONUM_HI
-+ | .FPU stw TMP0, TONUM_HI
- | li_vmstate INTERP
-- | lfs TONUM, TMPD
-+ | .FPU lfs TONUM, TMPD
- | li TISNIL, LJ_TNIL
- | st_vmstate
- |
-@@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz INS, -4(PC)
- | subi CARG2, RB, 16
- | decode_RB8 SAVE0, INS
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz TMP2, 0(RA)
-+ | lwz TMP3, 4(RA)
-+ |.endif
- | add TMP1, BASE, SAVE0
- | stp BASE, L->base
- | cmplw TMP1, CARG2
- | sub CARG3, CARG2, TMP1
- | decode_RA8 RA, INS
-+ |.if FPU
- | stfd f0, 0(CARG2)
-+ |.else
-+ | stw TMP2, 0(CARG2)
-+ | stw TMP3, 4(CARG2)
-+ |.endif
- | bney ->BC_CAT_Z
-+ |.if FPU
- | stfdx f0, BASE, RA
-+ |.else
-+ | stwux TMP2, RA, BASE
-+ | stw TMP3, 4(RA)
-+ |.endif
- | b ->cont_nop
- |
- |//-- Table indexing metamethods -----------------------------------------
-@@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *ctx)
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
- | beq >3
-+ |.if FPU
- | lfd f0, 0(CRET1)
-+ |.else
-+ | lwz TMP0, 0(CRET1)
-+ | lwz TMP1, 4(CRET1)
-+ |.endif
- | ins_next1
-+ |.if FPU
- | stfdx f0, BASE, RA
-+ |.else
-+ | stwux TMP0, RA, BASE
-+ | stw TMP1, 4(RA)
-+ |.endif
- | ins_next2
- |
- |3: // Call __index metamethod.
-@@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *ctx)
- | // Returns cTValue * or NULL.
- | cmplwi CRET1, 0
- | beq >1
-+ |.if FPU
- | lfd f14, 0(CRET1)
-+ |.else
-+ | lwz SAVE0, 0(CRET1)
-+ | lwz SAVE1, 4(CRET1)
-+ |.endif
- | b ->BC_TGETR_Z
- |1:
- | stwx TISNIL, BASE, RA
-@@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *ctx)
- | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
-+ |.if FPU
- | lfdx f0, BASE, RA
-+ |.else
-+ | lwzux TMP2, RA, BASE
-+ | lwz TMP3, 4(RA)
-+ |.endif
- | beq >3
- | // NOBARRIER: lj_meta_tset ensures the table is not black.
- | ins_next1
-+ |.if FPU
- | stfd f0, 0(CRET1)
-+ |.else
-+ | stw TMP2, 0(CRET1)
-+ | stw TMP3, 4(CRET1)
-+ |.endif
- | ins_next2
- |
- |3: // Call __newindex metamethod.
-@@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *ctx)
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 24 // 3 args for func(t, k, v)
-+ |.if FPU
- | stfd f0, 16(BASE) // Copy value to third argument.
-+ |.else
-+ | stw TMP2, 16(BASE)
-+ | stw TMP3, 20(BASE)
-+ |.endif
- | b ->vm_call_dispatch_f
- |
- |->vmeta_tsetr:
-@@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *ctx)
- | stw PC, SAVE_PC
- | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
- | // Returns TValue *.
-+ |.if FPU
- | stfd f14, 0(CRET1)
-+ |.else
-+ | stw SAVE0, 0(CRET1)
-+ | stw SAVE1, 4(CRET1)
-+ |.endif
- | b ->cont_nop
- |
- |//-- Comparison metamethods ---------------------------------------------
-@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->cont_ra: // RA = resultptr
- | lwz INS, -4(PC)
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz CARG1, 0(RA)
-+ | lwz CARG2, 4(RA)
-+ |.endif
- | decode_RA8 TMP1, INS
-+ |.if FPU
- | stfdx f0, BASE, TMP1
-+ |.else
-+ | stwux CARG1, TMP1, BASE
-+ | stw CARG2, 4(TMP1)
-+ |.endif
- | b ->cont_nop
- |
- |->cont_condt: // RA = resultptr
-@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_n, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG1, 0(BASE)
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | blt ->fff_fallback
-- | checknum CARG3; bge ->fff_fallback
-+ | checknum CARG1; bge ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_nn, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG1, 0(BASE)
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-- | lwz CARG4, 8(BASE)
-+ | lwz CARG3, 8(BASE)
- | lfd FARG2, 8(BASE)
-+ |.else
-+ | lwz CARG2, 4(BASE)
-+ | lwz CARG3, 8(BASE)
-+ | lwz CARG4, 12(BASE)
-+ |.endif
- | blt ->fff_fallback
-+ | checknum CARG1; bge ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
-- | checknum CARG4; bge ->fff_fallback
- |.endmacro
- |
- |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
-@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx)
- | bge cr1, ->fff_fallback
- | stw CARG3, 0(RA)
- | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
-+ | addi TMP1, BASE, 8
-+ | add TMP2, RA, NARGS8:RC
- | stw CARG1, 4(RA)
- | beq ->fff_res // Done if exactly 1 argument.
-- | li TMP1, 8
-- | subi RC, RC, 8
- |1:
-- | cmplw TMP1, RC
-- | lfdx f0, BASE, TMP1
-- | stfdx f0, RA, TMP1
-+ | cmplw TMP1, TMP2
-+ |.if FPU
-+ | lfd f0, 0(TMP1)
-+ | stfd f0, 0(TMP1)
-+ |.else
-+ | lwz CARG1, 0(TMP1)
-+ | lwz CARG2, 4(TMP1)
-+ | stw CARG1, -8(TMP1)
-+ | stw CARG2, -4(TMP1)
-+ |.endif
- | addi TMP1, TMP1, 8
- | bney <1
- | b ->fff_res
-@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx)
- | orc TMP1, TMP2, TMP0
- | addi TMP1, TMP1, ~LJ_TISNUM+1
- | slwi TMP1, TMP1, 3
-+ |.if FPU
- | la TMP2, CFUNC:RB->upvalue
- | lfdx FARG1, TMP2, TMP1
-+ |.else
-+ | add TMP1, CFUNC:RB, TMP1
-+ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
-+ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
-+ |.endif
- | b ->fff_resn
- |
- |//-- Base library: getters and setters ---------------------------------
-@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx)
- | mr CARG1, L
- | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
- | // Returns cTValue *.
-+ |.if FPU
- | lfd FARG1, 0(CRET1)
-+ |.else
-+ | lwz CARG2, 4(CRET1)
-+ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
-+ |.endif
- | b ->fff_resn
- |
- |//-- Base library: conversions ------------------------------------------
-@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx)
- | // Only handles the number case inline (without a base argument).
- | cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | bne ->fff_fallback // Exactly one argument.
- | checknum CARG1; bgt ->fff_fallback
- | b ->fff_resn
-@@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *ctx)
- | cmplwi CRET1, 0
- | li CARG3, LJ_TNIL
- | beq ->fff_restv // End of traversal: return nil.
-- | lfd f0, 8(BASE) // Copy key and value to results.
- | la RA, -8(BASE)
-+ |.if FPU
-+ | lfd f0, 8(BASE) // Copy key and value to results.
- | lfd f1, 16(BASE)
- | stfd f0, 0(RA)
-- | li RD, (2+1)*8
- | stfd f1, 8(RA)
-+ |.else
-+ | lwz CARG1, 8(BASE)
-+ | lwz CARG2, 12(BASE)
-+ | lwz CARG3, 16(BASE)
-+ | lwz CARG4, 20(BASE)
-+ | stw CARG1, 0(RA)
-+ | stw CARG2, 4(RA)
-+ | stw CARG3, 8(RA)
-+ | stw CARG4, 12(RA)
-+ |.endif
-+ | li RD, (2+1)*8
- | b ->fff_res
- |
- |.ffunc_1 pairs
-@@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *ctx)
- | bne ->fff_fallback
- #if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
-+ |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
-+ |.else
-+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
-+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
-+ |.endif
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
- #else
-+ |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
-+ |.else
-+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
-+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
-+ |.endif
- | la RA, -8(BASE)
- #endif
- | stw TISNIL, 8(BASE)
- | li RD, (3+1)*8
-+ |.if FPU
- | stfd f0, 0(RA)
-+ |.else
-+ | stw TMP0, 0(RA)
-+ | stw TMP1, 4(RA)
-+ |.endif
- | b ->fff_res
- |
- |.ffunc ipairs_aux
-@@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *ctx)
- | stfd FARG2, 0(RA)
- |.endif
- | ble >2 // Not in array part?
-+ |.if FPU
- | lwzx TMP2, TMP1, TMP3
- | lfdx f0, TMP1, TMP3
-+ |.else
-+ | lwzux TMP2, TMP1, TMP3
-+ | lwz TMP3, 4(TMP1)
-+ |.endif
- |1:
- | checknil TMP2
- | li RD, (0+1)*8
- | beq ->fff_res // End of iteration, return 0 results.
- | li RD, (2+1)*8
-+ |.if FPU
- | stfd f0, 8(RA)
-+ |.else
-+ | stw TMP2, 8(RA)
-+ | stw TMP3, 12(RA)
-+ |.endif
- | b ->fff_res
- |2: // Check for empty hash part first. Otherwise call C function.
- | lwz TMP0, TAB:CARG1->hmask
-@@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *ctx)
- | li RD, (0+1)*8
- | beq ->fff_res
- | lwz TMP2, 0(CRET1)
-+ |.if FPU
- | lfd f0, 0(CRET1)
-+ |.else
-+ | lwz TMP3, 4(CRET1)
-+ |.endif
- | b <1
- |
- |.ffunc_1 ipairs
-@@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *ctx)
- | bne ->fff_fallback
- #if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
-+ |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
-+ |.else
-+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
-+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
-+ |.endif
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
- #else
-+ |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
-+ |.else
-+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
-+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
-+ |.endif
- | la RA, -8(BASE)
- #endif
- |.if DUALNUM
-@@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | stw ZERO, 12(BASE)
- | li RD, (3+1)*8
-+ |.if FPU
- | stfd f0, 0(RA)
-+ |.else
-+ | stw TMP0, 0(RA)
-+ | stw TMP1, 4(RA)
-+ |.endif
- | b ->fff_res
- |
- |//-- Base library: catch errors ----------------------------------------
-@@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc xpcall
- | cmplwi NARGS8:RC, 16
-- | lwz CARG4, 8(BASE)
-+ | lwz CARG3, 8(BASE)
-+ |.if FPU
- | lfd FARG2, 8(BASE)
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG1, 0(BASE)
-+ | lwz CARG2, 4(BASE)
-+ | lwz CARG4, 12(BASE)
-+ |.endif
- | blt ->fff_fallback
- | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
- | mr TMP2, BASE
-- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
-+ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
- | la BASE, 16(BASE)
- | // Remember active hook before pcall.
- | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
-+ |.if FPU
- | stfd FARG2, 0(TMP2) // Swap function and traceback.
-- | subi NARGS8:RC, NARGS8:RC, 16
- | stfd FARG1, 8(TMP2)
-+ |.else
-+ | stw CARG3, 0(TMP2)
-+ | stw CARG4, 4(TMP2)
-+ | stw CARG1, 8(TMP2)
-+ | stw CARG2, 12(TMP2)
-+ |.endif
-+ | subi NARGS8:RC, NARGS8:RC, 16
- | addi PC, TMP1, 16+FRAME_PCALL
- | b ->vm_call_dispatch
- |
-@@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *ctx)
- | stp BASE, L->top
- |2: // Move args to coroutine.
- | cmpw TMP1, NARGS8:RC
-+ |.if FPU
- | lfdx f0, BASE, TMP1
-+ |.else
-+ | add CARG3, BASE, TMP1
-+ | lwz TMP2, 0(CARG3)
-+ | lwz TMP3, 4(CARG3)
-+ |.endif
- | beq >3
-+ |.if FPU
- | stfdx f0, CARG2, TMP1
-+ |.else
-+ | add CARG3, CARG2, TMP1
-+ | stw TMP2, 0(CARG3)
-+ | stw TMP3, 4(CARG3)
-+ |.endif
- | addi TMP1, TMP1, 8
- | b <2
- |3:
-@@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *ctx)
- | stp TMP2, L:SAVE0->top // Clear coroutine stack.
- |5: // Move results from coroutine.
- | cmplw TMP1, TMP3
-+ |.if FPU
- | lfdx f0, TMP2, TMP1
- | stfdx f0, BASE, TMP1
-+ |.else
-+ | add CARG3, TMP2, TMP1
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ | add CARG3, BASE, TMP1
-+ | stw CARG1, 0(CARG3)
-+ | stw CARG2, 4(CARG3)
-+ |.endif
- | addi TMP1, TMP1, 8
- | bne <5
- |6:
-@@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *ctx)
- | andix. TMP0, PC, FRAME_TYPE
- | la TMP3, -8(TMP3)
- | li TMP1, LJ_TFALSE
-+ |.if FPU
- | lfd f0, 0(TMP3)
-+ |.else
-+ | lwz CARG1, 0(TMP3)
-+ | lwz CARG2, 4(TMP3)
-+ |.endif
- | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
- | li RD, (2+1)*8
- | stw TMP1, -8(BASE) // Prepend false to results.
- | la RA, -8(BASE)
-+ |.if FPU
- | stfd f0, 0(BASE) // Copy error message.
-+ |.else
-+ | stw CARG1, 0(BASE) // Copy error message.
-+ | stw CARG2, 4(BASE)
-+ |.endif
- | b <7
- |.else
- | mr CARG1, L
-@@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *ctx)
- | lus CARG1, 0x8000 // -(2^31).
- | beqy ->fff_resi
- |5:
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG1, 0(BASE)
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | blex func
- | b ->fff_resn
- |.endmacro
-@@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc math_log
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-- | lfd FARG1, 0(BASE)
-+ | lwz CARG1, 0(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
-- | checknum CARG3; bge ->fff_fallback
-+ | checknum CARG1; bge ->fff_fallback
-+ |.if FPU
-+ | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | blex log
- | b ->fff_resn
- |
-@@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *ctx)
- |.if DUALNUM
- |.ffunc math_ldexp
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-+ | lwz TMP0, 0(BASE)
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-- | lwz CARG4, 8(BASE)
-+ |.else
-+ | lwz CARG1, 0(BASE)
-+ | lwz CARG2, 4(BASE)
-+ |.endif
-+ | lwz TMP1, 8(BASE)
- |.if GPR64
- | lwz CARG2, 12(BASE)
-- |.else
-+ |.elif FPU
- | lwz CARG1, 12(BASE)
-+ |.else
-+ | lwz CARG3, 12(BASE)
- |.endif
- | blt ->fff_fallback
-- | checknum CARG3; bge ->fff_fallback
-- | checknum CARG4; bne ->fff_fallback
-+ | checknum TMP0; bge ->fff_fallback
-+ | checknum TMP1; bne ->fff_fallback
- |.else
- |.ffunc_nn math_ldexp
- |.if GPR64
-@@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc_n math_frexp
- |.if GPR64
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
-- |.else
-+ |.elif FPU
- | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
-+ |.else
-+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- |.endif
- | lwz PC, FRAME_PC(BASE)
- | blex frexp
-@@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *ctx)
- |.if not DUALNUM
- | tonum_i FARG2, TMP1
- |.endif
-+ |.if FPU
- | stfd FARG1, 0(RA)
-+ |.else
-+ | stw CRET1, 0(RA)
-+ | stw CRET2, 4(RA)
-+ |.endif
- | li RD, (2+1)*8
- |.if DUALNUM
- | stw TISNUM, 8(RA)
-@@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc_n math_modf
- |.if GPR64
- | la CARG2, -8(BASE)
-- |.else
-+ |.elif FPU
- | la CARG1, -8(BASE)
-+ |.else
-+ | la CARG3, -8(BASE)
- |.endif
- | lwz PC, FRAME_PC(BASE)
- | blex modf
- | la RA, -8(BASE)
-+ |.if FPU
- | stfd FARG1, 0(BASE)
-+ |.else
-+ | stw CRET1, 0(BASE)
-+ | stw CRET2, 4(BASE)
-+ |.endif
- | li RD, (2+1)*8
- | b ->fff_res
- |
-@@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *ctx)
- |.if DUALNUM
- | .ffunc_1 name
- | checknum CARG3
-- | addi TMP1, BASE, 8
-- | add TMP2, BASE, NARGS8:RC
-+ | addi SAVE0, BASE, 8
-+ | add SAVE1, BASE, NARGS8:RC
- | bne >4
- |1: // Handle integers.
-- | lwz CARG4, 0(TMP1)
-- | cmplw cr1, TMP1, TMP2
-- | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 0(SAVE0)
-+ | cmplw cr1, SAVE0, SAVE1
-+ | lwz CARG2, 4(SAVE0)
- | bge cr1, ->fff_resi
- | checknum CARG4
- | xoris TMP0, CARG1, 0x8000
-@@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *ctx)
- |.if GPR64
- | rldicl CARG1, CARG1, 0, 32
- |.endif
-- | addi TMP1, TMP1, 8
-+ | addi SAVE0, SAVE0, 8
- | b <1
- |3:
- | bge ->fff_fallback
- | // Convert intermediate result to number and continue below.
-+ |.if FPU
- | tonum_i FARG1, CARG1
-- | lfd FARG2, 0(TMP1)
-+ | lfd FARG2, 0(SAVE0)
-+ |.else
-+ | mr CARG2, CARG1
-+ | bl ->vm_sfi2d_1
-+ | lwz CARG3, 0(SAVE0)
-+ | lwz CARG4, 4(SAVE0)
-+ |.endif
- | b >6
- |4:
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG1, 0(BASE)
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | bge ->fff_fallback
- |5: // Handle numbers.
-- | lwz CARG4, 0(TMP1)
-- | cmplw cr1, TMP1, TMP2
-- | lfd FARG2, 0(TMP1)
-+ | lwz CARG3, 0(SAVE0)
-+ | cmplw cr1, SAVE0, SAVE1
-+ |.if FPU
-+ | lfd FARG2, 0(SAVE0)
-+ |.else
-+ | lwz CARG4, 4(SAVE0)
-+ |.endif
- | bge cr1, ->fff_resn
-- | checknum CARG4; bge >7
-+ | checknum CARG3; bge >7
- |6:
-+ | addi SAVE0, SAVE0, 8
-+ |.if FPU
- | fsub f0, FARG1, FARG2
-- | addi TMP1, TMP1, 8
- |.if ismax
- | fsel FARG1, f0, FARG1, FARG2
- |.else
- | fsel FARG1, f0, FARG2, FARG1
- |.endif
-+ |.else
-+ | stw CARG1, SFSAVE_1
-+ | stw CARG2, SFSAVE_2
-+ | stw CARG3, SFSAVE_3
-+ | stw CARG4, SFSAVE_4
-+ | blex __ledf2
-+ | cmpwi CRET1, 0
-+ |.if ismax
-+ | blt >8
-+ |.else
-+ | bge >8
-+ |.endif
-+ | lwz CARG1, SFSAVE_1
-+ | lwz CARG2, SFSAVE_2
-+ | b <5
-+ |8:
-+ | lwz CARG1, SFSAVE_3
-+ | lwz CARG2, SFSAVE_4
-+ |.endif
- | b <5
- |7: // Convert integer to number and continue above.
-- | lwz CARG2, 4(TMP1)
-+ | lwz CARG3, 4(SAVE0)
- | bne ->fff_fallback
-- | tonum_i FARG2, CARG2
-+ |.if FPU
-+ | tonum_i FARG2, CARG3
-+ |.else
-+ | bl ->vm_sfi2d_2
-+ |.endif
- | b <6
- |.else
- | .ffunc_n name
-@@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name
-- | addi TMP1, BASE, 8
-- | add TMP2, BASE, NARGS8:RC
-+ | addi SAVE0, BASE, 8
-+ | add SAVE1, BASE, NARGS8:RC
- |1:
-- | lwz CARG4, 0(TMP1)
-- | cmplw cr1, TMP1, TMP2
-+ | lwz CARG4, 0(SAVE0)
-+ | cmplw cr1, SAVE0, SAVE1
- |.if DUALNUM
-- | lwz CARG2, 4(TMP1)
-+ | lwz CARG2, 4(SAVE0)
- |.else
-- | lfd FARG1, 0(TMP1)
-+ | lfd FARG1, 0(SAVE0)
- |.endif
- | bgey cr1, ->fff_resi
- | checknum CARG4
- |.if DUALNUM
-+ |.if FPU
- | bnel ->fff_bitop_fb
- |.else
-+ | beq >3
-+ | stw CARG1, SFSAVE_1
-+ | bl ->fff_bitop_fb
-+ | mr CARG2, CARG1
-+ | lwz CARG1, SFSAVE_1
-+ |3:
-+ |.endif
-+ |.else
- | fadd FARG1, FARG1, TOBIT
- | bge ->fff_fallback
- | stfd FARG1, TMPD
- | lwz CARG2, TMPD_LO
- |.endif
- | ins CARG1, CARG1, CARG2
-- | addi TMP1, TMP1, 8
-+ | addi SAVE0, SAVE0, 8
- | b <1
- |.endmacro
- |
-@@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_bit_sh, name, ins, shmod
- |.if DUALNUM
- | .ffunc_2 bit_..name
-+ |.if FPU
- | checknum CARG3; bnel ->fff_tobit_fb
-+ |.else
-+ | checknum CARG3; beq >1
-+ | bl ->fff_tobit_fb
-+ | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
-+ |1:
-+ |.endif
- | // Note: no inline conversion from number for 2nd argument!
- | checknum CARG4; bne ->fff_fallback
- |.else
-@@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *ctx)
- |->fff_resn:
- | lwz PC, FRAME_PC(BASE)
- | la RA, -8(BASE)
-+ |.if FPU
- | stfd FARG1, -8(BASE)
-+ |.else
-+ | stw CARG1, -8(BASE)
-+ | stw CARG2, -4(BASE)
-+ |.endif
- | b ->fff_res1
- |
- |// Fallback FP number to bit conversion.
- |->fff_tobit_fb:
- |.if DUALNUM
-+ |.if FPU
- | lfd FARG1, 0(BASE)
- | bgt ->fff_fallback
- | fadd FARG1, FARG1, TOBIT
- | stfd FARG1, TMPD
- | lwz CARG1, TMPD_LO
- | blr
-+ |.else
-+ | bgt ->fff_fallback
-+ | mr CARG2, CARG1
-+ | mr CARG1, CARG3
-+ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
-+ |->vm_tobit:
-+ | slwi TMP2, CARG1, 1
-+ | addis TMP2, TMP2, 0x0020
-+ | cmpwi TMP2, 0
-+ | bge >2
-+ | li TMP1, 0x3e0
-+ | srawi TMP2, TMP2, 21
-+ | not TMP1, TMP1
-+ | sub. TMP2, TMP1, TMP2
-+ | cmpwi cr7, CARG1, 0
-+ | blt >1
-+ | slwi TMP1, CARG1, 11
-+ | srwi TMP0, CARG2, 21
-+ | oris TMP1, TMP1, 0x8000
-+ | or TMP1, TMP1, TMP0
-+ | srw CARG1, TMP1, TMP2
-+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
-+ | neg CARG1, CARG1
-+ | blr
-+ |1:
-+ | addi TMP2, TMP2, 21
-+ | srw TMP1, CARG2, TMP2
-+ | slwi CARG2, CARG1, 12
-+ | subfic TMP2, TMP2, 20
-+ | slw TMP0, CARG2, TMP2
-+ | or CARG1, TMP1, TMP0
-+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
-+ | neg CARG1, CARG1
-+ | blr
-+ |2:
-+ | li CARG1, 0
-+ | blr
-+ |.endif
- |.endif
- |->fff_bitop_fb:
- |.if DUALNUM
-- | lfd FARG1, 0(TMP1)
-+ |.if FPU
-+ | lfd FARG1, 0(SAVE0)
- | bgt ->fff_fallback
- | fadd FARG1, FARG1, TOBIT
- | stfd FARG1, TMPD
- | lwz CARG2, TMPD_LO
- | blr
-+ |.else
-+ | bgt ->fff_fallback
-+ | mr CARG1, CARG4
-+ | b ->vm_tobit
-+ |.endif
- |.endif
- |
- |//-----------------------------------------------------------------------
-@@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *ctx)
- | decode_RA8 RC, INS // Call base.
- | beq >2
- |1: // Move results down.
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz CARG1, 0(RA)
-+ | lwz CARG2, 4(RA)
-+ |.endif
- | addic. TMP1, TMP1, -8
- | addi RA, RA, 8
-+ |.if FPU
- | stfdx f0, BASE, RC
-+ |.else
-+ | add CARG3, BASE, RC
-+ | stw CARG1, 0(CARG3)
-+ | stw CARG2, 4(CARG3)
-+ |.endif
- | addi RC, RC, 8
- | bne <1
- |2:
-@@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |.macro savex_, a, b, c, d
-+ |.if FPU
- | stfd f..a, 16+a*8(sp)
- | stfd f..b, 16+b*8(sp)
- | stfd f..c, 16+c*8(sp)
- | stfd f..d, 16+d*8(sp)
-+ |.endif
- |.endmacro
- |
- |->vm_exit_handler:
-@@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | // Setup type comparison constants.
- | li TISNUM, LJ_TISNUM
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-- | stw TMP3, TMPD
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU stw TMP3, TMPD
- | li ZERO, 0
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-- | lfs TOBIT, TMPD
-- | stw TMP3, TMPD
-- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU lfs TOBIT, TMPD
-+ | .FPU stw TMP3, TMPD
-+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | li TISNIL, LJ_TNIL
-- | stw TMP0, TONUM_HI
-- | lfs TONUM, TMPD
-+ | .FPU stw TMP0, TONUM_HI
-+ | .FPU lfs TONUM, TMPD
- | // Modified copy of ins_next which handles function header dispatch, too.
- | lwz INS, 0(PC)
- | addi PC, PC, 4
-@@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Math helper functions ----------------------------------------------
- |//-----------------------------------------------------------------------
- |
-- |// NYI: Use internal implementations of floor, ceil, trunc.
-+ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
-+ |
-+ |.macro sfi2d, AHI, ALO
-+ |.if not FPU
-+ | mr. AHI, ALO
-+ | bclr 12, 2 // Handle zero first.
-+ | srawi TMP0, ALO, 31
-+ | xor TMP1, ALO, TMP0
-+ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
-+ | cntlzw AHI, TMP1
-+ | andix. TMP0, TMP0, 0x800 // Mask sign bit.
-+ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
-+ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
-+ | slwi ALO, TMP1, 21
-+ | or AHI, AHI, TMP0 // Sign | Exponent.
-+ | srwi TMP1, TMP1, 11
-+ | slwi AHI, AHI, 20 // Align left.
-+ | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
-+ | blr
-+ |.endif
-+ |.endmacro
-+ |
-+ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
-+ |->vm_sfi2d_1:
-+ | sfi2d CARG1, CARG2
-+ |
-+ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
-+ |->vm_sfi2d_2:
-+ | sfi2d CARG3, CARG4
- |
- |->vm_modi:
- | divwo. TMP0, CARG1, CARG2
-@@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *ctx)
- | addi DISPATCH, r12, GG_G2DISP
- | stw r11, CTSTATE->cb.slot
- | stw r3, CTSTATE->cb.gpr[0]
-- | stfd f1, CTSTATE->cb.fpr[0]
-+ | .FPU stfd f1, CTSTATE->cb.fpr[0]
- | stw r4, CTSTATE->cb.gpr[1]
-- | stfd f2, CTSTATE->cb.fpr[1]
-+ | .FPU stfd f2, CTSTATE->cb.fpr[1]
- | stw r5, CTSTATE->cb.gpr[2]
-- | stfd f3, CTSTATE->cb.fpr[2]
-+ | .FPU stfd f3, CTSTATE->cb.fpr[2]
- | stw r6, CTSTATE->cb.gpr[3]
-- | stfd f4, CTSTATE->cb.fpr[3]
-+ | .FPU stfd f4, CTSTATE->cb.fpr[3]
- | stw r7, CTSTATE->cb.gpr[4]
-- | stfd f5, CTSTATE->cb.fpr[4]
-+ | .FPU stfd f5, CTSTATE->cb.fpr[4]
- | stw r8, CTSTATE->cb.gpr[5]
-- | stfd f6, CTSTATE->cb.fpr[5]
-+ | .FPU stfd f6, CTSTATE->cb.fpr[5]
- | stw r9, CTSTATE->cb.gpr[6]
-- | stfd f7, CTSTATE->cb.fpr[6]
-+ | .FPU stfd f7, CTSTATE->cb.fpr[6]
- | stw r10, CTSTATE->cb.gpr[7]
-- | stfd f8, CTSTATE->cb.fpr[7]
-+ | .FPU stfd f8, CTSTATE->cb.fpr[7]
- | addi TMP0, sp, CFRAME_SPACE+8
- | stw TMP0, CTSTATE->cb.stack
- | mr CARG1, CTSTATE
-@@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *ctx)
- | lp BASE, L:CRET1->base
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp RC, L:CRET1->top
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | li ZERO, 0
- | mr L, CRET1
-- | stw TMP3, TMPD
-- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
-+ | .FPU stw TMP3, TMPD
-+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-- | stw TMP0, TONUM_HI
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU stw TMP0, TONUM_HI
- | li TISNIL, LJ_TNIL
- | li_vmstate INTERP
-- | lfs TOBIT, TMPD
-- | stw TMP3, TMPD
-+ | .FPU lfs TOBIT, TMPD
-+ | .FPU stw TMP3, TMPD
- | sub RC, RC, BASE
- | st_vmstate
-- | lfs TONUM, TMPD
-+ | .FPU lfs TONUM, TMPD
- | ins_callt
- |.endif
- |
-@@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *ctx)
- | mr CARG2, RA
- | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
- | lwz CRET1, CTSTATE->cb.gpr[0]
-- | lfd FARG1, CTSTATE->cb.fpr[0]
-+ | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
- | lwz CRET2, CTSTATE->cb.gpr[1]
- | b ->vm_leave_unw
- |.endif
-@@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *ctx)
- | bge <1
- |2:
- | bney cr1, >3
-- | lfd f1, CCSTATE->fpr[0]
-- | lfd f2, CCSTATE->fpr[1]
-- | lfd f3, CCSTATE->fpr[2]
-- | lfd f4, CCSTATE->fpr[3]
-- | lfd f5, CCSTATE->fpr[4]
-- | lfd f6, CCSTATE->fpr[5]
-- | lfd f7, CCSTATE->fpr[6]
-- | lfd f8, CCSTATE->fpr[7]
-+ | .FPU lfd f1, CCSTATE->fpr[0]
-+ | .FPU lfd f2, CCSTATE->fpr[1]
-+ | .FPU lfd f3, CCSTATE->fpr[2]
-+ | .FPU lfd f4, CCSTATE->fpr[3]
-+ | .FPU lfd f5, CCSTATE->fpr[4]
-+ | .FPU lfd f6, CCSTATE->fpr[5]
-+ | .FPU lfd f7, CCSTATE->fpr[6]
-+ | .FPU lfd f8, CCSTATE->fpr[7]
- |3:
- | lp TMP0, CCSTATE->func
- | lwz CARG2, CCSTATE->gpr[1]
-@@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz TMP2, -4(r14)
- | lwz TMP0, 4(r14)
- | stw CARG1, CCSTATE:TMP1->gpr[0]
-- | stfd FARG1, CCSTATE:TMP1->fpr[0]
-+ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
- | stw CARG2, CCSTATE:TMP1->gpr[1]
- | mtlr TMP0
- | stw CARG3, CCSTATE:TMP1->gpr[2]
-@@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux TMP0, RA, BASE
-+ | lwzux CARG1, RA, BASE
- | addi PC, PC, 4
- | lwz CARG2, 4(RA)
-- | lwzux TMP1, RD, BASE
-+ | lwzux CARG3, RD, BASE
- | lwz TMP2, -4(PC)
-- | checknum cr0, TMP0
-- | lwz CARG3, 4(RD)
-+ | checknum cr0, CARG1
-+ | lwz CARG4, 4(RD)
- | decode_RD4 TMP2, TMP2
-- | checknum cr1, TMP1
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | checknum cr1, CARG3
-+ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
- | bne cr0, >7
- | bne cr1, >8
-- | cmpw CARG2, CARG3
-+ | cmpw CARG2, CARG4
- if (op == BC_ISLT) {
- | bge >2
- } else if (op == BC_ISGE) {
-@@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ble >2
- }
- |1:
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |2:
- | ins_next
- |
- |7: // RA is not an integer.
- | bgt cr0, ->vmeta_comp
- | // RA is a number.
-- | lfd f0, 0(RA)
-+ | .FPU lfd f0, 0(RA)
- | bgt cr1, ->vmeta_comp
- | blt cr1, >4
- | // RA is a number, RD is an integer.
-- | tonum_i f1, CARG3
-+ |.if FPU
-+ | tonum_i f1, CARG4
-+ |.else
-+ | bl ->vm_sfi2d_2
-+ |.endif
- | b >5
- |
- |8: // RA is an integer, RD is not an integer.
- | bgt cr1, ->vmeta_comp
- | // RA is an integer, RD is a number.
-+ |.if FPU
- | tonum_i f0, CARG2
-+ |.else
-+ | bl ->vm_sfi2d_1
-+ |.endif
- |4:
-- | lfd f1, 0(RD)
-+ | .FPU lfd f1, 0(RD)
- |5:
-+ |.if FPU
- | fcmpu cr0, f0, f1
-+ |.else
-+ | blex __ledf2
-+ | cmpwi CRET1, 0
-+ |.endif
- if (op == BC_ISLT) {
- | bge <2
- } else if (op == BC_ISGE) {
-@@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = op == BC_ISEQV;
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux TMP0, RA, BASE
-+ | lwzux CARG1, RA, BASE
- | addi PC, PC, 4
- | lwz CARG2, 4(RA)
-- | lwzux TMP1, RD, BASE
-- | checknum cr0, TMP0
-- | lwz TMP2, -4(PC)
-- | checknum cr1, TMP1
-- | decode_RD4 TMP2, TMP2
-- | lwz CARG3, 4(RD)
-+ | lwzux CARG3, RD, BASE
-+ | checknum cr0, CARG1
-+ | lwz SAVE0, -4(PC)
-+ | checknum cr1, CARG3
-+ | decode_RD4 SAVE0, SAVE0
-+ | lwz CARG4, 4(RD)
- | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- if (vk) {
- | ble cr7, ->BC_ISEQN_Z
- } else {
- | ble cr7, ->BC_ISNEN_Z
- }
- |.else
-- | lwzux TMP0, RA, BASE
-- | lwz TMP2, 0(PC)
-+ | lwzux CARG1, RA, BASE
-+ | lwz SAVE0, 0(PC)
- | lfd f0, 0(RA)
- | addi PC, PC, 4
-- | lwzux TMP1, RD, BASE
-- | checknum cr0, TMP0
-- | decode_RD4 TMP2, TMP2
-+ | lwzux CARG3, RD, BASE
-+ | checknum cr0, CARG1
-+ | decode_RD4 SAVE0, SAVE0
- | lfd f1, 0(RD)
-- | checknum cr1, TMP1
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | checknum cr1, CARG3
-+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- | bge cr0, >5
- | bge cr1, >5
- | fcmpu cr0, f0, f1
- if (vk) {
- | bne >1
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- } else {
- | beq >1
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- }
- |1:
- | ins_next
-@@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |5: // Either or both types are not numbers.
- |.if not DUALNUM
- | lwz CARG2, 4(RA)
-- | lwz CARG3, 4(RD)
-+ | lwz CARG4, 4(RD)
- |.endif
- |.if FFI
-- | cmpwi cr7, TMP0, LJ_TCDATA
-- | cmpwi cr5, TMP1, LJ_TCDATA
-+ | cmpwi cr7, CARG1, LJ_TCDATA
-+ | cmpwi cr5, CARG3, LJ_TCDATA
- |.endif
-- | not TMP3, TMP0
-- | cmplw TMP0, TMP1
-- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
-+ | not TMP2, CARG1
-+ | cmplw CARG1, CARG3
-+ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
- |.if FFI
- | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
- |.endif
-- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
-+ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
- |.if FFI
- | beq cr7, ->vmeta_equal_cd
- |.endif
-- | cmplw cr5, CARG2, CARG3
-+ | cmplw cr5, CARG2, CARG4
- | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
- | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
-- | mr SAVE0, PC
-+ | mr SAVE1, PC
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
- if (vk) {
- | bne cr0, >6
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |6:
- } else {
- | beq cr0, >6
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |6:
- }
- |.if DUALNUM
-@@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |
- | // Different tables or userdatas. Need to check __eq metamethod.
- | // Field metatable must be at same offset for GCtab and GCudata!
-+ | mr CARG3, CARG4
- | lwz TAB:TMP2, TAB:CARG2->metatable
- | li CARG4, 1-vk // ne = 0 or 1.
- | cmplwi TAB:TMP2, 0
-@@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lbz TMP2, TAB:TMP2->nomm
- | andix. TMP2, TMP2, 1<<MM_eq
- | bne <1 // Or 'no __eq' flag set?
-- | mr PC, SAVE0 // Restore old PC.
-+ | mr PC, SAVE1 // Restore old PC.
- | b ->vmeta_equal // Handle __eq metamethod.
- break;
-
-@@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = op == BC_ISEQN;
- | // RA = src*8, RD = num_const*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux TMP0, RA, BASE
-+ | lwzux CARG1, RA, BASE
- | addi PC, PC, 4
- | lwz CARG2, 4(RA)
-- | lwzux TMP1, RD, KBASE
-- | checknum cr0, TMP0
-- | lwz TMP2, -4(PC)
-- | checknum cr1, TMP1
-- | decode_RD4 TMP2, TMP2
-- | lwz CARG3, 4(RD)
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | lwzux CARG3, RD, KBASE
-+ | checknum cr0, CARG1
-+ | lwz SAVE0, -4(PC)
-+ | checknum cr1, CARG3
-+ | decode_RD4 SAVE0, SAVE0
-+ | lwz CARG4, 4(RD)
-+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- if (vk) {
- |->BC_ISEQN_Z:
- } else {
-@@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | bne cr0, >7
- | bne cr1, >8
-- | cmpw CARG2, CARG3
-+ | cmpw CARG2, CARG4
- |4:
- |.else
- if (vk) {
-@@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- } else {
- |->BC_ISNEN_Z: // Dummy label.
- }
-- | lwzx TMP0, BASE, RA
-+ | lwzx CARG1, BASE, RA
- | addi PC, PC, 4
- | lfdx f0, BASE, RA
-- | lwz TMP2, -4(PC)
-+ | lwz SAVE0, -4(PC)
- | lfdx f1, KBASE, RD
-- | decode_RD4 TMP2, TMP2
-- | checknum TMP0
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | decode_RD4 SAVE0, SAVE0
-+ | checknum CARG1
-+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- | bge >3
- | fcmpu cr0, f0, f1
- |.endif
- if (vk) {
- | bne >1
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |1:
- |.if not FFI
- |3:
-@@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.if not FFI
- |3:
- |.endif
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |2:
- }
- | ins_next
- |.if FFI
- |3:
-- | cmpwi TMP0, LJ_TCDATA
-+ | cmpwi CARG1, LJ_TCDATA
- | beq ->vmeta_equal_cd
- | b <1
- |.endif
-@@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |7: // RA is not an integer.
- | bge cr0, <3
- | // RA is a number.
-- | lfd f0, 0(RA)
-+ | .FPU lfd f0, 0(RA)
- | blt cr1, >1
- | // RA is a number, RD is an integer.
-- | tonum_i f1, CARG3
-+ |.if FPU
-+ | tonum_i f1, CARG4
-+ |.else
-+ | bl ->vm_sfi2d_2
-+ |.endif
- | b >2
- |
- |8: // RA is an integer, RD is a number.
-+ |.if FPU
- | tonum_i f0, CARG2
-+ |.else
-+ | bl ->vm_sfi2d_1
-+ |.endif
- |1:
-- | lfd f1, 0(RD)
-+ | .FPU lfd f1, 0(RD)
- |2:
-+ |.if FPU
- | fcmpu cr0, f0, f1
-+ |.else
-+ | blex __ledf2
-+ | cmpwi CRET1, 0
-+ |.endif
- | b <4
- |.endif
- break;
-@@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add PC, PC, TMP2
- } else {
- | li TMP1, LJ_TFALSE
-+ |.if FPU
- | lfdx f0, BASE, RD
-+ |.else
-+ | lwzux CARG1, RD, BASE
-+ | lwz CARG2, 4(RD)
-+ |.endif
- | cmplw TMP0, TMP1
- if (op == BC_ISTC) {
- | bge >1
-@@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | addis PC, PC, -(BCBIAS_J*4 >> 16)
- | decode_RD4 TMP2, INS
-+ |.if FPU
- | stfdx f0, BASE, RA
-+ |.else
-+ | stwux CARG1, RA, BASE
-+ | stw CARG2, 4(RA)
-+ |.endif
- | add PC, PC, TMP2
- |1:
- }
-@@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_MOV:
- | // RA = dst*8, RD = src*8
- | ins_next1
-+ |.if FPU
- | lfdx f0, BASE, RD
- | stfdx f0, BASE, RA
-+ |.else
-+ | lwzux TMP0, RD, BASE
-+ | lwz TMP1, 4(RD)
-+ | stwux TMP0, RA, BASE
-+ | stw TMP1, 4(RA)
-+ |.endif
- | ins_next2
- break;
- case BC_NOT:
-@@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-- | lwzx TMP1, BASE, RB
-+ | lwzx CARG1, BASE, RB
- | .if DUALNUM
-- | lwzx TMP2, KBASE, RC
-+ | lwzx CARG3, KBASE, RC
- | .endif
-+ | .if FPU
- | lfdx f14, BASE, RB
- | lfdx f15, KBASE, RC
-+ | .else
-+ | add TMP1, BASE, RB
-+ | add TMP2, KBASE, RC
-+ | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 4(TMP2)
-+ | .endif
- | .if DUALNUM
-- | checknum cr0, TMP1
-- | checknum cr1, TMP2
-+ | checknum cr0, CARG1
-+ | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_vn
- | .else
-- | checknum TMP1; bge ->vmeta_arith_vn
-+ | checknum CARG1; bge ->vmeta_arith_vn
- | .endif
- || break;
- ||case 1:
-- | lwzx TMP1, BASE, RB
-+ | lwzx CARG1, BASE, RB
- | .if DUALNUM
-- | lwzx TMP2, KBASE, RC
-+ | lwzx CARG3, KBASE, RC
- | .endif
-+ | .if FPU
- | lfdx f15, BASE, RB
- | lfdx f14, KBASE, RC
-+ | .else
-+ | add TMP1, BASE, RB
-+ | add TMP2, KBASE, RC
-+ | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 4(TMP2)
-+ | .endif
- | .if DUALNUM
-- | checknum cr0, TMP1
-- | checknum cr1, TMP2
-+ | checknum cr0, CARG1
-+ | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_nv
- | .else
-- | checknum TMP1; bge ->vmeta_arith_nv
-+ | checknum CARG1; bge ->vmeta_arith_nv
- | .endif
- || break;
- ||default:
-- | lwzx TMP1, BASE, RB
-- | lwzx TMP2, BASE, RC
-+ | lwzx CARG1, BASE, RB
-+ | lwzx CARG3, BASE, RC
-+ | .if FPU
- | lfdx f14, BASE, RB
- | lfdx f15, BASE, RC
-- | checknum cr0, TMP1
-- | checknum cr1, TMP2
-+ | .else
-+ | add TMP1, BASE, RB
-+ | add TMP2, BASE, RC
-+ | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 4(TMP2)
-+ | .endif
-+ | checknum cr0, CARG1
-+ | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_vv
- || break;
-@@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | fsub a, b, a // b - floor(b/c)*c
- |.endmacro
- |
-+ |.macro sfpmod
-+ |->BC_MODVN_Z:
-+ | stw CARG1, SFSAVE_1
-+ | stw CARG2, SFSAVE_2
-+ | mr SAVE0, CARG3
-+ | mr SAVE1, CARG4
-+ | blex __divdf3
-+ | blex floor
-+ | mr CARG3, SAVE0
-+ | mr CARG4, SAVE1
-+ | blex __muldf3
-+ | mr CARG3, CRET1
-+ | mr CARG4, CRET2
-+ | lwz CARG1, SFSAVE_1
-+ | lwz CARG2, SFSAVE_2
-+ | blex __subdf3
-+ |.endmacro
-+ |
- |.macro ins_arithfp, fpins
- | ins_arithpre
- |.if "fpins" == "fpmod_"
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
-- |.else
-+ |.elif FPU
- | fpins f0, f14, f15
- | ins_next1
- | stfdx f0, BASE, RA
- | ins_next2
-+ |.else
-+ | blex __divdf3 // Only soft-float div uses this macro.
-+ | ins_next1
-+ | stwux CRET1, RA, BASE
-+ | stw CRET2, 4(RA)
-+ | ins_next2
- |.endif
- |.endmacro
- |
-- |.macro ins_arithdn, intins, fpins
-+ |.macro ins_arithdn, intins, fpins, fpcall
- | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-- | lwzux TMP1, RB, BASE
-- | lwzux TMP2, RC, KBASE
-- | lwz CARG1, 4(RB)
-- | checknum cr0, TMP1
-- | lwz CARG2, 4(RC)
-+ | lwzux CARG1, RB, BASE
-+ | lwzux CARG3, RC, KBASE
-+ | lwz CARG2, 4(RB)
-+ | checknum cr0, CARG1
-+ | lwz CARG4, 4(RC)
-+ | checknum cr1, CARG3
- || break;
- ||case 1:
-- | lwzux TMP1, RB, BASE
-- | lwzux TMP2, RC, KBASE
-- | lwz CARG2, 4(RB)
-- | checknum cr0, TMP1
-- | lwz CARG1, 4(RC)
-+ | lwzux CARG3, RB, BASE
-+ | lwzux CARG1, RC, KBASE
-+ | lwz CARG4, 4(RB)
-+ | checknum cr0, CARG3
-+ | lwz CARG2, 4(RC)
-+ | checknum cr1, CARG1
- || break;
- ||default:
-- | lwzux TMP1, RB, BASE
-- | lwzux TMP2, RC, BASE
-- | lwz CARG1, 4(RB)
-- | checknum cr0, TMP1
-- | lwz CARG2, 4(RC)
-+ | lwzux CARG1, RB, BASE
-+ | lwzux CARG3, RC, BASE
-+ | lwz CARG2, 4(RB)
-+ | checknum cr0, CARG1
-+ | lwz CARG4, 4(RC)
-+ | checknum cr1, CARG3
- || break;
- ||}
-- | checknum cr1, TMP2
- | bne >5
- | bne cr1, >5
-- | intins CARG1, CARG1, CARG2
-+ |.if "intins" == "intmod"
-+ | mr CARG1, CARG2
-+ | mr CARG2, CARG4
-+ |.endif
-+ | intins CARG1, CARG2, CARG4
- | bso >4
- |1:
- | ins_next1
-@@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | checkov TMP0, <1 // Ignore unrelated overflow.
- | ins_arithfallback b
- |5: // FP variant.
-+ |.if FPU
- ||if (vk == 1) {
- | lfd f15, 0(RB)
-- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | lfd f14, 0(RC)
- ||} else {
- | lfd f14, 0(RB)
-- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | lfd f15, 0(RC)
- ||}
-+ |.endif
-+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | ins_arithfallback bge
- |.if "fpins" == "fpmod_"
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- |.else
-+ |.if FPU
- | fpins f0, f14, f15
-- | ins_next1
- | stfdx f0, BASE, RA
-+ |.else
-+ |.if "fpcall" == "sfpmod"
-+ | sfpmod
-+ |.else
-+ | blex fpcall
-+ |.endif
-+ | stwux CRET1, RA, BASE
-+ | stw CRET2, 4(RA)
-+ |.endif
-+ | ins_next1
- | b <2
- |.endif
- |.endmacro
- |
-- |.macro ins_arith, intins, fpins
-+ |.macro ins_arith, intins, fpins, fpcall
- |.if DUALNUM
-- | ins_arithdn intins, fpins
-+ | ins_arithdn intins, fpins, fpcall
- |.else
- | ins_arithfp fpins
- |.endif
-@@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addo. TMP0, TMP0, TMP3
- | add y, a, b
- |.endmacro
-- | ins_arith addo32., fadd
-+ | ins_arith addo32., fadd, __adddf3
- |.else
-- | ins_arith addo., fadd
-+ | ins_arith addo., fadd, __adddf3
- |.endif
- break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-@@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | subo. TMP0, TMP0, TMP3
- | sub y, a, b
- |.endmacro
-- | ins_arith subo32., fsub
-+ | ins_arith subo32., fsub, __subdf3
- |.else
-- | ins_arith subo., fsub
-+ | ins_arith subo., fsub, __subdf3
- |.endif
- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
-- | ins_arith mullwo., fmul
-+ | ins_arith mullwo., fmul, __muldf3
- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arithfp fdiv
- break;
- case BC_MODVN:
-- | ins_arith intmod, fpmod
-+ | ins_arith intmod, fpmod, sfpmod
- break;
- case BC_MODNV: case BC_MODVV:
-- | ins_arith intmod, fpmod_
-+ | ins_arith intmod, fpmod_, sfpmod
- break;
- case BC_POW:
- | // NYI: (partial) integer arithmetic.
-- | lwzx TMP1, BASE, RB
-+ | lwzx CARG1, BASE, RB
-+ | lwzx CARG3, BASE, RC
-+ |.if FPU
- | lfdx FARG1, BASE, RB
-- | lwzx TMP2, BASE, RC
- | lfdx FARG2, BASE, RC
-- | checknum cr0, TMP1
-- | checknum cr1, TMP2
-+ |.else
-+ | add TMP1, BASE, RB
-+ | add TMP2, BASE, RC
-+ | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 4(TMP2)
-+ |.endif
-+ | checknum cr0, CARG1
-+ | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_vv
- | blex pow
- | ins_next1
-+ |.if FPU
- | stfdx FARG1, BASE, RA
-+ |.else
-+ | stwux CARG1, RA, BASE
-+ | stw CARG2, 4(RA)
-+ |.endif
- | ins_next2
- break;
-
-@@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lp BASE, L->base
- | bne ->vmeta_binop
- | ins_next1
-+ |.if FPU
- | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
- | stfdx f0, BASE, RA
-+ |.else
-+ | lwzux TMP0, SAVE0, BASE
-+ | lwz TMP1, 4(SAVE0)
-+ | stwux TMP0, RA, BASE
-+ | stw TMP1, 4(RA)
-+ |.endif
- | ins_next2
- break;
-
-@@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_KNUM:
- | // RA = dst*8, RD = num_const*8
- | ins_next1
-+ |.if FPU
- | lfdx f0, KBASE, RD
- | stfdx f0, BASE, RA
-+ |.else
-+ | lwzux TMP0, RD, KBASE
-+ | lwz TMP1, 4(RD)
-+ | stwux TMP0, RA, BASE
-+ | stw TMP1, 4(RA)
-+ |.endif
- | ins_next2
- break;
- case BC_KPRI:
-@@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwzx UPVAL:RB, LFUNC:RB, RD
- | ins_next1
- | lwz TMP1, UPVAL:RB->v
-+ |.if FPU
- | lfd f0, 0(TMP1)
- | stfdx f0, BASE, RA
-+ |.else
-+ | lwz TMP2, 0(TMP1)
-+ | lwz TMP3, 4(TMP1)
-+ | stwux TMP2, RA, BASE
-+ | stw TMP3, 4(RA)
-+ |.endif
- | ins_next2
- break;
- case BC_USETV:
-@@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
-+ |.if FPU
- | lfdux f0, RD, BASE
-+ |.else
-+ | lwzux CARG1, RD, BASE
-+ | lwz CARG3, 4(RD)
-+ |.endif
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | lbz TMP3, UPVAL:RB->marked
- | lwz CARG2, UPVAL:RB->v
- | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
- | lbz TMP0, UPVAL:RB->closed
- | lwz TMP2, 0(RD)
-+ |.if FPU
- | stfd f0, 0(CARG2)
-+ |.else
-+ | stw CARG1, 0(CARG2)
-+ | stw CARG3, 4(CARG2)
-+ |.endif
- | cmplwi cr1, TMP0, 0
- | lwz TMP1, 4(RD)
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
-@@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
-+ |.if FPU
- | lfdx f0, KBASE, RD
-+ |.else
-+ | lwzux TMP2, RD, KBASE
-+ | lwz TMP3, 4(RD)
-+ |.endif
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | ins_next1
- | lwz TMP1, UPVAL:RB->v
-+ |.if FPU
- | stfd f0, 0(TMP1)
-+ |.else
-+ | stw TMP2, 0(TMP1)
-+ | stw TMP3, 4(TMP1)
-+ |.endif
- | ins_next2
- break;
- case BC_USETP:
-@@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.endif
- | ble ->vmeta_tgetv // Integer key and in array part?
- | lwzx TMP0, TMP1, TMP2
-+ |.if FPU
- | lfdx f14, TMP1, TMP2
-+ |.else
-+ | lwzux SAVE0, TMP1, TMP2
-+ | lwz SAVE1, 4(TMP1)
-+ |.endif
- | checknil TMP0; beq >2
- |1:
- | ins_next1
-+ |.if FPU
- | stfdx f14, BASE, RA
-+ |.else
-+ | stwux SAVE0, RA, BASE
-+ | stw SAVE1, 4(RA)
-+ |.endif
- | ins_next2
- |
- |2: // Check for __index if table value is nil.
-@@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
- | cmplw TMP0, TMP1; bge ->vmeta_tgetb
-+ |.if FPU
- | lwzx TMP1, TMP2, RC
- | lfdx f0, TMP2, RC
-+ |.else
-+ | lwzux TMP1, TMP2, RC
-+ | lwz TMP3, 4(TMP2)
-+ |.endif
- | checknil TMP1; beq >5
- |1:
- | ins_next1
-+ |.if FPU
- | stfdx f0, BASE, RA
-+ |.else
-+ | stwux TMP1, RA, BASE
-+ | stw TMP3, 4(RA)
-+ |.endif
- | ins_next2
- |
- |5: // Check for __index if table value is nil.
-@@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cmplw TMP0, CARG2
- | slwi TMP2, CARG2, 3
- | ble ->vmeta_tgetr // In array part?
-+ |.if FPU
- | lfdx f14, TMP1, TMP2
-+ |.else
-+ | lwzux SAVE0, TMP2, TMP1
-+ | lwz SAVE1, 4(TMP2)
-+ |.endif
- |->BC_TGETR_Z:
- | ins_next1
-+ |.if FPU
- | stfdx f14, BASE, RA
-+ |.else
-+ | stwux SAVE0, RA, BASE
-+ | stw SAVE1, 4(RA)
-+ |.endif
- | ins_next2
- break;
-
-@@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ble ->vmeta_tsetv // Integer key and in array part?
- | lwzx TMP2, TMP1, TMP0
- | lbz TMP3, TAB:RB->marked
-+ |.if FPU
- | lfdx f14, BASE, RA
-+ |.else
-+ | add SAVE1, BASE, RA
-+ | lwz SAVE0, 0(SAVE1)
-+ | lwz SAVE1, 4(SAVE1)
-+ |.endif
- | checknil TMP2; beq >3
- |1:
- | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
-+ |.if FPU
- | stfdx f14, TMP1, TMP0
-+ |.else
-+ | stwux SAVE0, TMP1, TMP0
-+ | stw SAVE1, 4(TMP1)
-+ |.endif
- | bne >7
- |2:
- | ins_next
-@@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz NODE:TMP2, TAB:RB->node
- | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
-+ |.if FPU
- | lfdx f14, BASE, RA
-+ |.else
-+ | add CARG2, BASE, RA
-+ | lwz SAVE0, 0(CARG2)
-+ | lwz SAVE1, 4(CARG2)
-+ |.endif
- | slwi TMP0, TMP1, 5
- | slwi TMP1, TMP1, 3
- | sub TMP1, TMP0, TMP1
-@@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | checknil CARG2; beq >4 // Key found, but nil value?
- |2:
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
-+ |.if FPU
- | stfd f14, NODE:TMP2->val
-+ |.else
-+ | stw SAVE0, NODE:TMP2->val.u32.hi
-+ | stw SAVE1, NODE:TMP2->val.u32.lo
-+ |.endif
- | bne >7
- |3:
- | ins_next
-@@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
- | // Returns TValue *.
- | lp BASE, L->base
-+ |.if FPU
- | stfd f14, 0(CRET1)
-+ |.else
-+ | stw SAVE0, 0(CRET1)
-+ | stw SAVE1, 4(CRET1)
-+ |.endif
- | b <3 // No 2nd write barrier needed.
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-@@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz TMP2, TAB:RB->array
- | lbz TMP3, TAB:RB->marked
- | cmplw TMP0, TMP1
-+ |.if FPU
- | lfdx f14, BASE, RA
-+ |.else
-+ | add CARG2, BASE, RA
-+ | lwz SAVE0, 0(CARG2)
-+ | lwz SAVE1, 4(CARG2)
-+ |.endif
- | bge ->vmeta_tsetb
- | lwzx TMP1, TMP2, RC
- | checknil TMP1; beq >5
- |1:
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
-+ |.if FPU
- | stfdx f14, TMP2, RC
-+ |.else
-+ | stwux SAVE0, RC, TMP2
-+ | stw SAVE1, 4(RC)
-+ |.endif
- | bne >7
- |2:
- | ins_next
-@@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |2:
- | cmplw TMP0, CARG3
- | slwi TMP2, CARG3, 3
-+ |.if FPU
- | lfdx f14, BASE, RA
-+ |.else
-+ | lwzux SAVE0, RA, BASE
-+ | lwz SAVE1, 4(RA)
-+ |.endif
- | ble ->vmeta_tsetr // In array part?
- | ins_next1
-+ |.if FPU
- | stfdx f14, TMP1, TMP2
-+ |.else
-+ | stwux SAVE0, TMP1, TMP2
-+ | stw SAVE1, 4(TMP1)
-+ |.endif
- | ins_next2
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-@@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add TMP1, TMP1, TMP0
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- |3: // Copy result slots to table.
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz SAVE0, 0(RA)
-+ | lwz SAVE1, 4(RA)
-+ |.endif
- | addi RA, RA, 8
- | cmpw cr1, RA, TMP2
-+ |.if FPU
- | stfd f0, 0(TMP1)
-+ |.else
-+ | stw SAVE0, 0(TMP1)
-+ | stw SAVE1, 4(TMP1)
-+ |.endif
- | addi TMP1, TMP1, 8
- | blt cr1, <3
- | bne >7
-@@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | beq cr1, >3
- |2:
- | addi TMP3, TMP2, 8
-+ |.if FPU
- | lfdx f0, RA, TMP2
-+ |.else
-+ | add CARG3, RA, TMP2
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ |.endif
- | cmplw cr1, TMP3, NARGS8:RC
-+ |.if FPU
- | stfdx f0, BASE, TMP2
-+ |.else
-+ | stwux CARG1, TMP2, BASE
-+ | stw CARG2, 4(TMP2)
-+ |.endif
- | mr TMP2, TMP3
- | bne cr1, <2
- |3:
-@@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add BASE, BASE, RA
- | lwz TMP1, -24(BASE)
- | lwz LFUNC:RB, -20(BASE)
-+ |.if FPU
- | lfd f1, -8(BASE)
- | lfd f0, -16(BASE)
-+ |.else
-+ | lwz CARG1, -8(BASE)
-+ | lwz CARG2, -4(BASE)
-+ | lwz CARG3, -16(BASE)
-+ | lwz CARG4, -12(BASE)
-+ |.endif
- | stw TMP1, 0(BASE) // Copy callable.
- | stw LFUNC:RB, 4(BASE)
- | checkfunc TMP1
-- | stfd f1, 16(BASE) // Copy control var.
- | li NARGS8:RC, 16 // Iterators get 2 arguments.
-+ |.if FPU
-+ | stfd f1, 16(BASE) // Copy control var.
- | stfdu f0, 8(BASE) // Copy state.
-+ |.else
-+ | stw CARG1, 16(BASE) // Copy control var.
-+ | stw CARG2, 20(BASE)
-+ | stwu CARG3, 8(BASE) // Copy state.
-+ | stw CARG4, 4(BASE)
-+ |.endif
- | bne ->vmeta_call
- | ins_call
- break;
-@@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi TMP3, RC, 3
- | bge >5 // Index points after array part?
- | lwzx TMP2, TMP1, TMP3
-+ |.if FPU
- | lfdx f0, TMP1, TMP3
-+ |.else
-+ | lwzux CARG1, TMP3, TMP1
-+ | lwz CARG2, 4(TMP3)
-+ |.endif
- | checknil TMP2
- | lwz INS, -4(PC)
- | beq >4
-@@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.endif
- | addi RC, RC, 1
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-+ |.if FPU
- | stfd f0, 8(RA)
-+ |.else
-+ | stw CARG1, 8(RA)
-+ | stw CARG2, 12(RA)
-+ |.endif
- | decode_RD4 TMP1, INS
- | stw RC, -4(RA) // Update control var.
- | add PC, TMP1, TMP3
-@@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi RB, RC, 3
- | sub TMP3, TMP3, RB
- | lwzx RB, TMP2, TMP3
-+ |.if FPU
- | lfdx f0, TMP2, TMP3
-+ |.else
-+ | add CARG3, TMP2, TMP3
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ |.endif
- | add NODE:TMP3, TMP2, TMP3
- | checknil RB
- | lwz INS, -4(PC)
- | beq >7
-+ |.if FPU
- | lfd f1, NODE:TMP3->key
-+ |.else
-+ | lwz CARG3, NODE:TMP3->key.u32.hi
-+ | lwz CARG4, NODE:TMP3->key.u32.lo
-+ |.endif
- | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
-+ |.if FPU
- | stfd f0, 8(RA)
-+ |.else
-+ | stw CARG1, 8(RA)
-+ | stw CARG2, 12(RA)
-+ |.endif
- | add RC, RC, TMP0
- | decode_RD4 TMP1, INS
-+ |.if FPU
- | stfd f1, 0(RA)
-+ |.else
-+ | stw CARG3, 0(RA)
-+ | stw CARG4, 4(RA)
-+ |.endif
- | addi RC, RC, 1
- | add PC, TMP1, TMP2
- | stw RC, -4(RA) // Update control var.
-@@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | subi TMP2, TMP2, 16
- | ble >2 // No vararg slots?
- |1: // Copy vararg slots to destination slots.
-+ |.if FPU
- | lfd f0, 0(RC)
-+ |.else
-+ | lwz CARG1, 0(RC)
-+ | lwz CARG2, 4(RC)
-+ |.endif
- | addi RC, RC, 8
-+ |.if FPU
- | stfd f0, 0(RA)
-+ |.else
-+ | stw CARG1, 0(RA)
-+ | stw CARG2, 4(RA)
-+ |.endif
- | cmplw RA, TMP2
- | cmplw cr1, RC, TMP3
- | bge >3 // All destination slots filled?
-@@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addi MULTRES, TMP1, 8
- | bgt >7
- |6:
-+ |.if FPU
- | lfd f0, 0(RC)
-+ |.else
-+ | lwz CARG1, 0(RC)
-+ | lwz CARG2, 4(RC)
-+ |.endif
- | addi RC, RC, 8
-+ |.if FPU
- | stfd f0, 0(RA)
-+ |.else
-+ | stw CARG1, 0(RA)
-+ | stw CARG2, 4(RA)
-+ |.endif
- | cmplw RC, TMP3
- | addi RA, RA, 8
- | blt <6 // More vararg slots?
-@@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | li TMP1, 0
- |2:
- | addi TMP3, TMP1, 8
-+ |.if FPU
- | lfdx f0, RA, TMP1
-+ |.else
-+ | add CARG3, RA, TMP1
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ |.endif
- | cmpw TMP3, RC
-+ |.if FPU
- | stfdx f0, TMP2, TMP1
-+ |.else
-+ | add CARG3, TMP2, TMP1
-+ | stw CARG1, 0(CARG3)
-+ | stw CARG2, 4(CARG3)
-+ |.endif
- | beq >3
- | addi TMP1, TMP3, 8
-+ |.if FPU
- | lfdx f1, RA, TMP3
-+ |.else
-+ | add CARG3, RA, TMP3
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ |.endif
- | cmpw TMP1, RC
-+ |.if FPU
- | stfdx f1, TMP2, TMP3
-+ |.else
-+ | add CARG3, TMP2, TMP3
-+ | stw CARG1, 0(CARG3)
-+ | stw CARG2, 4(CARG3)
-+ |.endif
- | bne <2
- |3:
- |5:
-@@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | subi TMP2, BASE, 8
- | decode_RB8 RB, INS
- if (op == BC_RET1) {
-+ |.if FPU
- | lfd f0, 0(RA)
- | stfd f0, 0(TMP2)
-+ |.else
-+ | lwz CARG1, 0(RA)
-+ | lwz CARG2, 4(RA)
-+ | stw CARG1, 0(TMP2)
-+ | stw CARG2, 4(TMP2)
-+ |.endif
- }
- |5:
- | cmplw RB, RD
-@@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |4:
- | stw CARG1, FORL_IDX*8+4(RA)
- } else {
-- | lwz TMP3, FORL_STEP*8(RA)
-+ | lwz SAVE0, FORL_STEP*8(RA)
- | lwz CARG3, FORL_STEP*8+4(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | lwz CARG2, FORL_STOP*8+4(RA)
-- | cmplw cr7, TMP3, TISNUM
-+ | cmplw cr7, SAVE0, TISNUM
- | cmplw cr1, TMP2, TISNUM
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
-@@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- if (vk) {
- |.if DUALNUM
- |9: // FP loop.
-+ |.if FPU
- | lfd f1, FORL_IDX*8(RA)
- |.else
-+ | lwz CARG1, FORL_IDX*8(RA)
-+ | lwz CARG2, FORL_IDX*8+4(RA)
-+ |.endif
-+ |.else
- | lfdux f1, RA, BASE
- |.endif
-+ |.if FPU
- | lfd f3, FORL_STEP*8(RA)
- | lfd f2, FORL_STOP*8(RA)
-- | lwz TMP3, FORL_STEP*8(RA)
- | fadd f1, f1, f3
- | stfd f1, FORL_IDX*8(RA)
-+ |.else
-+ | lwz CARG3, FORL_STEP*8(RA)
-+ | lwz CARG4, FORL_STEP*8+4(RA)
-+ | mr SAVE1, RD
-+ | blex __adddf3
-+ | mr RD, SAVE1
-+ | stw CRET1, FORL_IDX*8(RA)
-+ | stw CRET2, FORL_IDX*8+4(RA)
-+ | lwz CARG3, FORL_STOP*8(RA)
-+ | lwz CARG4, FORL_STOP*8+4(RA)
-+ |.endif
-+ | lwz SAVE0, FORL_STEP*8(RA)
- } else {
- |.if DUALNUM
- |9: // FP loop.
- |.else
- | lwzux TMP1, RA, BASE
-- | lwz TMP3, FORL_STEP*8(RA)
-+ | lwz SAVE0, FORL_STEP*8(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | cmplw cr0, TMP1, TISNUM
-- | cmplw cr7, TMP3, TISNUM
-+ | cmplw cr7, SAVE0, TISNUM
- | cmplw cr1, TMP2, TISNUM
- |.endif
-+ |.if FPU
- | lfd f1, FORL_IDX*8(RA)
-+ |.else
-+ | lwz CARG1, FORL_IDX*8(RA)
-+ | lwz CARG2, FORL_IDX*8+4(RA)
-+ |.endif
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
-+ |.if FPU
- | lfd f2, FORL_STOP*8(RA)
-+ |.else
-+ | lwz CARG3, FORL_STOP*8(RA)
-+ | lwz CARG4, FORL_STOP*8+4(RA)
-+ |.endif
- | bge ->vmeta_for
- }
-- | cmpwi cr6, TMP3, 0
-+ | cmpwi cr6, SAVE0, 0
- if (op != BC_JFORL) {
- | srwi RD, RD, 1
- }
-+ |.if FPU
- | stfd f1, FORL_EXT*8(RA)
-+ |.else
-+ | stw CARG1, FORL_EXT*8(RA)
-+ | stw CARG2, FORL_EXT*8+4(RA)
-+ |.endif
- if (op != BC_JFORL) {
- | add RD, PC, RD
- }
-+ |.if FPU
- | fcmpu cr0, f1, f2
-+ |.else
-+ | mr SAVE1, RD
-+ | blex __ledf2
-+ | cmpwi CRET1, 0
-+ | mr RD, SAVE1
-+ |.endif
- if (op == BC_JFORI) {
- | addis PC, RD, -(BCBIAS_J*4 >> 16)
- }
---
-2.20.1
-
diff --git a/0011-Use-https-for-freelists.org-links.patch
b/0011-Use-https-for-freelists.org-links.patch
deleted file mode 100644
index c0c2a19..0000000
--- a/0011-Use-https-for-freelists.org-links.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From f3d75075ed91137699c6071abe49e2252e794a9c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Fri, 18 Aug 2017 12:52:14 +0200
-Subject: [PATCH 11/72] Use https for
freelists.org links.
-
----
- doc/ext_ffi_semantics.html | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
-index 899640c..ae3c037 100644
---- a/doc/ext_ffi_semantics.html
-+++ b/doc/ext_ffi_semantics.html
-@@ -844,7 +844,7 @@ place of a type, you'd need to use
<tt>ffi.typeof("int")</tt> instead.
- <p>
- The main use for parameterized types are libraries implementing abstract
- data types
--(<a
href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8&...
class="ext">»</span> example</a>),
-+(<a
href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8...>),
- similar to what can be achieved with C++ template metaprogramming.
- Another use case are derived types of anonymous structs, which avoids
- pollution of the global struct namespace.
---
-2.20.1
-
diff --git a/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch
b/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch
deleted file mode 100644
index 80ca5b0..0000000
--- a/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From 6b0824852677cc12570c20a3211fbfe0e4f0ce14 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 28 Aug 2017 10:43:37 +0200
-Subject: [PATCH 12/72] x64/LJ_GC64: Fix fallback case of asm_fuseloadk64().
-
-Contributed by Peter Cawley.
----
- src/lj_asm_x86.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
-index 3e189b1..55c02d2 100644
---- a/src/lj_asm_x86.h
-+++ b/src/lj_asm_x86.h
-@@ -387,6 +387,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
- ir->i = (int32_t)(as->mctop - as->mcbot);
- as->mcbot += 8;
- as->mclim = as->mcbot + MCLIM_REDZONE;
-+ lj_mcode_commitbot(as->J, as->mcbot);
- }
- as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
- as->mrm.base = RID_RIP;
---
-2.20.1
-
diff --git a/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch
b/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch
deleted file mode 100644
index faaa94a..0000000
--- a/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch
+++ /dev/null
@@ -1,751 +0,0 @@
-From 71b7bc88341945f13f3951e2bb5fd247b639ff7a Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 3 Sep 2017 23:20:53 +0200
-Subject: [PATCH 13/72] PPC: Add soft-float support to JIT compiler backend.
-
-Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
-Sponsored by Cisco Systems, Inc.
----
- src/lj_arch.h | 1 -
- src/lj_asm_ppc.h | 321 ++++++++++++++++++++++++++++++++++++++++-------
- 2 files changed, 278 insertions(+), 44 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 0145a7c..5962f3a 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -273,7 +273,6 @@
- #endif
-
- #if LJ_ABI_SOFTFP
--#define LJ_ARCH_NOJIT 1 /* NYI */
- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
- #else
- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
-diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
-index 6daa861..1955429 100644
---- a/src/lj_asm_ppc.h
-+++ b/src/lj_asm_ppc.h
-@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef
ref,
- emit_tab(as, pi, rt, left, right);
- }
-
-+#if !LJ_SOFTFP
- /* Fuse to multiply-add/sub instruction. */
- static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
- {
-@@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns
pir)
- }
- return 0;
- }
-+#endif
-
- /* -- Calls --------------------------------------------------------------- */
-
-@@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- {
- uint32_t n, nargs = CCI_XNARGS(ci);
- int32_t ofs = 8;
-- Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
-+ Reg gpr = REGARG_FIRSTGPR;
-+#if !LJ_SOFTFP
-+ Reg fpr = REGARG_FIRSTFPR;
-+#endif
- if ((void *)ci->func)
- emit_call(as, (void *)ci->func);
- for (n = 0; n < nargs; n++) { /* Setup args. */
- IRRef ref = args[n];
- if (ref) {
- IRIns *ir = IR(ref);
-+#if !LJ_SOFTFP
- if (irt_isfp(ir->t)) {
- if (fpr <= REGARG_LASTFPR) {
- lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
-@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- emit_spstore(as, ir, r, ofs);
- ofs += irt_isnum(ir->t) ? 8 : 4;
- }
-- } else {
-+ } else
-+#endif
-+ {
- if (gpr <= REGARG_LASTGPR) {
- lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
- ra_leftov(as, gpr, ref);
-@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- }
- checkmclim(as);
- }
-+#if !LJ_SOFTFP
- if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
- emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
-+#endif
- }
-
- /* Setup result reg/sp for call. Evict scratch regs. */
-@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
- {
- RegSet drop = RSET_SCRATCH;
- int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
-+#if !LJ_SOFTFP
- if ((ci->flags & CCI_NOFPRCLOBBER))
- drop &= ~RSET_FPR;
-+#endif
- if (ra_hasreg(ir->r))
- rset_clear(drop, ir->r); /* Dest reg handled below. */
- if (hiop && ra_hasreg((ir+1)->r))
-@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
- ra_evictset(as, drop); /* Evictions must be performed first. */
- if (ra_used(ir)) {
- lua_assert(!irt_ispri(ir->t));
-- if (irt_isfp(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
- if ((ci->flags & CCI_CASTU64)) {
- /* Use spill slot or temp slots. */
- int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
-@@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
-
- /* -- Type conversions ---------------------------------------------------- */
-
-+#if !LJ_SOFTFP
- static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
- {
- RegSet allow = RSET_FPR;
-@@ -409,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir)
- emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
- emit_fab(as, PPCI_FADD, tmp, left, right);
- }
-+#endif
-
- static void asm_conv(ASMState *as, IRIns *ir)
- {
- IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
-+#if !LJ_SOFTFP
- int stfp = (st == IRT_NUM || st == IRT_FLOAT);
-+#endif
- IRRef lref = ir->op1;
-- lua_assert(irt_type(ir->t) != st);
- lua_assert(!(irt_isint64(ir->t) ||
- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
-+#if LJ_SOFTFP
-+ /* FP conversions are handled by SPLIT. */
-+ lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
-+ /* Can't check for same types: SPLIT uses CONV
int.int + BXOR for sfp NEG. */
-+#else
-+ lua_assert(irt_type(ir->t) != st);
- if (irt_isfp(ir->t)) {
- Reg dest = ra_dest(as, ir, RSET_FPR);
- if (stfp) { /* FP to FP conversion. */
-@@ -476,7 +497,9 @@ static void asm_conv(ASMState *as, IRIns *ir)
- emit_fb(as, PPCI_FCTIWZ, tmp, left);
- }
- }
-- } else {
-+ } else
-+#endif
-+ {
- Reg dest = ra_dest(as, ir, RSET_GPR);
- if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
- Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
-@@ -496,17 +519,41 @@ static void asm_strto(ASMState *as, IRIns *ir)
- {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
- IRRef args[2];
-- int32_t ofs;
-+ int32_t ofs = SPOFS_TMP;
-+#if LJ_SOFTFP
-+ ra_evictset(as, RSET_SCRATCH);
-+ if (ra_used(ir)) {
-+ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
-+ (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
-+ int i;
-+ for (i = 0; i < 2; i++) {
-+ Reg r = (ir+i)->r;
-+ if (ra_hasreg(r)) {
-+ ra_free(as, r);
-+ ra_modified(as, r);
-+ emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
-+ }
-+ }
-+ ofs = sps_scale(ir->s & ~1);
-+ } else {
-+ Reg rhi = ra_dest(as, ir+1, RSET_GPR);
-+ Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
-+ emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
-+ emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
-+ }
-+ }
-+#else
- RegSet drop = RSET_SCRATCH;
- if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
- ra_evictset(as, drop);
-+ if (ir->s) ofs = sps_scale(ir->s);
-+#endif
- asm_guardcc(as, CC_EQ);
- emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
- args[0] = ir->op1; /* GCstr *str */
- args[1] = ASMREF_TMP1; /* TValue *n */
- asm_gencall(as, ci, args);
- /* Store the result to the spill slot or temp slots. */
-- ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
- emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
- }
-
-@@ -530,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
- Reg src = ra_alloc1(as, ref, allow);
- emit_setgl(as, src, tmptv.gcr);
- }
-- type = ra_allock(as, irt_toitype(ir->t), allow);
-+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-+ type = ra_alloc1(as, ref+1, allow);
-+ else
-+ type = ra_allock(as, irt_toitype(ir->t), allow);
- emit_setgl(as, type, tmptv.it);
- }
- }
-@@ -574,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- Reg tisnum = RID_NONE, tmpnum = RID_NONE;
- IRRef refkey = ir->op2;
- IRIns *irkey = IR(refkey);
-+ int isk = irref_isk(refkey);
- IRType1 kt = irkey->t;
- uint32_t khash;
- MCLabel l_end, l_loop, l_next;
-
- rset_clear(allow, tab);
-+#if LJ_SOFTFP
-+ if (!isk) {
-+ key = ra_alloc1(as, refkey, allow);
-+ rset_clear(allow, key);
-+ if (irkey[1].o == IR_HIOP) {
-+ if (ra_hasreg((irkey+1)->r)) {
-+ tmpnum = (irkey+1)->r;
-+ ra_noweak(as, tmpnum);
-+ } else {
-+ tmpnum = ra_allocref(as, refkey+1, allow);
-+ }
-+ rset_clear(allow, tmpnum);
-+ }
-+ }
-+#else
- if (irt_isnum(kt)) {
- key = ra_alloc1(as, refkey, RSET_FPR);
- tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
-@@ -588,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- key = ra_alloc1(as, refkey, allow);
- rset_clear(allow, key);
- }
-+#endif
- tmp2 = ra_scratch(as, allow);
- rset_clear(allow, tmp2);
-
-@@ -610,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- asm_guardcc(as, CC_EQ);
- else
- emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
-- if (irt_isnum(kt)) {
-+ if (!LJ_SOFTFP && irt_isnum(kt)) {
- emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
- emit_condbranch(as, PPCI_BC, CC_GE, l_next);
- emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
-@@ -620,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_ab(as, PPCI_CMPW, tmp2, key);
- emit_condbranch(as, PPCI_BC, CC_NE, l_next);
- }
-- emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
-+ if (LJ_SOFTFP && ra_hasreg(tmpnum))
-+ emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
-+ else
-+ emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
- if (!irt_ispri(kt))
- emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
- }
-@@ -629,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- (((char *)as->mcp-(char *)l_loop) & 0xffffu);
-
- /* Load main position relative to tab->node into dest. */
-- khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
-+ khash = isk ? ir_khash(irkey) : 1;
- if (khash == 0) {
- emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
- } else {
- Reg tmphash = tmp1;
-- if (irref_isk(refkey))
-+ if (isk)
- tmphash = ra_allock(as, khash, allow);
- emit_tab(as, PPCI_ADD, dest, dest, tmp1);
- emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
- emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
- emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
- emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
-- if (irref_isk(refkey)) {
-+ if (isk) {
- /* Nothing to do. */
- } else if (irt_isstr(kt)) {
- emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
-@@ -651,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
- emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
- emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
-- if (irt_isnum(kt)) {
-+ if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
-+#if LJ_SOFTFP
-+ emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
-+ emit_rotlwi(as, dest, tmp1, HASH_ROT1);
-+ emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
-+#else
- int32_t ofs = ra_spill(as, irkey);
- emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
- emit_rotlwi(as, dest, tmp1, HASH_ROT1);
- emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
- emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
- emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
-+#endif
- } else {
- emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
- emit_rotlwi(as, dest, tmp1, HASH_ROT1);
-@@ -784,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir)
- case IRT_U8: return PPCI_LBZ;
- case IRT_I16: return PPCI_LHA;
- case IRT_U16: return PPCI_LHZ;
-- case IRT_NUM: return PPCI_LFD;
-- case IRT_FLOAT: return PPCI_LFS;
-+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD;
-+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
- default: return PPCI_LWZ;
- }
- }
-@@ -795,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir)
- switch (irt_type(ir->t)) {
- case IRT_I8: case IRT_U8: return PPCI_STB;
- case IRT_I16: case IRT_U16: return PPCI_STH;
-- case IRT_NUM: return PPCI_STFD;
-- case IRT_FLOAT: return PPCI_STFS;
-+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD;
-+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
- default: return PPCI_STW;
- }
- }
-@@ -839,7 +915,8 @@ static void asm_fstore(ASMState *as, IRIns *ir)
-
- static void asm_xload(ASMState *as, IRIns *ir)
- {
-- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
-+ Reg dest = ra_dest(as, ir,
-+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
- if (irt_isi8(ir->t))
- emit_as(as, PPCI_EXTSB, dest, dest);
-@@ -857,7 +934,8 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
- Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
- asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
- } else {
-- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
-+ Reg src = ra_alloc1(as, ir->op2,
-+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
- rset_exclude(RSET_GPR, src), ofs);
- }
-@@ -871,10 +949,19 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
- Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
- RegSet allow = RSET_GPR;
- int32_t ofs = AHUREF_LSX;
-+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
-+ t.irt = IRT_NUM;
-+ if (ra_used(ir+1)) {
-+ type = ra_dest(as, ir+1, allow);
-+ rset_clear(allow, type);
-+ }
-+ ofs = 0;
-+ }
- if (ra_used(ir)) {
-- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
-- if (!irt_isnum(t)) ofs = 0;
-- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
-+ lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
-+ irt_isint(ir->t) || irt_isaddr(ir->t));
-+ if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
-+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
- rset_clear(allow, dest);
- }
- idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
-@@ -883,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
- asm_guardcc(as, CC_GE);
- emit_ab(as, PPCI_CMPLW, type, tisnum);
- if (ra_hasreg(dest)) {
-- if (ofs == AHUREF_LSX) {
-+ if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
- tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
- (idx&255)), (idx>>8)));
- emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
- } else {
-- emit_fai(as, PPCI_LFD, dest, idx, ofs);
-+ emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
-+ ofs+4*LJ_SOFTFP);
- }
- }
- } else {
-@@ -911,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
- int32_t ofs = AHUREF_LSX;
- if (ir->r == RID_SINK)
- return;
-- if (irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
- src = ra_alloc1(as, ir->op2, RSET_FPR);
- } else {
- if (!irt_ispri(ir->t)) {
-@@ -919,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
- rset_clear(allow, src);
- ofs = 0;
- }
-- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
-+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-+ type = ra_alloc1(as, (ir+1)->op2, allow);
-+ else
-+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
- rset_clear(allow, type);
- }
- idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
-- if (irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
- if (ofs == AHUREF_LSX) {
- emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
- emit_slwi(as, RID_TMP, (idx>>8), 3);
-@@ -948,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir)
- IRType1 t = ir->t;
- Reg dest = RID_NONE, type = RID_NONE, base;
- RegSet allow = RSET_GPR;
-+ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
-+ if (hiop)
-+ t.irt = IRT_NUM;
- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
-- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
-+ lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
- lua_assert(LJ_DUALNUM ||
- !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
-+#if LJ_SOFTFP
-+ lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
-+ if (hiop && ra_used(ir+1)) {
-+ type = ra_dest(as, ir+1, allow);
-+ rset_clear(allow, type);
-+ }
-+#else
- if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) &&
irt_isint(t)) {
- dest = ra_scratch(as, RSET_FPR);
- asm_tointg(as, ir, dest);
- t.irt = IRT_NUM; /* Continue with a regular number type check. */
-- } else if (ra_used(ir)) {
-+ } else
-+#endif
-+ if (ra_used(ir)) {
- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
-- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
-+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
- rset_clear(allow, dest);
- base = ra_alloc1(as, REF_BASE, allow);
- rset_clear(allow, base);
-- if ((ir->op2 & IRSLOAD_CONVERT)) {
-+ if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
- if (irt_isint(t)) {
- emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
- dest = ra_scratch(as, RSET_FPR);
-@@ -994,10 +1097,13 @@ dotypecheck:
- if ((ir->op2 & IRSLOAD_TYPECHECK)) {
- Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
- asm_guardcc(as, CC_GE);
-- emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
-+#if !LJ_SOFTFP
- type = RID_TMP;
-+#endif
-+ emit_ab(as, PPCI_CMPLW, type, tisnum);
- }
-- if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
-+ if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
-+ base, ofs-(LJ_SOFTFP?0:4));
- } else {
- if ((ir->op2 & IRSLOAD_TYPECHECK)) {
- asm_guardcc(as, CC_NE);
-@@ -1119,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
-
- /* -- Arithmetic and logic operations ------------------------------------- */
-
-+#if !LJ_SOFTFP
- static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
- {
- Reg dest = ra_dest(as, ir, RSET_FPR);
-@@ -1146,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
- else
- asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
- }
-+#endif
-
- static void asm_add(ASMState *as, IRIns *ir)
- {
-+#if !LJ_SOFTFP
- if (irt_isnum(ir->t)) {
- if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
- asm_fparith(as, ir, PPCI_FADD);
-- } else {
-+ } else
-+#endif
-+ {
- Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
- PPCIns pi;
-@@ -1191,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir)
-
- static void asm_sub(ASMState *as, IRIns *ir)
- {
-+#if !LJ_SOFTFP
- if (irt_isnum(ir->t)) {
- if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
- asm_fparith(as, ir, PPCI_FSUB);
-- } else {
-+ } else
-+#endif
-+ {
- PPCIns pi = PPCI_SUBF;
- Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg left, right;
-@@ -1220,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
-
- static void asm_mul(ASMState *as, IRIns *ir)
- {
-+#if !LJ_SOFTFP
- if (irt_isnum(ir->t)) {
- asm_fparith(as, ir, PPCI_FMUL);
-- } else {
-+ } else
-+#endif
-+ {
- PPCIns pi = PPCI_MULLW;
- Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
-@@ -1250,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns *ir)
-
- static void asm_neg(ASMState *as, IRIns *ir)
- {
-+#if !LJ_SOFTFP
- if (irt_isnum(ir->t)) {
- asm_fpunary(as, ir, PPCI_FNEG);
-- } else {
-+ } else
-+#endif
-+ {
- Reg dest, left;
- PPCIns pi = PPCI_NEG;
- if (as->flagmcp == as->mcp) {
-@@ -1563,9 +1683,40 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi,
PPCIns pik)
- PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
- #define asm_bror(as, ir) lua_assert(0)
-
-+#if LJ_SOFTFP
-+static void asm_sfpmin_max(ASMState *as, IRIns *ir)
-+{
-+ CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
-+ IRRef args[4];
-+ MCLabel l_right, l_end;
-+ Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
-+ Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
-+ Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
-+ PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
-+ righthi = (lefthi >> 8); lefthi &= 255;
-+ rightlo = (leftlo >> 8); leftlo &= 255;
-+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
-+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
-+ l_end = emit_label(as);
-+ if (desthi != righthi) emit_mr(as, desthi, righthi);
-+ if (destlo != rightlo) emit_mr(as, destlo, rightlo);
-+ l_right = emit_label(as);
-+ if (l_end != l_right) emit_jmp(as, l_end);
-+ if (desthi != lefthi) emit_mr(as, desthi, lefthi);
-+ if (destlo != leftlo) emit_mr(as, destlo, leftlo);
-+ if (l_right == as->mcp+1) {
-+ cond ^= 4; l_right = l_end; ++as->mcp;
-+ }
-+ emit_condbranch(as, PPCI_BC, cond, l_right);
-+ ra_evictset(as, RSET_SCRATCH);
-+ emit_cmpi(as, RID_RET, 1);
-+ asm_gencall(as, &ci, args);
-+}
-+#endif
-+
- static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
- {
-- if (irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
- Reg dest = ra_dest(as, ir, RSET_FPR);
- Reg tmp = dest;
- Reg right, left = ra_alloc2(as, ir, RSET_FPR);
-@@ -1653,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg
cr, PPCCC cc)
- static void asm_comp(ASMState *as, IRIns *ir)
- {
- PPCCC cc = asm_compmap[ir->o];
-- if (irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
- Reg right, left = ra_alloc2(as, ir, RSET_FPR);
- right = (left >> 8); left &= 255;
- asm_guardcc(as, (cc >> 4));
-@@ -1674,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns *ir)
-
- #define asm_equal(as, ir) asm_comp(as, ir)
-
-+#if LJ_SOFTFP
-+/* SFP comparisons. */
-+static void asm_sfpcomp(ASMState *as, IRIns *ir)
-+{
-+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
-+ RegSet drop = RSET_SCRATCH;
-+ Reg r;
-+ IRRef args[4];
-+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
-+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
-+
-+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
-+ if (!rset_test(as->freeset, r) &&
-+ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
-+ rset_clear(drop, r);
-+ }
-+ ra_evictset(as, drop);
-+ asm_setupresult(as, ir, ci);
-+ switch ((IROp)ir->o) {
-+ case IR_ULT:
-+ asm_guardcc(as, CC_EQ);
-+ emit_ai(as, PPCI_CMPWI, RID_RET, 0);
-+ case IR_ULE:
-+ asm_guardcc(as, CC_EQ);
-+ emit_ai(as, PPCI_CMPWI, RID_RET, 1);
-+ break;
-+ case IR_GE: case IR_GT:
-+ asm_guardcc(as, CC_EQ);
-+ emit_ai(as, PPCI_CMPWI, RID_RET, 2);
-+ default:
-+ asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
-+ emit_ai(as, PPCI_CMPWI, RID_RET, 0);
-+ break;
-+ }
-+ asm_gencall(as, ci, args);
-+}
-+#endif
-+
- #if LJ_HASFFI
- /* 64 bit integer comparisons. */
- static void asm_comp64(ASMState *as, IRIns *ir)
-@@ -1703,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRIns *ir)
- /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
- static void asm_hiop(ASMState *as, IRIns *ir)
- {
--#if LJ_HASFFI
-+#if LJ_HASFFI || LJ_SOFTFP
- /* HIOP is marked as a store because it needs its own DCE logic. */
- int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
- if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
- if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
- as->curins--; /* Always skip the CONV. */
-+#if LJ_HASFFI && !LJ_SOFTFP
- if (usehi || uselo)
- asm_conv64(as, ir);
- return;
-+#endif
- } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
- as->curins--; /* Always skip the loword comparison. */
-+#if LJ_SOFTFP
-+ if (!irt_isint(ir->t)) {
-+ asm_sfpcomp(as, ir-1);
-+ return;
-+ }
-+#endif
-+#if LJ_HASFFI
- asm_comp64(as, ir);
-+#endif
-+ return;
-+#if LJ_SOFTFP
-+ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
-+ as->curins--; /* Always skip the loword min/max. */
-+ if (uselo || usehi)
-+ asm_sfpmin_max(as, ir-1);
- return;
-+#endif
- } else if ((ir-1)->o == IR_XSTORE) {
- as->curins--; /* Handle both stores here. */
- if ((ir-1)->r != RID_SINK) {
-@@ -1726,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns *ir)
- }
- if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
- switch ((ir-1)->o) {
-+#if LJ_HASFFI
- case IR_ADD: as->curins--; asm_add64(as, ir); break;
- case IR_SUB: as->curins--; asm_sub64(as, ir); break;
- case IR_NEG: as->curins--; asm_neg64(as, ir); break;
-+#endif
-+#if LJ_SOFTFP
-+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-+ case IR_STRTO:
-+ if (!uselo)
-+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
-+ break;
-+#endif
- case IR_CALLN:
-+ case IR_CALLS:
- case IR_CALLXS:
- if (!uselo)
- ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
- break;
-+#if LJ_SOFTFP
-+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
-+#endif
- case IR_CNEWI:
- /* Nothing to do here. Handled by lo op itself. */
- break;
-@@ -1797,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
- if ((sn & SNAP_NORESTORE))
- continue;
- if (irt_isnum(ir->t)) {
-+#if LJ_SOFTFP
-+ Reg tmp;
-+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
-+ lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
-+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
-+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
-+ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
-+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
-+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
-+#else
- Reg src = ra_alloc1(as, ref, RSET_FPR);
- emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
-+#endif
- } else {
- Reg type;
- RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
-@@ -1811,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
- if ((sn & (SNAP_CONT|SNAP_FRAME))) {
- if (s == 0) continue; /* Do not overwrite link to previous frame. */
- type = ra_allock(as, (int32_t)(*flinks--), allow);
-+#if LJ_SOFTFP
-+ } else if ((sn & SNAP_SOFTFPNUM)) {
-+ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
-+#endif
- } else {
- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
- }
-@@ -1947,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const
CCallInfo *ci)
- int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
- asm_collectargs(as, ir, ci, args);
- for (i = 0; i < nargs; i++)
-- if (args[i] && irt_isfp(IR(args[i])->t)) {
-+ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
- if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
- } else {
- if (ngpr > 0) ngpr--; else nslots++;
- }
- if (nslots > as->evenspill) /* Leave room for args in stack slots. */
- as->evenspill = nslots;
-- return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
-+ return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
-+ REGSP_HINT(RID_RET);
- }
-
- static void asm_setup_target(ASMState *as)
---
-2.20.1
-
diff --git a/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch
b/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch
deleted file mode 100644
index 7e9dd8a..0000000
--- a/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 05fbdf565c700365d22e38f11478101a0d92a23e Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 10 Sep 2017 14:05:30 +0200
-Subject: [PATCH 14/72] x64/LJ_GC64: Fix type-check-only variant of SLOAD.
-
-Thanks to Peter Cawley.
----
- src/lj_asm_x86.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
-index 55c02d2..af54dc7 100644
---- a/src/lj_asm_x86.h
-+++ b/src/lj_asm_x86.h
-@@ -1759,7 +1759,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
- emit_i8(as, irt_toitype(t));
- emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
- emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
-- emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4);
-+ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs);
- #else
- } else {
- emit_i8(as, irt_toitype(t));
---
-2.20.1
-
diff --git a/0015-MIPS64-Hide-internal-function.patch
b/0015-MIPS64-Hide-internal-function.patch
deleted file mode 100644
index 0e2f4fd..0000000
--- a/0015-MIPS64-Hide-internal-function.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From bf12f1dafb157008b963f829b57b2472b6993cc8 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 18 Sep 2017 09:50:22 +0200
-Subject: [PATCH 15/72] MIPS64: Hide internal function.
-
----
- src/lj_ccall.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index 799be48..25e938c 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -848,7 +848,8 @@ noth: /* Not a homogeneous float/double aggregate. */
- return 0; /* Struct is in GPRs. */
- }
-
--void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft)
-+static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
-+ int ft)
- {
- if (LJ_ABI_SOFTFP ? ft :
- ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
---
-2.20.1
-
diff --git a/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch
b/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch
deleted file mode 100644
index 66f5bf0..0000000
--- a/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-commit 6a2d8b0b4d49eb5aac600c219e5903420806e56e
-Merge: bf12f1d 0c0e7b1
-Author: Mike Pall <mike>
-Date: Wed Sep 20 19:42:34 2017 +0200
-
- Merge branch 'master' into v2.1
-
-From 0c0e7b168ea147866835954267c151ef789f64fb Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 20 Sep 2017 19:39:50 +0200
-Subject: [PATCH 16/72] DynASM/x86: Fix potential REL_A overflow.
-
-Thanks to Joshua Haberman.
----
- dynasm/dasm_x86.h | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
-index 90dc5d1..f9260b0 100644
---- a/dynasm/dasm_x86.h
-+++ b/dynasm/dasm_x86.h
-@@ -395,7 +395,8 @@ int dasm_encode(Dst_DECL, void *buffer)
- }
- case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
- b++; n = (int)(ptrdiff_t)D->globals[-n];
-- case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
-+ case DASM_REL_A: rel_a:
-+ n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
- case DASM_REL_PC: rel_pc: {
- int shrink = *b++;
- int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
---
-2.20.1
-
diff --git a/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch
b/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch
deleted file mode 100644
index aff6f20..0000000
--- a/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From b4ed3219a1a98dd9fe7d1e3eeea3b82f5a780948 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 2 Oct 2017 09:22:46 +0200
-Subject: [PATCH 17/72] LJ_GC64: Fix ir_khash for non-string GCobj.
-
-Contributed by Peter Cawley.
----
- src/lj_asm.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index bed2268..d961927 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -1017,7 +1017,11 @@ static uint32_t ir_khash(IRIns *ir)
- } else {
- lua_assert(irt_isgcv(ir->t));
- lo = u32ptr(ir_kgc(ir));
-+#if LJ_GC64
-+ hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) <<
15);
-+#else
- hi = lo + HASH_BIAS;
-+#endif
- }
- return hashrot(lo, hi);
- }
---
-2.20.1
-
diff --git a/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch
b/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch
deleted file mode 100644
index d604876..0000000
--- a/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 850f8c59d3d04a9847f21f32a6c36d8269b5b6b1 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 2 Oct 2017 23:10:56 +0200
-Subject: [PATCH 18/72] LJ_GC64: Make ASMREF_L references 64 bit.
-
-Reported by Yichun Zhang.
----
- src/lj_asm.c | 1 +
- src/lj_ir.h | 4 +++-
- src/lj_opt_sink.c | 1 +
- 3 files changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index d961927..753fe6b 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -2015,6 +2015,7 @@ static void asm_setup_regsp(ASMState *as)
- ir->prev = REGSP_INIT;
- if (irt_is64(ir->t) && ir->o != IR_KNULL) {
- #if LJ_GC64
-+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
- ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
- #else
- /* Make life easier for backends by putting address of constant in i. */
-diff --git a/src/lj_ir.h b/src/lj_ir.h
-index 34c2785..8057a75 100644
---- a/src/lj_ir.h
-+++ b/src/lj_ir.h
-@@ -377,10 +377,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
- #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64))
-
- #if LJ_GC64
-+/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */
- #define IRT_IS64 \
- ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
-
(1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
--
(1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA))
-+
(1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\
-+ (1u<<IRT_NIL))
- #elif LJ_64
- #define IRT_IS64 \
-
((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
-diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
-index 929ccb6..a16d112 100644
---- a/src/lj_opt_sink.c
-+++ b/src/lj_opt_sink.c
-@@ -219,6 +219,7 @@ static void sink_sweep_ins(jit_State *J)
- for (ir = IR(J->cur.nk); ir < irbase; ir++) {
- irt_clearmark(ir->t);
- ir->prev = REGSP_INIT;
-+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
- if (irt_is64(ir->t) && ir->o != IR_KNULL)
- ir++;
- }
---
-2.20.1
-
diff --git a/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch
b/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch
deleted file mode 100644
index c999ce8..0000000
--- a/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 9f0caad0e43f97a4613850b3874b851cb1bc301d Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 8 Nov 2017 12:53:05 +0100
-Subject: [PATCH 19/72] Fix FOLD rule for strength reduction of widening.
-
-Reported by Matthew Burk.
----
- src/lj_opt_fold.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
-index 3d0e35a..5dc7ae3 100644
---- a/src/lj_opt_fold.c
-+++ b/src/lj_opt_fold.c
-@@ -1052,7 +1052,7 @@ LJFOLDF(simplify_conv_sext)
- if (ref == J->scev.idx) {
- IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
- lua_assert(irt_isint(J->scev.t));
-- if (lo && IR(lo)->i + ofs >= 0) {
-+ if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
- ok_reduce:
- #if LJ_TARGET_X64
- /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
---
-2.20.1
-
diff --git a/0020-ARM64-Fix-assembly-of-HREFK.patch
b/0020-ARM64-Fix-assembly-of-HREFK.patch
deleted file mode 100644
index 3200304..0000000
--- a/0020-ARM64-Fix-assembly-of-HREFK.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From 06cd9fce7df440323647174f1ca4a01281ec8acd Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 8 Nov 2017 12:53:48 +0100
-Subject: [PATCH 20/72] ARM64: Fix assembly of HREFK.
-
-Reported by Jason Teplitz.
----
- src/lj_asm_arm64.h | 11 +++++------
- 1 file changed, 5 insertions(+), 6 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index 8fd92e7..cbb186d 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -869,14 +869,12 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
- int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
- int32_t kofs = ofs + (int32_t)offsetof(Node, key);
- int bigofs = !emit_checkofs(A64I_LDRx, ofs);
-- RegSet allow = RSET_GPR;
- Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
-- Reg node = ra_alloc1(as, ir->op1, allow);
-- Reg key = ra_scratch(as, rset_clear(allow, node));
-- Reg idx = node;
-+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
-+ Reg key, idx = node;
-+ RegSet allow = rset_exclude(RSET_GPR, node);
- uint64_t k;
- lua_assert(ofs % sizeof(Node) == 0);
-- rset_clear(allow, key);
- if (bigofs) {
- idx = dest;
- rset_clear(allow, dest);
-@@ -892,7 +890,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
- } else {
- k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
- }
-- emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow));
-+ key = ra_scratch(as, allow);
-+ emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
- emit_lso(as, A64I_LDRx, key, idx, kofs);
- if (bigofs)
- emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
---
-2.20.1
-
diff --git a/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch
b/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch
deleted file mode 100644
index 80fad2f..0000000
--- a/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From 99cdfbf6a1e8856f64908072ef10443a7eab14f2 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 8 Nov 2017 12:54:03 +0100
-Subject: [PATCH 21/72] MIPS64: Fix register allocation in assembly of HREF.
-
-Contributed by James Cowgill.
----
- src/lj_asm_mips.h | 42 +++++++++++++++++++++++++-----------------
- 1 file changed, 25 insertions(+), 17 deletions(-)
-
-diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
-index 1406a87..3a4679b 100644
---- a/src/lj_asm_mips.h
-+++ b/src/lj_asm_mips.h
-@@ -859,6 +859,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- Reg dest = ra_dest(as, ir, allow);
- Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
- Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
-+#if LJ_64
-+ Reg cmp64 = RID_NONE;
-+#endif
- IRRef refkey = ir->op2;
- IRIns *irkey = IR(refkey);
- int isk = irref_isk(refkey);
-@@ -901,6 +904,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- #endif
- tmp2 = ra_scratch(as, allow);
- rset_clear(allow, tmp2);
-+#if LJ_64
-+ if (LJ_SOFTFP || !irt_isnum(kt)) {
-+ /* Allocate cmp64 register used for 64-bit comparisons */
-+ if (LJ_SOFTFP && irt_isnum(kt)) {
-+ cmp64 = key;
-+ } else if (!isk && irt_isaddr(kt)) {
-+ cmp64 = tmp2;
-+ } else {
-+ int64_t k;
-+ if (isk && irt_isaddr(kt)) {
-+ k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
-+ } else {
-+ lua_assert(irt_ispri(kt) && !irt_isnil(kt));
-+ k = ~((int64_t)~irt_toitype(ir->t) << 47);
-+ }
-+ cmp64 = ra_allock(as, k, allow);
-+ rset_clear(allow, cmp64);
-+ }
-+ }
-+#endif
-
- /* Key not found in chain: jump to exit (if merged) or load niltv. */
- l_end = emit_label(as);
-@@ -943,24 +966,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
- emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
- emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
-- } else if (LJ_SOFTFP && irt_isnum(kt)) {
-- emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
-- emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
-- } else if (irt_isaddr(kt)) {
-- Reg refk = tmp2;
-- if (isk) {
-- int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
-- refk = ra_allock(as, k, allow);
-- rset_clear(allow, refk);
-- }
-- emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end);
-- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key));
- } else {
-- Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
-- rset_clear(allow, pri);
-- lua_assert(irt_ispri(kt) && !irt_isnil(kt));
-- emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end);
-- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key));
-+ emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end);
-+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
- }
- *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
- if (!isk && irt_isaddr(kt)) {
---
-2.20.1
-
diff --git a/0022-ARM64-Fix-xpcall-error-case.patch
b/0022-ARM64-Fix-xpcall-error-case.patch
deleted file mode 100644
index ec05a7c..0000000
--- a/0022-ARM64-Fix-xpcall-error-case.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From 33082a6f4778aa152f6a4a684a7fe79436f1ecb6 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 16 Nov 2017 12:53:34 +0100
-Subject: [PATCH 22/72] ARM64: Fix xpcall() error case.
-
-Thanks to Stefan Pejic.
----
- src/vm_arm64.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
-index 3eaf376..241c58a 100644
---- a/src/vm_arm64.dasc
-+++ b/src/vm_arm64.dasc
-@@ -1185,12 +1185,12 @@ static void build_subroutines(BuildCtx *ctx)
- | subs NARGS8:RC, NARGS8:RC, #16
- | blo ->fff_fallback
- | mov RB, BASE
-- | add BASE, BASE, #24
- | asr ITYPE, CARG2, #47
- | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
- | cmn ITYPE, #-LJ_TFUNC
- | add PC, TMP0, #24+FRAME_PCALL
- | bne ->fff_fallback // Traceback must be a function.
-+ | add BASE, BASE, #24
- | stp CARG2, CARG1, [RB] // Swap function and traceback.
- | cbz NARGS8:RC, ->vm_call_dispatch
- | b <1
---
-2.20.1
-
diff --git a/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch
b/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch
deleted file mode 100644
index 740a5a7..0000000
--- a/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 7dbf0b05f1228c1c719866db5e5f3d58f87f74c8 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 16 Nov 2017 12:58:12 +0100
-Subject: [PATCH 23/72] Fix saved bytecode encapsulated in ELF objects.
-
-Thanks to Dimitry Andric.
----
- src/jit/bcsave.lua | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
-index aa677df..c94064e 100644
---- a/src/jit/bcsave.lua
-+++ b/src/jit/bcsave.lua
-@@ -275,7 +275,7 @@ typedef struct {
- o.sect[2].size = fofs(ofs)
- o.sect[3].type = f32(3) -- .strtab
- o.sect[3].ofs = fofs(sofs + ofs)
-- o.sect[3].size = fofs(#symname+1)
-+ o.sect[3].size = fofs(#symname+2)
- ffi.copy(o.space+ofs+1, symname)
- ofs = ofs + #symname + 2
- o.sect[4].type = f32(1) -- .rodata
---
-2.20.1
-
diff --git a/0024-ARM64-Fix-xpcall-error-case-really.patch
b/0024-ARM64-Fix-xpcall-error-case-really.patch
deleted file mode 100644
index ab518e1..0000000
--- a/0024-ARM64-Fix-xpcall-error-case-really.patch
+++ /dev/null
@@ -1,37 +0,0 @@
-From d417ded17945b4211608d497d50b509e0274f5e0 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sat, 18 Nov 2017 12:23:57 +0100
-Subject: [PATCH 24/72] ARM64: Fix xpcall() error case (really).
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Thanks to François Perrad and Stefan Pejic.
----
- src/vm_arm64.dasc | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
-index 241c58a..c55794a 100644
---- a/src/vm_arm64.dasc
-+++ b/src/vm_arm64.dasc
-@@ -1182,7 +1182,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc xpcall
- | ldp CARG1, CARG2, [BASE]
- | ldrb TMP0w, GL->hookmask
-- | subs NARGS8:RC, NARGS8:RC, #16
-+ | subs NARGS8:TMP1, NARGS8:RC, #16
- | blo ->fff_fallback
- | mov RB, BASE
- | asr ITYPE, CARG2, #47
-@@ -1190,6 +1190,7 @@ static void build_subroutines(BuildCtx *ctx)
- | cmn ITYPE, #-LJ_TFUNC
- | add PC, TMP0, #24+FRAME_PCALL
- | bne ->fff_fallback // Traceback must be a function.
-+ | mov NARGS8:RC, NARGS8:TMP1
- | add BASE, BASE, #24
- | stp CARG2, CARG1, [RB] // Swap function and traceback.
- | cbz NARGS8:RC, ->vm_call_dispatch
---
-2.20.1
-
diff --git a/0025-MIPS64-Fix-xpcall-error-case.patch
b/0025-MIPS64-Fix-xpcall-error-case.patch
deleted file mode 100644
index 5b17e81..0000000
--- a/0025-MIPS64-Fix-xpcall-error-case.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From ea7071d3c30b6432bfe6f8a9d263e0285cec25e3 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sat, 18 Nov 2017 12:25:35 +0100
-Subject: [PATCH 25/72] MIPS64: Fix xpcall() error case.
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Thanks to François Perrad and Stefan Pejic.
----
- src/vm_mips64.dasc | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
-index 75b38de..a78cd25 100644
---- a/src/vm_mips64.dasc
-+++ b/src/vm_mips64.dasc
-@@ -1399,15 +1399,16 @@ static void build_subroutines(BuildCtx *ctx)
- |. nop
- |
- |.ffunc xpcall
-- | daddiu NARGS8:RC, NARGS8:RC, -16
-+ | daddiu NARGS8:TMP0, NARGS8:RC, -16
- | ld CARG1, 0(BASE)
- | ld CARG2, 8(BASE)
-- | bltz NARGS8:RC, ->fff_fallback
-+ | bltz NARGS8:TMP0, ->fff_fallback
- |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
- | gettp AT, CARG2
- | daddiu AT, AT, -LJ_TFUNC
- | bnez AT, ->fff_fallback // Traceback must be a function.
- |. move TMP2, BASE
-+ | move NARGS8:RC, NARGS8:TMP0
- | daddiu BASE, BASE, 24
- | // Remember active hook before pcall.
- | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
---
-2.20.1
-
diff --git a/0026-Fix-IR_BUFPUT-assembly.patch b/0026-Fix-IR_BUFPUT-assembly.patch
deleted file mode 100644
index c942467..0000000
--- a/0026-Fix-IR_BUFPUT-assembly.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 58d0dde0a2df49abc991decbabff15230010829a Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 14 Jan 2018 13:57:00 +0100
-Subject: [PATCH 26/72] Fix IR_BUFPUT assembly.
-
-Thanks to Peter Cawley.
----
- src/lj_asm.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index 753fe6b..5f83779 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -1119,7 +1119,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
- IRRef args[3];
- IRIns *irs;
-- int kchar = -1;
-+ int kchar = -129;
- args[0] = ir->op1; /* SBuf * */
- args[1] = ir->op2; /* GCstr * */
- irs = IR(ir->op2);
-@@ -1127,7 +1127,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
- if (irs->o == IR_KGC) {
- GCstr *s = ir_kstr(irs);
- if (s->len == 1) { /* Optimize put of single-char string constant. */
-- kchar = strdata(s)[0];
-+ kchar = (int8_t)strdata(s)[0]; /* Signed! */
- args[1] = ASMREF_TMP1; /* int, truncated to char */
- ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
- }
-@@ -1154,7 +1154,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
- asm_gencall(as, ci, args);
- if (args[1] == ASMREF_TMP1) {
- Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
-- if (kchar == -1)
-+ if (kchar == -129)
- asm_tvptr(as, tmp, irs->op1);
- else
- ra_allockreg(as, kchar, tmp);
---
-2.20.1
-
diff --git a/0027-Fix-string.format-c-0.patch b/0027-Fix-string.format-c-0.patch
deleted file mode 100644
index caece09..0000000
--- a/0027-Fix-string.format-c-0.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-commit 4660dbfa8a4f9eea5218b739075d04faadfeeef6
-Merge: 58d0dde 430d9f8
-Author: Mike Pall <mike>
-Date: Sun Jan 14 14:26:10 2018 +0100
-
- Merge branch 'master' into v2.1
-
-From 430d9f8f7ebb779948dbd43944b876b1a3f58551 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 14 Jan 2018 14:11:59 +0100
-Subject: [PATCH 27/72] Fix string.format("%c", 0).
-
----
- src/lib_string.c | 15 ++++++++-------
- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/0028-Fix-ARMv8-32-bit-subset-detection.patch
b/0028-Fix-ARMv8-32-bit-subset-detection.patch
deleted file mode 100644
index 00687af..0000000
--- a/0028-Fix-ARMv8-32-bit-subset-detection.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 9eaad8574f5b2271b981cd31966b1e832cd8de12 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 18 Jan 2018 12:24:36 +0100
-Subject: [PATCH 28/72] Fix ARMv8 (32 bit subset) detection.
-
-Thanks to Markus Oberhumber.
----
- src/lj_arch.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 5962f3a..fcebd84 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -201,7 +201,7 @@
- #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-
--#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
-+#if __ARM_ARCH_8__ || __ARM_ARCH_8A__
- #define LJ_ARCH_VERSION 80
- #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ ||
__ARM_ARCH_7VE__
- #define LJ_ARCH_VERSION 70
---
-2.20.1
-
diff --git a/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch
b/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch
deleted file mode 100644
index 70ae35a..0000000
--- a/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From c88602f080dcafea6ba222a2f7cc1ea0e41ef3cc Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 18 Jan 2018 12:29:39 +0100
-Subject: [PATCH 29/72] Fix LuaJIT API docs for LUAJIT_MODE_*.
-
-Thanks to sunfishgao.
----
- doc/ext_c_api.html | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
-index 041a722..4bb8251 100644
---- a/doc/ext_c_api.html
-+++ b/doc/ext_c_api.html
-@@ -89,8 +89,8 @@ other Lua/C API functions).
- </p>
- <p>
- The third argument specifies the mode, which is 'or'ed with a flag.
--The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on,
--<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or
-+The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off,
-+<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or
- <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code.
- </p>
- <p>
---
-2.20.1
-
diff --git a/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch
b/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch
deleted file mode 100644
index 8ee3a17..0000000
--- a/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 8071aa4ad65cf09e3b7adda4a7787d8897e5314c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 29 Jan 2018 12:12:29 +0100
-Subject: [PATCH 30/72] MIPS64: Fix soft-float +-0.0 vs. +-0.0 comparison.
-
-Thanks to Stefan Pejic.
----
- src/vm_mips64.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
-index a78cd25..0a3f8e5 100644
---- a/src/vm_mips64.dasc
-+++ b/src/vm_mips64.dasc
-@@ -2661,7 +2661,7 @@ static void build_subroutines(BuildCtx *ctx)
- |. slt CRET1, CARG2, CARG1
- |8:
- | jr ra
-- |. nop
-+ |. li CRET1, 0
- |9:
- | jr ra
- |. move CRET1, CRET2
---
-2.20.1
-
diff --git a/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch
b/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch
deleted file mode 100644
index b95ca0c..0000000
--- a/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-commit 74c544d68c07bcd416225598cdf15f88e62fd457
-Merge: 8071aa4 b03a56f
-Author: Mike Pall <mike>
-Date: Mon Jan 29 12:53:42 2018 +0100
-
- Merge branch 'master' into v2.1
-
-From b03a56f28ec360bbcf43091afd0607890a4a33c7 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 29 Jan 2018 12:47:08 +0100
-Subject: [PATCH 31/72] FFI: Don't assert on #1LL (5.2 compatibility mode
- only).
-
-Reported by Denis Golovan.
----
- src/lib_ffi.c | 2 +-
- src/lj_carith.c | 9 +++++++++
- src/lj_carith.h | 1 +
- 3 files changed, 11 insertions(+), 1 deletion(-)
-
-diff --git a/src/lib_ffi.c b/src/lib_ffi.c
-index f2f2ede..83483d9 100644
---- a/src/lib_ffi.c
-+++ b/src/lib_ffi.c
-@@ -193,7 +193,7 @@ LJLIB_CF(ffi_meta___eq) LJLIB_REC(cdata_arith MM_eq)
-
- LJLIB_CF(ffi_meta___len) LJLIB_REC(cdata_arith MM_len)
- {
-- return ffi_arith(L);
-+ return lj_carith_len(L);
- }
-
- LJLIB_CF(ffi_meta___lt) LJLIB_REC(cdata_arith MM_lt)
-diff --git a/src/lj_carith.c b/src/lj_carith.c
-index 6224dee..c34596c 100644
---- a/src/lj_carith.c
-+++ b/src/lj_carith.c
-@@ -272,6 +272,15 @@ int lj_carith_op(lua_State *L, MMS mm)
- return lj_carith_meta(L, cts, &ca, mm);
- }
-
-+/* No built-in functionality for length of cdata. */
-+int lj_carith_len(lua_State *L)
-+{
-+ CTState *cts = ctype_cts(L);
-+ CDArith ca;
-+ carith_checkarg(L, cts, &ca);
-+ return lj_carith_meta(L, cts, &ca, MM_len);
-+}
-+
- /* -- 64 bit bit operations helpers --------------------------------------- */
-
- #if LJ_64
-diff --git a/src/lj_carith.h b/src/lj_carith.h
-index 3c15591..82fc824 100644
---- a/src/lj_carith.h
-+++ b/src/lj_carith.h
-@@ -11,6 +11,7 @@
- #if LJ_HASFFI
-
- LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
-+LJ_FUNC int lj_carith_len(lua_State *L);
-
- #if LJ_32
- LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
-
---
-2.20.1
-
diff --git a/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch
b/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch
deleted file mode 100644
index 192f271..0000000
--- a/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch
+++ /dev/null
@@ -1,291 +0,0 @@
-commit 0bf46e1edf94c43795b5e491efe682ab70974ce7
-Merge: 74c544d d4ee803
-Author: Mike Pall <mike>
-Date: Mon Jan 29 13:19:30 2018 +0100
-
- Merge branch 'master' into v2.1
-
-From d4ee80342770d1281e2ce877f8ae8ab1d99e6528 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 29 Jan 2018 13:06:13 +0100
-Subject: [PATCH 32/72] Fix GCC 7 -Wimplicit-fallthrough warnings.
-
----
- dynasm/dasm_arm.h | 2 ++
- dynasm/dasm_mips.h | 1 +
- dynasm/dasm_ppc.h | 1 +
- dynasm/dasm_x86.h | 14 ++++++++++++--
- src/lj_asm.c | 3 ++-
- src/lj_cparse.c | 10 ++++++++++
- src/lj_err.c | 1 +
- src/lj_opt_sink.c | 2 +-
- src/lj_parse.c | 3 ++-
- src/luajit.c | 1 +
- 10 files changed, 33 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
-index a43f7c6..1d404cc 100644
---- a/dynasm/dasm_arm.h
-+++ b/dynasm/dasm_arm.h
-@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...)
- case DASM_IMMV8:
- CK((n & 3) == 0, RANGE_I);
- n >>= 2;
-+ /* fallthrough */
- case DASM_IMML8:
- case DASM_IMML12:
- CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
-@@ -371,6 +372,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-+ /* fallthrough */
- case DASM_REL_PC:
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4;
-diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
-index 7eac669..46af034 100644
---- a/dynasm/dasm_mips.h
-+++ b/dynasm/dasm_mips.h
-@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-+ /* fallthrough */
- case DASM_REL_PC:
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n);
-diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
-index 6110361..81b9a76 100644
---- a/dynasm/dasm_ppc.h
-+++ b/dynasm/dasm_ppc.h
-@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-+ /* fallthrough */
- case DASM_REL_PC:
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
-diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
-index f9260b0..8ae911d 100644
---- a/dynasm/dasm_x86.h
-+++ b/dynasm/dasm_x86.h
-@@ -194,12 +194,13 @@ void dasm_put(Dst_DECL, int start, ...)
- switch (action) {
- case DASM_DISP:
- if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
-- case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
-+ /* fallthrough */
-+ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
- case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
- case DASM_IMM_D: ofs += 4; break;
- case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
- case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
-- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
-+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
- case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
- case DASM_SPACE: p++; ofs += n; break;
- case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
-@@ -323,11 +324,14 @@ int dasm_link(Dst_DECL, size_t *szp)
- pos += 2;
- break;
- }
-+ /* fallthrough */
- case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
-+ /* fallthrough */
- case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
- case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
- case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
- case DASM_LABEL_LG: p++;
-+ /* fallthrough */
- case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
- case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
- case DASM_EXTERN: p += 2; break;
-@@ -385,12 +389,15 @@ int dasm_encode(Dst_DECL, void *buffer)
- if (mrm != 5) { mm[-1] -= 0x80; break; } }
- if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
- }
-+ /* fallthrough */
- case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
- case DASM_IMM_DB: if (((n+128)&-256) == 0) {
- db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
- } else mark = NULL;
-+ /* fallthrough */
- case DASM_IMM_D: wd: dasmd(n); break;
- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
-+ /* fallthrough */
- case DASM_IMM_W: dasmw(n); break;
- case DASM_VREG: {
- int t = *p++;
-@@ -397,6 +404,7 @@
- }
- case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
- b++; n = (int)(ptrdiff_t)D->globals[-n];
-+ /* fallthrough */
- case DASM_REL_A: rel_a:
- n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
- case DASM_REL_PC: rel_pc: {
-@@ -407,6 +415,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- }
- case DASM_IMM_LG:
- p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
-+ /* fallthrough */
- case DASM_IMM_PC: {
- int *pb = DASM_POS2PTR(D, n);
- n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
-@@ -427,6 +436,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
- case DASM_MARK: mark = cp; break;
- case DASM_ESC: action = *p++;
-+ /* fallthrough */
- default: *cp++ = action; break;
- case DASM_SECTION: case DASM_STOP: goto stop;
- }
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index 02714d4..dd7186f 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -2136,6 +2136,7 @@ static void asm_setup_regsp(ASMState *as)
- case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
- if (REGARG_NUMGPR < 3 && as->evenspill < 3)
- as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
-+ /* fallthrough */
- #if LJ_TARGET_X86 && LJ_HASFFI
- if (0) {
- case IR_CNEW:
-@@ -2176,7 +2177,7 @@ static void asm_setup_regsp(ASMState *as)
- continue;
- #endif
- }
-- /* fallthrough for integer POW */
-+ /* fallthrough */ /* for integer POW */
- case IR_DIV: case IR_MOD:
- if (!irt_isnum(ir->t)) {
- ir->prev = REGSP_HINT(RID_RET);
-diff --git a/src/lj_cparse.c b/src/lj_cparse.c
-index 2ba50a7..f111537 100644
---- a/src/lj_cparse.c
-+++ b/src/lj_cparse.c
-@@ -590,28 +590,34 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- k->id = k2.id > k3.id ? k2.id : k3.id;
- continue;
- }
-+ /* fallthrough */
- case 1:
- if (cp_opt(cp, CTOK_OROR)) {
- cp_expr_sub(cp, &k2, 2); k->i32 = k->u32 || k2.u32; k->id = CTID_INT32;
- continue;
- }
-+ /* fallthrough */
- case 2:
- if (cp_opt(cp, CTOK_ANDAND)) {
- cp_expr_sub(cp, &k2, 3); k->i32 = k->u32 && k2.u32; k->id =
CTID_INT32;
- continue;
- }
-+ /* fallthrough */
- case 3:
- if (cp_opt(cp, '|')) {
- cp_expr_sub(cp, &k2, 4); k->u32 = k->u32 | k2.u32; goto arith_result;
- }
-+ /* fallthrough */
- case 4:
- if (cp_opt(cp, '^')) {
- cp_expr_sub(cp, &k2, 5); k->u32 = k->u32 ^ k2.u32; goto arith_result;
- }
-+ /* fallthrough */
- case 5:
- if (cp_opt(cp, '&')) {
- cp_expr_sub(cp, &k2, 6); k->u32 = k->u32 & k2.u32; goto arith_result;
- }
-+ /* fallthrough */
- case 6:
- if (cp_opt(cp, CTOK_EQ)) {
- cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 == k2.u32; k->id = CTID_INT32;
-@@ -620,6 +626,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 != k2.u32; k->id = CTID_INT32;
- continue;
- }
-+ /* fallthrough */
- case 7:
- if (cp_opt(cp, '<')) {
- cp_expr_sub(cp, &k2, 8);
-@@ -654,6 +661,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- k->id = CTID_INT32;
- continue;
- }
-+ /* fallthrough */
- case 8:
- if (cp_opt(cp, CTOK_SHL)) {
- cp_expr_sub(cp, &k2, 9); k->u32 = k->u32 << k2.u32;
-@@ -666,6 +674,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- k->u32 = k->u32 >> k2.u32;
- continue;
- }
-+ /* fallthrough */
- case 9:
- if (cp_opt(cp, '+')) {
- cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 + k2.u32;
-@@ -675,6 +684,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- } else if (cp_opt(cp, '-')) {
- cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 - k2.u32; goto arith_result;
- }
-+ /* fallthrough */
- case 10:
- if (cp_opt(cp, '*')) {
- cp_expr_unary(cp, &k2); k->u32 = k->u32 * k2.u32; goto arith_result;
-diff --git a/src/lj_err.c b/src/lj_err.c
-index 54f42c3..13a1ded 100644
---- a/src/lj_err.c
-+++ b/src/lj_err.c
-@@ -153,6 +153,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
- case FRAME_CONT: /* Continuation frame. */
- if (frame_iscont_fficb(frame))
- goto unwind_c;
-+ /* fallthrough */
- case FRAME_VARG: /* Vararg frame. */
- frame = frame_prevd(frame);
- break;
-diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
-index 6a00d04..4efe395 100644
---- a/src/lj_opt_sink.c
-+++ b/src/lj_opt_sink.c
-@@ -100,8 +100,8 @@ static void sink_mark_ins(jit_State *J)
- (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP &&
- !sink_checkphi(J, ir, (ir+1)->op2))))
- irt_setmark(ir->t); /* Mark ineligible allocation. */
-- /* fallthrough */
- #endif
-+ /* fallthrough */
- case IR_USTORE:
- irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
- break;
-diff --git a/src/lj_parse.c b/src/lj_parse.c
-index 9e5976f..6785495 100644
---- a/src/lj_parse.c
-+++ b/src/lj_parse.c
-@@ -2696,7 +2696,8 @@ static int parse_stmt(LexState *ls)
- lj_lex_next(ls);
- parse_goto(ls);
- break;
-- } /* else: fallthrough */
-+ }
-+ /* fallthrough */
- default:
- parse_call_assign(ls);
- break;
-diff --git a/src/luajit.c b/src/luajit.c
-index 9e15b26..0e18dc5 100644
---- a/src/luajit.c
-+++ b/src/luajit.c
-@@ -419,6 +419,7 @@ static int collectargs(char **argv, int *flags)
- break;
- case 'e':
- *flags |= FLAGS_EXEC;
-+ /* fallthrough */
- case 'j': /* LuaJIT extension */
- case 'l':
- *flags |= FLAGS_OPTION;
---
-2.20.1
-
diff --git a/0033-Clear-stack-after-print_jit_status-in-CLI.patch
b/0033-Clear-stack-after-print_jit_status-in-CLI.patch
deleted file mode 100644
index 53a4acf..0000000
--- a/0033-Clear-stack-after-print_jit_status-in-CLI.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-commit fddef924097f28c46a0a5b45483a6086b33cab81
-Merge: 0bf46e1 03cd5aa
-Author: Mike Pall <mike>
-Date: Mon Jan 29 13:28:53 2018 +0100
-
- Merge branch 'master' into v2.1
-
-From 03cd5aa749c1bc3bb4b7d4289236b6096cb3dc85 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 29 Jan 2018 13:25:51 +0100
-Subject: [PATCH 33/72] Clear stack after print_jit_status() in CLI.
-
-Suggested by Hydroque.
----
- src/luajit.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/luajit.c b/src/luajit.c
-index 0e18dc5..9ede59c 100644
---- a/src/luajit.c
-+++ b/src/luajit.c
-@@ -151,6 +151,7 @@ static void print_jit_status(lua_State *L)
- fputs(s, stdout);
- }
- putc('\n', stdout);
-+ lua_settop(L, 0); /* clear stack */
- }
-
- static void createargtable(lua_State *L, char **argv, int argc, int argf)
---
-2.20.1
-
diff --git a/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch
b/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch
deleted file mode 100644
index 1b90fb3..0000000
--- a/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 046129dbdda5261c1b17469a2895a113d14c070a Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 27 Feb 2018 23:02:23 +0100
-Subject: [PATCH 34/72] Fix rechaining of pseudo-resurrected string keys.
-
-This is a serious bug. But extremely hard to reproduce, so it went
-undetected for 8 years. One needs two resurrections with different
-main nodes, which are both in a hash chain which gets relinked on
-key insertion where the colliding node is in a non-main position. Phew.
-
-Thanks to lbeiming.
----
- src/lj_tab.c | 23 +++++++++++++++++++++++
- 1 file changed, 23 insertions(+)
-
-diff --git a/src/lj_tab.c b/src/lj_tab.c
-index 50f447e..f2f3c0b 100644
---- a/src/lj_tab.c
-+++ b/src/lj_tab.c
-@@ -457,6 +457,29 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
- freenode->next = nn->next;
- nn->next = n->next;
- setmref(n->next, nn);
-+ /*
-+ ** Rechaining a resurrected string key creates a new dilemma:
-+ ** Another string key may have originally been resurrected via
-+ ** _any_ of the previous nodes as a chain anchor. Including
-+ ** a node that had to be moved, which makes them unreachable.
-+ ** It's not feasible to check for all previous nodes, so rechain
-+ ** any string key that's currently in a non-main positions.
-+ */
-+ while ((nn = nextnode(freenode))) {
-+ if (tvisstr(&nn->key) && !tvisnil(&nn->val)) {
-+ Node *mn = hashstr(t, strV(&nn->key));
-+ if (mn != freenode) {
-+ freenode->next = nn->next;
-+ nn->next = mn->next;
-+ setmref(mn->next, nn);
-+ } else {
-+ freenode = nn;
-+ }
-+ } else {
-+ freenode = nn;
-+ }
-+ }
-+ break;
- } else {
- freenode = nn;
- }
---
-2.20.1
-
diff --git a/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch
b/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch
deleted file mode 100644
index 832809e..0000000
--- a/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From fe651bf6e2b4d02b624be3c289378c08bab2fa9b Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 27 Feb 2018 23:22:40 +0100
-Subject: [PATCH 35/72] DynASM/x86: Add BMI1 and BMI2 instructions.
-
-Thanks to Peter Cawley.
----
- dynasm/dasm_x86.lua | 19 +++++++++++++++++++
- 1 file changed, 19 insertions(+)
-
-diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
-index 4c031e2..c1d267a 100644
---- a/dynasm/dasm_x86.lua
-+++ b/dynasm/dasm_x86.lua
-@@ -955,6 +955,7 @@ end
- -- "u" Use VEX encoding, vvvv unused.
- -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the
operand is
- -- removed from the list used by future characters).
-+-- "w" Use VEX encoding, vvvv from 3rd operand.
- -- "L" Force VEX.L
- --
- -- All of the following characters force a flush of the opcode:
-@@ -1677,6 +1678,24 @@ local map_op = {
- -- Intel ADX
- adcx_2 = "rmqd:660F38F6rM",
- adox_2 = "rmqd:F30F38F6rM",
-+
-+ -- BMI1
-+ andn_3 = "rrmqd:0F38VF2rM",
-+ bextr_3 = "rmrqd:0F38wF7rM",
-+ blsi_2 = "rmqd:0F38vF33m",
-+ blsmsk_2 = "rmqd:0F38vF32m",
-+ blsr_2 = "rmqd:0F38vF31m",
-+ tzcnt_2 = "rmqdw:F30FBCrM",
-+
-+ -- BMI2
-+ bzhi_3 = "rmrqd:0F38wF5rM",
-+ mulx_3 = "rrmqd:F20F38VF6rM",
-+ pdep_3 = "rrmqd:F20F38VF5rM",
-+ pext_3 = "rrmqd:F30F38VF5rM",
-+ rorx_3 = "rmSqd:F20F3AuF0rMS",
-+ sarx_3 = "rmrqd:F30F38wF7rM",
-+ shrx_3 = "rmrqd:F20F38wF7rM",
-+ shlx_3 = "rmrqd:660F38wF7rM",
- }
-
- ------------------------------------------------------------------------------
---
-2.20.1
-
diff --git a/0036-Give-expected-results-for-negative-non-base-10-numbe.patch
b/0036-Give-expected-results-for-negative-non-base-10-numbe.patch
deleted file mode 100644
index 3279dfe..0000000
--- a/0036-Give-expected-results-for-negative-non-base-10-numbe.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From f3cf0d6e15240098147437fed7bd436ff55fdf8c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 22 Apr 2018 13:14:28 +0200
-Subject: [PATCH 36/72] Give expected results for negative non-base-10 numbers
- in tonumber().
-
-This was undefined in Lua 5.1, but it's defined in 5.2.
----
- src/lib_base.c | 27 ++++++++++++++++++---------
- 1 file changed, 18 insertions(+), 9 deletions(-)
-
-diff --git a/src/lib_base.c b/src/lib_base.c
-index 3a75787..d61e876 100644
---- a/src/lib_base.c
-+++ b/src/lib_base.c
-@@ -287,18 +287,27 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
- } else {
- const char *p = strdata(lj_lib_checkstr(L, 1));
- char *ep;
-+ unsigned int neg = 0;
- unsigned long ul;
- if (base < 2 || base > 36)
- lj_err_arg(L, 2, LJ_ERR_BASERNG);
-- ul = strtoul(p, &ep, base);
-- if (p != ep) {
-- while (lj_char_isspace((unsigned char)(*ep))) ep++;
-- if (*ep == '\0') {
-- if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
-- setintV(L->base-1-LJ_FR2, (int32_t)ul);
-- else
-- setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
-- return FFH_RES(1);
-+ while (lj_char_isspace((unsigned char)(*p))) p++;
-+ if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
-+ if (lj_char_isalnum((unsigned char)(*p))) {
-+ ul = strtoul(p, &ep, base);
-+ if (p != ep) {
-+ while (lj_char_isspace((unsigned char)(*ep))) ep++;
-+ if (*ep == '\0') {
-+ if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
-+ if (neg) ul = -ul;
-+ setintV(L->base-1-LJ_FR2, (int32_t)ul);
-+ } else {
-+ lua_Number n = (lua_Number)ul;
-+ if (neg) n = -n;
-+ setnumV(L->base-1-LJ_FR2, n);
-+ }
-+ return FFH_RES(1);
-+ }
- }
- }
- }
---
-2.20.1
-
diff --git a/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch
b/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch
deleted file mode 100644
index c0406a5..0000000
--- a/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 02b521981a1ab919ff2cd4d9bcaee80baf77dce2 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 22 Apr 2018 13:27:25 +0200
-Subject: [PATCH 37/72] FFI: Add tonumber() specialization for failed
- conversions.
-
-Contributed by Javier Guerra Giraldez.
----
- src/lj_crecord.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/lj_crecord.c b/src/lj_crecord.c
-index 84fc49e..bc88d63 100644
---- a/src/lj_crecord.c
-+++ b/src/lj_crecord.c
-@@ -1661,6 +1661,8 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData
*rd)
- d = ctype_get(cts, CTID_DOUBLE);
- J->base[0] = crec_ct_tv(J, d, 0, J->base[0], &rd->argv[0]);
- } else {
-+ /* Specialize to the ctype that couldn't be converted. */
-+ argv2cdata(J, J->base[0], &rd->argv[0]);
- J->base[0] = TREF_NIL;
- }
- }
---
-2.20.1
-
diff --git a/0038-Bump-copyright-date-to-2018.patch
b/0038-Bump-copyright-date-to-2018.patch
deleted file mode 100644
index 1f9e5eb..0000000
--- a/0038-Bump-copyright-date-to-2018.patch
+++ /dev/null
@@ -1,387 +0,0 @@
-From cf7a0540a3a9f80fc729211eb21d1e9b72acc89c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 25 Apr 2018 12:07:08 +0200
-Subject: [PATCH 38/72] Bump copyright date to 2018.
-
----
- doc/bluequad-print.css | 2 +-
- doc/bluequad.css | 2 +-
- doc/changes.html | 5 ++---
- doc/contact.html | 7 +++----
- doc/ext_c_api.html | 5 ++---
- doc/ext_ffi.html | 5 ++---
- doc/ext_ffi_api.html | 5 ++---
- doc/ext_ffi_semantics.html | 5 ++---
- doc/ext_ffi_tutorial.html | 5 ++---
- doc/ext_jit.html | 5 ++---
- doc/extensions.html | 5 ++---
- doc/faq.html | 5 ++---
- doc/install.html | 5 ++---
- doc/luajit.html | 7 +++----
- doc/running.html | 5 ++---
- doc/status.html | 5 ++---
- 16 files changed, 32 insertions(+), 46 deletions(-)
-
-diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css
-index 62e1c16..d5a3ea3 100644
---- a/doc/bluequad-print.css
-+++ b/doc/bluequad-print.css
-@@ -1,4 +1,4 @@
--/* Copyright (C) 2004-2017 Mike Pall.
-+/* Copyright (C) 2004-2018 Mike Pall.
- *
- * You are welcome to use the general ideas of this design for your own sites.
- * But please do not steal the stylesheet, the layout or the color scheme.
-diff --git a/doc/bluequad.css b/doc/bluequad.css
-index be2c4bf..cfc889a 100644
---- a/doc/bluequad.css
-+++ b/doc/bluequad.css
-@@ -1,4 +1,4 @@
--/* Copyright (C) 2004-2017 Mike Pall.
-+/* Copyright (C) 2004-2018 Mike Pall.
- *
- * You are welcome to use the general ideas of this design for your own sites.
- * But please do not steal the stylesheet, the layout or the color scheme.
-diff --git a/doc/changes.html b/doc/changes.html
-index 4a4d4fb..c1848e8 100644
---- a/doc/changes.html
-+++ b/doc/changes.html
-@@ -3,8 +3,7 @@
- <head>
- <title>LuaJIT Change History</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -1010,7 +1009,7 @@ This is the initial non-public release of LuaJIT.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/contact.html b/doc/contact.html
-index 5e07bde..54ddf74 100644
---- a/doc/contact.html
-+++ b/doc/contact.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Contact</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -91,7 +90,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
- <h2>Copyright</h2>
- <p>
- All documentation is
--Copyright © 2005-2017 Mike Pall.
-+Copyright © 2005-2018 Mike Pall.
- </p>
-
-
-@@ -99,7 +98,7 @@ Copyright © 2005-2017 Mike Pall.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
-index 4bb8251..3825956 100644
---- a/doc/ext_c_api.html
-+++ b/doc/ext_c_api.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Lua/C API Extensions</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -177,7 +176,7 @@ Also note that this mechanism is not without overhead.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
-index d48d77f..74ca294 100644
---- a/doc/ext_ffi.html
-+++ b/doc/ext_ffi.html
-@@ -3,8 +3,7 @@
- <head>
- <title>FFI Library</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -320,7 +319,7 @@ without undue conversion penalties.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
-index 566897c..10f2d02 100644
---- a/doc/ext_ffi_api.html
-+++ b/doc/ext_ffi_api.html
-@@ -3,8 +3,7 @@
- <head>
- <title>ffi.* API Functions</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -556,7 +555,7 @@ named <tt>i</tt>.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
-index ae3c037..218049d 100644
---- a/doc/ext_ffi_semantics.html
-+++ b/doc/ext_ffi_semantics.html
-@@ -3,8 +3,7 @@
- <head>
- <title>FFI Semantics</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -1235,7 +1234,7 @@ compiled.</li>
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
-index 29cf549..cd455cf 100644
---- a/doc/ext_ffi_tutorial.html
-+++ b/doc/ext_ffi_tutorial.html
-@@ -3,8 +3,7 @@
- <head>
- <title>FFI Tutorial</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -591,7 +590,7 @@ it to a local variable in the function scope is unnecessary.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_jit.html b/doc/ext_jit.html
-index 5017e3c..ce6dcd6 100644
---- a/doc/ext_jit.html
-+++ b/doc/ext_jit.html
-@@ -3,8 +3,7 @@
- <head>
- <title>jit.* Library</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -189,7 +188,7 @@ if you want to know more.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/extensions.html b/doc/extensions.html
-index 3d9e82b..fa412e0 100644
---- a/doc/extensions.html
-+++ b/doc/extensions.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Extensions</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -398,7 +397,7 @@ lead to the termination of the process.</li>
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/faq.html b/doc/faq.html
-index afeff94..9338be4 100644
---- a/doc/faq.html
-+++ b/doc/faq.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Frequently Asked Questions (FAQ)</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -174,7 +173,7 @@ the development of certain features, if they are important to you.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/install.html b/doc/install.html
-index 4bcc506..befffa7 100644
---- a/doc/install.html
-+++ b/doc/install.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Installation</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -636,7 +635,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/luajit.html b/doc/luajit.html
-index 0003008..d8f531d 100644
---- a/doc/luajit.html
-+++ b/doc/luajit.html
-@@ -3,8 +3,7 @@
- <head>
- <title>LuaJIT</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -150,7 +149,7 @@ Lua is a powerful, dynamic and light-weight programming language.
- It may be embedded or used as a general-purpose, stand-alone language.
- </p>
- <p>
--LuaJIT is Copyright © 2005-2017 Mike Pall, released under the
-+LuaJIT is Copyright © 2005-2018 Mike Pall, released under the
- <a
href="http://www.opensource.org/licenses/mit-license.php">&l...
class="ext">»</span> MIT open source
license</a>.
- </p>
- <p>
-@@ -224,7 +223,7 @@ Please select a sub-topic in the navigation bar to learn more about
LuaJIT.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/running.html b/doc/running.html
-index 331c22d..08d7f71 100644
---- a/doc/running.html
-+++ b/doc/running.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Running LuaJIT</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -296,7 +295,7 @@ Here are the parameters and their default settings:
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/status.html b/doc/status.html
-index aa8df93..ea61db1 100644
---- a/doc/status.html
-+++ b/doc/status.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Status</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -100,7 +99,7 @@ garbage collector.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
---
-2.20.1
-
diff --git a/0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch
b/0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch
deleted file mode 100644
index ee383da..0000000
--- a/0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-commit 362f034c1b91d52ea2cf971314ed4e0c24348bff
-Merge: 260b9b4 f5d424a
-Author: Mike Pall <mike>
-Date: Sun May 20 12:28:10 2018 +0200
-
- Merge branch 'master' into v2.1
-
-From f5d424afe8b9395f0df05aba905e0e1f6a2262b8 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 20 May 2018 12:25:36 +0200
-Subject: [PATCH 39/72] FFI: Make FP to U64 conversions match JIT backend
- behavior.
-
----
- src/lj_obj.h | 18 +++++++++++++-----
- 1 file changed, 13 insertions(+), 5 deletions(-)
-
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index e70b003..2ee526c 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -816,14 +816,22 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
-
- #define lj_num2int(n) ((int32_t)(n))
-
-+/*
-+** This must match the JIT backend behavior. In particular for archs
-+** that don't have a common hardware instruction for this conversion.
-+** Note that signed FP to unsigned int conversions have an undefined
-+** result and should never be relied upon in portable FFI code.
-+** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
-+*/
- static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
- {
--#ifdef _MSC_VER
-- if (n >= 9223372036854775808.0) /* They think it's a feature. */
-- return (uint64_t)(int64_t)(n - 18446744073709551616.0);
-- else
-+#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
-+ int64_t i = (int64_t)n;
-+ if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
-+ return (uint64_t)i;
-+#else
-+ return (uint64_t)n;
- #endif
-- return (uint64_t)n;
- }
-
- static LJ_AINLINE int32_t numberVint(cTValue *o)
---
-2.20.1
-
diff --git a/0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch
b/0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch
deleted file mode 100644
index 01dd836..0000000
--- a/0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From fb5e522fbc0750c838ef6a926b11c5d870826183 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 20 May 2018 12:40:33 +0200
-Subject: [PATCH 40/72] x86/x64: Check for jcc when using xor r,r in
- emit_loadi().
-
-Thanks to Peter Cawley.
----
- src/lj_emit_x86.h | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
-index bcceb93..9c371a9 100644
---- a/src/lj_emit_x86.h
-+++ b/src/lj_emit_x86.h
-@@ -268,10 +268,12 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs,
int32_t i)
- /* mov r, i / xor r, r */
- static void emit_loadi(ASMState *as, Reg r, int32_t i)
- {
-- /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */
-+ /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP/jcc. */
- if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP ||
- (as->curins+1 < as->T->nins &&
-- IR(as->curins+1)->o == IR_HIOP)))) {
-+ IR(as->curins+1)->o == IR_HIOP))) &&
-+ !((*as->mcp == 0x0f && (as->mcp[1] & 0xf0) == XI_JCCn) ||
-+ (*as->mcp & 0xf0) == XI_JCCs)) {
- emit_rr(as, XO_ARITH(XOg_XOR), r, r);
- } else {
- MCode *p = as->mcp;
---
-2.20.1
-
diff --git a/0041-PPC-NetBSD-Fix-endianess-check.patch
b/0041-PPC-NetBSD-Fix-endianess-check.patch
deleted file mode 100644
index 6800a89..0000000
--- a/0041-PPC-NetBSD-Fix-endianess-check.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-commit d36afcfea57c29fb51060c24679f3b2c07806545
-Merge: b708297 b025b01
-Author: Mike Pall <mike>
-Date: Tue Jun 5 11:39:10 2018 +0200
-
- Merge branch 'master' into v2.1
-
-From b025b01c5b9d23f6218c7d72b7aafa3f1ab1e08a Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 11:36:18 +0200
-Subject: [PATCH 41/72] PPC/NetBSD: Fix endianess check.
-
-Thanks to he32 and daurnimator.
----
- src/lj_arch.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index e04c4ee..5f7e445 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -339,7 +339,7 @@
- #error "No support for ILP32 model on ARM64"
- #endif
- #elif LJ_TARGET_PPC
--#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
-+#if !LJ_ARCH_PPC64 && (defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER)
|| (_BYTE_ORDER == _LITTLE_ENDIAN)))
- #error "No support for little-endian PPC32"
- #endif
- #if LJ_ARCH_PPC64
---
-2.20.1
-
diff --git a/0042-DynASM-x86-Add-FMA3-instructions.patch
b/0042-DynASM-x86-Add-FMA3-instructions.patch
deleted file mode 100644
index 0fe390a..0000000
--- a/0042-DynASM-x86-Add-FMA3-instructions.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From cc299958bb412f229844e53473a035c280544ec3 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 12:23:13 +0200
-Subject: [PATCH 42/72] DynASM/x86: Add FMA3 instructions.
-
-Thanks to Alexander Nasonov.
----
- dynasm/dasm_x86.lua | 67 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 67 insertions(+)
-
-diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
-index c1d267a..73502f6 100644
---- a/dynasm/dasm_x86.lua
-+++ b/dynasm/dasm_x86.lua
-@@ -1696,6 +1696,73 @@ local map_op = {
- sarx_3 = "rmrqd:F30F38wF7rM",
- shrx_3 = "rmrqd:F20F38wF7rM",
- shlx_3 = "rmrqd:660F38wF7rM",
-+
-+ -- FMA3
-+ vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
-+ vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
-+ vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
-+ vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
-+ vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
-+ vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
-+
-+ vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
-+ vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
-+ vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
-+ vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
-+ vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
-+ vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
-+
-+ vfmadd132pd_3 = "rrmoy:660F38VX98rM",
-+ vfmadd132ps_3 = "rrmoy:660F38V98rM",
-+ vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
-+ vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
-+ vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
-+ vfmadd213ps_3 = "rrmoy:660F38VA8rM",
-+ vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
-+ vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
-+ vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
-+ vfmadd231ps_3 = "rrmoy:660F38VB8rM",
-+ vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
-+ vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
-+
-+ vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
-+ vfmsub132ps_3 = "rrmoy:660F38V9ArM",
-+ vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
-+ vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
-+ vfmsub213pd_3 = "rrmoy:660F38VXAArM",
-+ vfmsub213ps_3 = "rrmoy:660F38VAArM",
-+ vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
-+ vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
-+ vfmsub231pd_3 = "rrmoy:660F38VXBArM",
-+ vfmsub231ps_3 = "rrmoy:660F38VBArM",
-+ vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
-+ vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
-+
-+ vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
-+ vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
-+ vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
-+ vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
-+ vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
-+ vfnmadd213ps_3 = "rrmoy:660F38VACrM",
-+ vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
-+ vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
-+ vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
-+ vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
-+ vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
-+ vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
-+
-+ vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
-+ vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
-+ vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
-+ vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
-+ vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
-+ vfnmsub213ps_3 = "rrmoy:660F38VAErM",
-+ vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
-+ vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
-+ vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
-+ vfnmsub231ps_3 = "rrmoy:660F38VBErM",
-+ vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
-+ vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
- }
-
- ------------------------------------------------------------------------------
---
-2.20.1
-
diff --git a/0043-x86-Disassemble-FMA3-instructions.patch
b/0043-x86-Disassemble-FMA3-instructions.patch
deleted file mode 100644
index 1d64c0a..0000000
--- a/0043-x86-Disassemble-FMA3-instructions.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From 55f70823242aa4e6acc248bde5cf8194ba1b27e3 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 12:23:29 +0200
-Subject: [PATCH 43/72] x86: Disassemble FMA3 instructions.
-
-Thanks to Alexander Nasonov.
----
- src/jit/dis_x86.lua | 24 +++++++++++++++++++++++-
- 1 file changed, 23 insertions(+), 1 deletion(-)
-
-diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
-index 4371233..3a68c93 100644
---- a/src/jit/dis_x86.lua
-+++ b/src/jit/dis_x86.lua
-@@ -239,6 +239,24 @@
nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
- --8x
- [0x8c] = "||pmaskmovXrvVSm",
- [0x8e] = "||pmaskmovVSmXvr",
-+--9x
-+[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
-+[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
-+[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
-+[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
-+[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
-+--Ax
-+[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
-+[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
-+[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
-+[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
-+[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
-+--Bx
-+[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
-+[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
-+[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
-+[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
-+[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
- --Dx
- [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
- [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
-@@ -483,7 +501,7 @@ local function putpat(ctx, name, pat)
- local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
- local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
-
-- -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
-+ -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
- for p in gmatch(pat, ".") do
- local x = nil
- if p == "V" or p == "U" then
-@@ -506,6 +524,9 @@ local function putpat(ctx, name, pat)
- sz = ctx.o16 and "X" or "M"; ctx.o16 = false
- if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
- regs = map_regs[sz]
-+ elseif p == "H" then
-+ name = name..(ctx.rexw and "d" or "s")
-+ ctx.rexw = false
- elseif p == "S" then
- name = name..lower(sz)
- elseif p == "s" then
-@@ -735,6 +756,7 @@ map_act = {
- V = putpat, U = putpat, T = putpat,
- M = putpat, X = putpat, P = putpat,
- F = putpat, G = putpat, Y = putpat,
-+ H = putpat,
-
- -- Collect prefixes.
- [":"] = function(ctx, name, pat)
---
-2.20.1
-
diff --git a/0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch
b/0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch
deleted file mode 100644
index 315b528..0000000
--- a/0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch
+++ /dev/null
@@ -1,49 +0,0 @@
-From a5a89ab586a3b5bb4f266949bbf3dc2b140e2374 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 12:23:56 +0200
-Subject: [PATCH 44/72] From Lua 5.3: assert() accepts any type of error
- object.
-
----
- doc/extensions.html | 1 +
- src/lib_base.c | 10 +++++-----
- 2 files changed, 6 insertions(+), 5 deletions(-)
-
-diff --git a/doc/extensions.html b/doc/extensions.html
-index 55c4b70..7379041 100644
---- a/doc/extensions.html
-+++ b/doc/extensions.html
-@@ -373,6 +373,7 @@ LuaJIT supports some extensions from Lua 5.3:
- <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8
encoding in string literals.</li>
- <li>The argument table <tt>arg</tt> can be read (and modified) by
<tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
- <li><tt>io.read()</tt> and <tt>file:read()</tt> accept
formats with or without a leading <tt>*</tt>.</li>
-+<li><tt>assert()</tt> accepts any type of error object.</li>
- <li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
- <li><tt>coroutine.isyieldable()</tt>.</li>
- <li>Lua/C API extensions:
-diff --git a/src/lib_base.c b/src/lib_base.c
-index d61e876..1cd8305 100644
---- a/src/lib_base.c
-+++ b/src/lib_base.c
-@@ -42,13 +42,13 @@
-
- LJLIB_ASM(assert) LJLIB_REC(.)
- {
-- GCstr *s;
- lj_lib_checkany(L, 1);
-- s = lj_lib_optstr(L, 2);
-- if (s)
-- lj_err_callermsg(L, strdata(s));
-- else
-+ if (L->top == L->base+1)
- lj_err_caller(L, LJ_ERR_ASSERT);
-+ else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
-+ lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
-+ else
-+ lj_err_run(L);
- return FFH_UNREACHABLE;
- }
-
---
-2.20.1
-
diff --git a/0045-Windows-Add-UWP-support-part-1.patch
b/0045-Windows-Add-UWP-support-part-1.patch
deleted file mode 100644
index fcb91fb..0000000
--- a/0045-Windows-Add-UWP-support-part-1.patch
+++ /dev/null
@@ -1,359 +0,0 @@
-From c3c54ce1aef782823936808a75460e6b53aada2c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 17:03:08 +0200
-Subject: [PATCH 45/72] Windows: Add UWP support, part 1.
-
-Contributed by Ben Pye.
----
- doc/ext_ffi_api.html | 2 ++
- src/lib_ffi.c | 3 +++
- src/lib_io.c | 4 ++--
- src/lib_package.c | 24 +++++++++++++++++++++++-
- src/lj_alloc.c | 6 +++---
- src/lj_arch.h | 19 +++++++++++++++++++
- src/lj_ccallback.c | 4 ++--
- src/lj_clib.c | 20 ++++++++++++++++----
- src/lj_mcode.c | 8 ++++----
- src/lj_profile.c | 8 ++++----
- 10 files changed, 78 insertions(+), 20 deletions(-)
-
-diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
-index 25cc974..54ff0ce 100644
---- a/doc/ext_ffi_api.html
-+++ b/doc/ext_ffi_api.html
-@@ -468,6 +468,8 @@ otherwise. The following parameters are currently defined:
- <tr class="odd">
- <td class="abiparam">win</td><td
class="abidesc">Windows variant of the standard ABI</td></tr>
- <tr class="even">
-+<td class="abiparam">uwp</td><td
class="abidesc">Universal Windows Platform</td></tr>
-+<tr class="odd">
- <td class="abiparam">gc64</td><td
class="abidesc">64 bit GC references</td></tr>
- </table>
-
-diff --git a/src/lib_ffi.c b/src/lib_ffi.c
-index 199cfc9..8032411 100644
---- a/src/lib_ffi.c
-+++ b/src/lib_ffi.c
-@@ -746,6 +746,9 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
- #endif
- #if LJ_ABI_WIN
- case H_(4ab624a8,4ab624a8): b = 1; break; /* win */
-+#endif
-+#if LJ_TARGET_UWP
-+ case H_(a40f0bcb,a40f0bcb): b = 1; break; /* uwp */
- #endif
- case H_(3af93066,1f001464): b = 1; break; /* le/be */
- #if LJ_GC64
-diff --git a/src/lib_io.c b/src/lib_io.c
-index 9763ed4..73fd932 100644
---- a/src/lib_io.c
-+++ b/src/lib_io.c
-@@ -99,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
- int stat = -1;
- #if LJ_TARGET_POSIX
- stat = pclose(iof->fp);
--#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE
-+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
- stat = _pclose(iof->fp);
- #else
- lua_assert(0);
-@@ -406,7 +406,7 @@ LJLIB_CF(io_open)
-
- LJLIB_CF(io_popen)
- {
--#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE)
-+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE &&
!LJ_TARGET_UWP)
- const char *fname = strdata(lj_lib_checkstr(L, 1));
- GCstr *s = lj_lib_optstr(L, 2);
- const char *mode = s ? strdata(s) : "r";
-diff --git a/src/lib_package.c b/src/lib_package.c
-index 6fac43e..bedd6d7 100644
---- a/src/lib_package.c
-+++ b/src/lib_package.c
-@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
- BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
- #endif
-
-+#if LJ_TARGET_UWP
-+void *LJ_WIN_LOADLIBA(const char *path)
-+{
-+ DWORD err = GetLastError();
-+ wchar_t wpath[256];
-+ HANDLE lib = NULL;
-+ if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
-+ lib = LoadPackagedLibrary(wpath, 0);
-+ }
-+ SetLastError(err);
-+ return lib;
-+}
-+#endif
-+
- #undef setprogdir
-
- static void setprogdir(lua_State *L)
-@@ -119,7 +133,7 @@ static void ll_unloadlib(void *lib)
-
- static void *ll_load(lua_State *L, const char *path, int gl)
- {
-- HINSTANCE lib = LoadLibraryExA(path, NULL, 0);
-+ HINSTANCE lib = LJ_WIN_LOADLIBA(path);
- if (lib == NULL) pusherror(L);
- UNUSED(gl);
- return lib;
-@@ -132,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char
*sym)
- return f;
- }
-
-+#if LJ_TARGET_UWP
-+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
-+#endif
-+
- static const char *ll_bcsym(void *lib, const char *sym)
- {
- if (lib) {
- return (const char *)GetProcAddress((HINSTANCE)lib, sym);
- } else {
-+#if LJ_TARGET_UWP
-+ return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
-+#else
- HINSTANCE h = GetModuleHandleA(NULL);
- const char *p = (const char *)GetProcAddress(h, sym);
- if (p == NULL &&
GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
- (const char *)ll_bcsym, &h))
- p = (const char *)GetProcAddress(h, sym);
- return p;
-+#endif
- }
- }
-
-diff --git a/src/lj_alloc.c b/src/lj_alloc.c
-index 9fc761c..f3b6a54 100644
---- a/src/lj_alloc.c
-+++ b/src/lj_alloc.c
-@@ -167,7 +167,7 @@ static void *DIRECT_MMAP(size_t size)
- static void *CALL_MMAP(size_t size)
- {
- DWORD olderr = GetLastError();
-- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
-+ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
- SetLastError(olderr);
- return ptr ? ptr : MFAIL;
- }
-@@ -176,8 +176,8 @@ static void *CALL_MMAP(size_t size)
- static void *DIRECT_MMAP(size_t size)
- {
- DWORD olderr = GetLastError();
-- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
-- PAGE_READWRITE);
-+ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
-+ PAGE_READWRITE);
- SetLastError(olderr);
- return ptr ? ptr : MFAIL;
- }
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index e796912..31a1159 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -135,6 +135,13 @@
- #define LJ_TARGET_GC64 1
- #endif
-
-+#ifdef _UWP
-+#define LJ_TARGET_UWP 1
-+#if LUAJIT_TARGET == LUAJIT_ARCH_X64
-+#define LJ_TARGET_GC64 1
-+#endif
-+#endif
-+
- #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
- #define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
- #define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
-@@ -570,6 +577,18 @@
- #define LJ_NO_UNWIND 1
- #endif
-
-+#if LJ_TARGET_WINDOWS
-+#if LJ_TARGET_UWP
-+#define LJ_WIN_VALLOC VirtualAllocFromApp
-+#define LJ_WIN_VPROTECT VirtualProtectFromApp
-+extern void *LJ_WIN_LOADLIBA(const char *path);
-+#else
-+#define LJ_WIN_VALLOC VirtualAlloc
-+#define LJ_WIN_VPROTECT VirtualProtect
-+#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0)
-+#endif
-+#endif
-+
- /* Compatibility with Lua 5.1 vs. 5.2. */
- #ifdef LUAJIT_ENABLE_LUA52COMPAT
- #define LJ_52 1
-diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
-index 03494a7..412dbf8 100644
---- a/src/lj_ccallback.c
-+++ b/src/lj_ccallback.c
-@@ -267,7 +267,7 @@ static void callback_mcode_new(CTState *cts)
- if (CALLBACK_MAX_SLOT == 0)
- lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
- #if LJ_TARGET_WINDOWS
-- p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
-+ p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
- if (!p)
- lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
- #elif LJ_TARGET_POSIX
-@@ -285,7 +285,7 @@ static void callback_mcode_new(CTState *cts)
- #if LJ_TARGET_WINDOWS
- {
- DWORD oprot;
-- VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot);
-+ LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
- }
- #elif LJ_TARGET_POSIX
- mprotect(p, sz, (PROT_READ|PROT_EXEC));
-diff --git a/src/lj_clib.c b/src/lj_clib.c
-index 6142659..f016b06 100644
---- a/src/lj_clib.c
-+++ b/src/lj_clib.c
-@@ -158,11 +158,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
- /* Default libraries. */
- enum {
- CLIB_HANDLE_EXE,
-+#if !LJ_TARGET_UWP
- CLIB_HANDLE_DLL,
- CLIB_HANDLE_CRT,
- CLIB_HANDLE_KERNEL32,
- CLIB_HANDLE_USER32,
- CLIB_HANDLE_GDI32,
-+#endif
- CLIB_HANDLE_MAX
- };
-
-@@ -208,7 +210,7 @@ static const char *clib_extname(lua_State *L, const char *name)
- static void *clib_loadlib(lua_State *L, const char *name, int global)
- {
- DWORD oldwerr = GetLastError();
-- void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0);
-+ void *h = LJ_WIN_LOADLIBA(clib_extname(L, name));
- if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
- SetLastError(oldwerr);
- UNUSED(global);
-@@ -218,6 +220,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int
global)
- static void clib_unloadlib(CLibrary *cl)
- {
- if (cl->handle == CLIB_DEFHANDLE) {
-+#if !LJ_TARGET_UWP
- MSize i;
- for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) {
- void *h = clib_def_handle[i];
-@@ -226,11 +229,16 @@ static void clib_unloadlib(CLibrary *cl)
- FreeLibrary((HINSTANCE)h);
- }
- }
-+#endif
- } else if (cl->handle) {
- FreeLibrary((HINSTANCE)cl->handle);
- }
- }
-
-+#if LJ_TARGET_UWP
-+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
-+#endif
-+
- static void *clib_getsym(CLibrary *cl, const char *name)
- {
- void *p = NULL;
-@@ -239,6 +247,9 @@ static void *clib_getsym(CLibrary *cl, const char *name)
- for (i = 0; i < CLIB_HANDLE_MAX; i++) {
- HINSTANCE h = (HINSTANCE)clib_def_handle[i];
- if (!(void *)h) { /* Resolve default library handles (once). */
-+#if LJ_TARGET_UWP
-+ h = (HINSTANCE)&__ImageBase;
-+#else
- switch (i) {
- case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
NULL, &h); break;
- case CLIB_HANDLE_DLL:
-@@ -249,11 +260,12 @@ static void *clib_getsym(CLibrary *cl, const char *name)
-
GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
- (const char *)&_fmode, &h);
- break;
-- case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0);
break;
-- case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break;
-- case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break;
-+ case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break;
-+ case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break;
-+ case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break;
- }
- if (!h) continue;
-+#endif
- clib_def_handle[i] = (void *)h;
- }
- p = (void *)GetProcAddress(h, name);
-diff --git a/src/lj_mcode.c b/src/lj_mcode.c
-index e46e3ef..64b0ca9 100644
---- a/src/lj_mcode.c
-+++ b/src/lj_mcode.c
-@@ -66,8 +66,8 @@ void lj_mcode_sync(void *start, void *end)
-
- static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
- {
-- void *p = VirtualAlloc((void *)hint, sz,
-- MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
-+ void *p = LJ_WIN_VALLOC((void *)hint, sz,
-+ MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
- if (!p && !hint)
- lj_trace_err(J, LJ_TRERR_MCODEAL);
- return p;
-@@ -82,7 +82,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz)
- static int mcode_setprot(void *p, size_t sz, DWORD prot)
- {
- DWORD oprot;
-- return !VirtualProtect(p, sz, prot, &oprot);
-+ return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
- }
-
- #elif LJ_TARGET_POSIX
-@@ -255,7 +255,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
- /* All memory addresses are reachable by relative jumps. */
- static void *mcode_alloc(jit_State *J, size_t sz)
- {
--#ifdef __OpenBSD__
-+#if defined(__OpenBSD__) || LJ_TARGET_UWP
- /* Allow better executable memory allocation for OpenBSD W^X mode. */
- void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
- if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
-diff --git a/src/lj_profile.c b/src/lj_profile.c
-index 116998e..3223697 100644
---- a/src/lj_profile.c
-+++ b/src/lj_profile.c
-@@ -247,7 +247,7 @@ static DWORD WINAPI profile_thread(void *psx)
- {
- ProfileState *ps = (ProfileState *)psx;
- int interval = ps->interval;
--#if LJ_TARGET_WINDOWS
-+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
- ps->wmm_tbp(interval);
- #endif
- while (1) {
-@@ -255,7 +255,7 @@ static DWORD WINAPI profile_thread(void *psx)
- if (ps->abort) break;
- profile_trigger(ps);
- }
--#if LJ_TARGET_WINDOWS
-+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
- ps->wmm_tep(interval);
- #endif
- return 0;
-@@ -264,9 +264,9 @@ static DWORD WINAPI profile_thread(void *psx)
- /* Start profiling timer thread. */
- static void profile_timer_start(ProfileState *ps)
- {
--#if LJ_TARGET_WINDOWS
-+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
- if (!ps->wmm) { /* Load WinMM library on-demand. */
-- ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0);
-+ ps->wmm = LJ_WIN_LOADLIBA("winmm.dll");
- if (ps->wmm) {
- ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm,
"timeBeginPeriod");
- ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm,
"timeEndPeriod");
---
-2.20.1
-
diff --git a/0046-ARM64-Fix-write-barrier-in-BC_USETS.patch
b/0046-ARM64-Fix-write-barrier-in-BC_USETS.patch
deleted file mode 100644
index 69eea9a..0000000
--- a/0046-ARM64-Fix-write-barrier-in-BC_USETS.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From c785131ca5a6d24adc519e5e0bf1b69b671d912f Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 24 Jun 2018 13:18:03 +0200
-Subject: [PATCH 46/72] ARM64: Fix write barrier in BC_USETS.
-
-Contributed by Javier Guerra Giraldez.
----
- src/vm_arm64.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
-index c55794a..fb226e3 100644
---- a/src/vm_arm64.dasc
-+++ b/src/vm_arm64.dasc
-@@ -2780,7 +2780,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |2: // Check if string is white and ensure upvalue is closed.
- | ldrb TMP0w, UPVAL:CARG1->closed
- | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
-- | ccmp TMP0w, #0, #0, ne
-+ | ccmp TMP0w, #0, #4, ne
- | beq <1
- | // Crossed a write barrier. Move the barrier forward.
- | mov CARG1, GL
---
-2.20.1
-
diff --git a/0047-ARM64-Fix-exit-stub-patching.patch
b/0047-ARM64-Fix-exit-stub-patching.patch
deleted file mode 100644
index 740d52e..0000000
--- a/0047-ARM64-Fix-exit-stub-patching.patch
+++ /dev/null
@@ -1,238 +0,0 @@
-From 9da06535092d6d9dec442641a26c64bce5574322 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 24 Jun 2018 14:08:59 +0200
-Subject: [PATCH 47/72] ARM64: Fix exit stub patching.
-
-Contributed by Javier Guerra Giraldez.
----
- src/lj_asm_arm64.h | 64 +++++++++++++++++++++++++------------------
- src/lj_emit_arm64.h | 18 ++++++------
- src/lj_target_arm64.h | 7 +++--
- 3 files changed, 51 insertions(+), 38 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index cbb186d..baafa21 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
- asm_mclimit(as);
- /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ...
*/
- for (i = nexits-1; (int32_t)i >= 0; i--)
-- *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
-- *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
-+ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
-+ *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
- mxp--;
-- *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
-- *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
-+ *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
-+ *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
- as->mctop = mxp;
- }
-
-@@ -77,7 +77,7 @@ static void asm_guardcc(ASMState *as, A64CC cc)
- MCode *p = as->mcp;
- if (LJ_UNLIKELY(p == as->invmcp)) {
- as->loopinv = 1;
-- *p = A64I_B | ((target-p) & 0x03ffffffu);
-+ *p = A64I_B | A64F_S26(target-p);
- emit_cond_branch(as, cc^1, p-1);
- return;
- }
-@@ -91,7 +91,7 @@ static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
- MCode *p = as->mcp;
- if (LJ_UNLIKELY(p == as->invmcp)) {
- as->loopinv = 1;
-- *p = A64I_B | ((target-p) & 0x03ffffffu);
-+ *p = A64I_B | A64F_S26(target-p);
- emit_tnb(as, ai^0x01000000u, r, bit, p-1);
- return;
- }
-@@ -105,7 +105,7 @@ static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
- MCode *p = as->mcp;
- if (LJ_UNLIKELY(p == as->invmcp)) {
- as->loopinv = 1;
-- *p = A64I_B | ((target-p) & 0x03ffffffu);
-+ *p = A64I_B | A64F_S26(target-p);
- emit_cnb(as, ai^0x01000000u, r, p-1);
- return;
- }
-@@ -1850,7 +1850,7 @@ static void asm_loop_fixup(ASMState *as)
- p[-2] |= ((uint32_t)delta & mask) << 5;
- } else {
- ptrdiff_t delta = target - (p - 1);
-- p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu);
-+ p[-1] = A64I_B | A64F_S26(delta);
- }
- }
-
-@@ -1919,7 +1919,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
- }
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
-- p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu);
-+ p[-1] = A64I_B | A64F_S26((target-p)+1);
- }
-
- /* Prepare tail of code. */
-@@ -1982,40 +1982,50 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
- {
- MCode *p = T->mcode;
- MCode *pe = (MCode *)((char *)p + T->szmcode);
-- MCode *cstart = NULL, *cend = p;
-+ MCode *cstart = NULL;
- MCode *mcarea = lj_mcode_patch(J, p, 0);
- MCode *px = exitstub_trace_addr(T, exitno);
-+ /* Note: this assumes a trace exit is only ever patched once. */
- for (; p < pe; p++) {
- /* Look for exitstub branch, replace with branch to target. */
-+ ptrdiff_t delta = target - p;
- MCode ins = A64I_LE(*p);
- if ((ins & 0xff000000u) == 0x54000000u &&
- ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
-- /* Patch bcc exitstub. */
-- *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) &
0x00ffffe0u));
-- cend = p+1;
-- if (!cstart) cstart = p;
-+ /* Patch bcc, if within range. */
-+ if (A64F_S_OK(delta, 19)) {
-+ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
-+ if (!cstart) cstart = p;
-+ }
- } else if ((ins & 0xfc000000u) == 0x14000000u &&
- ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
-- /* Patch b exitstub. */
-- *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu));
-- cend = p+1;
-+ /* Patch b. */
-+ lua_assert(A64F_S_OK(delta, 26));
-+ *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta));
- if (!cstart) cstart = p;
- } else if ((ins & 0x7e000000u) == 0x34000000u &&
- ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
-- /* Patch cbz/cbnz exitstub. */
-- *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u));
-- cend = p+1;
-- if (!cstart) cstart = p;
-+ /* Patch cbz/cbnz, if within range. */
-+ if (A64F_S_OK(delta, 19)) {
-+ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
-+ if (!cstart) cstart = p;
-+ }
- } else if ((ins & 0x7e000000u) == 0x36000000u &&
- ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
-- /* Patch tbz/tbnz exitstub. */
-- *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) &
0x0007ffe0u));
-- cend = p+1;
-- if (!cstart) cstart = p;
-+ /* Patch tbz/tbnz, if within range. */
-+ if (A64F_S_OK(delta, 14)) {
-+ *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta));
-+ if (!cstart) cstart = p;
-+ }
- }
- }
-- lua_assert(cstart != NULL);
-- lj_mcode_sync(cstart, cend);
-+ { /* Always patch long-range branch in exit stub itself. */
-+ ptrdiff_t delta = target - px;
-+ lua_assert(A64F_S_OK(delta, 26));
-+ *px = A64I_B | A64F_S26(delta);
-+ if (!cstart) cstart = px;
-+ }
-+ lj_mcode_sync(cstart, px+1);
- lj_mcode_patch(J, mcarea, 1);
- }
-
-diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
-index 6da4c7d..1001b1d 100644
---- a/src/lj_emit_arm64.h
-+++ b/src/lj_emit_arm64.h
-@@ -241,7 +241,7 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
- #define mcpofs(as, k) \
- ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
- #define checkmcpofs(as, k) \
-- ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0)
-+ (A64F_S_OK(mcpofs(as, k)>>2, 19))
-
- static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
-
-@@ -312,7 +312,7 @@ static void emit_cond_branch(ASMState *as, A64CC cond, MCode
*target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = target - p;
-- lua_assert(((delta + 0x40000) >> 19) == 0);
-+ lua_assert(A64F_S_OK(delta, 19));
- *p = A64I_BCC | A64F_S19(delta) | cond;
- }
-
-@@ -320,24 +320,24 @@ static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = target - p;
-- lua_assert(((delta + 0x02000000) >> 26) == 0);
-- *p = ai | ((uint32_t)delta & 0x03ffffffu);
-+ lua_assert(A64F_S_OK(delta, 26));
-+ *p = ai | A64F_S26(delta);
- }
-
- static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = target - p;
-- lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0);
-+ lua_assert(bit < 63 && A64F_S_OK(delta, 14));
- if (bit > 31) ai |= A64I_X;
-- *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r;
-+ *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r;
- }
-
- static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = target - p;
-- lua_assert(((delta + 0x40000) >> 19) == 0);
-+ lua_assert(A64F_S_OK(delta, 19));
- *p = ai | A64F_S19(delta) | r;
- }
-
-@@ -347,8 +347,8 @@ static void emit_call(ASMState *as, void *target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = (char *)target - (char *)p;
-- if ((((delta>>2) + 0x02000000) >> 26) == 0) {
-- *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu);
-+ if (A64F_S_OK(delta>>2, 26)) {
-+ *p = A64I_BL | A64F_S26(delta>>2);
- } else { /* Target out of range: need indirect call. But don't use R0-R7. */
- Reg r = ra_allock(as, i64ptr(target),
- RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
-diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
-index 520023a..a207a2b 100644
---- a/src/lj_target_arm64.h
-+++ b/src/lj_target_arm64.h
-@@ -132,9 +132,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p,
uint32_t exitno)
- #define A64F_IMMR(x) ((x) << 16)
- #define A64F_U16(x) ((x) << 5)
- #define A64F_U12(x) ((x) << 10)
--#define A64F_S26(x) (x)
-+#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu))
- #define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5)
--#define A64F_S14(x) ((x) << 5)
-+#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5)
- #define A64F_S9(x) ((x) << 12)
- #define A64F_BIT(x) ((x) << 19)
- #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
-@@ -145,6 +145,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p,
uint32_t exitno)
- #define A64F_LSL16(x) (((x) / 16) << 21)
- #define A64F_BSH(sh) ((sh) << 10)
-
-+/* Check for valid field range. */
-+#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
-+
- typedef enum A64Ins {
- A64I_S = 0x20000000,
- A64I_X = 0x80000000,
---
-2.20.1
-
diff --git a/0048-DynASM-Fix-warning.patch b/0048-DynASM-Fix-warning.patch
deleted file mode 100644
index 4af1c32..0000000
--- a/0048-DynASM-Fix-warning.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 9b41062156779160b88fe5e1eb1ece1ee1fe6a74 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 24 Jun 2018 14:10:21 +0200
-Subject: [PATCH 48/72] DynASM: Fix warning.
-
----
- dynasm/dasm_arm64.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
-index 47e1e07..ff21236 100644
---- a/dynasm/dasm_arm64.h
-+++ b/dynasm/dasm_arm64.h
-@@ -427,6 +427,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-+ /* fallthrough */
- case DASM_REL_PC:
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
---
-2.20.1
-
diff --git a/0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch
b/0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch
deleted file mode 100644
index 40bcbce..0000000
--- a/0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 26f1023819efb843e10014232cd88bb1d52ea4f5 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 22 Aug 2018 13:35:41 +0200
-Subject: [PATCH 49/72] DynASM/x86: Fix vroundps/vroundpd encoding.
-
-Thanks to Alexander Nasonov.
----
- dynasm/dasm_x86.lua | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
-index 73502f6..7f536af 100644
---- a/dynasm/dasm_x86.lua
-+++ b/dynasm/dasm_x86.lua
-@@ -1537,8 +1537,8 @@ local map_op = {
- vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
- vrsqrtps_2 = "rmoy:0Fu52rM",
- vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
-- vroundpd_3 = "rmioy:660F3AV09rMU",
-- vroundps_3 = "rmioy:660F3AV08rMU",
-+ vroundpd_3 = "rmioy:660F3Au09rMU",
-+ vroundps_3 = "rmioy:660F3Au08rMU",
- vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
- vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
- vshufpd_4 = "rrmioy:660FVC6rMU",
---
-2.20.1
-
diff --git a/0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch
b/0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch
deleted file mode 100644
index 9b29c4e..0000000
--- a/0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 646148e747759f0af3b47f9bd287cedd7e174631 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 13 Sep 2018 17:58:50 +0200
-Subject: [PATCH 50/72] Fix memory probing allocator to check for valid end
- address, too.
-
----
- src/lj_alloc.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_alloc.c b/src/lj_alloc.c
-index f3b6a54..33a2eb8 100644
---- a/src/lj_alloc.c
-+++ b/src/lj_alloc.c
-@@ -255,7 +255,8 @@ static void *mmap_probe(size_t size)
- for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
- void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
- uintptr_t addr = (uintptr_t)p;
-- if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >=
LJ_ALLOC_MMAP_PROBE_LOWER) {
-+ if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >=
LJ_ALLOC_MMAP_PROBE_LOWER &&
-+ ((addr + size) >> LJ_ALLOC_MBITS) == 0) {
- /* We got a suitable address. Bump the hint address. */
- hint_addr = addr + size;
- errno = olderr;
---
-2.20.1
-
diff --git a/0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch
b/0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch
deleted file mode 100644
index 3226e33..0000000
--- a/0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 9c1b637898f38dd4606da08ba1a82a174c3e64b6 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 14 Oct 2018 15:12:59 +0200
-Subject: [PATCH 51/72] MIPS/MIPS64: Fix TSETR barrier (again).
-
----
- src/vm_mips.dasc | 2 +-
- src/vm_mips64.dasc | 2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
-index 1afd611..f324812 100644
---- a/src/vm_mips.dasc
-+++ b/src/vm_mips.dasc
-@@ -4317,7 +4317,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next2
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-- | barrierback TAB:CARG2, TMP3, TMP0, <2
-+ | barrierback TAB:CARG2, TMP3, CRET1, <2
- break;
-
- case BC_TSETM:
-diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
-index 0a3f8e5..1682c81 100644
---- a/src/vm_mips64.dasc
-+++ b/src/vm_mips64.dasc
-@@ -4263,7 +4263,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next2
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-- | barrierback TAB:CARG2, TMP3, TMP0, <2
-+ | barrierback TAB:CARG2, TMP3, CRET1, <2
- break;
-
- case BC_TSETM:
---
-2.20.1
-
diff --git a/0052-Actually-implement-maxirconst-trace-limit.patch
b/0052-Actually-implement-maxirconst-trace-limit.patch
deleted file mode 100644
index 5281168..0000000
--- a/0052-Actually-implement-maxirconst-trace-limit.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From 0a9ff94c4a1fcec2c310dcb092da694f23186e23 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 14 Oct 2018 15:21:37 +0200
-Subject: [PATCH 52/72] Actually implement maxirconst trace limit.
-
-Suggested by spacewander.
----
- src/lj_record.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/lj_record.c b/src/lj_record.c
-index 1a2b1c5..7f37d6c 100644
---- a/src/lj_record.c
-+++ b/src/lj_record.c
-@@ -2470,8 +2470,9 @@ void lj_record_ins(jit_State *J)
- #undef rbv
- #undef rcv
-
-- /* Limit the number of recorded IR instructions. */
-- if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord])
-+ /* Limit the number of recorded IR instructions and constants. */
-+ if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] ||
-+ J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst])
- lj_trace_err(J, LJ_TRERR_TRACEOV);
- }
-
---
-2.20.1
-
diff --git a/0053-Better-detection-of-MinGW-build.patch
b/0053-Better-detection-of-MinGW-build.patch
deleted file mode 100644
index 9805f09..0000000
--- a/0053-Better-detection-of-MinGW-build.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 3404183e2387f48e3464bd79116d3e8021ca781e Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:02:15 +0100
-Subject: [PATCH 53/72] Better detection of MinGW build.
-
----
- src/Makefile | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/Makefile b/src/Makefile
-index 24e8c0e..962aa94 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -165,6 +165,10 @@ else
- HOST_SYS= Windows
- HOST_MSYS= mingw
- endif
-+ ifneq (,$(findstring MSYS,$(HOST_SYS)))
-+ HOST_SYS= Windows
-+ HOST_MSYS= mingw
-+ endif
- ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
- HOST_SYS= Windows
- HOST_MSYS= cygwin
---
-2.20.1
-
diff --git a/0054-Fix-overflow-of-snapshot-map-offset.patch
b/0054-Fix-overflow-of-snapshot-map-offset.patch
deleted file mode 100644
index 723cb74..0000000
--- a/0054-Fix-overflow-of-snapshot-map-offset.patch
+++ /dev/null
@@ -1,131 +0,0 @@
-commit 749e99ce2a88bf337bd2f6279940d6761ce5f616
-Merge: e2cc89b 380e440
-Author: Mike Pall <mike>
-Date: Thu Jan 10 12:24:17 2019 +0100
-
- Merge branch 'master' into v2.1
-
-From 380e4409a70725df85034f02c968b6ebd7a5e513 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:19:30 +0100
-Subject: [PATCH 54/72] Fix overflow of snapshot map offset.
-
-Thanks to Yichun Zhang.
----
- src/lj_jit.h | 10 +++++-----
- src/lj_opt_loop.c | 8 ++++----
- src/lj_snap.c | 6 +++---
- 3 files changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/src/lj_jit.h b/src/lj_jit.h
-index 3f38d28..0bc6258 100644
---- a/src/lj_jit.h
-+++ b/src/lj_jit.h
-@@ -163,7 +163,7 @@ typedef struct MCLink {
-
- /* Stack snapshot header. */
- typedef struct SnapShot {
-- uint16_t mapofs; /* Offset into snapshot map. */
-+ uint32_t mapofs; /* Offset into snapshot map. */
- IRRef1 ref; /* First IR ref for this snapshot. */
- uint8_t nslots; /* Number of valid slots. */
- uint8_t topslot; /* Maximum frame extent. */
-@@ -217,17 +217,15 @@ typedef enum {
- /* Trace object. */
- typedef struct GCtrace {
- GCHeader;
-- uint8_t topslot; /* Top stack slot already checked to be allocated. */
-- uint8_t linktype; /* Type of link. */
-+ uint16_t nsnap; /* Number of snapshots. */
- IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
- #if LJ_GC64
- uint32_t unused_gc64;
- #endif
- GCRef gclist;
- IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
- IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
-- uint16_t nsnap; /* Number of snapshots. */
-- uint16_t nsnapmap; /* Number of snapshot map elements. */
-+ uint32_t nsnapmap; /* Number of snapshot map elements. */
- SnapShot *snap; /* Snapshot array. */
- SnapEntry *snapmap; /* Snapshot map. */
- GCRef startpt; /* Starting prototype. */
-@@ -241,6 +239,8 @@ typedef struct GCtrace {
- TraceNo1 nextroot; /* Next root trace for same prototype. */
- TraceNo1 nextside; /* Next side trace of same root trace. */
- uint8_t sinktags; /* Trace has SINK tags. */
-+ uint8_t topslot; /* Top stack slot already checked to be allocated. */
-+ uint8_t linktype; /* Type of link. */
- uint8_t unused1;
- #ifdef LUAJIT_USE_GDBJIT
- void *gdbjit_entry; /* GDB JIT entry. */
-diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
-index 36317b3..cc88111 100644
---- a/src/lj_opt_loop.c
-+++ b/src/lj_opt_loop.c
-@@ -223,7 +223,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
- }
- J->guardemit.irt = 0;
- /* Setup new snapshot. */
-- snap->mapofs = (uint16_t)nmapofs;
-+ snap->mapofs = (uint32_t)nmapofs;
- snap->ref = (IRRef1)J->cur.nins;
- snap->nslots = nslots;
- snap->topslot = osnap->topslot;
-@@ -251,7 +251,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
- nmap += nn;
- while (omap < nextmap) /* Copy PC + frame links. */
- *nmap++ = *omap++;
-- J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap);
-+ J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap);
- }
-
- typedef struct LoopState {
-@@ -362,7 +362,7 @@ static void loop_unroll(jit_State *J)
- }
- }
- if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
-- J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs;
-+ J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs;
- lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
- *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
-
-@@ -376,7 +376,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize
nsnapmap)
- SnapShot *snap = &J->cur.snap[nsnap-1];
- SnapEntry *map = J->cur.snapmap;
- map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC.
*/
-- J->cur.nsnapmap = (uint16_t)nsnapmap;
-+ J->cur.nsnapmap = (uint32_t)nsnapmap;
- J->cur.nsnap = nsnap;
- J->guardemit.irt = 0;
- lj_ir_rollback(J, ins);
-diff --git a/src/lj_snap.c b/src/lj_snap.c
-index e891f7a..73f2500 100644
---- a/src/lj_snap.c
-+++ b/src/lj_snap.c
-@@ -129,11 +129,11 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize
nsnapmap)
- nent = snapshot_slots(J, p, nslots);
- snap->nent = (uint8_t)nent;
- nent += snapshot_framelinks(J, p + nent, &snap->topslot);
-- snap->mapofs = (uint16_t)nsnapmap;
-+ snap->mapofs = (uint32_t)nsnapmap;
- snap->ref = (IRRef1)J->cur.nins;
- snap->nslots = (uint8_t)nslots;
- snap->count = 0;
-- J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
-+ J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
- }
-
- /* Add or merge a snapshot. */
-@@ -294,7 +294,7 @@ void lj_snap_shrink(jit_State *J)
- snap->nent = (uint8_t)m;
- nlim = J->cur.nsnapmap - snap->mapofs - 1;
- while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
-- J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */
-+ J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */
- }
-
- /* -- Snapshot access ----------------------------------------------------- */
---
-2.20.1
-
diff --git a/0055-DynASM-PPC-Fix-shadowed-variable.patch
b/0055-DynASM-PPC-Fix-shadowed-variable.patch
deleted file mode 100644
index 4f80b3a..0000000
--- a/0055-DynASM-PPC-Fix-shadowed-variable.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From 20e4c529458fa42ef6651a0042e3955723ee20c2 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:28:24 +0100
-Subject: [PATCH 55/72] DynASM/PPC: Fix shadowed variable.
-
-Cleanup only, bug cannot trigger.
-Thanks to Domingo Alvarez Duarte.
----
- dynasm/dasm_ppc.lua | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
-index 4e1656e..77031fb 100644
---- a/dynasm/dasm_ppc.lua
-+++ b/dynasm/dasm_ppc.lua
-@@ -1056,9 +1056,9 @@ map_op[".template__"] = function(params, template,
nparams)
- elseif p == "M" then
- op = op + parse_shiftmask(params[n], false); n = n + 1
- elseif p == "J" or p == "K" then
-- local mode, n, s = parse_label(params[n], false)
-- if p == "K" then n = n + 2048 end
-- waction("REL_"..mode, n, s, 1)
-+ local mode, m, s = parse_label(params[n], false)
-+ if p == "K" then m = m + 2048 end
-+ waction("REL_"..mode, m, s, 1)
- n = n + 1
- elseif p == "0" then
- if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end
---
-2.20.1
-
diff --git a/0056-DynASM-MIPS-Fix-shadowed-variable.patch
b/0056-DynASM-MIPS-Fix-shadowed-variable.patch
deleted file mode 100644
index e3fc081..0000000
--- a/0056-DynASM-MIPS-Fix-shadowed-variable.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From 5c911998a3c85d024a8006feafc68d0b4c962fd8 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:32:08 +0100
-Subject: [PATCH 56/72] DynASM/MIPS: Fix shadowed variable.
-
-Cleanup only, bug cannot trigger.
-Thanks to Domingo Alvarez Duarte.
----
- dynasm/dasm_mips.lua | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
-index 8e250ce..af53042 100644
---- a/dynasm/dasm_mips.lua
-+++ b/dynasm/dasm_mips.lua
-@@ -757,9 +757,9 @@ map_op[".template__"] = function(params, template,
nparams)
- elseif p == "X" then
- op = op + parse_index(params[n]); n = n + 1
- elseif p == "B" or p == "J" then
-- local mode, n, s = parse_label(params[n], false)
-- if p == "B" then n = n + 2048 end
-- waction("REL_"..mode, n, s, 1)
-+ local mode, m, s = parse_label(params[n], false)
-+ if p == "B" then m = m + 2048 end
-+ waction("REL_"..mode, m, s, 1)
- n = n + 1
- elseif p == "A" then
- op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
---
-2.20.1
-
diff --git a/0057-Fix-MinGW-build.patch b/0057-Fix-MinGW-build.patch
deleted file mode 100644
index d23aa4c..0000000
--- a/0057-Fix-MinGW-build.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 61464b0a5b685489bee7b6680c0e9663f2143a84 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:37:09 +0100
-Subject: [PATCH 57/72] Fix MinGW build.
-
-Thanks to Victor Bombi.
----
- src/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index 962aa94..2c780de 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -194,7 +194,7 @@ CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
- LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
-
- HOST_CC= $(CC)
--HOST_RM= rm -f
-+HOST_RM?= rm -f
- # If left blank, minilua is built and used. You can supply an installed
- # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua
- HOST_LUA=
---
-2.20.1
-
diff --git a/0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch
b/0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch
deleted file mode 100644
index 40324b7..0000000
--- a/0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From fc63c938b522e147ea728b75f385728bf4a8fc35 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:47:28 +0100
-Subject: [PATCH 58/72] Fix os.date() for wider libc strftime() compatibility.
-
-Thanks to Jesper Lundgren.
----
- src/lib_os.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/lib_os.c b/src/lib_os.c
-index 9e78d49..ffbc3fd 100644
---- a/src/lib_os.c
-+++ b/src/lib_os.c
-@@ -205,12 +205,12 @@ LJLIB_CF(os_date)
- setboolfield(L, "isdst", stm->tm_isdst);
- } else if (*s) {
- SBuf *sb = &G(L)->tmpbuf;
-- MSize sz = 0;
-+ MSize sz = 0, retry = 4;
- const char *q;
- for (q = s; *q; q++)
- sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
- setsbufL(sb, L);
-- for (;;) {
-+ while (retry--) { /* Limit growth for invalid format or empty result. */
- char *buf = lj_buf_need(sb, sz);
- size_t len = strftime(buf, sbufsz(sb), s, stm);
- if (len) {
---
-2.20.1
-
diff --git a/0059-Improve-luaL_addlstring.patch b/0059-Improve-luaL_addlstring.patch
deleted file mode 100644
index 0bf7cf5..0000000
--- a/0059-Improve-luaL_addlstring.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From f0e865dd4861520258299d0f2a56491bd9d602e1 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 13:09:17 +0100
-Subject: [PATCH 59/72] Improve luaL_addlstring().
-
-Thanks to Domingo Alvarez Duarte.
----
- src/lib_aux.c | 11 +++++++++--
- 1 file changed, 9 insertions(+), 2 deletions(-)
-
-diff --git a/src/lib_aux.c b/src/lib_aux.c
-index c40565c..2682a38 100644
---- a/src/lib_aux.c
-+++ b/src/lib_aux.c
-@@ -218,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
-
- LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
- {
-- while (l--)
-- luaL_addchar(B, *s++);
-+ if (l <= bufffree(B)) {
-+ memcpy(B->p, s, l);
-+ B->p += l;
-+ } else {
-+ emptybuffer(B);
-+ lua_pushlstring(B->L, s, l);
-+ B->lvl++;
-+ adjuststack(B);
-+ }
- }
-
- LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
---
-2.20.1
-
diff --git a/0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch
b/0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch
deleted file mode 100644
index 8fe6e8e..0000000
--- a/0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 43a3893b0d7d82bfbfd13bf458a5906f755989c9 Mon Sep 17 00:00:00 2001
-From: Patrick Galizia <pgalizia.qdt(a)qualcommdatacenter.com>
-Date: Fri, 24 Aug 2018 11:02:15 -0400
-Subject: [PATCH 60/72] Fix arm64 register allocation issue for XLOAD.
-
-For the arm64 implementation of asm_xload(), it is possible for
-the dest register selected to be the same as one of the source
-registers generated in the asm_fusexref() call. To prevent this,
-exclude the dest register from the list of allowed registers for
-that call.
-
-Thanks to Javier for guidance as well as his script to replicate
-the issue.
----
- src/lj_asm_arm64.h | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index baafa21..045f260 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -1008,7 +1008,8 @@ static void asm_xload(ASMState *as, IRIns *ir)
- {
- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
-- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
-+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1,
-+ rset_exclude(RSET_GPR, dest));
- }
-
- static void asm_xstore(ASMState *as, IRIns *ir)
---
-2.20.1
-
diff --git a/0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch
b/0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch
deleted file mode 100644
index 1f58f88..0000000
--- a/0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From ec0d8427ade9346d356623072fcb91c2d11e3bda Mon Sep 17 00:00:00 2001
-From: Patrick Galizia <pgalizia.qdt(a)qualcommdatacenter.com>
-Date: Wed, 28 Nov 2018 14:14:35 -0500
-Subject: [PATCH 61/72] Fix arm64 register allocation issue for XLOAD.
-
-For arm64, it's possible for both IRRefs to fail asm_isk32(), but
-one of them pass irref_isk(). Add a secondary check for the latter
-call if both asm_isk32() calls fail.
----
- src/lj_asm_arm64.h | 18 +++++++++++++-----
- 1 file changed, 13 insertions(+), 5 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index 045f260..ce49cde 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -295,9 +295,18 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef
ref,
- } else if (asm_isk32(as, ir->op1, &ofs)) {
- ref = ir->op2;
- } else {
-- Reg rn = ra_alloc1(as, ir->op1, allow);
-- IRIns *irr = IR(ir->op2);
-+ IRRef ref1 = ir->op1;
-+ IRRef ref2 = ir->op2;
-+ Reg rn;
-+ IRIns *irr;
- uint32_t m;
-+
-+ if (irref_isk(ir->op1)) {
-+ ref1 = ir->op2;
-+ ref2 = ir->op1;
-+ }
-+ rn = ra_alloc1(as, ref1, allow);
-+ irr = IR(ref2);
- if (irr+1 == ir && !ra_used(irr) &&
- irr->o == IR_ADD && irref_isk(irr->op2)) {
- ofs = sizeof(GCstr) + IR(irr->op2)->i;
-@@ -307,7 +316,7 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
- goto skipopm;
- }
- }
-- m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
-+ m = asm_fuseopm(as, 0, ref2, rset_exclude(allow, rn));
- ofs = sizeof(GCstr);
- skipopm:
- emit_lso(as, ai, rd, rd, ofs);
-@@ -1008,8 +1017,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
- {
- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
-- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1,
-- rset_exclude(RSET_GPR, dest));
-+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
- }
-
- static void asm_xstore(ASMState *as, IRIns *ir)
---
-2.20.1
-
diff --git a/0062-Remove-redundant-emit_check_ofs.patch
b/0062-Remove-redundant-emit_check_ofs.patch
deleted file mode 100644
index 9b34eab..0000000
--- a/0062-Remove-redundant-emit_check_ofs.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 1fae7b08e319ba4028d303b09de72b026109a269 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Fri, 22 Feb 2019 19:05:38 +0000
-Subject: [PATCH 62/72] Remove redundant emit_check_ofs
-
-Even if the offset is a constant, it is not 32-bit since it failed
-that check earlier before it came here. The code is thus useless and
-hence removed. This also fixes inconsistencies with op1/op2 renaming
-that were introduced in PR #438. They were never triggered because
-the code path is effectively dead for arm64.
----
- src/lj_asm_arm64.h | 15 +--------------
- 1 file changed, 1 insertion(+), 14 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index ce49cde..c214e10 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -298,27 +298,14 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef
ref,
- IRRef ref1 = ir->op1;
- IRRef ref2 = ir->op2;
- Reg rn;
-- IRIns *irr;
-- uint32_t m;
-
- if (irref_isk(ir->op1)) {
- ref1 = ir->op2;
- ref2 = ir->op1;
- }
- rn = ra_alloc1(as, ref1, allow);
-- irr = IR(ref2);
-- if (irr+1 == ir && !ra_used(irr) &&
-- irr->o == IR_ADD && irref_isk(irr->op2)) {
-- ofs = sizeof(GCstr) + IR(irr->op2)->i;
-- if (emit_checkofs(ai, ofs)) {
-- Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
-- m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
-- goto skipopm;
-- }
-- }
-- m = asm_fuseopm(as, 0, ref2, rset_exclude(allow, rn));
-+ uint32_t m = asm_fuseopm(as, 0, ref2, rset_exclude(allow, rn));
- ofs = sizeof(GCstr);
-- skipopm:
- emit_lso(as, ai, rd, rd, ofs);
- emit_dn(as, A64I_ADDx^m, rd, rn);
- return;
---
-2.20.1
-
diff --git a/0063-aarch64-Use-the-xzr-register-whenever-possible.patch
b/0063-aarch64-Use-the-xzr-register-whenever-possible.patch
deleted file mode 100644
index c2b0505..0000000
--- a/0063-aarch64-Use-the-xzr-register-whenever-possible.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 8fc4ce1c981967fccd5366ace6add6d14cfcde89 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Mon, 25 Feb 2019 14:40:39 +0000
-Subject: [PATCH 63/72] aarch64: Use the xzr register whenever possible
-
-Using the xzr register for store inputs and the second operand of
-arithmetic operations frees up a register for use elsewhere.
----
- src/lj_asm_arm64.h | 31 ++++++++++++++++++++++++++++---
- 1 file changed, 28 insertions(+), 3 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index c214e10..a826687 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -1007,10 +1007,30 @@ static void asm_xload(ASMState *as, IRIns *ir)
- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
- }
-
-+static int maybe_zero_val(ASMState *as, IRRef ref)
-+{
-+ IRIns *ir = IR(ref);
-+
-+ switch(ir->o) {
-+ case IR_KNULL:
-+ return 1;
-+ case IR_KINT:
-+ return 0 == ir->i;
-+ case IR_KINT64:
-+ return 0 == ir_kint64(ir)->u64;
-+ }
-+
-+ return 0;
-+}
-+
- static void asm_xstore(ASMState *as, IRIns *ir)
- {
- if (ir->r != RID_SINK) {
-- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
-+ Reg src;
-+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
-+ src = RID_ZERO;
-+ else
-+ src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
- rset_exclude(RSET_GPR, src));
- }
-@@ -1198,7 +1218,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
- /* Initialize immutable cdata object. */
- if (ir->o == IR_CNEWI) {
- int32_t ofs = sizeof(GCcdata);
-- Reg r = ra_alloc1(as, ir->op2, allow);
-+ Reg r;
-+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
-+ r = RID_ZERO;
-+ else
-+ r = ra_alloc1(as, ir->op2, allow);
-+
- lua_assert(sz == 4 || sz == 8);
- emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
- } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
-@@ -1214,7 +1239,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
-
- /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
- {
-- Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
-+ Reg r = id == 0 ? RID_ZERO : (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
- emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
- emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
- emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
---
-2.20.1
-
diff --git a/0065-Add-support-for-FNMADD-and-FNMSUB.patch
b/0065-Add-support-for-FNMADD-and-FNMSUB.patch
deleted file mode 100644
index c1762f4..0000000
--- a/0065-Add-support-for-FNMADD-and-FNMSUB.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From e99ac1bc2df5c1d138bbc98d35d1a1892144cf2b Mon Sep 17 00:00:00 2001
-From: Sameera Deshpande <sameera.deshpande(a)linaro.org>
-Date: Fri, 15 Feb 2019 07:46:16 +0530
-Subject: [PATCH 65/72] Add support for FNMADD and FNMSUB.
-
----
- src/lj_asm_arm64.h | 32 +++++++++++++++++++++++++++++++-
- 1 file changed, 31 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index a826687..470e65d 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -344,6 +344,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins
air)
- return 0;
- }
-
-+/* Fuse FP neg-multiply-add/sub. */
-+static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
-+{
-+ IRRef ref = ir->op1;
-+ IRIns *irn = IR(ref);
-+ if (irn->o != IR_ADD && irn->o != IR_SUB)
-+ return 0;
-+
-+ if (!mayfuse(as, ref))
-+ return 0;
-+
-+ IRRef lref = irn->op1, rref = irn->op2;
-+ IRIns *irm;
-+ if (lref != rref &&
-+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
-+ ra_noreg(irm->r)) ||
-+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
-+ (rref = lref, ra_noreg(irm->r))))) {
-+ Reg dest = ra_dest(as, ir, RSET_FPR);
-+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
-+ Reg left = ra_alloc2(as, irm,
-+ rset_exclude(rset_exclude(RSET_FPR, dest), add));
-+ Reg right = (left >> 8); left &= 255;
-+ emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31),
(right & 31), (add & 31));
-+ return 1;
-+ }
-+ return 0;
-+}
-+
- /* Fuse BAND + BSHL/BSHR into UBFM. */
- static int asm_fuseandshift(ASMState *as, IRIns *ir)
- {
-@@ -1481,7 +1510,8 @@ static void asm_mod(ASMState *as, IRIns *ir)
- static void asm_neg(ASMState *as, IRIns *ir)
- {
- if (irt_isnum(ir->t)) {
-- asm_fpunary(as, ir, A64I_FNEGd);
-+ if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
-+ asm_fpunary(as, ir, A64I_FNEGd);
- return;
- }
- asm_intneg(as, ir);
---
-2.20.1
-
diff --git a/0066-Fix-os.date-for-timezone-change-awareness.patch
b/0066-Fix-os.date-for-timezone-change-awareness.patch
deleted file mode 100644
index afab3fe..0000000
--- a/0066-Fix-os.date-for-timezone-change-awareness.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 372bb8b22546663ba57e69fad75c97cfd004ac63 Mon Sep 17 00:00:00 2001
-From: Vivien HENRIET <bubuabu(a)bubuabu.org>
-Date: Wed, 30 Jan 2019 23:44:51 +0100
-Subject: [PATCH 66/72] Fix os.date() for timezone change awareness
-
-On POSIX target, system timezone change are not taken into account.
-To reproduce,
-1. call os.date()
-2. change your timezone
-3. call os.date() within the same luajit instance
-
-On POSIX target, os.date use localtime_r to retrieve time.
-On other target, the function localtime is used. But there is a behaviour
-diference between these two function. localtime acts as if it called tzset
-which localtime_r don't.
-
-To fix the issue tzset is called before localtime_r.
----
- src/lib_os.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/lib_os.c b/src/lib_os.c
-index ffbc3fd..09dc737 100644
---- a/src/lib_os.c
-+++ b/src/lib_os.c
-@@ -185,6 +185,7 @@ LJLIB_CF(os_date)
- #endif
- } else {
- #if LJ_TARGET_POSIX
-+ tzset();
- stm = localtime_r(&t, &rtm);
- #else
- stm = localtime(&t);
---
-2.20.1
-
diff --git a/0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch
b/0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch
deleted file mode 100644
index 7f27204..0000000
--- a/0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 49f19e7b31fc033ac1e9208580b5be31e2b66b19 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Thu, 14 Mar 2019 23:08:24 +0530
-Subject: [PATCH 67/72] Revert "FFI: Make FP to U64 conversions match JIT
- backend behavior."
-
-This reverts commit f5d424afe8b9395f0df05aba905e0e1f6a2262b8.
-
-The patch breaks test 279, i.e.
-
- assert(tostring(bit.band(1ll, 1, 1ull, -1)) == "1ULL")
-
-The patch was put in to make the JIT and interpreter behaviour
-consistent[1] for float to unsigned int conversions but it ended up
-making things worse. There needs to be a better fix for this.
-
-[1]
https://github.com/LuaJIT/LuaJIT/pull/415
----
- src/lj_obj.h | 18 +++++-------------
- 1 file changed, 5 insertions(+), 13 deletions(-)
-
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index 72b7ace..c7e4742 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -942,22 +942,14 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
-
- #define lj_num2int(n) ((int32_t)(n))
-
--/*
--** This must match the JIT backend behavior. In particular for archs
--** that don't have a common hardware instruction for this conversion.
--** Note that signed FP to unsigned int conversions have an undefined
--** result and should never be relied upon in portable FFI code.
--** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
--*/
- static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
- {
--#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
-- int64_t i = (int64_t)n;
-- if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
-- return (uint64_t)i;
--#else
-- return (uint64_t)n;
-+#ifdef _MSC_VER
-+ if (n >= 9223372036854775808.0) /* They think it's a feature. */
-+ return (uint64_t)(int64_t)(n - 18446744073709551616.0);
-+ else
- #endif
-+ return (uint64_t)n;
- }
-
- static LJ_AINLINE int32_t numberVint(cTValue *o)
---
-2.20.1
-
diff --git a/0068-bench-Fix-build-warnings.patch b/0068-bench-Fix-build-warnings.patch
deleted file mode 100644
index 5ee8bc7..0000000
--- a/0068-bench-Fix-build-warnings.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-commit 0513e634f0013083d29af9f5762b225297d3ad6c (HEAD -> v2.1, origin/v2.1)
-Author: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Fri Apr 12 20:42:55 2019 +0530
-
- Remove built binary from git
-
- Oops.
-
-From 9b4f498707569f3ecf81a0561a0d3d91570cec3d Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Fri, 15 Mar 2019 15:51:02 +0530
-Subject: [PATCH 68/72] bench: Fix build warnings
-
----
- bench/Makefile | 2 +-
- bench/luajit-bench | Bin 571144 -> 571224 bytes
- bench/luajit-bench.c | 1 +
- 3 files changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/bench/Makefile b/bench/Makefile
-index d0c1e8d..87d213a 100644
---- a/bench/Makefile
-+++ b/bench/Makefile
-@@ -44,7 +44,7 @@ endif
- LUAJIT_A = ../src/$(FILE_A)
-
- $(BENCH_BIN): $(LUAJIT_A) $(BENCH_BIN).c Makefile
-- $(CC) $@.c $(DURATION) -g -O3 -c -o $@.o -I ../src
-+ $(CC) $@.c -std=gnu11 $(DURATION) -g -O3 -c -o $@.o -I ../src
- $(CC) $@.o -lpthread $< -lm -ldl -o $@
-
- # Build the luajit static library if it doesn't exist.
-diff --git a/bench/luajit-bench.c b/bench/luajit-bench.c
-index e7b068d..6603132 100644
---- a/bench/luajit-bench.c
-+++ b/bench/luajit-bench.c
-@@ -39,6 +39,7 @@
- #include <argp.h>
- #include <sys/param.h>
- #include <string.h>
-+#include <time.h>
-
- #include "lua.h"
- #include "lualib.h"
---
-2.20.1
-
diff --git a/0069-Guard-against-undefined-behaviour-when-casting-from-.patch
b/0069-Guard-against-undefined-behaviour-when-casting-from-.patch
deleted file mode 100644
index e498f62..0000000
--- a/0069-Guard-against-undefined-behaviour-when-casting-from-.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 454bea87cff4ff3cd2fd9ae34a3718dd200ce0fb Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Sun, 17 Mar 2019 11:34:04 +0530
-Subject: [PATCH 69/72] Guard against undefined behaviour when casting from
- float to unsigned
-
-Only range (-1.0, UINT64_MAX) can be safely converted to unsigned
-directly, and (-INT64_MAX,INT_64_MAX) through a cast to int64_t first.
-The remaining range is undefined.
-
-TODO: Do the same for JIT as well as for float to other ranges.
----
- src/lj_obj.h | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index c7e4742..4ff5944 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -944,12 +944,18 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
-
- static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
- {
-+ /* Undefined behaviour. This is deliberately not a full check because we
-+ don't want to slow down compliant code. */
-+ lua_assert(n >= -9223372036854775809.0);
- #ifdef _MSC_VER
- if (n >= 9223372036854775808.0) /* They think it's a feature. */
- return (uint64_t)(int64_t)(n - 18446744073709551616.0);
- else
- #endif
-- return (uint64_t)n;
-+ if (n > -1.0)
-+ return (uint64_t)n;
-+ else
-+ return (uint64_t)(int64_t)n;
- }
-
- static LJ_AINLINE int32_t numberVint(cTValue *o)
---
-2.20.1
-
diff --git a/0070-Fix-build-erro-with-fnmsub-fusing.patch
b/0070-Fix-build-erro-with-fnmsub-fusing.patch
deleted file mode 100644
index a506e4f..0000000
--- a/0070-Fix-build-erro-with-fnmsub-fusing.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From ddca2290b8fa73fc32e88f83105219a1f2be75ff Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Mon, 25 Mar 2019 17:56:53 +0530
-Subject: [PATCH 70/72] Fix build erro with fnmsub fusing
-
----
- src/lj_asm_arm64.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index 470e65d..42a4fae 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -1510,7 +1510,7 @@ static void asm_mod(ASMState *as, IRIns *ir)
- static void asm_neg(ASMState *as, IRIns *ir)
- {
- if (irt_isnum(ir->t)) {
-- if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
-+ if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd))
- asm_fpunary(as, ir, A64I_FNEGd);
- return;
- }
---
-2.20.1
-
diff --git a/0071-aarch64-better-float-to-unsigned-int-conversion.patch
b/0071-aarch64-better-float-to-unsigned-int-conversion.patch
deleted file mode 100644
index 305f07b..0000000
--- a/0071-aarch64-better-float-to-unsigned-int-conversion.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From 70e65633d892765bcbaad3493e5b690abd5402f2 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Thu, 28 Mar 2019 09:19:34 +0530
-Subject: [PATCH 71/72] aarch64: better float to unsigned int conversion
-
-A straight float to unsigned conversion has a limited range of (-1.0,
-UTYPE_MAX) which should be fine in general but for the sake of
-consistency across the interpreter and the JIT compiler, it is
-necessary to work a wee bit harder to expand this range to (TYPE_MIN,
-UTYPE_MAX), which can be done with a simple range check. This adds a
-couple of branches but only one of the branches should have a
-noticeable performance impact on most processors with branch
-predictors, and that too only if the input number varies wildly in
-range.
-
-This currently works only for 64-bit conversions, 32-bit is still WIP.
----
- src/lj_asm_arm64.h | 30 ++++++++++++++++++++++--------
- src/lj_target_arm64.h | 1 +
- 2 files changed, 23 insertions(+), 8 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index 42a4fae..c72144a 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -594,14 +594,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
- } else {
- Reg left = ra_alloc1(as, lref, RSET_FPR);
- Reg dest = ra_dest(as, ir, RSET_GPR);
-- A64Ins ai = irt_is64(ir->t) ?
-- (st == IRT_NUM ?
-- (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
-- (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
-- (st == IRT_NUM ?
-- (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
-- (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
-- emit_dn(as, ai, dest, (left & 31));
-+
-+ A64Ins ai_signed = st == IRT_NUM ?
-+ (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) :
-+ (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32);
-+
-+ if (irt_isi64(ir->t) || irt_isint(ir->t))
-+ emit_dn(as, ai_signed, dest, (left & 31));
-+ else {
-+ A64Ins ai_unsigned = st == IRT_NUM ?
-+ (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) :
-+ (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32);
-+
-+ MCLabel l_done = emit_label(as);
-+ emit_dn(as, ai_unsigned, dest, (left & 31));
-+ MCLabel l_signed = emit_label(as);
-+ emit_jmp(as, l_done);
-+ emit_dn(as, ai_signed, dest, (left & 31));
-+ /* The valid range for float to unsigned int conversion is (-1.0,
-+ UINT{,64}_MAX-1), but we just compare with 0 to save a load. */
-+ emit_cond_branch(as, CC_PL, l_signed);
-+ emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0);
-+ }
- }
- } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer.
*/
- Reg dest = ra_dest(as, ir, RSET_GPR);
-diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
-index a207a2b..2f8357f 100644
---- a/src/lj_target_arm64.h
-+++ b/src/lj_target_arm64.h
-@@ -279,6 +279,7 @@ typedef enum A64Ins {
- A64I_STPs = 0x2d000000,
- A64I_STPd = 0x6d000000,
- A64I_FCMPd = 0x1e602000,
-+ A64I_FCMPZs = 0x1e202008,
- A64I_FCMPZd = 0x1e602008,
- A64I_FCSELd = 0x1e600c00,
- A64I_FRINTMd = 0x1e654000,
---
-2.20.1
-
diff --git a/0072-Better-behaviour-for-float-to-uint32_t-conversions.patch
b/0072-Better-behaviour-for-float-to-uint32_t-conversions.patch
deleted file mode 100644
index 20cb957..0000000
--- a/0072-Better-behaviour-for-float-to-uint32_t-conversions.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From f2779155495aee6583abaff4700a7acda80864ef Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Thu, 28 Mar 2019 10:50:23 +0530
-Subject: [PATCH 72/72] Better behaviour for float to uint32_t conversions
-
-This is the uint32_t part of the float to unsigned int conversions for
-the interpreter. The cast ends up working correctly for x86 but not
-for aarch64 since fcvtzu sets the result to zero on negative inputs.
-Work slightly harder to make sure that negative number inputs behave
-like x86.
-
-This fixes the interpreter but not the JIT compiler, which errors out
-during the narrowing pass.
----
- src/lj_cconv.c | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_cconv.c b/src/lj_cconv.c
-index 13b8230..bf8f8e8 100644
---- a/src/lj_cconv.c
-+++ b/src/lj_cconv.c
-@@ -196,7 +196,13 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
- else if (dsize == 2) *(int16_t *)dp = (int16_t)i;
- else *(int8_t *)dp = (int8_t)i;
- } else if (dsize == 4) {
-- *(uint32_t *)dp = (uint32_t)n;
-+ /* Undefined behaviour. This is deliberately not a full check because we
-+ * don't want to slow down compliant code. */
-+ lua_assert(n >= -2147483649.0);
-+ if (n > -1.0)
-+ *(uint32_t *)dp = (uint32_t)n;
-+ else
-+ *(uint32_t *)dp = (uint32_t)(int32_t)n;
- } else if (dsize == 8) {
- if (!(dinfo & CTF_UNSIGNED))
- *(int64_t *)dp = (int64_t)n;
---
-2.20.1
-
diff --git a/arm-Fix-up-condition-codes-for-conditional-arithmeti.patch
b/arm-Fix-up-condition-codes-for-conditional-arithmeti.patch
deleted file mode 100644
index 44aeea4..0000000
--- a/arm-Fix-up-condition-codes-for-conditional-arithmeti.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From 24429cc95657332e3953a21581d3220884da3d75 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Wed, 8 May 2019 22:14:00 +0530
-Subject: [PATCH] arm: Fix up condition codes for conditional arithmetic insn
-
-When an arithmetic instruction such as add or sub are combined with a
-subsequent compare with zero, its following conditional branch code
-needs fixing up. This is necessary because one could generate an add
-with a subtract of the negative but such a substitution, while correct
-on its own, will change the effect on condition flags since while
-addition of two positive numbers may signal an overflow, addition of a
-positive and a negative number may not. So if earlier the condition
-code was GE, it needs to be fixed up to PL to remain correct.
-
-We did that for bit operations but not for arithmetic, so do that now.
----
- src/lj_asm_arm.h | 38 ++++++++++++++++++++------------------
- 1 file changed, 20 insertions(+), 18 deletions(-)
-
-diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
-index 37bfa40f..e585b4c2 100644
---- a/src/lj_asm_arm.h
-+++ b/src/lj_asm_arm.h
-@@ -1412,13 +1412,28 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)
- emit_dn(as, ai^m, dest, left);
- }
-
--static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
-+static ARMIns maybe_drop_zero_cmp(ASMState *as, ARMIns ai)
- {
-- if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
-+ if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
-+ uint32_t cc = (as->mcp[1] >> 28);
- as->flagmcp = NULL;
-- as->mcp++;
-- ai |= ARMI_S;
-+ if (cc <= CC_NE) {
-+ as->mcp++;
-+ ai |= ARMI_S;
-+ } else if (cc == CC_GE) {
-+ *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-+ ai |= ARMI_S;
-+ } else if (cc == CC_LT) {
-+ *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-+ ai |= ARMI_S;
-+ } /* else: other conds don't work with bit ops. */
- }
-+ return ai;
-+}
-+
-+static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
-+{
-+ ai = maybe_drop_zero_cmp(as, ai);
- asm_intop(as, ir, ai);
- }
-
-@@ -1514,20 +1529,7 @@ static void asm_neg(ASMState *as, IRIns *ir)
-
- static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
- {
-- if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
-- uint32_t cc = (as->mcp[1] >> 28);
-- as->flagmcp = NULL;
-- if (cc <= CC_NE) {
-- as->mcp++;
-- ai |= ARMI_S;
-- } else if (cc == CC_GE) {
-- *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-- ai |= ARMI_S;
-- } else if (cc == CC_LT) {
-- *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-- ai |= ARMI_S;
-- } /* else: other conds don't work with bit ops. */
-- }
-+ ai = maybe_drop_zero_cmp(as, ai);
- if (ir->op2 == 0) {
- Reg dest = ra_dest(as, ir, RSET_GPR);
- uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
---
-2.21.0
-
diff --git a/bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch
b/bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch
deleted file mode 100644
index 939ac87..0000000
--- a/bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch
+++ /dev/null
@@ -1,160 +0,0 @@
-From a6a2720ddc22f9f62f119325881d05722c4f392e Mon Sep 17 00:00:00 2001
-From: Thibault Charbonnier <thibaultcha(a)me.com>
-Date: Tue, 19 Mar 2019 13:52:51 -0700
-Subject: [PATCH 1/3] bugfix: fixed a segfault when unsinking 64-bit pointers.
-
-The unsinking code was not using the correct layout for GC64 IR
-constants (value in adjacent slot) for this case.
-
-This patch is a derivative of
-https://github.com/raptorjit/raptorjit/pull/246 ported for LuaJIT
-itself.
-
-Fixed after an intense debugging session with @lukego.
-
-Co-authored-by: Luke Gorrie <lukego(a)gmail.com>
----
- src/lj_ir.h | 12 ++++++------
- src/lj_snap.c | 2 +-
- 2 files changed, 7 insertions(+), 7 deletions(-)
-
-diff --git a/src/lj_ir.h b/src/lj_ir.h
-index 8057a750..a46b561f 100644
---- a/src/lj_ir.h
-+++ b/src/lj_ir.h
-@@ -562,6 +562,11 @@ typedef union IRIns {
- TValue tv; /* TValue constant (overlaps entire slot). */
- } IRIns;
-
-+#define ir_isk64(ir) ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
-+ (LJ_GC64 && \
-+ ((ir)->o == IR_KGC || \
-+ (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)))
-+
- #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
- #define ir_kstr(ir) (gco2str(ir_kgc((ir))))
- #define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
-@@ -569,12 +574,7 @@ typedef union IRIns {
- #define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
- #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
- #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
--#define ir_k64(ir) \
-- check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
-- (LJ_GC64 && \
-- ((ir)->o == IR_KGC || \
-- (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \
-- &(ir)[1].tv)
-+#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv)
- #define ir_kptr(ir) \
- check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
- mref((ir)[LJ_GC64].ptr, void))
-diff --git a/src/lj_snap.c b/src/lj_snap.c
-index ceaf2ca5..75888d80 100644
---- a/src/lj_snap.c
-+++ b/src/lj_snap.c
-@@ -688,7 +688,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
- int32_t *src;
- uint64_t tmp;
- if (irref_isk(ref)) {
-- if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
-+ if (ir_isk64(ir)) {
- src = (int32_t *)&ir[1];
- } else if (sz == 8) {
- tmp = (uint64_t)(uint32_t)ir->i;
---
-2.21.0
-
-
-From f36cddf49b664d713bfa7c332673bdc66861d2ad Mon Sep 17 00:00:00 2001
-From: Thibault Charbonnier <thibaultcha(a)me.com>
-Date: Tue, 19 Mar 2019 13:49:18 -0700
-Subject: [PATCH 2/3] tests: ffi: added a test case unsinking a 64-bit pointer
- from a constant.
-
-This test case reproduces the issue observed at:
-https://github.com/openresty/lua-resty-core/issues/232 and was
-contributed by @lukego and myself.
-
-Co-authored-by: Luke Gorrie <lukego(a)gmail.com>
----
- test/ffi/unsink_64_kptr.lua | 26 ++++++++++++++++++++++++++
- 1 file changed, 26 insertions(+)
- create mode 100644 test/ffi/unsink_64_kptr.lua
-
-diff --git a/test/ffi/unsink_64_kptr.lua b/test/ffi/unsink_64_kptr.lua
-new file mode 100644
-index 00000000..7fab0e89
---- /dev/null
-+++ b/test/ffi/unsink_64_kptr.lua
-@@ -0,0 +1,26 @@
-+local ffi = require("ffi")
-+
-+local array = ffi.new("struct { int x; } [1]")
-+
-+-- This test forces the VM to unsink a pointer that was constructed
-+-- from a constant. The IR will include a 'cnewi' instruction to
-+-- allocate an FFI pointer object, the pointer value will be an IR
-+-- constant, the allocation will be sunk, and the allocation will
-+-- at some point be "unsunk" due to a reference in the snapshot for
-+-- a taken exit.
-+
-+-- Note: JIT will recognize <array> as a "singleton" and allow its
-+-- address to be inlined ("constified") instead of looking up the
-+-- upvalue at runtime.
-+
-+local function fn(i)
-+ local struct = array[0] -- Load pointer that the JIT will constify.
-+ if i == 1000 then end -- Force trace exit when i==1000.
-+ struct.x = 0 -- Ensure that 'struct' is live after exit.
-+end
-+
-+-- Loop over the function to make it compile and take a trace exit
-+-- during the final iteration.
-+for i = 1, 1000 do
-+ fn(i)
-+end
---
-2.21.0
-
-
-From 7b2f874b8061f206b22c04aee336b15030213637 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Tue, 14 May 2019 22:01:37 +0530
-Subject: [PATCH 3/3] Make unsink_64_kptr usable in the testsuite
-
----
- test/lib/ffi/index | 1 +
- test/{ => lib}/ffi/unsink_64_kptr.lua | 6 ++++--
- 2 files changed, 5 insertions(+), 2 deletions(-)
- rename test/{ => lib}/ffi/unsink_64_kptr.lua (93%)
-
-diff --git a/test/lib/ffi/index b/test/lib/ffi/index
-index 59e36dd8..7933c5a7 100644
---- a/test/lib/ffi/index
-+++ b/test/lib/ffi/index
-@@ -10,3 +10,4 @@ jit_struct.lua
- meta_tostring.lua
- redir.lua
- type_punning.lua
-+unsink_64_kptr.lua
-diff --git a/test/ffi/unsink_64_kptr.lua b/test/lib/ffi/unsink_64_kptr.lua
-similarity index 93%
-rename from test/ffi/unsink_64_kptr.lua
-rename to test/lib/ffi/unsink_64_kptr.lua
-index 7fab0e89..f285d9ff 100644
---- a/test/ffi/unsink_64_kptr.lua
-+++ b/test/lib/ffi/unsink_64_kptr.lua
-@@ -21,6 +21,8 @@ end
-
- -- Loop over the function to make it compile and take a trace exit
- -- during the final iteration.
--for i = 1, 1000 do
-- fn(i)
-+do --- unsink 64-bit pointers
-+ for i = 1, 1000 do
-+ fn(i)
-+ end
- end
---
-2.21.0
-
diff --git a/0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch
b/luajit-2.1-fedora.patch
similarity index 99%
rename from 0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch
rename to luajit-2.1-fedora.patch
index fb2b611..e84dfa1 100644
--- a/0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch
+++ b/luajit-2.1-fedora.patch
@@ -1,15 +1,7 @@
-commit 0513e634f0013083d29af9f5762b225297d3ad6c (HEAD -> v2.1, origin/v2.1)
-Author: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Fri Apr 12 20:42:55 2019 +0530
-
- Remove built binary from git
-
- Oops.
-
-From 48eb69061df1da9d843707ec1d6b854255a3c87d Mon Sep 17 00:00:00 2001
+From 86a1a5033a3eb07e694f8e7f7024550928191024 Mon Sep 17 00:00:00 2001
From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Tue, 12 Mar 2019 12:56:01 +0530
-Subject: [PATCH 64/72] Merge in LuaJIT-test-cleanup into the main repo
+Date: Thu, 21 Oct 2021 11:04:58 +0200
+Subject: [PATCH 01/10] Merge in LuaJIT-test-cleanup into the main repo
The tests and benchmarks in the LuaJIT-test-cleanup repo are more or
less complete and with scaffolding added, they can now be called
@@ -24,7 +16,6 @@ taken) and LuaJIT itself to allow for a more succint copyright notice
that credits authors in addition to Mike Pall in the COPYRIGHT file.
---
CONTRIBUTORS | 17 +
- COPYRIGHT | 3 +-
Makefile | 19 +-
bench/FASTA_10000 | 1671 +
bench/FASTA_1000000 | 166671 ++++++++++++++++++++++
@@ -38,9 +29,9 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
bench/SUMCOL_1.txt | 1000 +
bench/SUMCOL_100 | 100 +
bench/SUMCOL_1000 | 1000 +
- bench/TEST_md5sum.txt | 20 +
+ bench/TEST_md5sum.txt | 19 +
bench/TEST_md5sum_arm64.txt | 15 +
- bench/array3d.lua | 59 +
+ bench/array3d.lua | 58 +
bench/binary-trees.lua | 47 +
bench/chameneos.lua | 68 +
bench/coroutine-ring.lua | 42 +
@@ -49,12 +40,11 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
bench/fasta.lua | 95 +
bench/k-nucleotide.lua | 62 +
bench/life.lua | 111 +
- bench/luajit-bench | Bin 0 -> 571144 bytes
- bench/luajit-bench.c | 283 +
+ bench/luajit-bench.c | 284 +
bench/luajit-bench.lua | 53 +
bench/mandelbrot-bit.lua | 33 +
bench/mandelbrot.lua | 23 +
- bench/md5.lua | 183 +
+ bench/md5.lua | 182 +
bench/meteor.lua | 220 +
bench/nbody.lua | 119 +
bench/nsieve-bit-fp.lua | 37 +
@@ -66,7 +56,7 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
bench/recursive-ack.lua | 8 +
bench/recursive-fib.lua | 7 +
bench/revcomp.lua | 39 +
- bench/scimark-2010-12-20.lua | 400 +
+ bench/scimark-2010-12-20.lua | 399 +
bench/scimark-fft.lua | 1 +
bench/scimark-lu.lua | 1 +
bench/scimark-sor.lua | 1 +
@@ -79,14 +69,14 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
test/bc/constov.lua | 16 +
test/bc/index | 1 +
test/common/expect_error.lua | 16 +
- test/common/ffi_util.inc | 41 +
+ test/common/ffi_util.inc | 40 +
test/common/test_runner_canary.lua | 1 +
test/computations.lua | 113 +
test/index | 6 +
test/lang/andor.lua | 61 +
test/lang/assignment.lua | 46 +
test/lang/compare.lua | 323 +
- test/lang/compare_nan.lua | 99 +
+ test/lang/compare_nan.lua | 98 +
test/lang/concat.lua | 112 +
test/lang/constant/index | 2 +
test/lang/constant/number.lua | 12 +
@@ -100,9 +90,9 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
test/lang/meta/arith.lua | 118 +
test/lang/meta/arith_jit.lua | 68 +
test/lang/meta/call.lua | 81 +
- test/lang/meta/cat.lua | 61 +
+ test/lang/meta/cat.lua | 60 +
test/lang/meta/comp.lua | 120 +
- test/lang/meta/comp_jit.lua | 104 +
+ test/lang/meta/comp_jit.lua | 103 +
test/lang/meta/debuginfo.lua | 81 +
test/lang/meta/eq.lua | 30 +
test/lang/meta/eq_jit.lua | 35 +
@@ -111,14 +101,14 @@ that credits authors in addition to Mike Pall in the COPYRIGHT
file.
test/lang/meta/index.lua | 60 +
test/lang/meta/len.lua | 42 +
test/lang/meta/newindex.lua | 69 +
- test/lang/meta/nomm.lua | 21 +
+ test/lang/meta/nomm.lua | 20 +
test/lang/modulo.lua | 46 +
test/lang/self.lua | 19 +
test/lang/table.lua | 32 +
test/lang/tail_recursion.lua | 20 +
test/lang/upvalue/closure.lua | 84 +
test/lang/upvalue/index | 1 +
- test/lang/vararg_jit.lua | 95 +
+ test/lang/vararg_jit.lua | 94 +
test/lib/base/assert.lua | 33 +
test/lib/base/error.lua | 43 +
test/lib/base/getfenv.lua | 13 +
@@ -135,36 +125,36 @@ that credits authors in addition to Mike Pall in the COPYRIGHT
file.
test/lib/contents.lua | 158 +
test/lib/coroutine/index | 1 +
test/lib/coroutine/yield.lua | 109 +
- test/lib/ffi/bit64.lua | 130 +
+ test/lib/ffi/bit64.lua | 129 +
test/lib/ffi/cdata_var.lua | 47 +
- test/lib/ffi/copy_fill.lua | 64 +
- test/lib/ffi/err.lua | 35 +
- test/lib/ffi/ffi_arith_ptr.lua | 106 +
- test/lib/ffi/ffi_bitfield.lua | 108 +
- test/lib/ffi/ffi_call.lua | 266 +
- test/lib/ffi/ffi_callback.lua | 158 +
- test/lib/ffi/ffi_const.lua | 113 +
- test/lib/ffi/ffi_convert.lua | 787 +
- test/lib/ffi/ffi_enum.lua | 57 +
- test/lib/ffi/ffi_gcstep_recursive.lua | 66 +
- test/lib/ffi/ffi_jit_arith.lua | 155 +
- test/lib/ffi/ffi_jit_call.lua | 154 +
+ test/lib/ffi/copy_fill.lua | 63 +
+ test/lib/ffi/err.lua | 34 +
+ test/lib/ffi/ffi_arith_ptr.lua | 105 +
+ test/lib/ffi/ffi_bitfield.lua | 107 +
+ test/lib/ffi/ffi_call.lua | 265 +
+ test/lib/ffi/ffi_callback.lua | 157 +
+ test/lib/ffi/ffi_const.lua | 112 +
+ test/lib/ffi/ffi_convert.lua | 786 +
+ test/lib/ffi/ffi_enum.lua | 56 +
+ test/lib/ffi/ffi_gcstep_recursive.lua | 65 +
+ test/lib/ffi/ffi_jit_arith.lua | 154 +
+ test/lib/ffi/ffi_jit_call.lua | 153 +
test/lib/ffi/ffi_jit_conv.lua | 277 +
- test/lib/ffi/ffi_lex_number.lua | 51 +
- test/lib/ffi/ffi_metatype.lua | 245 +
- test/lib/ffi/ffi_new.lua | 106 +
- test/lib/ffi/ffi_parse_array.lua | 78 +
- test/lib/ffi/ffi_parse_basic.lua | 131 +
- test/lib/ffi/ffi_parse_cdef.lua | 77 +
- test/lib/ffi/ffi_parse_struct.lua | 259 +
+ test/lib/ffi/ffi_lex_number.lua | 50 +
+ test/lib/ffi/ffi_metatype.lua | 244 +
+ test/lib/ffi/ffi_new.lua | 105 +
+ test/lib/ffi/ffi_parse_array.lua | 77 +
+ test/lib/ffi/ffi_parse_basic.lua | 130 +
+ test/lib/ffi/ffi_parse_cdef.lua | 76 +
+ test/lib/ffi/ffi_parse_struct.lua | 258 +
test/lib/ffi/ffi_tabov.lua | 12 +
test/lib/ffi/index | 12 +
test/lib/ffi/istype.lua | 88 +
test/lib/ffi/jit_array.lua | 104 +
test/lib/ffi/jit_complex.lua | 109 +
test/lib/ffi/jit_misc.lua | 109 +
- test/lib/ffi/jit_struct.lua | 201 +
- test/lib/ffi/meta_tostring.lua | 55 +
+ test/lib/ffi/jit_struct.lua | 200 +
+ test/lib/ffi/meta_tostring.lua | 54 +
test/lib/ffi/redir.lua | 19 +
test/lib/ffi/type_punning.lua | 138 +
test/lib/index | 8 +
@@ -187,50 +177,50 @@ that credits authors in addition to Mike Pall in the COPYRIGHT
file.
test/lib/string/sub.lua | 189 +
test/lib/table/concat.lua | 55 +
test/lib/table/index | 6 +
- test/lib/table/insert.lua | 17 +
- test/lib/table/misc.lua | 58 +
+ test/lib/table/insert.lua | 16 +
+ test/lib/table/misc.lua | 55 +
test/lib/table/new.lua | 11 +
test/lib/table/pack.lua | 7 +
test/lib/table/remove.lua | 42 +
test/lib/table/sort.lua | 27 +
- test/misc/alias_alloc.lua | 54 +
- test/misc/api_call.lua | 98 +
- test/misc/catch_wrap.lua | 45 +
- test/misc/coro_traceback.lua | 8 +
- test/misc/coro_yield.lua | 111 +
+ test/misc/alias_alloc.lua | 53 +
+ test/misc/api_call.lua | 97 +
+ test/misc/catch_wrap.lua | 44 +
+ test/misc/coro_traceback.lua | 7 +
+ test/misc/coro_yield.lua | 110 +
test/misc/debug_gc.lua | 47 +
- test/misc/dualnum.lua | 47 +
- test/misc/for_dir.lua | 13 +
- test/misc/fori_coerce.lua | 33 +
- test/misc/gc_rechain.lua | 32 +
- test/misc/gc_trace.lua | 37 +
- test/misc/gcstep.lua | 33 +
- test/misc/hook_active.lua | 95 +
- test/misc/hook_line.lua | 41 +
- test/misc/hook_norecord.lua | 12 +
- test/misc/hook_record.lua | 8 +
- test/misc/hook_top.lua | 55 +
- test/misc/jit_flush.lua | 50 +
- test/misc/lightud.lua | 88 +
- test/misc/loop_unroll.lua | 35 +
+ test/misc/dualnum.lua | 46 +
+ test/misc/for_dir.lua | 12 +
+ test/misc/fori_coerce.lua | 32 +
+ test/misc/gc_rechain.lua | 31 +
+ test/misc/gc_trace.lua | 36 +
+ test/misc/gcstep.lua | 32 +
+ test/misc/hook_active.lua | 94 +
+ test/misc/hook_line.lua | 40 +
+ test/misc/hook_norecord.lua | 11 +
+ test/misc/hook_record.lua | 7 +
+ test/misc/hook_top.lua | 54 +
+ test/misc/jit_flush.lua | 49 +
+ test/misc/lightud.lua | 87 +
+ test/misc/loop_unroll.lua | 34 +
test/misc/parse_comp.lua | 13 +
test/misc/parse_esc.lua | 7 +
- test/misc/parse_misc.lua | 31 +
- test/misc/phi_conv.lua | 53 +
- test/misc/recurse_deep.lua | 29 +
- test/misc/recurse_tail.lua | 22 +
- test/misc/stack_gc.lua | 15 +
- test/misc/stack_purge.lua | 25 +
- test/misc/stackov.lua | 40 +
+ test/misc/parse_misc.lua | 30 +
+ test/misc/phi_conv.lua | 52 +
+ test/misc/recurse_deep.lua | 28 +
+ test/misc/recurse_tail.lua | 21 +
+ test/misc/stack_gc.lua | 14 +
+ test/misc/stack_purge.lua | 24 +
+ test/misc/stackov.lua | 39 +
test/misc/stackovc.lua | 4 +
- test/misc/tcall_base.lua | 20 +
+ test/misc/tcall_base.lua | 19 +
test/misc/tcall_loop.lua | 8 +
- test/misc/tonumber_scan.lua | 180 +
- test/misc/uclo.lua | 91 +
- test/misc/unordered_jit.lua | 96 +
+ test/misc/tonumber_scan.lua | 179 +
+ test/misc/uclo.lua | 90 +
+ test/misc/unordered_jit.lua | 95 +
test/misc/wbarrier.lua | 7 +
- test/misc/wbarrier_jit.lua | 18 +
- test/misc/wbarrier_obar.lua | 22 +
+ test/misc/wbarrier_jit.lua | 17 +
+ test/misc/wbarrier_obar.lua | 21 +
test/opt/dse/array.lua | 197 +
test/opt/dse/field.lua | 70 +
test/opt/dse/index | 2 +
@@ -251,11 +241,11 @@ that credits authors in addition to Mike Pall in the COPYRIGHT
file.
test/opt/sink/nosink.lua | 109 +
test/src/cpptest.cpp | 129 +
test/src/ctest.c | 339 +
- test/sysdep/catch_cpp.lua | 71 +
+ test/sysdep/catch_cpp.lua | 70 +
test/sysdep/ffi_include_gtk.lua | 9 +
- test/sysdep/ffi_include_std.lua | 36 +
- test/sysdep/ffi_lib_c.lua | 87 +
- test/sysdep/ffi_lib_z.lua | 107 +
+ test/sysdep/ffi_include_std.lua | 35 +
+ test/sysdep/ffi_lib_c.lua | 86 +
+ test/sysdep/ffi_lib_z.lua | 106 +
test/test.lua | 416 +
test/trace/exit_frame.lua | 79 +
test/trace/exit_growstack.lua | 28 +
@@ -269,8 +259,8 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
test/trace/snap.lua | 47 +
test/trace/stitch.lua | 19 +
test/unportable/ffi_arith_int64.lua | 68 +
- test/unportable/math_special.lua | 55 +
- 247 files changed, 186644 insertions(+), 5 deletions(-)
+ test/unportable/math_special.lua | 54 +
+ 245 files changed, 186570 insertions(+), 4 deletions(-)
create mode 100644 CONTRIBUTORS
create mode 100644 bench/FASTA_10000
create mode 100644 bench/FASTA_1000000
@@ -295,7 +285,6 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
create mode 100644 bench/fasta.lua
create mode 100644 bench/k-nucleotide.lua
create mode 100644 bench/life.lua
- create mode 100755 bench/luajit-bench
create mode 100644 bench/luajit-bench.c
create mode 100644 bench/luajit-bench.lua
create mode 100644 bench/mandelbrot-bit.lua
@@ -519,7 +508,7 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644
-index 0000000..a1c9209
+index 00000000..a1c9209b
--- /dev/null
+++ b/CONTRIBUTORS
@@ -0,0 +1,17 @@
@@ -540,22 +529,8 @@ index 0000000..a1c9209
+Siddhesh Poyarekar
+Vlad Krasnov
+William Adams
-diff --git a/COPYRIGHT b/COPYRIGHT
-index 6ed4002..1e5c442 100644
---- a/COPYRIGHT
-+++ b/COPYRIGHT
-@@ -1,7 +1,8 @@
- ===============================================================================
- LuaJIT -- a Just-In-Time Compiler for Lua.
http://luajit.org/
-
--Copyright (C) 2005-2017 Mike Pall. All rights reserved.
-+Copyright (C) 2005-2019 Mike Pall. All rights reserved.
-+Copyright (C) 2015-2019 LuaJIT Contributors, see CONTRIBUTORS file for a list.
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
diff --git a/Makefile b/Makefile
-index 0f93308..923bf72 100644
+index aa1b84bd..cb2b3418 100644
--- a/Makefile
+++ b/Makefile
@@ -106,14 +106,14 @@ endif
@@ -604,7 +579,7 @@ index 0f93308..923bf72 100644
##############################################################################
diff --git a/bench/FASTA_10000 b/bench/FASTA_10000
new file mode 100644
-index 0000000..fb23263
+index 00000000..fb232633
--- /dev/null
+++ b/bench/FASTA_10000
@@ -0,0 +1,1671 @@
@@ -2281,7 +2256,7 @@ index 0000000..fb23263
+gagatacctttgcaattttt
diff --git a/bench/FASTA_1000000 b/bench/FASTA_1000000
new file mode 100644
-index 0000000..bafe0c5
+index 00000000..bafe0c5d
--- /dev/null
+++ b/bench/FASTA_1000000
@@ -0,0 +1,166671 @@
@@ -168958,7 +168933,7 @@ index 0000000..bafe0c5
+tacactgatacgaattattt
diff --git a/bench/Makefile b/bench/Makefile
new file mode 100644
-index 0000000..d0c1e8d
+index 00000000..87d213a5
--- /dev/null
+++ b/bench/Makefile
@@ -0,0 +1,56 @@
@@ -169008,7 +168983,7 @@ index 0000000..d0c1e8d
+LUAJIT_A = ../src/$(FILE_A)
+
+$(BENCH_BIN): $(LUAJIT_A) $(BENCH_BIN).c Makefile
-+ $(CC) $@.c $(DURATION) -g -O3 -c -o $@.o -I ../src
++ $(CC) $@.c -std=gnu11 $(DURATION) -g -O3 -c -o $@.o -I ../src
+ $(CC) $@.o -lpthread $< -lm -ldl -o $@
+
+# Build the luajit static library if it doesn't exist.
@@ -169020,7 +168995,7 @@ index 0000000..d0c1e8d
+endif
diff --git a/bench/PARAM_arm.txt b/bench/PARAM_arm.txt
new file mode 100644
-index 0000000..a07fd01
+index 00000000..a07fd010
--- /dev/null
+++ b/bench/PARAM_arm.txt
@@ -0,0 +1,29 @@
@@ -169055,7 +169030,7 @@ index 0000000..a07fd01
+sum-file 1000 SUMCOL_1000
diff --git a/bench/PARAM_arm64.txt b/bench/PARAM_arm64.txt
new file mode 100644
-index 0000000..1c27638
+index 00000000..1c276385
--- /dev/null
+++ b/bench/PARAM_arm64.txt
@@ -0,0 +1,29 @@
@@ -169090,7 +169065,7 @@ index 0000000..1c27638
+sum-file 2e7 SUMCOL_1000
diff --git a/bench/PARAM_mips.txt b/bench/PARAM_mips.txt
new file mode 100644
-index 0000000..e6bcadb
+index 00000000..e6bcadba
--- /dev/null
+++ b/bench/PARAM_mips.txt
@@ -0,0 +1,29 @@
@@ -169125,7 +169100,7 @@ index 0000000..e6bcadb
+sum-file 100 SUMCOL_100
diff --git a/bench/PARAM_ppc.txt b/bench/PARAM_ppc.txt
new file mode 100644
-index 0000000..c8319a1
+index 00000000..c8319a15
--- /dev/null
+++ b/bench/PARAM_ppc.txt
@@ -0,0 +1,29 @@
@@ -169160,7 +169135,7 @@ index 0000000..c8319a1
+sum-file 1000 SUMCOL_1000
diff --git a/bench/PARAM_x86.txt b/bench/PARAM_x86.txt
new file mode 100644
-index 0000000..87088d7
+index 00000000..87088d7b
--- /dev/null
+++ b/bench/PARAM_x86.txt
@@ -0,0 +1,29 @@
@@ -169195,7 +169170,7 @@ index 0000000..87088d7
+sum-file 5000 SUMCOL_5000
diff --git a/bench/README b/bench/README
new file mode 100644
-index 0000000..16f55cb
+index 00000000..16f55cbb
--- /dev/null
+++ b/bench/README
@@ -0,0 +1,37 @@
@@ -169238,7 +169213,7 @@ index 0000000..16f55cb
+results using the benchmark binary with that of the script.
diff --git a/bench/SUMCOL_1.txt b/bench/SUMCOL_1.txt
new file mode 100644
-index 0000000..956aba1
+index 00000000..956aba14
--- /dev/null
+++ b/bench/SUMCOL_1.txt
@@ -0,0 +1,1000 @@
@@ -170244,7 +170219,7 @@ index 0000000..956aba1
+264
diff --git a/bench/SUMCOL_100 b/bench/SUMCOL_100
new file mode 100644
-index 0000000..daf0c7b
+index 00000000..daf0c7bb
--- /dev/null
+++ b/bench/SUMCOL_100
@@ -0,0 +1,100 @@
@@ -170350,7 +170325,7 @@ index 0000000..daf0c7b
+264
diff --git a/bench/SUMCOL_1000 b/bench/SUMCOL_1000
new file mode 100644
-index 0000000..956aba1
+index 00000000..956aba14
--- /dev/null
+++ b/bench/SUMCOL_1000
@@ -0,0 +1,1000 @@
@@ -171356,10 +171331,10 @@ index 0000000..956aba1
+264
diff --git a/bench/TEST_md5sum.txt b/bench/TEST_md5sum.txt
new file mode 100644
-index 0000000..15aa8a1
+index 00000000..7d417a88
--- /dev/null
+++ b/bench/TEST_md5sum.txt
-@@ -0,0 +1,20 @@
+@@ -0,0 +1,19 @@
+binarytrees 10 7202f4e13df7abc5ad8c07f05fe9d644
+chameneos 1e5 a629ce12f63050c6656bce175258cf8f
+cheapconcr 1000 d29799d1e263810a4db7bbf43ca66499
@@ -171379,10 +171354,9 @@ index 0000000..15aa8a1
+revcomp x 47de276e2f72519b57b82da39f4c7592 <FASTA_10000
+spectralnorm 200 25f44bd552ccd9faa0ee2ae5617947e2
+sumfile x 2ebd3caa45b31a2e74e436b645eab4b0 <SUMCOL_100
-+
diff --git a/bench/TEST_md5sum_arm64.txt b/bench/TEST_md5sum_arm64.txt
new file mode 100644
-index 0000000..deab02e
+index 00000000..deab02e5
--- /dev/null
+++ b/bench/TEST_md5sum_arm64.txt
@@ -0,0 +1,15 @@
@@ -171403,10 +171377,10 @@ index 0000000..deab02e
+sum-file x 5d6b881128665a84e8863cac991b18a2 SUMCOL_100
diff --git a/bench/array3d.lua b/bench/array3d.lua
new file mode 100644
-index 0000000..d638e4d
+index 00000000..0c83c6c8
--- /dev/null
+++ b/bench/array3d.lua
-@@ -0,0 +1,59 @@
+@@ -0,0 +1,58 @@
+
+local function array_set(self, x, y, z, p)
+ assert(x >= 0 and x < self.nx, "x outside PA")
@@ -171465,10 +171439,9 @@ index 0000000..d638e4d
+ arr:set(x, y, z, x*x)
+end
+assert(arr.image[dim^3-1] == (dim-1)^2)
-+
diff --git a/bench/binary-trees.lua b/bench/binary-trees.lua
new file mode 100644
-index 0000000..bf04046
+index 00000000..bf040466
--- /dev/null
+++ b/bench/binary-trees.lua
@@ -0,0 +1,47 @@
@@ -171521,7 +171494,7 @@ index 0000000..bf04046
+ maxdepth, ItemCheck(longlivedtree)))
diff --git a/bench/chameneos.lua b/bench/chameneos.lua
new file mode 100644
-index 0000000..78b64c3
+index 00000000..78b64c3f
--- /dev/null
+++ b/bench/chameneos.lua
@@ -0,0 +1,68 @@
@@ -171595,7 +171568,7 @@ index 0000000..78b64c3
+io.write(schedule(threads), "\n")
diff --git a/bench/coroutine-ring.lua b/bench/coroutine-ring.lua
new file mode 100644
-index 0000000..1e8c5ef
+index 00000000..1e8c5ef6
--- /dev/null
+++ b/bench/coroutine-ring.lua
@@ -0,0 +1,42 @@
@@ -171643,7 +171616,7 @@ index 0000000..1e8c5ef
+io.write(id, "\n")
diff --git a/bench/euler14-bit.lua b/bench/euler14-bit.lua
new file mode 100644
-index 0000000..537f2bf
+index 00000000..537f2bf3
--- /dev/null
+++ b/bench/euler14-bit.lua
@@ -0,0 +1,22 @@
@@ -171671,7 +171644,7 @@ index 0000000..537f2bf
+io.write("Found ", n, " (chain length: ", m, ")\n")
diff --git a/bench/fannkuch.lua b/bench/fannkuch.lua
new file mode 100644
-index 0000000..2a4cd42
+index 00000000..2a4cd426
--- /dev/null
+++ b/bench/fannkuch.lua
@@ -0,0 +1,50 @@
@@ -171727,7 +171700,7 @@ index 0000000..2a4cd42
+io.write("Pfannkuchen(", n, ") = ", fannkuch(n), "\n")
diff --git a/bench/fasta.lua b/bench/fasta.lua
new file mode 100644
-index 0000000..7ce6080
+index 00000000..7ce60804
--- /dev/null
+++ b/bench/fasta.lua
@@ -0,0 +1,95 @@
@@ -171828,7 +171801,7 @@ index 0000000..7ce6080
+make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, N*5)
diff --git a/bench/k-nucleotide.lua b/bench/k-nucleotide.lua
new file mode 100644
-index 0000000..b97e394
+index 00000000..b97e394c
--- /dev/null
+++ b/bench/k-nucleotide.lua
@@ -0,0 +1,62 @@
@@ -171896,7 +171869,7 @@ index 0000000..b97e394
+count(seq, "GGTATTTTAATTTATAGT")
diff --git a/bench/life.lua b/bench/life.lua
new file mode 100644
-index 0000000..911d9fe
+index 00000000..911d9fe1
--- /dev/null
+++ b/bench/life.lua
@@ -0,0 +1,111 @@
@@ -172013,10 +171986,10 @@ index 0000000..911d9fe
+LIFE(40,20)
diff --git a/bench/luajit-bench.c b/bench/luajit-bench.c
new file mode 100644
-index 0000000..e7b068d
+index 00000000..6603132b
--- /dev/null
+++ b/bench/luajit-bench.c
-@@ -0,0 +1,283 @@
+@@ -0,0 +1,284 @@
+/* Benchmark driver.
+ *
+ * Copyright (C) 2019 Vlad Krasnov
@@ -172058,6 +172031,7 @@ index 0000000..e7b068d
+#include <argp.h>
+#include <sys/param.h>
+#include <string.h>
++#include <time.h>
+
+#include "lua.h"
+#include "lualib.h"
@@ -172302,7 +172276,7 @@ index 0000000..e7b068d
+}
diff --git a/bench/luajit-bench.lua b/bench/luajit-bench.lua
new file mode 100644
-index 0000000..7238725
+index 00000000..72387254
--- /dev/null
+++ b/bench/luajit-bench.lua
@@ -0,0 +1,53 @@
@@ -172361,7 +172335,7 @@ index 0000000..7238725
+end
diff --git a/bench/mandelbrot-bit.lua b/bench/mandelbrot-bit.lua
new file mode 100644
-index 0000000..91d9697
+index 00000000..91d96975
--- /dev/null
+++ b/bench/mandelbrot-bit.lua
@@ -0,0 +1,33 @@
@@ -172400,7 +172374,7 @@ index 0000000..91d9697
+end
diff --git a/bench/mandelbrot.lua b/bench/mandelbrot.lua
new file mode 100644
-index 0000000..0ef595a
+index 00000000..0ef595a2
--- /dev/null
+++ b/bench/mandelbrot.lua
@@ -0,0 +1,23 @@
@@ -172429,10 +172403,10 @@ index 0000000..0ef595a
+end
diff --git a/bench/md5.lua b/bench/md5.lua
new file mode 100644
-index 0000000..fdf6b4a
+index 00000000..c4c087ee
--- /dev/null
+++ b/bench/md5.lua
-@@ -0,0 +1,183 @@
+@@ -0,0 +1,182 @@
+
+local bit = require("bit")
+local tobit, tohex, bnot = bit.tobit or bit.cast, bit.tohex, bit.bnot
@@ -172615,10 +172589,9 @@ index 0000000..fdf6b4a
+ res = md5(txt)
+end
+assert(res == 'a831e91e0f70eddcb70dc61c6f82f6cd')
-+
diff --git a/bench/meteor.lua b/bench/meteor.lua
new file mode 100644
-index 0000000..80588ab
+index 00000000..80588ab5
--- /dev/null
+++ b/bench/meteor.lua
@@ -0,0 +1,220 @@
@@ -172844,7 +172817,7 @@ index 0000000..80588ab
+printresult()
diff --git a/bench/nbody.lua b/bench/nbody.lua
new file mode 100644
-index 0000000..e0ff8f7
+index 00000000..e0ff8f77
--- /dev/null
+++ b/bench/nbody.lua
@@ -0,0 +1,119 @@
@@ -172969,7 +172942,7 @@ index 0000000..e0ff8f7
+io.write( string.format("%0.9f",energy(bodies, nbody)), "\n")
diff --git a/bench/nsieve-bit-fp.lua b/bench/nsieve-bit-fp.lua
new file mode 100644
-index 0000000..3971ec1
+index 00000000..3971ec1f
--- /dev/null
+++ b/bench/nsieve-bit-fp.lua
@@ -0,0 +1,37 @@
@@ -173012,7 +172985,7 @@ index 0000000..3971ec1
+end
diff --git a/bench/nsieve-bit.lua b/bench/nsieve-bit.lua
new file mode 100644
-index 0000000..820a372
+index 00000000..820a3726
--- /dev/null
+++ b/bench/nsieve-bit.lua
@@ -0,0 +1,27 @@
@@ -173045,7 +173018,7 @@ index 0000000..820a372
+end
diff --git a/bench/nsieve.lua b/bench/nsieve.lua
new file mode 100644
-index 0000000..6de0524
+index 00000000..6de0524f
--- /dev/null
+++ b/bench/nsieve.lua
@@ -0,0 +1,21 @@
@@ -173072,7 +173045,7 @@ index 0000000..6de0524
+end
diff --git a/bench/partialsums.lua b/bench/partialsums.lua
new file mode 100644
-index 0000000..09ac02f
+index 00000000..09ac02f9
--- /dev/null
+++ b/bench/partialsums.lua
@@ -0,0 +1,29 @@
@@ -173107,7 +173080,7 @@ index 0000000..09ac02f
+pr("%.9f\tGregory\n", a9)
diff --git a/bench/pidigits-nogmp.lua b/bench/pidigits-nogmp.lua
new file mode 100644
-index 0000000..63a1cb0
+index 00000000..63a1cb0e
--- /dev/null
+++ b/bench/pidigits-nogmp.lua
@@ -0,0 +1,100 @@
@@ -173213,7 +173186,7 @@ index 0000000..63a1cb0
+end
diff --git a/bench/ray.lua b/bench/ray.lua
new file mode 100644
-index 0000000..86f159b
+index 00000000..873cc995
--- /dev/null
+++ b/bench/ray.lua
@@ -0,0 +1,135 @@
@@ -173346,7 +173319,7 @@ index 0000000..86f159b
+ for d = y, y+.99, iss do
+ for e = x, x+.99, iss do
+ dir[1], dir[2], dir[3] = unitise(e, d, n)
-+ g = g + ray_trace(light, camera, dir, scene)
++ g = g + ray_trace(light, camera, dir, scene)
+ end
+ end
+ io.write(string.char(math.floor(0.5 + g*gf)))
@@ -173354,7 +173327,7 @@ index 0000000..86f159b
+end
diff --git a/bench/recursive-ack.lua b/bench/recursive-ack.lua
new file mode 100644
-index 0000000..fad3058
+index 00000000..fad30589
--- /dev/null
+++ b/bench/recursive-ack.lua
@@ -0,0 +1,8 @@
@@ -173368,7 +173341,7 @@ index 0000000..fad3058
+io.write("Ack(3,", N ,"): ", Ack(3,N), "\n")
diff --git a/bench/recursive-fib.lua b/bench/recursive-fib.lua
new file mode 100644
-index 0000000..53b6f96
+index 00000000..53b6f96c
--- /dev/null
+++ b/bench/recursive-fib.lua
@@ -0,0 +1,7 @@
@@ -173381,7 +173354,7 @@ index 0000000..53b6f96
+io.write(string.format("Fib(%d): %d\n", n, fib(n)))
diff --git a/bench/revcomp.lua b/bench/revcomp.lua
new file mode 100644
-index 0000000..90b3d5c
+index 00000000..90b3d5c5
--- /dev/null
+++ b/bench/revcomp.lua
@@ -0,0 +1,39 @@
@@ -173426,10 +173399,10 @@ index 0000000..90b3d5c
+writerev(t, n)
diff --git a/bench/scimark-2010-12-20.lua b/bench/scimark-2010-12-20.lua
new file mode 100644
-index 0000000..353acb7
+index 00000000..25f34eeb
--- /dev/null
+++ b/bench/scimark-2010-12-20.lua
-@@ -0,0 +1,400 @@
+@@ -0,0 +1,399 @@
+------------------------------------------------------------------------------
+-- Lua SciMark (2010-12-20).
+--
@@ -173829,38 +173802,37 @@ index 0000000..353acb7
+end
+printf("\nSciMark %8.2f [%s problem sizes]\n", sum / #benchmarks,
SIZE_SELECT)
+io.flush()
-+
diff --git a/bench/scimark-fft.lua b/bench/scimark-fft.lua
new file mode 100644
-index 0000000..c05bb69
+index 00000000..c05bb69a
--- /dev/null
+++ b/bench/scimark-fft.lua
@@ -0,0 +1 @@
+require("scimark_lib").FFT(1024)(tonumber(arg and arg[1]) or 50000)
diff --git a/bench/scimark-lu.lua b/bench/scimark-lu.lua
new file mode 100644
-index 0000000..7636d99
+index 00000000..7636d994
--- /dev/null
+++ b/bench/scimark-lu.lua
@@ -0,0 +1 @@
+require("scimark_lib").LU(100)(tonumber(arg and arg[1]) or 5000)
diff --git a/bench/scimark-sor.lua b/bench/scimark-sor.lua
new file mode 100644
-index 0000000..e537e98
+index 00000000..e537e986
--- /dev/null
+++ b/bench/scimark-sor.lua
@@ -0,0 +1 @@
+require("scimark_lib").SOR(100)(tonumber(arg and arg[1]) or 50000)
diff --git a/bench/scimark-sparse.lua b/bench/scimark-sparse.lua
new file mode 100644
-index 0000000..01a2258
+index 00000000..01a2258d
--- /dev/null
+++ b/bench/scimark-sparse.lua
@@ -0,0 +1 @@
+require("scimark_lib").SPARSE(1000, 5000)(tonumber(arg and arg[1]) or 150000)
diff --git a/bench/scimark_lib.lua b/bench/scimark_lib.lua
new file mode 100644
-index 0000000..aeffd75
+index 00000000..aeffd75a
--- /dev/null
+++ b/bench/scimark_lib.lua
@@ -0,0 +1,297 @@
@@ -174163,7 +174135,7 @@ index 0000000..aeffd75
+return benchmarks
diff --git a/bench/series.lua b/bench/series.lua
new file mode 100644
-index 0000000..f766cb3
+index 00000000..f766cb32
--- /dev/null
+++ b/bench/series.lua
@@ -0,0 +1,34 @@
@@ -174203,7 +174175,7 @@ index 0000000..f766cb3
+ n, tm, (2*n-1)/tm))
diff --git a/bench/spectral-norm.lua b/bench/spectral-norm.lua
new file mode 100644
-index 0000000..ecc8011
+index 00000000..ecc80112
--- /dev/null
+++ b/bench/spectral-norm.lua
@@ -0,0 +1,40 @@
@@ -174249,7 +174221,7 @@ index 0000000..ecc8011
+io.write(string.format("%0.9f\n", math.sqrt(vBv / vv)))
diff --git a/bench/sum-file.lua b/bench/sum-file.lua
new file mode 100644
-index 0000000..a16632b
+index 00000000..a16632b1
--- /dev/null
+++ b/bench/sum-file.lua
@@ -0,0 +1,8 @@
@@ -174263,7 +174235,7 @@ index 0000000..a16632b
+io.write(sum, "\n")
diff --git a/test/README.md b/test/README.md
new file mode 100644
-index 0000000..ff16ac8
+index 00000000..ff16ac8e
--- /dev/null
+++ b/test/README.md
@@ -0,0 +1,110 @@
@@ -174379,7 +174351,7 @@ index 0000000..ff16ac8
+After that, consult the README file by Mike in the directory above this one.
diff --git a/test/bc/constov.lua b/test/bc/constov.lua
new file mode 100644
-index 0000000..5827840
+index 00000000..5827840b
--- /dev/null
+++ b/test/bc/constov.lua
@@ -0,0 +1,16 @@
@@ -174401,14 +174373,14 @@ index 0000000..5827840
+end
diff --git a/test/bc/index b/test/bc/index
new file mode 100644
-index 0000000..dead10f
+index 00000000..dead10f5
--- /dev/null
+++ b/test/bc/index
@@ -0,0 +1 @@
+constov.lua +slow
diff --git a/test/common/expect_error.lua b/test/common/expect_error.lua
new file mode 100644
-index 0000000..e155090
+index 00000000..e155090e
--- /dev/null
+++ b/test/common/expect_error.lua
@@ -0,0 +1,16 @@
@@ -174430,10 +174402,10 @@ index 0000000..e155090
+end
diff --git a/test/common/ffi_util.inc b/test/common/ffi_util.inc
new file mode 100644
-index 0000000..1eee8dd
+index 00000000..1fa28f3b
--- /dev/null
+++ b/test/common/ffi_util.inc
-@@ -0,0 +1,41 @@
+@@ -0,0 +1,40 @@
+-- This should be turned into a proper module and not use globals.
+-- Or combined into a generiv test utility module. With FFI
+-- functionality turned off, if the FFI module is not built-in.
@@ -174474,17 +174446,16 @@ index 0000000..1eee8dd
+ fp:close()
+ ffi.cdef(s)
+end
-+
diff --git a/test/common/test_runner_canary.lua b/test/common/test_runner_canary.lua
new file mode 100644
-index 0000000..fc9cadc
+index 00000000..fc9cadc6
--- /dev/null
+++ b/test/common/test_runner_canary.lua
@@ -0,0 +1 @@
+return "canary is alive"
diff --git a/test/computations.lua b/test/computations.lua
new file mode 100644
-index 0000000..4fce7fc
+index 00000000..64b36af1
--- /dev/null
+++ b/test/computations.lua
@@ -0,0 +1,113 @@
@@ -174494,7 +174465,7 @@ index 0000000..4fce7fc
+ if n == 0 then return Ack(m-1, 1) end
+ return Ack(m-1, (Ack(m, n-1))) -- The parentheses are deliberate.
+ end
-+
++
+ assert(Ack(3,5) == 253)
+end
+
@@ -174504,7 +174475,7 @@ index 0000000..4fce7fc
+ if n == 0 then return Ack(m-1, 1) end
+ return (Ack(m-1, (Ack(m, n-1)))) -- The parentheses are deliberate.
+ end
-+
++
+ assert(Ack(3,5) == 253)
+end
+
@@ -174516,7 +174487,7 @@ index 0000000..4fce7fc
+ end
+ return x
+ end
-+
++
+ assert(fac(10) == 3628800)
+end
+
@@ -174565,7 +174536,7 @@ index 0000000..4fce7fc
+ end
+ return count
+ end
-+
++
+ assert(nsieve(100) == 25)
+ assert(nsieve(12345) == 1474)
+end
@@ -174575,7 +174546,7 @@ index 0000000..4fce7fc
+ if n == 1 then return 1 end
+ return n + sum(n-1)
+ end
-+
++
+ for i=1, 100 do
+ assert(sum(i) == i*(i+1)/2)
+ end
@@ -174587,7 +174558,7 @@ index 0000000..4fce7fc
+ if n == 1 then return 1 end
+ return abs(n + sum(n-1))
+ end
-+
++
+ for i=1, 100 do
+ assert(sum(i) == i*(i+1)/2)
+ end
@@ -174603,7 +174574,7 @@ index 0000000..4fce7fc
+end
diff --git a/test/index b/test/index
new file mode 100644
-index 0000000..bd4081e
+index 00000000..bd4081e3
--- /dev/null
+++ b/test/index
@@ -0,0 +1,6 @@
@@ -174615,7 +174586,7 @@ index 0000000..bd4081e
+opt +jit
diff --git a/test/lang/andor.lua b/test/lang/andor.lua
new file mode 100644
-index 0000000..55b2c75
+index 00000000..55b2c756
--- /dev/null
+++ b/test/lang/andor.lua
@@ -0,0 +1,61 @@
@@ -174682,7 +174653,7 @@ index 0000000..55b2c75
+end
diff --git a/test/lang/assignment.lua b/test/lang/assignment.lua
new file mode 100644
-index 0000000..e9745ef
+index 00000000..e9745ef6
--- /dev/null
+++ b/test/lang/assignment.lua
@@ -0,0 +1,46 @@
@@ -174734,7 +174705,7 @@ index 0000000..e9745ef
+end
diff --git a/test/lang/compare.lua b/test/lang/compare.lua
new file mode 100644
-index 0000000..09c5488
+index 00000000..09c5488d
--- /dev/null
+++ b/test/lang/compare.lua
@@ -0,0 +1,323 @@
@@ -175063,10 +175034,10 @@ index 0000000..09c5488
+end
diff --git a/test/lang/compare_nan.lua b/test/lang/compare_nan.lua
new file mode 100644
-index 0000000..878f39a
+index 00000000..dd152fab
--- /dev/null
+++ b/test/lang/compare_nan.lua
-@@ -0,0 +1,99 @@
+@@ -0,0 +1,98 @@
+
+local function check(a, b)
+ if a ~= b then
@@ -175165,10 +175136,9 @@ index 0000000..878f39a
+ check(not (1==nan), true)
+ check(not (1~=nan), false)
+end
-+
diff --git a/test/lang/concat.lua b/test/lang/concat.lua
new file mode 100644
-index 0000000..04d665b
+index 00000000..04d665b2
--- /dev/null
+++ b/test/lang/concat.lua
@@ -0,0 +1,112 @@
@@ -175286,7 +175256,7 @@ index 0000000..04d665b
+end
diff --git a/test/lang/constant/index b/test/lang/constant/index
new file mode 100644
-index 0000000..e738357
+index 00000000..e738357d
--- /dev/null
+++ b/test/lang/constant/index
@@ -0,0 +1,2 @@
@@ -175294,7 +175264,7 @@ index 0000000..e738357
+table.lua
diff --git a/test/lang/constant/number.lua b/test/lang/constant/number.lua
new file mode 100644
-index 0000000..fb67356
+index 00000000..fb67356e
--- /dev/null
+++ b/test/lang/constant/number.lua
@@ -0,0 +1,12 @@
@@ -175312,7 +175282,7 @@ index 0000000..fb67356
+end
diff --git a/test/lang/constant/table.lua b/test/lang/constant/table.lua
new file mode 100644
-index 0000000..899d0f6
+index 00000000..899d0f67
--- /dev/null
+++ b/test/lang/constant/table.lua
@@ -0,0 +1,15 @@
@@ -175333,7 +175303,7 @@ index 0000000..899d0f6
+end
diff --git a/test/lang/coroutine.lua b/test/lang/coroutine.lua
new file mode 100644
-index 0000000..405135c
+index 00000000..405135c9
--- /dev/null
+++ b/test/lang/coroutine.lua
@@ -0,0 +1,8 @@
@@ -175347,7 +175317,7 @@ index 0000000..405135c
+end
diff --git a/test/lang/for.lua b/test/lang/for.lua
new file mode 100644
-index 0000000..4982b32
+index 00000000..4982b32b
--- /dev/null
+++ b/test/lang/for.lua
@@ -0,0 +1,45 @@
@@ -175398,7 +175368,7 @@ index 0000000..4982b32
+end
diff --git a/test/lang/gc.lua b/test/lang/gc.lua
new file mode 100644
-index 0000000..35e6a1f
+index 00000000..35e6a1f3
--- /dev/null
+++ b/test/lang/gc.lua
@@ -0,0 +1,42 @@
@@ -175446,7 +175416,7 @@ index 0000000..35e6a1f
+end
diff --git a/test/lang/goto.lua b/test/lang/goto.lua
new file mode 100644
-index 0000000..1563a23
+index 00000000..978476c8
--- /dev/null
+++ b/test/lang/goto.lua
@@ -0,0 +1,149 @@
@@ -175460,7 +175430,7 @@ index 0000000..1563a23
+ assert(ok, err)
+ end
+end
-+
++
+do --- Basic goto and label semantics.
+ -- Error: duplicate label.
+ expect("::a:: ::a::", "'a'")
@@ -175601,7 +175571,7 @@ index 0000000..1563a23
+end
diff --git a/test/lang/index b/test/lang/index
new file mode 100644
-index 0000000..88e2edf
+index 00000000..88e2edfa
--- /dev/null
+++ b/test/lang/index
@@ -0,0 +1,18 @@
@@ -175625,7 +175595,7 @@ index 0000000..88e2edf
+meta
diff --git a/test/lang/length.lua b/test/lang/length.lua
new file mode 100644
-index 0000000..67c68ae
+index 00000000..67c68ae7
--- /dev/null
+++ b/test/lang/length.lua
@@ -0,0 +1,23 @@
@@ -175654,7 +175624,7 @@ index 0000000..67c68ae
+end
diff --git a/test/lang/meta/arith.lua b/test/lang/meta/arith.lua
new file mode 100644
-index 0000000..17de4c8
+index 00000000..17de4c8c
--- /dev/null
+++ b/test/lang/meta/arith.lua
@@ -0,0 +1,118 @@
@@ -175778,7 +175748,7 @@ index 0000000..17de4c8
+end
diff --git a/test/lang/meta/arith_jit.lua b/test/lang/meta/arith_jit.lua
new file mode 100644
-index 0000000..2cb35db
+index 00000000..2cb35dbb
--- /dev/null
+++ b/test/lang/meta/arith_jit.lua
@@ -0,0 +1,68 @@
@@ -175852,7 +175822,7 @@ index 0000000..2cb35db
+end
diff --git a/test/lang/meta/call.lua b/test/lang/meta/call.lua
new file mode 100644
-index 0000000..c77c0dd
+index 00000000..c77c0dd8
--- /dev/null
+++ b/test/lang/meta/call.lua
@@ -0,0 +1,81 @@
@@ -175939,10 +175909,10 @@ index 0000000..c77c0dd
+end
diff --git a/test/lang/meta/cat.lua b/test/lang/meta/cat.lua
new file mode 100644
-index 0000000..48a89e4
+index 00000000..3a5db6fc
--- /dev/null
+++ b/test/lang/meta/cat.lua
-@@ -0,0 +1,61 @@
+@@ -0,0 +1,60 @@
+local function create(cat, v1, v2)
+ local meta = { __concat = cat }
+ return setmetatable({v1}, meta), setmetatable({v2}, meta)
@@ -176003,10 +175973,9 @@ index 0000000..48a89e4
+ for i=1,100 do y = a..b.. 1 .. "z" end
+ assert(y == "ab1z")
+end
-+
diff --git a/test/lang/meta/comp.lua b/test/lang/meta/comp.lua
new file mode 100644
-index 0000000..23f18b0
+index 00000000..23f18b08
--- /dev/null
+++ b/test/lang/meta/comp.lua
@@ -0,0 +1,120 @@
@@ -176132,10 +176101,10 @@ index 0000000..23f18b0
+end
diff --git a/test/lang/meta/comp_jit.lua b/test/lang/meta/comp_jit.lua
new file mode 100644
-index 0000000..d0a19d8
+index 00000000..0bf07b9f
--- /dev/null
+++ b/test/lang/meta/comp_jit.lua
-@@ -0,0 +1,104 @@
+@@ -0,0 +1,103 @@
+do --- coverage
+ local lt, le = false, false
+ local t, u = {}, {}
@@ -176239,10 +176208,9 @@ index 0000000..d0a19d8
+ assert(not ok)
+ end
+end
-+
diff --git a/test/lang/meta/debuginfo.lua b/test/lang/meta/debuginfo.lua
new file mode 100644
-index 0000000..a99941f
+index 00000000..a99941fa
--- /dev/null
+++ b/test/lang/meta/debuginfo.lua
@@ -0,0 +1,81 @@
@@ -176329,7 +176297,7 @@ index 0000000..a99941f
+end
diff --git a/test/lang/meta/eq.lua b/test/lang/meta/eq.lua
new file mode 100644
-index 0000000..ebf6043
+index 00000000..ebf60435
--- /dev/null
+++ b/test/lang/meta/eq.lua
@@ -0,0 +1,30 @@
@@ -176365,7 +176333,7 @@ index 0000000..ebf6043
+end
diff --git a/test/lang/meta/eq_jit.lua b/test/lang/meta/eq_jit.lua
new file mode 100644
-index 0000000..47e1420
+index 00000000..47e14207
--- /dev/null
+++ b/test/lang/meta/eq_jit.lua
@@ -0,0 +1,35 @@
@@ -176406,7 +176374,7 @@ index 0000000..47e1420
+end
diff --git a/test/lang/meta/framegap.lua b/test/lang/meta/framegap.lua
new file mode 100644
-index 0000000..0080633
+index 00000000..0080633a
--- /dev/null
+++ b/test/lang/meta/framegap.lua
@@ -0,0 +1,24 @@
@@ -176436,7 +176404,7 @@ index 0000000..0080633
+end
diff --git a/test/lang/meta/index b/test/lang/meta/index
new file mode 100644
-index 0000000..f114e78
+index 00000000..f114e78d
--- /dev/null
+++ b/test/lang/meta/index
@@ -0,0 +1,14 @@
@@ -176456,7 +176424,7 @@ index 0000000..f114e78
+debuginfo.lua
diff --git a/test/lang/meta/index.lua b/test/lang/meta/index.lua
new file mode 100644
-index 0000000..4d6d0ff
+index 00000000..4d6d0ffe
--- /dev/null
+++ b/test/lang/meta/index.lua
@@ -0,0 +1,60 @@
@@ -176522,7 +176490,7 @@ index 0000000..4d6d0ff
+end
diff --git a/test/lang/meta/len.lua b/test/lang/meta/len.lua
new file mode 100644
-index 0000000..2410daa
+index 00000000..2410daa6
--- /dev/null
+++ b/test/lang/meta/len.lua
@@ -0,0 +1,42 @@
@@ -176570,7 +176538,7 @@ index 0000000..2410daa
+end
diff --git a/test/lang/meta/newindex.lua b/test/lang/meta/newindex.lua
new file mode 100644
-index 0000000..6c46b8c
+index 00000000..6c46b8cb
--- /dev/null
+++ b/test/lang/meta/newindex.lua
@@ -0,0 +1,69 @@
@@ -176645,10 +176613,10 @@ index 0000000..6c46b8c
+end
diff --git a/test/lang/meta/nomm.lua b/test/lang/meta/nomm.lua
new file mode 100644
-index 0000000..2b3db86
+index 00000000..e41f72f4
--- /dev/null
+++ b/test/lang/meta/nomm.lua
-@@ -0,0 +1,21 @@
+@@ -0,0 +1,20 @@
+
+do --- untitled
+ local keys = {}
@@ -176669,10 +176637,9 @@ index 0000000..2b3db86
+ end
+ assert(x == 95)
+end
-+
diff --git a/test/lang/modulo.lua b/test/lang/modulo.lua
new file mode 100644
-index 0000000..eddaea7
+index 00000000..eddaea77
--- /dev/null
+++ b/test/lang/modulo.lua
@@ -0,0 +1,46 @@
@@ -176724,7 +176691,7 @@ index 0000000..eddaea7
+end
diff --git a/test/lang/self.lua b/test/lang/self.lua
new file mode 100644
-index 0000000..d374666
+index 00000000..d3746664
--- /dev/null
+++ b/test/lang/self.lua
@@ -0,0 +1,19 @@
@@ -176749,7 +176716,7 @@ index 0000000..d374666
+end
diff --git a/test/lang/table.lua b/test/lang/table.lua
new file mode 100644
-index 0000000..3ff38cf
+index 00000000..3ff38cfe
--- /dev/null
+++ b/test/lang/table.lua
@@ -0,0 +1,32 @@
@@ -176787,7 +176754,7 @@ index 0000000..3ff38cf
+end
diff --git a/test/lang/tail_recursion.lua b/test/lang/tail_recursion.lua
new file mode 100644
-index 0000000..78f071f
+index 00000000..78f071fd
--- /dev/null
+++ b/test/lang/tail_recursion.lua
@@ -0,0 +1,20 @@
@@ -176813,7 +176780,7 @@ index 0000000..78f071f
+end
diff --git a/test/lang/upvalue/closure.lua b/test/lang/upvalue/closure.lua
new file mode 100644
-index 0000000..faa4de1
+index 00000000..faa4de1c
--- /dev/null
+++ b/test/lang/upvalue/closure.lua
@@ -0,0 +1,84 @@
@@ -176903,17 +176870,17 @@ index 0000000..faa4de1
+end
diff --git a/test/lang/upvalue/index b/test/lang/upvalue/index
new file mode 100644
-index 0000000..3c170db
+index 00000000..3c170db9
--- /dev/null
+++ b/test/lang/upvalue/index
@@ -0,0 +1 @@
+closure.lua
diff --git a/test/lang/vararg_jit.lua b/test/lang/vararg_jit.lua
new file mode 100644
-index 0000000..4e78f96
+index 00000000..50729f5e
--- /dev/null
+++ b/test/lang/vararg_jit.lua
-@@ -0,0 +1,95 @@
+@@ -0,0 +1,94 @@
+
+do --- 1
+ local function f(a, b, c, ...)
@@ -177008,10 +176975,9 @@ index 0000000..4e78f96
+ f(nil)
+ f()
+end
-+
diff --git a/test/lib/base/assert.lua b/test/lib/base/assert.lua
new file mode 100644
-index 0000000..9c30ba0
+index 00000000..9c30ba02
--- /dev/null
+++ b/test/lib/base/assert.lua
@@ -0,0 +1,33 @@
@@ -177050,7 +177016,7 @@ index 0000000..9c30ba0
+end
diff --git a/test/lib/base/error.lua b/test/lib/base/error.lua
new file mode 100644
-index 0000000..9193085
+index 00000000..91930854
--- /dev/null
+++ b/test/lib/base/error.lua
@@ -0,0 +1,43 @@
@@ -177099,7 +177065,7 @@ index 0000000..9193085
+end
diff --git a/test/lib/base/getfenv.lua b/test/lib/base/getfenv.lua
new file mode 100644
-index 0000000..9c00ed7
+index 00000000..9c00ed7c
--- /dev/null
+++ b/test/lib/base/getfenv.lua
@@ -0,0 +1,13 @@
@@ -177118,7 +177084,7 @@ index 0000000..9c00ed7
+end
diff --git a/test/lib/base/getsetmetatable.lua b/test/lib/base/getsetmetatable.lua
new file mode 100644
-index 0000000..7d57343
+index 00000000..7d57343e
--- /dev/null
+++ b/test/lib/base/getsetmetatable.lua
@@ -0,0 +1,33 @@
@@ -177157,7 +177123,7 @@ index 0000000..7d57343
+end
diff --git a/test/lib/base/index b/test/lib/base/index
new file mode 100644
-index 0000000..942c53c
+index 00000000..942c53c0
--- /dev/null
+++ b/test/lib/base/index
@@ -0,0 +1,11 @@
@@ -177174,7 +177140,7 @@ index 0000000..942c53c
+xpcall_jit.lua +compat5.2
diff --git a/test/lib/base/ipairs.lua b/test/lib/base/ipairs.lua
new file mode 100644
-index 0000000..a9de087
+index 00000000..a9de087e
--- /dev/null
+++ b/test/lib/base/ipairs.lua
@@ -0,0 +1,41 @@
@@ -177221,7 +177187,7 @@ index 0000000..a9de087
+end
diff --git a/test/lib/base/next.lua b/test/lib/base/next.lua
new file mode 100644
-index 0000000..0e40615
+index 00000000..0e40615a
--- /dev/null
+++ b/test/lib/base/next.lua
@@ -0,0 +1,17 @@
@@ -177244,7 +177210,7 @@ index 0000000..0e40615
+end
diff --git a/test/lib/base/pairs.lua b/test/lib/base/pairs.lua
new file mode 100644
-index 0000000..4d89d42
+index 00000000..4d89d42d
--- /dev/null
+++ b/test/lib/base/pairs.lua
@@ -0,0 +1,73 @@
@@ -177323,7 +177289,7 @@ index 0000000..4d89d42
+end
diff --git a/test/lib/base/pcall_jit.lua b/test/lib/base/pcall_jit.lua
new file mode 100644
-index 0000000..dc9cd5f
+index 00000000..dc9cd5fa
--- /dev/null
+++ b/test/lib/base/pcall_jit.lua
@@ -0,0 +1,74 @@
@@ -177403,13 +177369,13 @@ index 0000000..dc9cd5f
+end
diff --git a/test/lib/base/select.lua b/test/lib/base/select.lua
new file mode 100644
-index 0000000..8278e5e
+index 00000000..8b2b9467
--- /dev/null
+++ b/test/lib/base/select.lua
@@ -0,0 +1,105 @@
+
+do --- select #
-+-- Test whether select("#", 3, 4) returns the correct number of arguments.
++-- Test whether select("#", 3, 4) returns the correct number of arguments.
+ local x = 0
+ for i=1,100 do
+ x = x + select("#", 3, 4)
@@ -177427,7 +177393,7 @@ index 0000000..8278e5e
+ assert(x == 200)
+end
+
-+do --- select 1
++do --- select 1
+ local x = 0
+ for i=1,100 do
+ x = x + select(1, i)
@@ -177514,7 +177480,7 @@ index 0000000..8278e5e
+end
diff --git a/test/lib/base/tonumber_tostring.lua b/test/lib/base/tonumber_tostring.lua
new file mode 100644
-index 0000000..e7f576c
+index 00000000..e7f576ce
--- /dev/null
+++ b/test/lib/base/tonumber_tostring.lua
@@ -0,0 +1,81 @@
@@ -177601,7 +177567,7 @@ index 0000000..e7f576c
+end
diff --git a/test/lib/base/xpcall_jit.lua b/test/lib/base/xpcall_jit.lua
new file mode 100644
-index 0000000..f4993cc
+index 00000000..f4993cc6
--- /dev/null
+++ b/test/lib/base/xpcall_jit.lua
@@ -0,0 +1,83 @@
@@ -177690,7 +177656,7 @@ index 0000000..f4993cc
+end
diff --git a/test/lib/bit.lua b/test/lib/bit.lua
new file mode 100644
-index 0000000..1adf550
+index 00000000..1adf5507
--- /dev/null
+++ b/test/lib/bit.lua
@@ -0,0 +1,98 @@
@@ -177794,7 +177760,7 @@ index 0000000..1adf550
+end
diff --git a/test/lib/contents.lua b/test/lib/contents.lua
new file mode 100644
-index 0000000..2baacd5
+index 00000000..2baacd5c
--- /dev/null
+++ b/test/lib/contents.lua
@@ -0,0 +1,158 @@
@@ -177958,14 +177924,14 @@ index 0000000..2baacd5
+end
diff --git a/test/lib/coroutine/index b/test/lib/coroutine/index
new file mode 100644
-index 0000000..9c5c17e
+index 00000000..9c5c17ec
--- /dev/null
+++ b/test/lib/coroutine/index
@@ -0,0 +1 @@
+yield.lua
diff --git a/test/lib/coroutine/yield.lua b/test/lib/coroutine/yield.lua
new file mode 100644
-index 0000000..d995bf8
+index 00000000..d995bf87
--- /dev/null
+++ b/test/lib/coroutine/yield.lua
@@ -0,0 +1,109 @@
@@ -178080,10 +178046,10 @@ index 0000000..d995bf8
+end
diff --git a/test/lib/ffi/bit64.lua b/test/lib/ffi/bit64.lua
new file mode 100644
-index 0000000..d1b47be
+index 00000000..ffec0def
--- /dev/null
+++ b/test/lib/ffi/bit64.lua
-@@ -0,0 +1,130 @@
+@@ -0,0 +1,129 @@
+local ffi = require("ffi")
+local bit = require("bit")
+
@@ -178213,10 +178179,9 @@ index 0000000..d1b47be
+ end
+ assert(b == -8881785180777266821LL)
+end
-+
diff --git a/test/lib/ffi/cdata_var.lua b/test/lib/ffi/cdata_var.lua
new file mode 100644
-index 0000000..42d6028
+index 00000000..42d6028a
--- /dev/null
+++ b/test/lib/ffi/cdata_var.lua
@@ -0,0 +1,47 @@
@@ -178269,10 +178234,10 @@ index 0000000..42d6028
+end
diff --git a/test/lib/ffi/copy_fill.lua b/test/lib/ffi/copy_fill.lua
new file mode 100644
-index 0000000..2956381
+index 00000000..d50d7cda
--- /dev/null
+++ b/test/lib/ffi/copy_fill.lua
-@@ -0,0 +1,64 @@
+@@ -0,0 +1,63 @@
+local ffi = require("ffi")
+
+do --- misc
@@ -178336,13 +178301,12 @@ index 0000000..2956381
+ end
+ assert(x == "d" and y == "~")
+end
-+
diff --git a/test/lib/ffi/err.lua b/test/lib/ffi/err.lua
new file mode 100644
-index 0000000..4472365
+index 00000000..8cdf9623
--- /dev/null
+++ b/test/lib/ffi/err.lua
-@@ -0,0 +1,35 @@
+@@ -0,0 +1,34 @@
+local ffi = require("ffi")
+
+do --- error in FFI metamethod: don't print metamethod frame.
@@ -178377,13 +178341,12 @@ index 0000000..4472365
+ local line = debug.getinfo(foo).linedefined+3
+ assert(string.match(err, "traceback:[^:]*:"..line..":"))
+end
-+
diff --git a/test/lib/ffi/ffi_arith_ptr.lua b/test/lib/ffi/ffi_arith_ptr.lua
new file mode 100644
-index 0000000..8cf890c
+index 00000000..82535551
--- /dev/null
+++ b/test/lib/ffi/ffi_arith_ptr.lua
-@@ -0,0 +1,106 @@
+@@ -0,0 +1,105 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -178489,13 +178452,12 @@ index 0000000..8cf890c
+ local ok, err = pcall(function(p) return p[1] end, p)
+ assert(not ok and err:match("size.*unknown"))
+end
-+
diff --git a/test/lib/ffi/ffi_bitfield.lua b/test/lib/ffi/ffi_bitfield.lua
new file mode 100644
-index 0000000..cd0b181
+index 00000000..20b89ad8
--- /dev/null
+++ b/test/lib/ffi/ffi_bitfield.lua
-@@ -0,0 +1,108 @@
+@@ -0,0 +1,107 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -178603,13 +178565,12 @@ index 0000000..cd0b181
+ end
+
+end
-+
diff --git a/test/lib/ffi/ffi_call.lua b/test/lib/ffi/ffi_call.lua
new file mode 100644
-index 0000000..1eb5e90
+index 00000000..1a7f4b1b
--- /dev/null
+++ b/test/lib/ffi/ffi_call.lua
-@@ -0,0 +1,266 @@
+@@ -0,0 +1,265 @@
+
+local ffi = require("ffi")
+
@@ -178875,13 +178836,12 @@ index 0000000..1eb5e90
+ assert(C.stdcall_ff(12.5, -3.25) == 12.5-3.25)
+ end
+end
-+
diff --git a/test/lib/ffi/ffi_callback.lua b/test/lib/ffi/ffi_callback.lua
new file mode 100644
-index 0000000..1fd14bd
+index 00000000..3e2759e2
--- /dev/null
+++ b/test/lib/ffi/ffi_callback.lua
-@@ -0,0 +1,158 @@
+@@ -0,0 +1,157 @@
+
+local ffi = require("ffi")
+
@@ -179039,13 +178999,12 @@ index 0000000..1fd14bd
+ debug.sethook(function() debug.sethook(nil, "", 0); f() end, "",
1)
+ local x
+end
-+
diff --git a/test/lib/ffi/ffi_const.lua b/test/lib/ffi/ffi_const.lua
new file mode 100644
-index 0000000..d42133a
+index 00000000..b2b256d4
--- /dev/null
+++ b/test/lib/ffi/ffi_const.lua
-@@ -0,0 +1,113 @@
+@@ -0,0 +1,112 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -179158,13 +179117,12 @@ index 0000000..d42133a
+ x.ccp = ccxa
+ end
+end
-+
diff --git a/test/lib/ffi/ffi_convert.lua b/test/lib/ffi/ffi_convert.lua
new file mode 100644
-index 0000000..bd3fb1f
+index 00000000..1945760a
--- /dev/null
+++ b/test/lib/ffi/ffi_convert.lua
-@@ -0,0 +1,787 @@
+@@ -0,0 +1,786 @@
+local ffi = require("ffi")
+
+local ctest = require("ctest")
@@ -179951,13 +179909,12 @@ index 0000000..bd3fb1f
+ jit.off(f)
+ end
+end
-+
diff --git a/test/lib/ffi/ffi_enum.lua b/test/lib/ffi/ffi_enum.lua
new file mode 100644
-index 0000000..e8e40ad
+index 00000000..9b63b4ec
--- /dev/null
+++ b/test/lib/ffi/ffi_enum.lua
-@@ -0,0 +1,57 @@
+@@ -0,0 +1,56 @@
+
+local ffi = require("ffi")
+
@@ -180014,13 +179971,12 @@ index 0000000..e8e40ad
+ assert(f("II"))
+ assert(not f(0))
+end
-+
diff --git a/test/lib/ffi/ffi_gcstep_recursive.lua
b/test/lib/ffi/ffi_gcstep_recursive.lua
new file mode 100644
-index 0000000..cb19df1
+index 00000000..22eb81af
--- /dev/null
+++ b/test/lib/ffi/ffi_gcstep_recursive.lua
-@@ -0,0 +1,66 @@
+@@ -0,0 +1,65 @@
+-- From Robert G. Jakabosky, 2012-03-20
+
+local N=tonumber(arg[1] or 10000)
@@ -180086,13 +180042,12 @@ index 0000000..cb19df1
+ end
+ cdata = nil
+end
-+
diff --git a/test/lib/ffi/ffi_jit_arith.lua b/test/lib/ffi/ffi_jit_arith.lua
new file mode 100644
-index 0000000..0554fe6
+index 00000000..0f502784
--- /dev/null
+++ b/test/lib/ffi/ffi_jit_arith.lua
-@@ -0,0 +1,155 @@
+@@ -0,0 +1,154 @@
+local ffi = require("ffi")
+
+do
@@ -180247,13 +180202,12 @@ index 0000000..0554fe6
+ assert(x == 1650)
+ assert(y == 970)
+end
-+
diff --git a/test/lib/ffi/ffi_jit_call.lua b/test/lib/ffi/ffi_jit_call.lua
new file mode 100644
-index 0000000..b79d60b
+index 00000000..ab1e26e3
--- /dev/null
+++ b/test/lib/ffi/ffi_jit_call.lua
-@@ -0,0 +1,154 @@
+@@ -0,0 +1,153 @@
+
+local ffi = require("ffi")
+
@@ -180407,10 +180361,9 @@ index 0000000..b79d60b
+ for i=1,100 do assert(lib.stdcall_ff(12.5, -3.25) == 12.5-3.25) end
+ end
+end
-+
diff --git a/test/lib/ffi/ffi_jit_conv.lua b/test/lib/ffi/ffi_jit_conv.lua
new file mode 100644
-index 0000000..d4707db
+index 00000000..d4707db7
--- /dev/null
+++ b/test/lib/ffi/ffi_jit_conv.lua
@@ -0,0 +1,277 @@
@@ -180693,10 +180646,10 @@ index 0000000..d4707db
+end
diff --git a/test/lib/ffi/ffi_lex_number.lua b/test/lib/ffi/ffi_lex_number.lua
new file mode 100644
-index 0000000..e26650e
+index 00000000..1737a8ba
--- /dev/null
+++ b/test/lib/ffi/ffi_lex_number.lua
-@@ -0,0 +1,51 @@
+@@ -0,0 +1,50 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -180747,13 +180700,12 @@ index 0000000..e26650e
+ ".0ll",
+ "0ii",
+}, function(s) assert(loadstring("return "..s)) end)
-+
diff --git a/test/lib/ffi/ffi_metatype.lua b/test/lib/ffi/ffi_metatype.lua
new file mode 100644
-index 0000000..2db717f
+index 00000000..1d3a20bc
--- /dev/null
+++ b/test/lib/ffi/ffi_metatype.lua
-@@ -0,0 +1,245 @@
+@@ -0,0 +1,244 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -180998,13 +180950,12 @@ index 0000000..2db717f
+ local o = xt()
+ assert(o.x == 99)
+end
-+
diff --git a/test/lib/ffi/ffi_new.lua b/test/lib/ffi/ffi_new.lua
new file mode 100644
-index 0000000..9cdbd53
+index 00000000..5f0c88b4
--- /dev/null
+++ b/test/lib/ffi/ffi_new.lua
-@@ -0,0 +1,106 @@
+@@ -0,0 +1,105 @@
+local ffi = require("ffi")
+local bit = require("bit")
+
@@ -181110,13 +181061,12 @@ index 0000000..9cdbd53
+ local p = ffi.gc(ffi.new("int[1]"), function(x) assert(type(x) ==
"cdata") end)
+ -- test for lua_close() cleanup.
+end
-+
diff --git a/test/lib/ffi/ffi_parse_array.lua b/test/lib/ffi/ffi_parse_array.lua
new file mode 100644
-index 0000000..3a9616d
+index 00000000..08176223
--- /dev/null
+++ b/test/lib/ffi/ffi_parse_array.lua
-@@ -0,0 +1,78 @@
+@@ -0,0 +1,77 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -181194,13 +181144,12 @@ index 0000000..3a9616d
+ assert(ffi.sizeof(id, 0x40000000) == nil)
+ assert(ffi.sizeof(id, 0x3ffffffd) == 4+2*0x3ffffffd)
+end
-+
diff --git a/test/lib/ffi/ffi_parse_basic.lua b/test/lib/ffi/ffi_parse_basic.lua
new file mode 100644
-index 0000000..c054bcf
+index 00000000..774d6143
--- /dev/null
+++ b/test/lib/ffi/ffi_parse_basic.lua
-@@ -0,0 +1,131 @@
+@@ -0,0 +1,130 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -181331,13 +181280,12 @@ index 0000000..c054bcf
+ 1, 2, "char __attribute__((aligned(8))) const __attribute__((aligned(2)))",
+ 1, 16, "char __attribute__((aligned(8))) const
__attribute__((aligned(16)))",
+}
-+
diff --git a/test/lib/ffi/ffi_parse_cdef.lua b/test/lib/ffi/ffi_parse_cdef.lua
new file mode 100644
-index 0000000..4bb5d90
+index 00000000..43206f31
--- /dev/null
+++ b/test/lib/ffi/ffi_parse_cdef.lua
-@@ -0,0 +1,77 @@
+@@ -0,0 +1,76 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -181414,13 +181362,12 @@ index 0000000..4bb5d90
+int ext1;
+extern int ext2;
+]]
-+
diff --git a/test/lib/ffi/ffi_parse_struct.lua b/test/lib/ffi/ffi_parse_struct.lua
new file mode 100644
-index 0000000..16a3d05
+index 00000000..02b17dd6
--- /dev/null
+++ b/test/lib/ffi/ffi_parse_struct.lua
-@@ -0,0 +1,259 @@
+@@ -0,0 +1,258 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -181679,10 +181626,9 @@ index 0000000..16a3d05
+ assert(ffi.sizeof("struct foo_packintalign8") == 6)
+ assert(ffi.sizeof("struct foo_packintalign1") == 5)
+end
-+
diff --git a/test/lib/ffi/ffi_tabov.lua b/test/lib/ffi/ffi_tabov.lua
new file mode 100644
-index 0000000..ba62196
+index 00000000..ba621960
--- /dev/null
+++ b/test/lib/ffi/ffi_tabov.lua
@@ -0,0 +1,12 @@
@@ -181700,7 +181646,7 @@ index 0000000..ba62196
+assert(last > 20000)
diff --git a/test/lib/ffi/index b/test/lib/ffi/index
new file mode 100644
-index 0000000..59e36dd
+index 00000000..59e36dd8
--- /dev/null
+++ b/test/lib/ffi/index
@@ -0,0 +1,12 @@
@@ -181718,7 +181664,7 @@ index 0000000..59e36dd
+type_punning.lua
diff --git a/test/lib/ffi/istype.lua b/test/lib/ffi/istype.lua
new file mode 100644
-index 0000000..5aba775
+index 00000000..5aba7759
--- /dev/null
+++ b/test/lib/ffi/istype.lua
@@ -0,0 +1,88 @@
@@ -181812,7 +181758,7 @@ index 0000000..5aba775
+end
diff --git a/test/lib/ffi/jit_array.lua b/test/lib/ffi/jit_array.lua
new file mode 100644
-index 0000000..e8de4af
+index 00000000..e8de4af1
--- /dev/null
+++ b/test/lib/ffi/jit_array.lua
@@ -0,0 +1,104 @@
@@ -181922,7 +181868,7 @@ index 0000000..e8de4af
+end
diff --git a/test/lib/ffi/jit_complex.lua b/test/lib/ffi/jit_complex.lua
new file mode 100644
-index 0000000..3296f0c
+index 00000000..3296f0cb
--- /dev/null
+++ b/test/lib/ffi/jit_complex.lua
@@ -0,0 +1,109 @@
@@ -182037,7 +181983,7 @@ index 0000000..3296f0c
+end
diff --git a/test/lib/ffi/jit_misc.lua b/test/lib/ffi/jit_misc.lua
new file mode 100644
-index 0000000..41e4737
+index 00000000..41e4737b
--- /dev/null
+++ b/test/lib/ffi/jit_misc.lua
@@ -0,0 +1,109 @@
@@ -182152,10 +182098,10 @@ index 0000000..41e4737
+end
diff --git a/test/lib/ffi/jit_struct.lua b/test/lib/ffi/jit_struct.lua
new file mode 100644
-index 0000000..8aa64c1
+index 00000000..ab7ab07f
--- /dev/null
+++ b/test/lib/ffi/jit_struct.lua
-@@ -0,0 +1,201 @@
+@@ -0,0 +1,200 @@
+local ffi = require("ffi")
+
+ffi.cdef[[
@@ -182356,13 +182302,12 @@ index 0000000..8aa64c1
+ end
+ f()
+end
-+
diff --git a/test/lib/ffi/meta_tostring.lua b/test/lib/ffi/meta_tostring.lua
new file mode 100644
-index 0000000..bb065e1
+index 00000000..968eaddf
--- /dev/null
+++ b/test/lib/ffi/meta_tostring.lua
-@@ -0,0 +1,55 @@
+@@ -0,0 +1,54 @@
+local ffi = require("ffi")
+
+ffi.cdef[[
@@ -182417,10 +182362,9 @@ index 0000000..bb065e1
+ x.f[1] = -753.125
+ assert(tostring(x.cf) == "12.5-753.125i")
+end
-+
diff --git a/test/lib/ffi/redir.lua b/test/lib/ffi/redir.lua
new file mode 100644
-index 0000000..c492055
+index 00000000..c492055a
--- /dev/null
+++ b/test/lib/ffi/redir.lua
@@ -0,0 +1,19 @@
@@ -182445,7 +182389,7 @@ index 0000000..c492055
+end
diff --git a/test/lib/ffi/type_punning.lua b/test/lib/ffi/type_punning.lua
new file mode 100644
-index 0000000..ac70b4b
+index 00000000..ac70b4b4
--- /dev/null
+++ b/test/lib/ffi/type_punning.lua
@@ -0,0 +1,138 @@
@@ -182589,7 +182533,7 @@ index 0000000..ac70b4b
+end
diff --git a/test/lib/index b/test/lib/index
new file mode 100644
-index 0000000..cc9d7d7
+index 00000000..cc9d7d73
--- /dev/null
+++ b/test/lib/index
@@ -0,0 +1,8 @@
@@ -182604,7 +182548,7 @@ index 0000000..cc9d7d7
\ No newline at end of file
diff --git a/test/lib/math/abs.lua b/test/lib/math/abs.lua
new file mode 100644
-index 0000000..4223a78
+index 00000000..4223a780
--- /dev/null
+++ b/test/lib/math/abs.lua
@@ -0,0 +1,16 @@
@@ -182626,7 +182570,7 @@ index 0000000..4223a78
+end
diff --git a/test/lib/math/constants.lua b/test/lib/math/constants.lua
new file mode 100644
-index 0000000..ec35b4c
+index 00000000..ec35b4ce
--- /dev/null
+++ b/test/lib/math/constants.lua
@@ -0,0 +1,8 @@
@@ -182640,7 +182584,7 @@ index 0000000..ec35b4c
+end
diff --git a/test/lib/math/index b/test/lib/math/index
new file mode 100644
-index 0000000..944e1ae
+index 00000000..944e1aeb
--- /dev/null
+++ b/test/lib/math/index
@@ -0,0 +1,3 @@
@@ -182649,7 +182593,7 @@ index 0000000..944e1ae
+random.lua
diff --git a/test/lib/math/random.lua b/test/lib/math/random.lua
new file mode 100644
-index 0000000..dc2ca00
+index 00000000..dc2ca00b
--- /dev/null
+++ b/test/lib/math/random.lua
@@ -0,0 +1,47 @@
@@ -182702,7 +182646,7 @@ index 0000000..dc2ca00
+end
diff --git a/test/lib/string/byte.lua b/test/lib/string/byte.lua
new file mode 100644
-index 0000000..697a2c2
+index 00000000..697a2c2e
--- /dev/null
+++ b/test/lib/string/byte.lua
@@ -0,0 +1,92 @@
@@ -182800,7 +182744,7 @@ index 0000000..697a2c2
+end
diff --git a/test/lib/string/char.lua b/test/lib/string/char.lua
new file mode 100644
-index 0000000..544767d
+index 00000000..544767de
--- /dev/null
+++ b/test/lib/string/char.lua
@@ -0,0 +1,29 @@
@@ -182835,7 +182779,7 @@ index 0000000..544767d
+end
diff --git a/test/lib/string/dump.lua b/test/lib/string/dump.lua
new file mode 100644
-index 0000000..216c6eb
+index 00000000..216c6eb8
--- /dev/null
+++ b/test/lib/string/dump.lua
@@ -0,0 +1,31 @@
@@ -182872,14 +182816,14 @@ index 0000000..216c6eb
+end
diff --git a/test/lib/string/format/index b/test/lib/string/format/index
new file mode 100644
-index 0000000..4408853
+index 00000000..44088536
--- /dev/null
+++ b/test/lib/string/format/index
@@ -0,0 +1 @@
+num.lua
diff --git a/test/lib/string/format/num.lua b/test/lib/string/format/num.lua
new file mode 100644
-index 0000000..e8cb33f
+index 00000000..e8cb33f3
--- /dev/null
+++ b/test/lib/string/format/num.lua
@@ -0,0 +1,184 @@
@@ -183069,7 +183013,7 @@ index 0000000..e8cb33f
+end
diff --git a/test/lib/string/index b/test/lib/string/index
new file mode 100644
-index 0000000..c0638e9
+index 00000000..c0638e9c
--- /dev/null
+++ b/test/lib/string/index
@@ -0,0 +1,11 @@
@@ -183086,7 +183030,7 @@ index 0000000..c0638e9
+sub.lua
diff --git a/test/lib/string/len.lua b/test/lib/string/len.lua
new file mode 100644
-index 0000000..8ed7e8a
+index 00000000..8ed7e8ae
--- /dev/null
+++ b/test/lib/string/len.lua
@@ -0,0 +1,14 @@
@@ -183106,7 +183050,7 @@ index 0000000..8ed7e8a
+end
diff --git a/test/lib/string/lower_upper.lua b/test/lib/string/lower_upper.lua
new file mode 100644
-index 0000000..7370c44
+index 00000000..7370c44c
--- /dev/null
+++ b/test/lib/string/lower_upper.lua
@@ -0,0 +1,51 @@
@@ -183163,7 +183107,7 @@ index 0000000..7370c44
+end
diff --git a/test/lib/string/metatable.lua b/test/lib/string/metatable.lua
new file mode 100644
-index 0000000..d39ed43
+index 00000000..d39ed432
--- /dev/null
+++ b/test/lib/string/metatable.lua
@@ -0,0 +1,3 @@
@@ -183172,7 +183116,7 @@ index 0000000..d39ed43
+end
diff --git a/test/lib/string/multiple_functions.lua
b/test/lib/string/multiple_functions.lua
new file mode 100644
-index 0000000..7b9d0f1
+index 00000000..7b9d0f13
--- /dev/null
+++ b/test/lib/string/multiple_functions.lua
@@ -0,0 +1,16 @@
@@ -183194,7 +183138,7 @@ index 0000000..7b9d0f1
+end
diff --git a/test/lib/string/rep.lua b/test/lib/string/rep.lua
new file mode 100644
-index 0000000..550c15b
+index 00000000..550c15b8
--- /dev/null
+++ b/test/lib/string/rep.lua
@@ -0,0 +1,68 @@
@@ -183268,7 +183212,7 @@ index 0000000..550c15b
+end
diff --git a/test/lib/string/reverse.lua b/test/lib/string/reverse.lua
new file mode 100644
-index 0000000..deaade7
+index 00000000..deaade7c
--- /dev/null
+++ b/test/lib/string/reverse.lua
@@ -0,0 +1,13 @@
@@ -183287,7 +183231,7 @@ index 0000000..deaade7
+end
diff --git a/test/lib/string/sub.lua b/test/lib/string/sub.lua
new file mode 100644
-index 0000000..ecb8021
+index 00000000..ecb80216
--- /dev/null
+++ b/test/lib/string/sub.lua
@@ -0,0 +1,189 @@
@@ -183482,7 +183426,7 @@ index 0000000..ecb8021
+end
diff --git a/test/lib/table/concat.lua b/test/lib/table/concat.lua
new file mode 100644
-index 0000000..1f2a2f9
+index 00000000..1f2a2f92
--- /dev/null
+++ b/test/lib/table/concat.lua
@@ -0,0 +1,55 @@
@@ -183543,7 +183487,7 @@ index 0000000..1f2a2f9
+end
diff --git a/test/lib/table/index b/test/lib/table/index
new file mode 100644
-index 0000000..bd3af0b
+index 00000000..bd3af0be
--- /dev/null
+++ b/test/lib/table/index
@@ -0,0 +1,6 @@
@@ -183555,10 +183499,10 @@ index 0000000..bd3af0b
+sort.lua
diff --git a/test/lib/table/insert.lua b/test/lib/table/insert.lua
new file mode 100644
-index 0000000..91d4dd8
+index 00000000..30db18c7
--- /dev/null
+++ b/test/lib/table/insert.lua
-@@ -0,0 +1,17 @@
+@@ -0,0 +1,16 @@
+local tinsert = table.insert
+local assert = assert
+
@@ -183575,13 +183519,12 @@ index 0000000..91d4dd8
+ for i=101,200 do tinsert(t, i, i) end
+ assert(#t == 300 and t[101] == 101 and t[200] == 200 and t[300] == 200)
+end
-+
diff --git a/test/lib/table/misc.lua b/test/lib/table/misc.lua
new file mode 100644
-index 0000000..e0e2fc5
+index 00000000..c54188e1
--- /dev/null
+++ b/test/lib/table/misc.lua
-@@ -0,0 +1,58 @@
+@@ -0,0 +1,55 @@
+-- TODO: Organise
+
+-- ABC elim
@@ -183637,12 +183580,9 @@ index 0000000..e0e2fc5
+ assert(t[1] == 1 and t[2] == 2 and t[3] == 3 and t[4] == 9 and t[5] == 10 and
+ t[6] == nil)
+end
-+
-+
-+
diff --git a/test/lib/table/new.lua b/test/lib/table/new.lua
new file mode 100644
-index 0000000..483c129
+index 00000000..483c1298
--- /dev/null
+++ b/test/lib/table/new.lua
@@ -0,0 +1,11 @@
@@ -183659,7 +183599,7 @@ index 0000000..483c129
+end
diff --git a/test/lib/table/pack.lua b/test/lib/table/pack.lua
new file mode 100644
-index 0000000..5bd6ecb
+index 00000000..5bd6ecbe
--- /dev/null
+++ b/test/lib/table/pack.lua
@@ -0,0 +1,7 @@
@@ -183672,7 +183612,7 @@ index 0000000..5bd6ecb
+end
diff --git a/test/lib/table/remove.lua b/test/lib/table/remove.lua
new file mode 100644
-index 0000000..1b24a4f
+index 00000000..1b24a4fb
--- /dev/null
+++ b/test/lib/table/remove.lua
@@ -0,0 +1,42 @@
@@ -183720,7 +183660,7 @@ index 0000000..1b24a4f
+end
diff --git a/test/lib/table/sort.lua b/test/lib/table/sort.lua
new file mode 100644
-index 0000000..6a86fcf
+index 00000000..6a86fcf3
--- /dev/null
+++ b/test/lib/table/sort.lua
@@ -0,0 +1,27 @@
@@ -183753,10 +183693,10 @@ index 0000000..6a86fcf
+end
diff --git a/test/misc/alias_alloc.lua b/test/misc/alias_alloc.lua
new file mode 100644
-index 0000000..02fe618
+index 00000000..6c89baad
--- /dev/null
+++ b/test/misc/alias_alloc.lua
-@@ -0,0 +1,54 @@
+@@ -0,0 +1,53 @@
+
+do
+ local t = {1}
@@ -183810,13 +183750,12 @@ index 0000000..02fe618
+ w[1] = t[1]
+ end
+end
-+
diff --git a/test/misc/api_call.lua b/test/misc/api_call.lua
new file mode 100644
-index 0000000..7dbd5e4
+index 00000000..28ce7d2b
--- /dev/null
+++ b/test/misc/api_call.lua
-@@ -0,0 +1,98 @@
+@@ -0,0 +1,97 @@
+local ctest = require("ctest")
+
+local function ret0() end
@@ -183914,13 +183853,12 @@ index 0000000..7dbd5e4
+test_yield(ctest.resume, coroutine.yield)
+test_yield(coroutine.resume, ctest.yield)
+test_yield(ctest.resume, ctest.yield)
-+
diff --git a/test/misc/catch_wrap.lua b/test/misc/catch_wrap.lua
new file mode 100644
-index 0000000..7f656bc
+index 00000000..8b934066
--- /dev/null
+++ b/test/misc/catch_wrap.lua
-@@ -0,0 +1,45 @@
+@@ -0,0 +1,44 @@
+
+local cp = require("cpptest")
+cp.wrapon()
@@ -183965,13 +183903,12 @@ index 0000000..7f656bc
+ assert(a == false and b == "foo")
+ if unwind then assert(cp.isalloc() == false) end
+end
-+
diff --git a/test/misc/coro_traceback.lua b/test/misc/coro_traceback.lua
new file mode 100644
-index 0000000..2676d2c
+index 00000000..e075f5dd
--- /dev/null
+++ b/test/misc/coro_traceback.lua
-@@ -0,0 +1,8 @@
+@@ -0,0 +1,7 @@
+
+local co = coroutine.create(function()
+ local x = nil
@@ -183979,13 +183916,12 @@ index 0000000..2676d2c
+end)
+assert(coroutine.resume(co) == false)
+debug.traceback(co)
-+
diff --git a/test/misc/coro_yield.lua b/test/misc/coro_yield.lua
new file mode 100644
-index 0000000..ae3206e
+index 00000000..602ba7d5
--- /dev/null
+++ b/test/misc/coro_yield.lua
-@@ -0,0 +1,111 @@
+@@ -0,0 +1,110 @@
+local create = coroutine.create
+local wrap = coroutine.wrap
+local resume = coroutine.resume
@@ -184096,10 +184032,9 @@ index 0000000..ae3206e
+ end,
+ 42)
+end
-+
diff --git a/test/misc/debug_gc.lua b/test/misc/debug_gc.lua
new file mode 100644
-index 0000000..30fb2b9
+index 00000000..30fb2b99
--- /dev/null
+++ b/test/misc/debug_gc.lua
@@ -0,0 +1,47 @@
@@ -184152,10 +184087,10 @@ index 0000000..30fb2b9
+caught = "end"
diff --git a/test/misc/dualnum.lua b/test/misc/dualnum.lua
new file mode 100644
-index 0000000..5f1288c
+index 00000000..059bf21d
--- /dev/null
+++ b/test/misc/dualnum.lua
-@@ -0,0 +1,47 @@
+@@ -0,0 +1,46 @@
+
+-- Positive overflow
+do
@@ -184202,13 +184137,12 @@ index 0000000..5f1288c
+ assert(fmax(-1, -3) == -1)
+ assert(fmax(-3, -1) == -1)
+end
-+
diff --git a/test/misc/for_dir.lua b/test/misc/for_dir.lua
new file mode 100644
-index 0000000..4dd38de
+index 00000000..3146df13
--- /dev/null
+++ b/test/misc/for_dir.lua
-@@ -0,0 +1,13 @@
+@@ -0,0 +1,12 @@
+
+local a,b,c = 10,1,-1
+for i=1,20 do
@@ -184221,13 +184155,12 @@ index 0000000..4dd38de
+ for i=a,b,c do for j=1,10 do end x=x+1 end
+ assert(x == 10)
+end
-+
diff --git a/test/misc/fori_coerce.lua b/test/misc/fori_coerce.lua
new file mode 100644
-index 0000000..7330943
+index 00000000..03dc37cc
--- /dev/null
+++ b/test/misc/fori_coerce.lua
-@@ -0,0 +1,33 @@
+@@ -0,0 +1,32 @@
+
+do
+ local n = 1
@@ -184260,13 +184193,12 @@ index 0000000..7330943
+ end
+ assert(not pcall(f))
+end
-+
diff --git a/test/misc/gc_rechain.lua b/test/misc/gc_rechain.lua
new file mode 100644
-index 0000000..285f408
+index 00000000..c98fa5af
--- /dev/null
+++ b/test/misc/gc_rechain.lua
-@@ -0,0 +1,32 @@
+@@ -0,0 +1,31 @@
+
+do
+ local k
@@ -184298,13 +184230,12 @@ index 0000000..285f408
+
+ assert(t[k] == 4)
+end
-+
diff --git a/test/misc/gc_trace.lua b/test/misc/gc_trace.lua
new file mode 100644
-index 0000000..bc38ce0
+index 00000000..e394bd49
--- /dev/null
+++ b/test/misc/gc_trace.lua
-@@ -0,0 +1,37 @@
+@@ -0,0 +1,36 @@
+
+if not jit or not jit.status or not jit.status() then return end
+
@@ -184341,13 +184272,12 @@ index 0000000..bc38ce0
+ end
+ jit.attach(reccb)
+end
-+
diff --git a/test/misc/gcstep.lua b/test/misc/gcstep.lua
new file mode 100644
-index 0000000..533356b
+index 00000000..7ee5565b
--- /dev/null
+++ b/test/misc/gcstep.lua
-@@ -0,0 +1,33 @@
+@@ -0,0 +1,32 @@
+
+local function testgc(what, func)
+ collectgarbage()
@@ -184380,13 +184310,12 @@ index 0000000..533356b
+ local s = "x"..i
+ end
+end)
-+
diff --git a/test/misc/hook_active.lua b/test/misc/hook_active.lua
new file mode 100644
-index 0000000..37dfc37
+index 00000000..57532568
--- /dev/null
+++ b/test/misc/hook_active.lua
-@@ -0,0 +1,95 @@
+@@ -0,0 +1,94 @@
+local ctest = require("ctest")
+
+local called = 0
@@ -184481,13 +184410,12 @@ index 0000000..37dfc37
+called = 2
+do local x = 1 end
+assert(called == 2)
-+
diff --git a/test/misc/hook_line.lua b/test/misc/hook_line.lua
new file mode 100644
-index 0000000..36f7108
+index 00000000..6106e492
--- /dev/null
+++ b/test/misc/hook_line.lua
-@@ -0,0 +1,41 @@
+@@ -0,0 +1,40 @@
+local lines = {}
+local function hook()
+ lines[#lines+1] = debug.getinfo(2).currentline
@@ -184528,13 +184456,12 @@ index 0000000..36f7108
+f()
+debug.sethook(nil, "", 0)
+for i=1,#lines do assert(lines[i] ~= 36) end
-+
diff --git a/test/misc/hook_norecord.lua b/test/misc/hook_norecord.lua
new file mode 100644
-index 0000000..8e7cba0
+index 00000000..004f3a3a
--- /dev/null
+++ b/test/misc/hook_norecord.lua
-@@ -0,0 +1,12 @@
+@@ -0,0 +1,11 @@
+
+if not jit or not jit.status or not jit.status() then return end
+
@@ -184546,13 +184473,12 @@ index 0000000..8e7cba0
+assert(called)
+-- Check that no trace was generated.
+assert(require("jit.util").traceinfo(1) == nil)
-+
diff --git a/test/misc/hook_record.lua b/test/misc/hook_record.lua
new file mode 100644
-index 0000000..6f1646d
+index 00000000..f4283d12
--- /dev/null
+++ b/test/misc/hook_record.lua
-@@ -0,0 +1,8 @@
+@@ -0,0 +1,7 @@
+
+if not jit or not jit.status or not jit.status() then return end
+
@@ -184560,13 +184486,12 @@ index 0000000..6f1646d
+for i=1,10 do end
+debug.sethook()
+assert((require("jit.util").traceinfo(1)))
-+
diff --git a/test/misc/hook_top.lua b/test/misc/hook_top.lua
new file mode 100644
-index 0000000..f809fce
+index 00000000..3cc7e651
--- /dev/null
+++ b/test/misc/hook_top.lua
-@@ -0,0 +1,55 @@
+@@ -0,0 +1,54 @@
+
+local t = {}
+for i=1,26 do t[i] = string.char(96+i) end
@@ -184621,13 +184546,12 @@ index 0000000..f809fce
+ assert(a == "bar")
+end
+foo5()
-+
diff --git a/test/misc/jit_flush.lua b/test/misc/jit_flush.lua
new file mode 100644
-index 0000000..ead1e4e
+index 00000000..fe1021ce
--- /dev/null
+++ b/test/misc/jit_flush.lua
-@@ -0,0 +1,50 @@
+@@ -0,0 +1,49 @@
+
+if not jit or not jit.status or not jit.status() then return end
+
@@ -184677,13 +184601,12 @@ index 0000000..ead1e4e
+jit.flush(2) -- ignored
+jit.flush(1) -- ok
+jit.flush(1) -- crashes
-+
diff --git a/test/misc/lightud.lua b/test/misc/lightud.lua
new file mode 100644
-index 0000000..4974d50
+index 00000000..261b106f
--- /dev/null
+++ b/test/misc/lightud.lua
-@@ -0,0 +1,88 @@
+@@ -0,0 +1,87 @@
+local ctest = require("ctest")
+
+local lightud = ctest.lightud
@@ -184771,13 +184694,12 @@ index 0000000..4974d50
+ end, t))
+ assert(x == 16110)
+end
-+
diff --git a/test/misc/loop_unroll.lua b/test/misc/loop_unroll.lua
new file mode 100644
-index 0000000..1700fac
+index 00000000..3dcb3d22
--- /dev/null
+++ b/test/misc/loop_unroll.lua
-@@ -0,0 +1,35 @@
+@@ -0,0 +1,34 @@
+
+-- type instability on loop unroll -> record unroll
+do
@@ -184812,10 +184734,9 @@ index 0000000..1700fac
+ j = j+1
+ until true
+end
-+
diff --git a/test/misc/parse_comp.lua b/test/misc/parse_comp.lua
new file mode 100644
-index 0000000..5e1948d
+index 00000000..5e1948da
--- /dev/null
+++ b/test/misc/parse_comp.lua
@@ -0,0 +1,13 @@
@@ -184834,7 +184755,7 @@ index 0000000..5e1948d
+end
diff --git a/test/misc/parse_esc.lua b/test/misc/parse_esc.lua
new file mode 100644
-index 0000000..4bcce0e
+index 00000000..4bcce0e8
--- /dev/null
+++ b/test/misc/parse_esc.lua
@@ -0,0 +1,7 @@
@@ -184847,10 +184768,10 @@ index 0000000..4bcce0e
+ def"]])() == "abc def")
diff --git a/test/misc/parse_misc.lua b/test/misc/parse_misc.lua
new file mode 100644
-index 0000000..8031ec1
+index 00000000..2c9949e3
--- /dev/null
+++ b/test/misc/parse_misc.lua
-@@ -0,0 +1,31 @@
+@@ -0,0 +1,30 @@
+
+-- Ambiguous syntax: function call vs. new statement.
+if os.getenv("LUA52") then
@@ -184881,13 +184802,12 @@ index 0000000..8031ec1
+assert(#"aäa" == 4)
+assert(#"äöü·€晶" == 14)
+]]))()
-+
diff --git a/test/misc/phi_conv.lua b/test/misc/phi_conv.lua
new file mode 100644
-index 0000000..8d7bea5
+index 00000000..0b7261c8
--- /dev/null
+++ b/test/misc/phi_conv.lua
-@@ -0,0 +1,53 @@
+@@ -0,0 +1,52 @@
+
+local bit = require("bit")
+
@@ -184940,13 +184860,12 @@ index 0000000..8d7bea5
+if jit and jit.status and jit.status() then jit.opt.start("hotloop=1") end
+
+test()
-+
diff --git a/test/misc/recurse_deep.lua b/test/misc/recurse_deep.lua
new file mode 100644
-index 0000000..9b9af29
+index 00000000..f18ff5cb
--- /dev/null
+++ b/test/misc/recurse_deep.lua
-@@ -0,0 +1,29 @@
+@@ -0,0 +1,28 @@
+
+do
+ local function sum(n)
@@ -184975,13 +184894,12 @@ index 0000000..9b9af29
+ end
+ assert(fib(15) == 987)
+end
-+
diff --git a/test/misc/recurse_tail.lua b/test/misc/recurse_tail.lua
new file mode 100644
-index 0000000..ef76443
+index 00000000..d6296e2b
--- /dev/null
+++ b/test/misc/recurse_tail.lua
-@@ -0,0 +1,22 @@
+@@ -0,0 +1,21 @@
+
+do
+ local tr1
@@ -185003,13 +184921,12 @@ index 0000000..ef76443
+ end
+ assert(tr2(200) == 0)
+end
-+
diff --git a/test/misc/stack_gc.lua b/test/misc/stack_gc.lua
new file mode 100644
-index 0000000..656a06a
+index 00000000..f212fec5
--- /dev/null
+++ b/test/misc/stack_gc.lua
-@@ -0,0 +1,15 @@
+@@ -0,0 +1,14 @@
+
+do
+ local t = setmetatable({}, { __index=function(t, k)
@@ -185024,13 +184941,12 @@ index 0000000..656a06a
+ end})
+ local x = t[50]
+end
-+
diff --git a/test/misc/stack_purge.lua b/test/misc/stack_purge.lua
new file mode 100644
-index 0000000..bfaee0f
+index 00000000..de53dea4
--- /dev/null
+++ b/test/misc/stack_purge.lua
-@@ -0,0 +1,25 @@
+@@ -0,0 +1,24 @@
+
+-- Must preserve the modified function slot in the RET snapshot.
+local function a()
@@ -185055,13 +184971,12 @@ index 0000000..bfaee0f
+
+jit.off(c)
+c()
-+
diff --git a/test/misc/stackov.lua b/test/misc/stackov.lua
new file mode 100644
-index 0000000..ef105af
+index 00000000..65c68d95
--- /dev/null
+++ b/test/misc/stackov.lua
-@@ -0,0 +1,40 @@
+@@ -0,0 +1,39 @@
+
+local function f()
+ f()
@@ -185101,10 +185016,9 @@ index 0000000..ef105af
+
+local err, s = xpcall(vcall, debug.traceback, 1)
+assert(err == false)
-+
diff --git a/test/misc/stackovc.lua b/test/misc/stackovc.lua
new file mode 100644
-index 0000000..c00bcbd
+index 00000000..c00bcbd8
--- /dev/null
+++ b/test/misc/stackovc.lua
@@ -0,0 +1,4 @@
@@ -185114,10 +185028,10 @@ index 0000000..c00bcbd
+assert(not ok and string.find(err, "unpack"))
diff --git a/test/misc/tcall_base.lua b/test/misc/tcall_base.lua
new file mode 100644
-index 0000000..c6c4ae1
+index 00000000..52882519
--- /dev/null
+++ b/test/misc/tcall_base.lua
-@@ -0,0 +1,20 @@
+@@ -0,0 +1,19 @@
+
+local r = 0
+local function g()
@@ -185137,10 +185051,9 @@ index 0000000..c6c4ae1
+g() -- Compile this loop first.
+for i=1,50 do f() end
+assert(r == 51)
-+
diff --git a/test/misc/tcall_loop.lua b/test/misc/tcall_loop.lua
new file mode 100644
-index 0000000..d3c6f1a
+index 00000000..d3c6f1a6
--- /dev/null
+++ b/test/misc/tcall_loop.lua
@@ -0,0 +1,8 @@
@@ -185154,10 +185067,10 @@ index 0000000..d3c6f1a
+assert(x == 100)
diff --git a/test/misc/tonumber_scan.lua b/test/misc/tonumber_scan.lua
new file mode 100644
-index 0000000..78e1ca3
+index 00000000..a4f51cee
--- /dev/null
+++ b/test/misc/tonumber_scan.lua
-@@ -0,0 +1,180 @@
+@@ -0,0 +1,179 @@
+local ffi = require("ffi")
+local bit = require("bit")
+
@@ -185337,13 +185250,12 @@ index 0000000..78e1ca3
+-- print(" "..tohex64(u.x)..",
\""..s.."\",")
+ end
+end
-+
diff --git a/test/misc/uclo.lua b/test/misc/uclo.lua
new file mode 100644
-index 0000000..bd9bd24
+index 00000000..6b36127e
--- /dev/null
+++ b/test/misc/uclo.lua
-@@ -0,0 +1,91 @@
+@@ -0,0 +1,90 @@
+
+local function test_for()
+ local z1, z2
@@ -185434,13 +185346,12 @@ index 0000000..bd9bd24
+ x = 2
+ assert(f() == 200)
+end
-+
diff --git a/test/misc/unordered_jit.lua b/test/misc/unordered_jit.lua
new file mode 100644
-index 0000000..5ff1a1b
+index 00000000..78ce72d3
--- /dev/null
+++ b/test/misc/unordered_jit.lua
-@@ -0,0 +1,96 @@
+@@ -0,0 +1,95 @@
+
+local nan = 0/0
+local t = {}
@@ -185536,10 +185447,9 @@ index 0000000..5ff1a1b
+do local z; for i=1,100 do z = not (nan >= nan) end; assert(z == true) end
+do local z; for i=1,100 do z = not (nan >= 1) end; assert(z == true) end
+do local z; for i=1,100 do z = not (1 >= nan) end; assert(z == true) end
-+
diff --git a/test/misc/wbarrier.lua b/test/misc/wbarrier.lua
new file mode 100644
-index 0000000..5536625
+index 00000000..5536625a
--- /dev/null
+++ b/test/misc/wbarrier.lua
@@ -0,0 +1,7 @@
@@ -185552,10 +185462,10 @@ index 0000000..5536625
+end
diff --git a/test/misc/wbarrier_jit.lua b/test/misc/wbarrier_jit.lua
new file mode 100644
-index 0000000..2c8dd7f
+index 00000000..bf1fc1e7
--- /dev/null
+++ b/test/misc/wbarrier_jit.lua
-@@ -0,0 +1,18 @@
+@@ -0,0 +1,17 @@
+
+do
+ local t = {[0]={}}
@@ -185573,13 +185483,12 @@ index 0000000..2c8dd7f
+ end
+ f()
+end
-+
diff --git a/test/misc/wbarrier_obar.lua b/test/misc/wbarrier_obar.lua
new file mode 100644
-index 0000000..258db21
+index 00000000..d6504995
--- /dev/null
+++ b/test/misc/wbarrier_obar.lua
-@@ -0,0 +1,22 @@
+@@ -0,0 +1,21 @@
+-- DSE of USTORE must eliminate OBAR, too.
+
+if jit and jit.opt then pcall(jit.opt.start, "-sink") end
@@ -185601,10 +185510,9 @@ index 0000000..258db21
+collectgarbage("setstepmul", 1)
+collectgarbage("restart")
+f()
-+
diff --git a/test/opt/dse/array.lua b/test/opt/dse/array.lua
new file mode 100644
-index 0000000..8c76624
+index 00000000..8c766248
--- /dev/null
+++ b/test/opt/dse/array.lua
@@ -0,0 +1,197 @@
@@ -185807,7 +185715,7 @@ index 0000000..8c76624
+end
diff --git a/test/opt/dse/field.lua b/test/opt/dse/field.lua
new file mode 100644
-index 0000000..d8a5411
+index 00000000..d8a5411c
--- /dev/null
+++ b/test/opt/dse/field.lua
@@ -0,0 +1,70 @@
@@ -185883,7 +185791,7 @@ index 0000000..d8a5411
+end
diff --git a/test/opt/dse/index b/test/opt/dse/index
new file mode 100644
-index 0000000..7b8ad1f
+index 00000000..7b8ad1f4
--- /dev/null
+++ b/test/opt/dse/index
@@ -0,0 +1,2 @@
@@ -185891,14 +185799,14 @@ index 0000000..7b8ad1f
+field.lua
diff --git a/test/opt/fold/index b/test/opt/fold/index
new file mode 100644
-index 0000000..8b4648c
+index 00000000..8b4648c7
--- /dev/null
+++ b/test/opt/fold/index
@@ -0,0 +1 @@
+kfold.lua
diff --git a/test/opt/fold/kfold.lua b/test/opt/fold/kfold.lua
new file mode 100644
-index 0000000..9cd3919
+index 00000000..9cd39190
--- /dev/null
+++ b/test/opt/fold/kfold.lua
@@ -0,0 +1,81 @@
@@ -185985,7 +185893,7 @@ index 0000000..9cd3919
+end
diff --git a/test/opt/fuse.lua b/test/opt/fuse.lua
new file mode 100644
-index 0000000..a68381e
+index 00000000..a68381ef
--- /dev/null
+++ b/test/opt/fuse.lua
@@ -0,0 +1,5 @@
@@ -185996,7 +185904,7 @@ index 0000000..a68381e
+end
diff --git a/test/opt/fwd/hrefk_rollback.lua b/test/opt/fwd/hrefk_rollback.lua
new file mode 100644
-index 0000000..5a6ad87
+index 00000000..5a6ad876
--- /dev/null
+++ b/test/opt/fwd/hrefk_rollback.lua
@@ -0,0 +1,32 @@
@@ -186034,7 +185942,7 @@ index 0000000..5a6ad87
+end
diff --git a/test/opt/fwd/index b/test/opt/fwd/index
new file mode 100644
-index 0000000..5bb1537
+index 00000000..5bb1537f
--- /dev/null
+++ b/test/opt/fwd/index
@@ -0,0 +1,3 @@
@@ -186043,7 +185951,7 @@ index 0000000..5bb1537
+upval.lua
diff --git a/test/opt/fwd/tnew_tdup.lua b/test/opt/fwd/tnew_tdup.lua
new file mode 100644
-index 0000000..9e18fa3
+index 00000000..9e18fa3b
--- /dev/null
+++ b/test/opt/fwd/tnew_tdup.lua
@@ -0,0 +1,69 @@
@@ -186118,7 +186026,7 @@ index 0000000..9e18fa3
+end
diff --git a/test/opt/fwd/upval.lua b/test/opt/fwd/upval.lua
new file mode 100644
-index 0000000..a3e83df
+index 00000000..a3e83dff
--- /dev/null
+++ b/test/opt/fwd/upval.lua
@@ -0,0 +1,50 @@
@@ -186174,7 +186082,7 @@ index 0000000..a3e83df
+end
diff --git a/test/opt/index b/test/opt/index
new file mode 100644
-index 0000000..94d50ae
+index 00000000..94d50aec
--- /dev/null
+++ b/test/opt/index
@@ -0,0 +1,6 @@
@@ -186186,14 +186094,14 @@ index 0000000..94d50ae
+sink +sink
diff --git a/test/opt/loop/index b/test/opt/loop/index
new file mode 100644
-index 0000000..e582023
+index 00000000..e5820234
--- /dev/null
+++ b/test/opt/loop/index
@@ -0,0 +1 @@
+unroll.lua
diff --git a/test/opt/loop/unroll.lua b/test/opt/loop/unroll.lua
new file mode 100644
-index 0000000..6fbd565
+index 00000000..6fbd565a
--- /dev/null
+++ b/test/opt/loop/unroll.lua
@@ -0,0 +1,32 @@
@@ -186231,7 +186139,7 @@ index 0000000..6fbd565
+end
diff --git a/test/opt/sink/alloc.lua b/test/opt/sink/alloc.lua
new file mode 100644
-index 0000000..bb2a0f7
+index 00000000..bb2a0f72
--- /dev/null
+++ b/test/opt/sink/alloc.lua
@@ -0,0 +1,126 @@
@@ -186363,7 +186271,7 @@ index 0000000..bb2a0f7
+end
diff --git a/test/opt/sink/ffi.lua b/test/opt/sink/ffi.lua
new file mode 100644
-index 0000000..0bba097
+index 00000000..0bba0978
--- /dev/null
+++ b/test/opt/sink/ffi.lua
@@ -0,0 +1,121 @@
@@ -186490,7 +186398,7 @@ index 0000000..0bba097
+end
diff --git a/test/opt/sink/ffi_nosink.lua b/test/opt/sink/ffi_nosink.lua
new file mode 100644
-index 0000000..8f7cced
+index 00000000..8f7cced2
--- /dev/null
+++ b/test/opt/sink/ffi_nosink.lua
@@ -0,0 +1,45 @@
@@ -186541,7 +186449,7 @@ index 0000000..8f7cced
+end
diff --git a/test/opt/sink/index b/test/opt/sink/index
new file mode 100644
-index 0000000..8bfa370
+index 00000000..8bfa370e
--- /dev/null
+++ b/test/opt/sink/index
@@ -0,0 +1,4 @@
@@ -186551,7 +186459,7 @@ index 0000000..8bfa370
+ffi_nosink.lua +ffi
diff --git a/test/opt/sink/nosink.lua b/test/opt/sink/nosink.lua
new file mode 100644
-index 0000000..762aace
+index 00000000..762aaced
--- /dev/null
+++ b/test/opt/sink/nosink.lua
@@ -0,0 +1,109 @@
@@ -186666,7 +186574,7 @@ index 0000000..762aace
+end
diff --git a/test/src/cpptest.cpp b/test/src/cpptest.cpp
new file mode 100644
-index 0000000..a5893ed
+index 00000000..a5893ed6
--- /dev/null
+++ b/test/src/cpptest.cpp
@@ -0,0 +1,129 @@
@@ -186801,7 +186709,7 @@ index 0000000..a5893ed
+}
diff --git a/test/src/ctest.c b/test/src/ctest.c
new file mode 100644
-index 0000000..d257567
+index 00000000..d257567b
--- /dev/null
+++ b/test/src/ctest.c
@@ -0,0 +1,339 @@
@@ -187146,10 +187054,10 @@ index 0000000..d257567
+}
diff --git a/test/sysdep/catch_cpp.lua b/test/sysdep/catch_cpp.lua
new file mode 100644
-index 0000000..b225100
+index 00000000..f2cfca64
--- /dev/null
+++ b/test/sysdep/catch_cpp.lua
-@@ -0,0 +1,71 @@
+@@ -0,0 +1,70 @@
+
+local cp = require("cpptest")
+
@@ -187220,10 +187128,9 @@ index 0000000..b225100
+ local a,b,c,d,e,f = cp.usereg(100, 50, foo, false)
+ assert(a==164 and b==312 and c==428 and d==3696 and e==404 and f==404)
+end
-+
diff --git a/test/sysdep/ffi_include_gtk.lua b/test/sysdep/ffi_include_gtk.lua
new file mode 100644
-index 0000000..a4bfcea
+index 00000000..a4bfceac
--- /dev/null
+++ b/test/sysdep/ffi_include_gtk.lua
@@ -0,0 +1,9 @@
@@ -187238,10 +187145,10 @@ index 0000000..a4bfcea
+include"/usr/include/gtk-2.0/gtk/gtk.h"
diff --git a/test/sysdep/ffi_include_std.lua b/test/sysdep/ffi_include_std.lua
new file mode 100644
-index 0000000..b88c82b
+index 00000000..5ef1affa
--- /dev/null
+++ b/test/sysdep/ffi_include_std.lua
-@@ -0,0 +1,36 @@
+@@ -0,0 +1,35 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -187277,13 +187184,12 @@ index 0000000..b88c82b
+ os.remove("/tmp/__tmp.c")
+ ffi.cdef(s)
+end
-+
diff --git a/test/sysdep/ffi_lib_c.lua b/test/sysdep/ffi_lib_c.lua
new file mode 100644
-index 0000000..a368d75
+index 00000000..f0ff0ad9
--- /dev/null
+++ b/test/sysdep/ffi_lib_c.lua
-@@ -0,0 +1,87 @@
+@@ -0,0 +1,86 @@
+local ffi = require("ffi")
+
+ffi.cdef[[
@@ -187370,13 +187276,12 @@ index 0000000..a368d75
+ ffi.load("pthread")
+ end
+end
-+
diff --git a/test/sysdep/ffi_lib_z.lua b/test/sysdep/ffi_lib_z.lua
new file mode 100644
-index 0000000..69a19ae
+index 00000000..91b1272d
--- /dev/null
+++ b/test/sysdep/ffi_lib_z.lua
-@@ -0,0 +1,107 @@
+@@ -0,0 +1,106 @@
+local ffi = require("ffi")
+
+local compress, uncompress
@@ -187483,10 +187388,9 @@ index 0000000..69a19ae
+assert(2*#c < #txt)
+local txt2 = uncompress(c, #txt)
+assert(txt2 == txt)
-+
diff --git a/test/test.lua b/test/test.lua
new file mode 100644
-index 0000000..b064eff
+index 00000000..f5131ba2
--- /dev/null
+++ b/test/test.lua
@@ -0,0 +1,416 @@
@@ -187501,7 +187405,7 @@ index 0000000..b064eff
+
+local function default_tags()
+ local tags = {}
-+
++
+ -- Lua version and features
+ tags.lua = tonumber(_VERSION:match"%d+%.%d+")
+ if table.pack then
@@ -187535,7 +187439,7 @@ index 0000000..b064eff
+ tags[flag:lower()] = true
+ end
+ end
-+
++
+ -- Environment
+ if dirsep == "\\" then
+ tags.windows = true
@@ -187557,7 +187461,7 @@ index 0000000..b064eff
+ tags["abi".. (bytecode:byte(9, 9) * 8)] = true
+ end
+ end
-+
++
+ return tags
+end
+
@@ -187594,7 +187498,7 @@ index 0000000..b064eff
+ want_meta = want_meta,
+ }
+ local result = opts
-+
++
+ local i, tlen = 1, #t
+ local joinedval = ""
+ local function flagval()
@@ -187609,7 +187513,7 @@ index 0000000..b064eff
+ end
+ return val
+ end
-+
++
+ while i <= tlen do
+ local arg = t[i]
+ i = i + 1
@@ -187800,7 +187704,7 @@ index 0000000..b064eff
+ end
+ end
+ seal(_G)
-+
++
+ if getmetatable(package.loaded) == sealed_mt then
+ setmetatable(package.loaded, nil)
+ end
@@ -187908,7 +187812,7 @@ index 0000000..b064eff
+end
diff --git a/test/trace/exit_frame.lua b/test/trace/exit_frame.lua
new file mode 100644
-index 0000000..9537c56
+index 00000000..9537c563
--- /dev/null
+++ b/test/trace/exit_frame.lua
@@ -0,0 +1,79 @@
@@ -187993,7 +187897,7 @@ index 0000000..9537c56
+end
diff --git a/test/trace/exit_growstack.lua b/test/trace/exit_growstack.lua
new file mode 100644
-index 0000000..658a31a
+index 00000000..4aa6fae8
--- /dev/null
+++ b/test/trace/exit_growstack.lua
@@ -0,0 +1,28 @@
@@ -188002,7 +187906,7 @@ index 0000000..658a31a
+ local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a;
+ local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a;
+ local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a;
-+ if i==90 then return end
++ if i==90 then return end
+ end
+ for j=1,5 do
+ collectgarbage() -- Shrink stack.
@@ -188027,7 +187931,7 @@ index 0000000..658a31a
+end
diff --git a/test/trace/exit_jfuncf.lua b/test/trace/exit_jfuncf.lua
new file mode 100644
-index 0000000..67ad7c3
+index 00000000..67ad7c36
--- /dev/null
+++ b/test/trace/exit_jfuncf.lua
@@ -0,0 +1,30 @@
@@ -188063,7 +187967,7 @@ index 0000000..67ad7c3
+end
diff --git a/test/trace/gc64_slot_revival.lua b/test/trace/gc64_slot_revival.lua
new file mode 100644
-index 0000000..40b9d87
+index 00000000..40b9d871
--- /dev/null
+++ b/test/trace/gc64_slot_revival.lua
@@ -0,0 +1,18 @@
@@ -188087,7 +187991,7 @@ index 0000000..40b9d87
+end
diff --git a/test/trace/index b/test/trace/index
new file mode 100644
-index 0000000..ea7a22e
+index 00000000..ea7a22e0
--- /dev/null
+++ b/test/trace/index
@@ -0,0 +1,7 @@
@@ -188100,7 +188004,7 @@ index 0000000..ea7a22e
+stitch.lua
diff --git a/test/trace/phi/copyspill.lua b/test/trace/phi/copyspill.lua
new file mode 100644
-index 0000000..17a8698
+index 00000000..17a8698f
--- /dev/null
+++ b/test/trace/phi/copyspill.lua
@@ -0,0 +1,53 @@
@@ -188159,7 +188063,7 @@ index 0000000..17a8698
+end
diff --git a/test/trace/phi/index b/test/trace/phi/index
new file mode 100644
-index 0000000..74a0733
+index 00000000..74a07333
--- /dev/null
+++ b/test/trace/phi/index
@@ -0,0 +1,3 @@
@@ -188168,7 +188072,7 @@ index 0000000..74a0733
+rotate.lua
diff --git a/test/trace/phi/ref.lua b/test/trace/phi/ref.lua
new file mode 100644
-index 0000000..3662912
+index 00000000..3662912d
--- /dev/null
+++ b/test/trace/phi/ref.lua
@@ -0,0 +1,131 @@
@@ -188305,7 +188209,7 @@ index 0000000..3662912
+end
diff --git a/test/trace/phi/rotate.lua b/test/trace/phi/rotate.lua
new file mode 100644
-index 0000000..cb751e0
+index 00000000..cb751e0b
--- /dev/null
+++ b/test/trace/phi/rotate.lua
@@ -0,0 +1,149 @@
@@ -188460,7 +188364,7 @@ index 0000000..cb751e0
+end
diff --git a/test/trace/snap.lua b/test/trace/snap.lua
new file mode 100644
-index 0000000..ba26326
+index 00000000..ba26326e
--- /dev/null
+++ b/test/trace/snap.lua
@@ -0,0 +1,47 @@
@@ -188513,7 +188417,7 @@ index 0000000..ba26326
+end
diff --git a/test/trace/stitch.lua b/test/trace/stitch.lua
new file mode 100644
-index 0000000..3f7f973
+index 00000000..3f7f9734
--- /dev/null
+++ b/test/trace/stitch.lua
@@ -0,0 +1,19 @@
@@ -188538,7 +188442,7 @@ index 0000000..3f7f973
+end
diff --git a/test/unportable/ffi_arith_int64.lua b/test/unportable/ffi_arith_int64.lua
new file mode 100644
-index 0000000..c05e02a
+index 00000000..c05e02a9
--- /dev/null
+++ b/test/unportable/ffi_arith_int64.lua
@@ -0,0 +1,68 @@
@@ -188612,10 +188516,10 @@ index 0000000..c05e02a
+{471871,702627,720692,1385612,1803393,1171039,1772007,763817,1583994,4486762,2380423,566647,1265370,2319256,770581,1990479,4566660,2319835,566647,1265370,2319256,770581,1990479,4566660,2319835,830322,4833809,4644705,1071753,2822313,7709069,4647021,})
diff --git a/test/unportable/math_special.lua b/test/unportable/math_special.lua
new file mode 100644
-index 0000000..4916101
+index 00000000..ef4de129
--- /dev/null
+++ b/test/unportable/math_special.lua
-@@ -0,0 +1,55 @@
+@@ -0,0 +1,54 @@
+
+local inp = { 0, -"0", 0.5, -0.5, 1, -1, 1/0, -1/0, 0/0 }
+
@@ -188670,7 +188574,443 @@ index 0000000..4916101
+
+-- Pointless: deg, rad, min, max, pow
+-- LATER: %, fmod, frexp, ldexp, modf, sinh, cosh, tanh
+--
+2.31.1
+
+
+From 6508eeb5c841344e3f128267a04e8150dd36f926 Mon Sep 17 00:00:00 2001
+From: Sameera Deshpande <sameera.deshpande(a)linaro.org>
+Date: Fri, 15 Feb 2019 07:46:16 +0530
+Subject: [PATCH 02/10] Add support for FNMADD and FNMSUB.
+
+---
+ src/lj_asm_arm64.h | 32 +++++++++++++++++++++++++++++++-
+ 1 file changed, 31 insertions(+), 1 deletion(-)
+
+diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
+index 67c53ee2..0e913fa5 100644
+--- a/src/lj_asm_arm64.h
++++ b/src/lj_asm_arm64.h
+@@ -353,6 +353,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins
air)
+ return 0;
+ }
+
++/* Fuse FP neg-multiply-add/sub. */
++static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
++{
++ IRRef ref = ir->op1;
++ IRIns *irn = IR(ref);
++ if (irn->o != IR_ADD && irn->o != IR_SUB)
++ return 0;
++
++ if (!mayfuse(as, ref))
++ return 0;
++
++ IRRef lref = irn->op1, rref = irn->op2;
++ IRIns *irm;
++ if (lref != rref &&
++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
++ ra_noreg(irm->r)) ||
++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
++ (rref = lref, ra_noreg(irm->r))))) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
++ Reg left = ra_alloc2(as, irm,
++ rset_exclude(rset_exclude(RSET_FPR, dest), add));
++ Reg right = (left >> 8); left &= 255;
++ emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31),
(right & 31), (add & 31));
++ return 1;
++ }
++ return 0;
++}
+
+ /* Fuse BAND + BSHL/BSHR into UBFM. */
+ static int asm_fuseandshift(ASMState *as, IRIns *ir)
+ {
+@@ -1466,7 +1495,8 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ static void asm_neg(ASMState *as, IRIns *ir)
+ {
+ if (irt_isnum(ir->t)) {
+- asm_fpunary(as, ir, A64I_FNEGd);
++ if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
++ asm_fpunary(as, ir, A64I_FNEGd);
+ return;
+ }
+ asm_intneg(as, ir);
+--
+2.31.1
+
+
+From aa0b2a0c837af307d26468fce05a7c24ab6045d3 Mon Sep 17 00:00:00 2001
+From: Vivien HENRIET <bubuabu(a)bubuabu.org>
+Date: Wed, 30 Jan 2019 23:44:51 +0100
+Subject: [PATCH 03/10] Fix os.date() for timezone change awareness
+
+On POSIX target, system timezone change are not taken into account.
+To reproduce,
+1. call os.date()
+2. change your timezone
+3. call os.date() within the same luajit instance
+
+On POSIX target, os.date use localtime_r to retrieve time.
+On other target, the function localtime is used. But there is a behaviour
+diference between these two function. localtime acts as if it called tzset
+which localtime_r don't.
+
+To fix the issue tzset is called before localtime_r.
+---
+ src/lib_os.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/lib_os.c b/src/lib_os.c
+index f19b831c..609cb2ec 100644
+--- a/src/lib_os.c
++++ b/src/lib_os.c
+@@ -185,6 +185,7 @@ LJLIB_CF(os_date)
+ #endif
+ } else {
+ #if LJ_TARGET_POSIX
++ tzset();
+ stm = localtime_r(&t, &rtm);
+ #else
+ stm = localtime(&t);
+--
+2.31.1
+
+
+From a62dc6306c4e5a4b672040067c169143da804a4f Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Thu, 14 Mar 2019 23:08:24 +0530
+Subject: [PATCH 04/10] Revert "FFI: Make FP to U64 conversions match JIT
+ backend behavior."
+
+This reverts commit f5d424afe8b9395f0df05aba905e0e1f6a2262b8.
+
+The patch breaks test 279, i.e.
+
+ assert(tostring(bit.band(1ll, 1, 1ull, -1)) == "1ULL")
+
+The patch was put in to make the JIT and interpreter behaviour
+consistent[1] for float to unsigned int conversions but it ended up
+making things worse. There needs to be a better fix for this.
+
+[1]
https://github.com/LuaJIT/LuaJIT/pull/415
+---
+ src/lj_obj.h | 18 +++++-------------
+ 1 file changed, 5 insertions(+), 13 deletions(-)
+
+diff --git a/src/lj_obj.h b/src/lj_obj.h
+index 1a6445fc..97885683 100644
+--- a/src/lj_obj.h
++++ b/src/lj_obj.h
+@@ -995,22 +995,14 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
+
+ #define lj_num2int(n) ((int32_t)(n))
+
+-/*
+-** This must match the JIT backend behavior. In particular for archs
+-** that don't have a common hardware instruction for this conversion.
+-** Note that signed FP to unsigned int conversions have an undefined
+-** result and should never be relied upon in portable FFI code.
+-** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
+-*/
+ static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
+ {
+-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
+- int64_t i = (int64_t)n;
+- if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
+- return (uint64_t)i;
+-#else
+- return (uint64_t)n;
++#ifdef _MSC_VER
++ if (n >= 9223372036854775808.0) /* They think it's a feature. */
++ return (uint64_t)(int64_t)(n - 18446744073709551616.0);
++ else
+ #endif
++ return (uint64_t)n;
+ }
+
+ static LJ_AINLINE int32_t numberVint(cTValue *o)
+--
+2.31.1
+
+
+From fe2399a76bab67b32409fda1de82c34e8d5d7904 Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Sun, 17 Mar 2019 11:34:04 +0530
+Subject: [PATCH 05/10] Guard against undefined behaviour when casting from
+ float to unsigned
+
+Only range (-1.0, UINT64_MAX) can be safely converted to unsigned
+directly, and (-INT64_MAX,INT_64_MAX) through a cast to int64_t first.
+The remaining range is undefined.
+
+TODO: Do the same for JIT as well as for float to other ranges.
+---
+ src/lj_obj.h | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/src/lj_obj.h b/src/lj_obj.h
+index 97885683..9878059f 100644
+--- a/src/lj_obj.h
++++ b/src/lj_obj.h
+@@ -997,12 +997,18 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
+
+ static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
+ {
++ /* Undefined behaviour. This is deliberately not a full check because we
++ don't want to slow down compliant code. */
++ lj_assertX(n >= -9223372036854775809.0, "Overflow");
+ #ifdef _MSC_VER
+ if (n >= 9223372036854775808.0) /* They think it's a feature. */
+ return (uint64_t)(int64_t)(n - 18446744073709551616.0);
+ else
+ #endif
+- return (uint64_t)n;
++ if (n > -1.0)
++ return (uint64_t)n;
++ else
++ return (uint64_t)(int64_t)n;
+ }
+
+ static LJ_AINLINE int32_t numberVint(cTValue *o)
+--
+2.31.1
+
+
+From c193115e16a138dac69f774a7f57a5b4cc7f1097 Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Mon, 25 Mar 2019 17:56:53 +0530
+Subject: [PATCH 06/10] Fix build erro with fnmsub fusing
+
+---
+ src/lj_asm_arm64.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
+index 0e913fa5..4c7bf401 100644
+--- a/src/lj_asm_arm64.h
++++ b/src/lj_asm_arm64.h
+@@ -1495,7 +1495,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ static void asm_neg(ASMState *as, IRIns *ir)
+ {
+ if (irt_isnum(ir->t)) {
+- if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
++ if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd))
+ asm_fpunary(as, ir, A64I_FNEGd);
+ return;
+ }
+--
+2.31.1
+
+
+From b5c2492406bf07ab80e134f351c1066e8f6224f2 Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Thu, 28 Mar 2019 09:19:34 +0530
+Subject: [PATCH 07/10] aarch64: better float to unsigned int conversion
+
+A straight float to unsigned conversion has a limited range of (-1.0,
+UTYPE_MAX) which should be fine in general but for the sake of
+consistency across the interpreter and the JIT compiler, it is
+necessary to work a wee bit harder to expand this range to (TYPE_MIN,
+UTYPE_MAX), which can be done with a simple range check. This adds a
+couple of branches but only one of the branches should have a
+noticeable performance impact on most processors with branch
+predictors, and that too only if the input number varies wildly in
+range.
+
+This currently works only for 64-bit conversions, 32-bit is still WIP.
+---
+ src/lj_asm_arm64.h | 30 ++++++++++++++++++++++--------
+ src/lj_target_arm64.h | 1 +
+ 2 files changed, 23 insertions(+), 8 deletions(-)
+
+diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
+index 4c7bf401..e7e744a8 100644
+--- a/src/lj_asm_arm64.h
++++ b/src/lj_asm_arm64.h
+@@ -626,14 +626,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+- A64Ins ai = irt_is64(ir->t) ?
+- (st == IRT_NUM ?
+- (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
+- (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
+- (st == IRT_NUM ?
+- (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
+- (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
+- emit_dn(as, ai, dest, (left & 31));
++
++ A64Ins ai_signed = st == IRT_NUM ?
++ (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) :
++ (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32);
++
++ if (irt_isi64(ir->t) || irt_isint(ir->t))
++ emit_dn(as, ai_signed, dest, (left & 31));
++ else {
++ A64Ins ai_unsigned = st == IRT_NUM ?
++ (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) :
++ (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32);
++
++ MCLabel l_done = emit_label(as);
++ emit_dn(as, ai_unsigned, dest, (left & 31));
++ MCLabel l_signed = emit_label(as);
++ emit_jmp(as, l_done);
++ emit_dn(as, ai_signed, dest, (left & 31));
++ /* The valid range for float to unsigned int conversion is (-1.0,
++ UINT{,64}_MAX-1), but we just compare with 0 to save a load. */
++ emit_cond_branch(as, CC_PL, l_signed);
++ emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0);
++ }
+ }
+ } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer.
*/
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
+index 6d39ffb8..370699d9 100644
+--- a/src/lj_target_arm64.h
++++ b/src/lj_target_arm64.h
+@@ -283,6 +283,7 @@ typedef enum A64Ins {
+ A64I_STPs = 0x2d000000,
+ A64I_STPd = 0x6d000000,
+ A64I_FCMPd = 0x1e602000,
++ A64I_FCMPZs = 0x1e202008,
+ A64I_FCMPZd = 0x1e602008,
+ A64I_FCSELd = 0x1e600c00,
+ A64I_FRINTMd = 0x1e654000,
+--
+2.31.1
+
+
+From bd79b1d4596ed6780470c8d02f77b8398d80cd3a Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Thu, 28 Mar 2019 10:50:23 +0530
+Subject: [PATCH 08/10] Better behaviour for float to uint32_t conversions
+
+This is the uint32_t part of the float to unsigned int conversions for
+the interpreter. The cast ends up working correctly for x86 but not
+for aarch64 since fcvtzu sets the result to zero on negative inputs.
+Work slightly harder to make sure that negative number inputs behave
+like x86.
+
+This fixes the interpreter but not the JIT compiler, which errors out
+during the narrowing pass.
+---
+ src/lj_cconv.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/src/lj_cconv.c b/src/lj_cconv.c
+index 613f66e2..7e8a8b92 100644
+--- a/src/lj_cconv.c
++++ b/src/lj_cconv.c
+@@ -203,7 +203,13 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
+ else if (dsize == 2) *(int16_t *)dp = (int16_t)i;
+ else *(int8_t *)dp = (int8_t)i;
+ } else if (dsize == 4) {
+- *(uint32_t *)dp = (uint32_t)n;
++ /* Undefined behaviour. This is deliberately not a full check because we
++ * don't want to slow down compliant code. */
++ lj_assertX(n >= -2147483649.0, "Overflow");
++ if (n > -1.0)
++ *(uint32_t *)dp = (uint32_t)n;
++ else
++ *(uint32_t *)dp = (uint32_t)(int32_t)n;
+ } else if (dsize == 8) {
+ if (!(dinfo & CTF_UNSIGNED))
+ *(int64_t *)dp = (int64_t)n;
+--
+2.31.1
+
+
+From a1636c6e1879b5eeb55a51ebba796501c93614dd Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Mon, 17 Jun 2019 13:50:57 +0530
+Subject: [PATCH 09/10] test: Check for package.searchers only in compat5.2
+
+LuaJIT version check for lua will return true for +lua<5.2 since it
+does not fully implement 5.2. Move the (not package.searchers) check
+to +compat5.2 instead of the version check since it is implemented by
+compat5.2.
+---
+ test/lib/contents.lua | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/test/lib/contents.lua b/test/lib/contents.lua
+index 2baacd5c..09866f6f 100644
+--- a/test/lib/contents.lua
++++ b/test/lib/contents.lua
+@@ -121,10 +121,13 @@ end
+
+ do --- pre-5.2 package +lua<5.2
+ assert(package.loaders)
+- assert(not package.searchers)
+ assert(package.seeall)
+ end
+
++do --- 5.2 compat package +compat5.2
++ assert(package.searchers)
++end
++
+ do --- 5.2 package +lua>=5.2
+ assert(not package.loaders)
+ assert(package.searchers)
+--
+2.31.1
+
+
+From ec04137a0873c09eef216b32f3df3b66209f47d5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej(a)sury.org>
+Date: Thu, 19 Nov 2015 16:29:02 +0200
+Subject: [PATCH 10/10] Get rid of LUAJIT_VERSION_SYM that changes ABI on every
+ patch release
+
+---
+ src/lj_dispatch.c | 5 -----
+ src/luajit.c | 2 --
+ src/luajit.h | 3 ---
+ 3 files changed, 10 deletions(-)
+
+diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
+index 7b73d3dd..3f69d0d1 100644
+--- a/src/lj_dispatch.c
++++ b/src/lj_dispatch.c
+@@ -318,11 +318,6 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
+ return 1; /* OK. */
+ }
+
+-/* Enforce (dynamic) linker error for version mismatches. See luajit.c. */
+-LUA_API void LUAJIT_VERSION_SYM(void)
+-{
+-}
+-
+ /* -- Hooks --------------------------------------------------------------- */
+
+ /* This function can be called asynchronously (e.g. during a signal). */
+diff --git a/src/luajit.c b/src/luajit.c
+index 6aed5337..a4a11cb1 100644
+--- a/src/luajit.c
++++ b/src/luajit.c
+@@ -518,8 +518,6 @@ static int pmain(lua_State *L)
+ globalL = L;
+ if (argv[0] && argv[0][0]) progname = argv[0];
+
+- LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
+-
+ argn = collectargs(argv, &flags);
+ if (argn < 0) { /* Invalid args? */
+ print_usage();
+diff --git a/src/luajit.h b/src/luajit.h
+index 2ee1f908..04f6b456 100644
+--- a/src/luajit.h
++++ b/src/luajit.h
+@@ -73,7 +73,4 @@ LUA_API void luaJIT_profile_stop(lua_State *L);
+ LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
+ int depth, size_t *len);
+
+-/* Enforce (dynamic) linker error for version mismatches. Call from main. */
+-LUA_API void LUAJIT_VERSION_SYM(void);
+-
+ #endif
--
-2.20.1
+2.31.1
diff --git a/luajit-2.1-update.patch b/luajit-2.1-update.patch
new file mode 100644
index 0000000..93fa207
--- /dev/null
+++ b/luajit-2.1-update.patch
@@ -0,0 +1,37572 @@
+diff --git a/COPYRIGHT b/COPYRIGHT
+index 6ed40025..9c2bca55 100644
+--- a/COPYRIGHT
++++ b/COPYRIGHT
+@@ -1,7 +1,7 @@
+ ===============================================================================
+-LuaJIT -- a Just-In-Time Compiler for Lua.
http://luajit.org/
++LuaJIT -- a Just-In-Time Compiler for Lua.
https://luajit.org/
+
+-Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+@@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+
+-[ MIT license:
http://www.opensource.org/licenses/mit-license.php ]
++[ MIT license:
https://www.opensource.org/licenses/mit-license.php ]
+
+ ===============================================================================
+ [ LuaJIT includes code from Lua 5.1/5.2, which has this license statement: ]
+@@ -51,6 +51,6 @@ THE SOFTWARE.
+
+ This is a version (aka dlmalloc) of malloc/free/realloc written by
+ Doug Lea and released to the public domain, as explained at
+-http://creativecommons.org/licenses/publicdomain
++https://creativecommons.org/licenses/publicdomain
+
+ ===============================================================================
+diff --git a/Makefile b/Makefile
+index 0f933089..aa1b84bd 100644
+--- a/Makefile
++++ b/Makefile
+@@ -10,7 +10,7 @@
+ # For MSVC, please follow the instructions given in src/msvcbuild.bat.
+ # For MinGW and Cygwin, cd to src and run make with the Makefile there.
+ #
+-# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++# Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ ##############################################################################
+
+ MAJVER= 2
+@@ -75,7 +75,7 @@ SYMLINK= ln -sf
+ INSTALL_X= install -m 0755
+ INSTALL_F= install -m 0644
+ UNINSTALL= $(RM)
+-LDCONFIG= ldconfig -n
++LDCONFIG= ldconfig -n 2>/dev/null
+ SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \
+ -e "s|^multilib=.*|multilib=$(MULTILIB)|"
+
+@@ -121,7 +121,7 @@ install: $(INSTALL_DEP)
+ $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
+ cd src && test -f $(FILE_SO) && \
+ $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
+- $(LDCONFIG) $(INSTALL_LIB) && \
++ ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \
+ $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \
+ $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || :
+ cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN)
+diff --git a/README b/README
+index 2b9ae9d2..c9f7d9ad 100644
+--- a/README
++++ b/README
+@@ -3,9 +3,9 @@ README for LuaJIT 2.1.0-beta3
+
+ LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
+
+-Project Homepage:
http://luajit.org/
++Project Homepage:
https://luajit.org/
+
+-LuaJIT is Copyright (C) 2005-2017 Mike Pall.
++LuaJIT is Copyright (C) 2005-2021 Mike Pall.
+ LuaJIT is free software, released under the MIT license.
+ See full Copyright Notice in the COPYRIGHT file or in luajit.h.
+
+diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css
+index 62e1c165..0b385cee 100644
+--- a/doc/bluequad-print.css
++++ b/doc/bluequad-print.css
+@@ -1,4 +1,4 @@
+-/* Copyright (C) 2004-2017 Mike Pall.
++/* Copyright (C) 2004-2021 Mike Pall.
+ *
+ * You are welcome to use the general ideas of this design for your own sites.
+ * But please do not steal the stylesheet, the layout or the color scheme.
+diff --git a/doc/bluequad.css b/doc/bluequad.css
+index be2c4bf2..86cd9ac0 100644
+--- a/doc/bluequad.css
++++ b/doc/bluequad.css
+@@ -1,4 +1,4 @@
+-/* Copyright (C) 2004-2017 Mike Pall.
++/* Copyright (C) 2004-2021 Mike Pall.
+ *
+ * You are welcome to use the general ideas of this design for your own sites.
+ * But please do not steal the stylesheet, the layout or the color scheme.
+diff --git a/doc/changes.html b/doc/changes.html
+deleted file mode 100644
+index a66a8d95..00000000
+--- a/doc/changes.html
++++ /dev/null
+@@ -1,883 +0,0 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
+-<html>
+-<head>
+-<title>LuaJIT Change History</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
+-<meta name="Language" content="en">
+-<link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+-<link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+-<style type="text/css">
+-div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
+-</style>
+-</head>
+-<body>
+-<div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+-</div>
+-<div id="head">
+-<h1>LuaJIT Change History</h1>
+-</div>
+-<div id="nav">
+-<ul><li>
+-<a href="luajit.html">LuaJIT</a>
+-<ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
+-</li><li>
+-<a href="install.html">Installation</a>
+-</li><li>
+-<a href="running.html">Running</a>
+-</li></ul>
+-</li><li>
+-<a href="extensions.html">Extensions</a>
+-<ul><li>
+-<a href="ext_ffi.html">FFI Library</a>
+-<ul><li>
+-<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+-</li><li>
+-<a href="ext_ffi_api.html">ffi.* API</a>
+-</li><li>
+-<a href="ext_ffi_semantics.html">FFI Semantics</a>
+-</li></ul>
+-</li><li>
+-<a href="ext_jit.html">jit.* Library</a>
+-</li><li>
+-<a href="ext_c_api.html">Lua/C API</a>
+-</li><li>
+-<a href="ext_profiler.html">Profiler</a>
+-</li></ul>
+-</li><li>
+-<a href="status.html">Status</a>
+-<ul><li>
+-<a class="current" href="changes.html">Changes</a>
+-</li></ul>
+-</li><li>
+-<a href="faq.html">FAQ</a>
+-</li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+-<a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+-</li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+-</li></ul>
+-</div>
+-<div id="main">
+-<p>
+-This is a list of changes between the released versions of LuaJIT.<br>
+-The current <span style="color: #0000c0;">stable version</span> is
<strong>LuaJIT 2.0.5</strong>.<br>
+-</p>
+-<p>
+-Please check the
+-<a
href="http://luajit.org/changes.html"><span
class="ext">»</span> Online Change
History</a>
+-to see whether newer versions are available.
+-</p>
+-
+-<div class="major" style="background: #d0d0ff;">
+-<h2 id="LuaJIT-2.1.0-beta3">LuaJIT 2.1.0-beta3 —
2017-05-01</h2>
+-<ul>
+-<li>Rewrite memory block allocator.</li>
+-<li>Add various extension from Lua 5.2/5.3.</li>
+-<li>Remove old Lua 5.0 compatibility defines.</li>
+-<li>Set arg table before evaluating <tt>LUA_INIT</tt> and
<tt>-e</tt> chunks.</li>
+-<li>Fix FOLD rules for <tt>math.abs()</tt> and FP
negation.</li>
+-<li>Fix soft-float <tt>math.abs()</tt> and negation.</li>
+-<li>Fix formatting of some small denormals at low precision.</li>
+-<li>LJ_GC64: Add JIT compiler support.</li>
+-<li>x64/LJ_GC64: Add JIT compiler backend.</li>
+-<li>x86/x64: Generate BMI2 shifts and rotates, if available.</li>
+-<li>Windows/x86: Add full exception interoperability.</li>
+-<li>ARM64: Add big-endian support.</li>
+-<li>ARM64: Add JIT compiler backend.</li>
+-<li>MIPS: Fix <tt>TSETR</tt> barrier.</li>
+-<li>MIPS: Support MIPS16 interlinking.</li>
+-<li>MIPS soft-float: Fix code generation for
<tt>HREF</tt>.</li>
+-<li>MIPS64: Add MIPS64 hard-float JIT compiler backend.</li>
+-<li>MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.</li>
+-<li>FFI: Compile bitfield loads/stores.</li>
+-<li>Various fixes common with the 2.0 branch.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 —
2016-03-03</h2>
+-<ul>
+-<li>Enable trace stitching.</li>
+-<li>Use internal implementation for converting FP numbers to strings.</li>
+-<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string
literals.</li>
+-<li>Add MIPS soft-float support.</li>
+-<li>Switch MIPS port to dual-number mode.</li>
+-<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li>
+-<li>FFI: Add <tt>ssize_t</tt> declaration.</li>
+-<li>FFI: Parse <tt>#line NN</tt> and
<tt>#NN</tt>.</li>
+-<li>Various minor fixes.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 —
2015-08-25</h2>
+-<p>
+-This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0.
+-Please take a look at the commit history for more details.
+-</p>
+-<ul>
+-<li>Changes to the VM core:
+-<ul>
+-<li>Add low-overhead profiler (<tt>-jp</tt>).</li>
+-<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47
bit). Interpreter-only for now.</li>
+-<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by
<tt>LJ_GC64</tt> mode.</li>
+-<li>Add <tt>table.new()</tt> and
<tt>table.clear()</tt>.</li>
+-<li>Parse binary number literals (<tt>0bxxx</tt>).</li>
+-</ul></li>
+-<li>Improvements to the JIT compiler:
+-<ul>
+-<li>Add trace stitching (disabled for now).</li>
+-<li>Compile various builtins: <tt>string.char()</tt>,
<tt>string.reverse()</tt>, <tt>string.lower()</tt>,
<tt>string.upper()</tt>, <tt>string.rep()</tt>,
<tt>string.format()</tt>, <tt>table.concat()</tt>,
<tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>,
<tt>debug.getmetatable()</tt>.</li>
+-<li>Compile <tt>string.find()</tt> for fixed string searches (no
patterns).</li>
+-<li>Compile <tt>BC_TSETM</tt>, e.g.
<tt>{1,2,3,f()}</tt>.</li>
+-<li>Compile string concatenations (<tt>BC_CAT</tt>).</li>
+-<li>Compile <tt>__concat</tt> metamethod.</li>
+-<li>Various minor optimizations.</li>
+-</ul></li>
+-<li>Internal Changes:
+-<ul>
+-<li>Add support for embedding LuaJIT bytecode for builtins.</li>
+-<li>Replace various builtins with embedded bytecode.</li>
+-<li>Refactor string buffers and string formatting.</li>
+-<li>Remove obsolete non-truncating number to integer conversions.</li>
+-</ul></li>
+-<li>Ports:
+-<ul>
+-<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li>
+-<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt>
mode).</li>
+-<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt>
mode.</li>
+-<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li>
+-<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li>
+-<li>PPC/e500: Drop support for this architecture.</li>
+-</ul></li>
+-<li>FFI library:
+-<ul>
+-<li>FFI: Add 64 bit bitwise operations.</li>
+-<li>FFI: Compile VLA/VLS and large cdata allocations with default
initialization.</li>
+-<li>FFI: Compile conversions from functions to function pointers.</li>
+-<li>FFI: Compile lightuserdata to <tt>void *</tt>
conversion.</li>
+-<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li>
+-<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li>
+-</ul></li>
+-</ul>
+-</div>
+-
+-<div class="major" style="background: #ffffd0;">
+-<h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 — 2017-05-01</h2>
+-<ul>
+-<li>Add workaround for MSVC 2015 stdio changes.</li>
+-<li>Limit mcode alloc probing, depending on the available pool size.</li>
+-<li>Fix overly restrictive range calculation in mcode allocation.</li>
+-<li>Fix out-of-scope goto handling in parser.</li>
+-<li>Remove internal <tt>__mode = "K"</tt> and replace with
safe check.</li>
+-<li>Add "proto" field to
<tt>jit.util.funcinfo()</tt>.</li>
+-<li>Fix GC step size calculation.</li>
+-<li>Initialize <tt>uv->immutable</tt> for upvalues of loaded
chunks.</li>
+-<li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li>
+-<li>Drop leftover regs in 'for' iterator assignment, too.</li>
+-<li>Fix PHI remarking in SINK pass.</li>
+-<li>Don't try to record outermost <tt>pcall()</tt> return to lower
frame.</li>
+-<li>Add guard for obscure aliasing between open upvalues and SSA
slots.</li>
+-<li>Remove assumption that <tt>lj_math_random_step()</tt> doesn't
clobber FPRs.</li>
+-<li>Fix handling of non-numeric strings in arithmetic coercions.</li>
+-<li>Fix recording of <tt>select(n, ...)</tt> with off-trace
varargs</li>
+-<li>Fix install for cross-builds.</li>
+-<li>Don't allocate unused 2nd result register in JIT compiler
backend.</li>
+-<li>Drop marks from replayed instructions when sinking.</li>
+-<li>Fix unsinking check.</li>
+-<li>Properly handle OOM in <tt>trace_save()</tt>.</li>
+-<li>Limit number of arguments given to <tt>io.lines()</tt> and
<tt>fp:lines()</tt>.</li>
+-<li>Fix narrowing of <tt>TOBIT</tt>.</li>
+-<li>OSX: Fix build with recent XCode.</li>
+-<li>x86/x64: Don't spill an explicit <tt>REF_BASE</tt> in the
IR.</li>
+-<li>x86/x64: Fix instruction length decoder.</li>
+-<li>x86/x64: Search for exit jumps with instruction length decoder.</li>
+-<li>ARM: Fix <tt>BLX</tt> encoding for Thumb interworking
calls.</li>
+-<li>MIPS: Don't use <tt>RID_GP</tt> as a scratch
register.</li>
+-<li>MIPS: Fix emitted code for U32 to float conversion.</li>
+-<li>MIPS: Backport workaround for compact unwind tables.</li>
+-<li>MIPS: Fix cross-endian jit.bcsave.</li>
+-<li>MIPS: Fix <tt>BC_ISNEXT</tt> fallback path.</li>
+-<li>MIPS: Fix use of ffgccheck delay slots in interpreter.</li>
+-<li>FFI: Fix FOLD rules for <tt>int64_t</tt> comparisons.</li>
+-<li>FFI: Fix SPLIT pass for <tt>CONV i64.u64</tt>.</li>
+-<li>FFI: Fix <tt>ipairs()</tt> recording.</li>
+-<li>FFI: Don't propagate qualifiers into subtypes of complex.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 — 2015-05-14</h2>
+-<ul>
+-<li>Fix stack check in narrowing optimization.</li>
+-<li>Fix Lua/C API typecheck error for special indexes.</li>
+-<li>Fix string to number conversion.</li>
+-<li>Fix lexer error for chunks without tokens.</li>
+-<li>Don't compile <tt>IR_RETF</tt> after
<tt>CALLT</tt> to ff with-side effects.</li>
+-<li>Fix <tt>BC_UCLO</tt>/<tt>BC_JMP</tt> join optimization
in Lua parser.</li>
+-<li>Fix corner case in string to number conversion.</li>
+-<li>Gracefully handle <tt>lua_error()</tt> for a suspended
coroutine.</li>
+-<li>Avoid error messages when building with Clang.</li>
+-<li>Fix snapshot #0 handling for traces with a stack check on entry.</li>
+-<li>Fix fused constant loads under high register pressure.</li>
+-<li>Invalidate backpropagation cache after DCE.</li>
+-<li>Fix ABC elimination.</li>
+-<li>Fix debug info for main chunk of stripped bytecode.</li>
+-<li>Fix FOLD rule for <tt>string.sub(s, ...) == k</tt>.</li>
+-<li>Fix FOLD rule for <tt>STRREF</tt> of
<tt>SNEW</tt>.</li>
+-<li>Fix frame traversal while searching for error function.</li>
+-<li>Prevent GC estimate miscalculation due to buffer growth.</li>
+-<li>Prevent adding side traces for stack checks.</li>
+-<li>Fix top slot calculation for snapshots with continuations.</li>
+-<li>Fix check for reuse of SCEV results in <tt>FORL</tt>.</li>
+-<li>Add PS Vita port.</li>
+-<li>Fix compatibility issues with Illumos.</li>
+-<li>Fix DragonFly build (unsupported).</li>
+-<li>OpenBSD/x86: Better executable memory allocation for W^X mode.</li>
+-<li>x86: Fix argument checks for <tt>ipairs()</tt>
iterator.</li>
+-<li>x86: <tt>lj_math_random_step()</tt> clobbers XMM regs on OSX
Clang.</li>
+-<li>x86: Fix code generation for unused result of
<tt>math.random()</tt>.</li>
+-<li>x64: Allow building with <tt>LUAJIT_USE_SYSMALLOC</tt> and
<tt>LUAJIT_USE_VALGRIND</tt>.</li>
+-<li>x86/x64: Fix argument check for bit shifts.</li>
+-<li>x86/x64: Fix code generation for fused test/arith ops.</li>
+-<li>ARM: Fix write barrier check in <tt>BC_USETS</tt>.</li>
+-<li>PPC: Fix red zone overflow in machine code generation.</li>
+-<li>PPC: Don't use <tt>mcrxr</tt> on PPE.</li>
+-<li>Various archs: Fix excess stack growth in interpreter.</li>
+-<li>FFI: Fix FOLD rule for <tt>TOBIT</tt> + <tt>CONV
num.u32</tt>.</li>
+-<li>FFI: Prevent DSE across <tt>ffi.string()</tt>.</li>
+-<li>FFI: No meta fallback when indexing pointer to incomplete struct.</li>
+-<li>FFI: Fix initialization of unions of subtypes.</li>
+-<li>FFI: Fix cdata vs. non-cdata arithmetic and comparisons.</li>
+-<li>FFI: Fix <tt>__index</tt>/<tt>__newindex</tt>
metamethod resolution for ctypes.</li>
+-<li>FFI: Fix compilation of reference field access.</li>
+-<li>FFI: Fix frame traversal for backtraces with FFI callbacks.</li>
+-<li>FFI: Fix recording of indexing a struct pointer ctype object
itself.</li>
+-<li>FFI: Allow non-scalar cdata to be compared for equality by
address.</li>
+-<li>FFI: Fix pseudo type conversions for type punning.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.3">LuaJIT 2.0.3 — 2014-03-12</h2>
+-<ul>
+-<li>Add PS4 port.</li>
+-<li>Add support for multilib distro builds.</li>
+-<li>Fix OSX build.</li>
+-<li>Fix MinGW build.</li>
+-<li>Fix Xbox 360 build.</li>
+-<li>Improve ULOAD forwarding for open upvalues.</li>
+-<li>Fix GC steps threshold handling when called by JIT-compiled code.</li>
+-<li>Fix argument checks for <tt>math.deg()</tt> and
<tt>math.rad()</tt>.</li>
+-<li>Fix <tt>jit.flush(func|true)</tt>.</li>
+-<li>Respect <tt>jit.off(func)</tt> when returning to a function,
too.</li>
+-<li>Fix compilation of <tt>string.byte(s, nil, n)</tt>.</li>
+-<li>Fix line number for relocated bytecode after closure fixup</li>
+-<li>Fix frame traversal for backtraces.</li>
+-<li>Fix ABC elimination.</li>
+-<li>Fix handling of redundant PHIs.</li>
+-<li>Fix snapshot restore for exit to function header.</li>
+-<li>Fix type punning alias analysis for constified pointers</li>
+-<li>Fix call unroll checks in the presence of metamethod frames.</li>
+-<li>Fix initial maxslot for down-recursive traces.</li>
+-<li>Prevent BASE register coalescing if parent uses
<tt>IR_RETF</tt>.</li>
+-<li>Don't purge modified function from stack slots in
<tt>BC_RET</tt>.</li>
+-<li>Fix recording of <tt>BC_VARG</tt>.</li>
+-<li>Don't access dangling reference to reallocated IR.</li>
+-<li>Fix frame depth display for bytecode dump in
<tt>-jdump</tt>.</li>
+-<li>ARM: Fix register allocation when rematerializing FPRs.</li>
+-<li>x64: Fix store to upvalue for lightuserdata values.</li>
+-<li>FFI: Add missing GC steps for callback argument conversions.</li>
+-<li>FFI: Properly unload loaded DLLs.</li>
+-<li>FFI: Fix argument checks for <tt>ffi.string()</tt>.</li>
+-<li>FFI/x64: Fix passing of vector arguments to calls.</li>
+-<li>FFI: Rehash finalizer table after GC cycle, if needed.</li>
+-<li>FFI: Fix <tt>cts->L</tt> for cdata unsinking in snapshot
restore.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.2">LuaJIT 2.0.2 — 2013-06-03</h2>
+-<ul>
+-<li>Fix memory access check for fast string interning.</li>
+-<li>Fix MSVC intrinsics for older versions.</li>
+-<li>Add missing GC steps for <tt>io.*</tt> functions.</li>
+-<li>Fix spurious red zone overflows in machine code generation.</li>
+-<li>Fix jump-range constrained mcode allocation.</li>
+-<li>Inhibit DSE for implicit loads via calls.</li>
+-<li>Fix builtin string to number conversion for overflow digits.</li>
+-<li>Fix optional argument handling while recording builtins.</li>
+-<li>Fix optional argument handling in
<tt>table.concat()</tt>.</li>
+-<li>Add partial support for building with MingW64 GCC 4.8-SEH.</li>
+-<li>Add missing PHI barrier to <tt>string.sub(str, a, b) == kstr</tt>
FOLD rule.</li>
+-<li>Fix compatibility issues with Illumos.</li>
+-<li>ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.</li>
+-<li>MIPS: Fix cache flush/sync for JIT-compiled code jump area.</li>
+-<li>PPC: Add <tt>plt</tt> suffix for external calls from assembler
code.</li>
+-<li>FFI: Fix snapshot substitution in SPLIT pass.</li>
+-<li>FFI/x86: Fix register allocation for 64 bit comparisons.</li>
+-<li>FFI: Fix tailcall in lowest frame to C function with bool
result.</li>
+-<li>FFI: Ignore <tt>long</tt> type specifier in
<tt>ffi.istype()</tt>.</li>
+-<li>FFI: Fix calling conventions for 32 bit OSX and iOS simulator (struct
returns).</li>
+-<li>FFI: Fix calling conventions for ARM hard-float EABI (nested
structs).</li>
+-<li>FFI: Improve error messages for arithmetic and comparison
operators.</li>
+-<li>FFI: Insert no-op type conversion for pointer to integer cast.</li>
+-<li>FFI: Fix unroll limit for <tt>ffi.fill()</tt>.</li>
+-<li>FFI: Must sink <tt>XBAR</tt> together with
<tt>XSTORE</tt>s.</li>
+-<li>FFI: Preserve intermediate string for
<tt>const char *</tt> conversion.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.1">LuaJIT 2.0.1 — 2013-02-19</h2>
+-<ul>
+-<li>Don't clear frame for out-of-memory error.</li>
+-<li>Leave hook when resume catches error thrown from hook.</li>
+-<li>Add missing GC steps for template table creation.</li>
+-<li>Fix discharge order of comparisons in Lua parser.</li>
+-<li>Improve buffer handling for <tt>io.read()</tt>.</li>
+-<li>OSX: Add support for Mach-O object files to <tt>-b</tt>
option.</li>
+-<li>Fix PS3 port.</li>
+-<li>Fix/enable Xbox 360 port.</li>
+-<li>x86/x64: Always mark ref for shift count as non-weak.</li>
+-<li>x64: Don't fuse implicitly 32-to-64 extended operands.</li>
+-<li>ARM: Fix armhf call argument handling.</li>
+-<li>ARM: Fix code generation for integer math.min/math.max.</li>
+-<li>PPC/e500: Fix <tt>lj_vm_floor()</tt> for Inf/NaN.</li>
+-<li>FFI: Change priority of table initializer variants for structs.</li>
+-<li>FFI: Fix code generation for bool call result check on x86/x64.</li>
+-<li>FFI: Load FFI library on-demand for bytecode with cdata literals.</li>
+-<li>FFI: Fix handling of qualified transparent structs/unions.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0">LuaJIT 2.0.0 — 2012-11-08</h2>
+-<ul>
+-<li>Correctness and completeness:
+-<ul>
+- <li>Fix Android/x86 build.</li>
+- <li>Fix recording of equality comparisons with <tt>__eq</tt>
metamethods.</li>
+- <li>Fix detection of immutable upvalues.</li>
+- <li>Replace error with PANIC for callbacks from JIT-compiled code.</li>
+- <li>Fix builtin string to number conversion for
<tt>INT_MIN</tt>.</li>
+- <li>Don't create unneeded array part for template tables.</li>
+- <li>Fix <tt>CONV.num.int</tt> sinking.</li>
+- <li>Don't propagate implicitly widened number to index
metamethods.</li>
+- <li>ARM: Fix ordered comparisons of number vs. non-number.</li>
+- <li>FFI: Fix code generation for replay of sunk float fields.</li>
+- <li>FFI: Fix signedness of bool.</li>
+- <li>FFI: Fix recording of bool call result check on x86/x64.</li>
+- <li>FFI: Fix stack-adjustment for <tt>__thiscall</tt>
callbacks.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta11">LuaJIT 2.0.0-beta11 —
2012-10-16</h2>
+-<ul>
+-<li>New features:
+-<ul>
+- <li>Use ARM VFP instructions, if available (build-time detection).</li>
+- <li>Add support for ARM hard-float EABI
(<tt>armhf</tt>).</li>
+- <li>Add PS3 port.</li>
+- <li>Add many features from Lua 5.2, e.g.
<tt>goto</tt>/labels.
+- Refer to <a href="extensions.html#lua52">this
list</a>.</li>
+- <li>FFI: Add parameterized C types.</li>
+- <li>FFI: Add support for copy constructors.</li>
+- <li>FFI: Equality comparisons never raise an error (treat as unequal
instead).</li>
+- <li>FFI: Box all accessed or returned enums.</li>
+- <li>FFI: Check for <tt>__new</tt> metamethod when calling a
constructor.</li>
+- <li>FFI: Handle <tt>__pairs</tt>/<tt>__ipairs</tt>
metamethods for cdata objects.</li>
+- <li>FFI: Convert <tt>io.*</tt> file handle to <tt>FILE
*</tt> pointer (but as a <tt>void *</tt>).</li>
+- <li>FFI: Detect and support type punning through unions.</li>
+- <li>FFI: Improve various error messages.</li>
+-</ul></li>
+-<li>Build-system reorganization:
+-<ul>
+- <li>Reorganize directory layout:<br>
+- <tt>lib/*</tt> → <tt>src/jit/*</tt><br>
+- <tt>src/buildvm_*.dasc</tt> →
<tt>src/vm_*.dasc</tt><br>
+- <tt>src/buildvm_*.h</tt> → removed<br>
+- <tt>src/buildvm*</tt> →
<tt>src/host/*</tt></li>
+- <li>Add minified Lua interpreter plus Lua BitOp (<tt>minilua</tt>)
to run DynASM.</li>
+- <li>Change DynASM bit operations to use Lua BitOp</li>
+- <li>Translate only <tt>vm_*.dasc</tt> for detected target
architecture.</li>
+- <li>Improve target detection for <tt>msvcbuild.bat</tt>.</li>
+- <li>Fix build issues on Cygwin and MinGW with optional MSys.</li>
+- <li>Handle cross-compiles with FPU/no-FPU or hard-fp/soft-fp ABI
mismatch.</li>
+- <li>Remove some library functions for no-JIT/no-FFI builds.</li>
+- <li>Add uninstall target to top-level Makefile.</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+- <li>Preserve snapshot #0 PC for all traces.</li>
+- <li>Fix argument checks for <tt>coroutine.create()</tt>.</li>
+- <li>Command line prints version and JIT status to <tt>stdout</tt>,
not <tt>stderr</tt>.</li>
+- <li>Fix userdata <tt>__gc</tt> separations at Lua state
close.</li>
+- <li>Fix <tt>TDUP</tt> to <tt>HLOAD</tt> forwarding for
<tt>LJ_DUALNUM</tt> builds.</li>
+- <li>Fix buffer check in bytecode writer.</li>
+- <li>Make <tt>os.date()</tt> thread-safe.</li>
+- <li>Add missing declarations for MSVC intrinsics.</li>
+- <li>Fix dispatch table modifications for return hooks.</li>
+- <li>Workaround for MSVC conversion bug (<tt>double</tt> →
<tt>uint32_t</tt> → <tt>int32_t</tt>).</li>
+- <li>Fix FOLD rule <tt>(i-j)-i => 0-j</tt>.</li>
+- <li>Never use DWARF unwinder on Windows.</li>
+- <li>Fix shrinking of direct mapped blocks in builtin allocator.</li>
+- <li>Limit recursion depth in <tt>string.match()</tt> et
al.</li>
+- <li>Fix late despecialization of <tt>ITERN</tt> after loop has been
entered.</li>
+- <li>Fix <tt>'f'</tt> and <tt>'L'</tt>
options for <tt>debug.getinfo()</tt> and
<tt>lua_getinfo()</tt>.</li>
+- <li>Fix <tt>package.searchpath()</tt>.</li>
+- <li>OSX: Change dylib names to be consistent with other platforms.</li>
+- <li>Android: Workaround for broken
<tt>sprintf("%g", -0.0)</tt>.</li>
+- <li>x86: Remove support for ancient CPUs without <tt>CMOV</tt>
(before Pentium Pro).</li>
+- <li>x86: Fix register allocation for calls returning register pair.</li>
+- <li>x86/x64: Fix fusion of unsigned byte comparisons with swapped
operands.</li>
+- <li>ARM: Fix <tt>tonumber()</tt> argument check.</li>
+- <li>ARM: Fix modulo operator and
<tt>math.floor()</tt>/<tt>math.ceil()</tt> for
<tt>inf</tt>/<tt>nan</tt>.</li>
+- <li>ARM: Invoke SPLIT pass for leftover
<tt>IR_TOBIT</tt>.</li>
+- <li>ARM: Fix BASE register coalescing.</li>
+- <li>PPC: Fix interpreter state setup in callbacks.</li>
+- <li>PPC: Fix <tt>string.sub()</tt> range check.</li>
+- <li>MIPS: Support generation of MIPS/MIPSEL bytecode object files.</li>
+- <li>MIPS: Fix calls to
<tt>floor()</tt>/<tt>ceil()</tt><tt>/trunc()</tt>.</li>
+- <li>ARM/PPC: Detect more target architecture variants.</li>
+- <li>ARM/PPC/e500/MIPS: Fix tailcalls from fast functions, esp.
<tt>tostring()</tt>.</li>
+- <li>ARM/PPC/MIPS: Fix rematerialization of FP constants.</li>
+- <li>FFI: Don't call <tt>FreeLibrary()</tt> on our own
EXE/DLL.</li>
+- <li>FFI: Resolve metamethods for constructors, too.</li>
+- <li>FFI: Properly disable callbacks on iOS (would require executable
memory).</li>
+- <li>FFI: Fix cdecl string parsing during recording.</li>
+- <li>FFI: Show address pointed to for <tt>tostring(ref)</tt>,
too.</li>
+- <li>FFI: Fix alignment of C call argument/return structure.</li>
+- <li>FFI: Initialize all fields of standard types.</li>
+- <li>FFI: Fix callback handling when new C types are declared in
callback.</li>
+- <li>FFI: Fix recording of constructors for pointers.</li>
+- <li>FFI: Always resolve metamethods for pointers to structs.</li>
+- <li>FFI: Correctly propagate alignment when interning nested types.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+- <li>Add allocation sinking and store sinking optimization.</li>
+- <li>Constify immutable upvalues.</li>
+- <li>Add builtin string to integer or FP number conversion. Improves
cross-platform consistency and correctness.</li>
+- <li>Create string hash slots in template tables for non-const values, too.
Avoids later table resizes.</li>
+- <li>Eliminate <tt>HREFK</tt> guard for template table
references.</li>
+- <li>Add various new FOLD rules.</li>
+- <li>Don't use stack unwinding for <tt>lua_yield()</tt> (slow on
x64).</li>
+- <li>ARM, PPC, MIPS: Improve <tt>XLOAD</tt> operand fusion and
register hinting.</li>
+- <li>PPC, MIPS: Compile <tt>math.sqrt()</tt> to sqrt instruction, if
available.</li>
+- <li>FFI: Fold <tt>KPTR</tt> + constant offset in SPLIT
pass.</li>
+- <li>FFI: Optimize/inline <tt>ffi.copy()</tt> and
<tt>ffi.fill()</tt>.</li>
+- <li>FFI: Compile and optimize array/struct copies.</li>
+- <li>FFI: Compile <tt>ffi.typeof(cdata|ctype)</tt>,
<tt>ffi.sizeof()</tt>, <tt>ffi.alignof()</tt>,
<tt>ffi.offsetof()</tt> and <tt>ffi.gc()</tt>.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta10">LuaJIT 2.0.0-beta10 —
2012-05-09</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>The MIPS of LuaJIT is complete. It requires a CPU conforming to the
+-MIPS32 R1 architecture with hardware FPU. O32 hard-fp ABI,
+-little-endian or big-endian.</li>
+-<li>Auto-detect target arch via cross-compiler. No need for
+-<tt>TARGET=arch</tt> anymore.</li>
+-<li>Make DynASM compatible with Lua 5.2.</li>
+-<li>From Lua 5.2: Try <tt>__tostring</tt> metamethod on non-string
error
+-messages..</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix parsing of hex literals with exponents.</li>
+-<li>Fix bytecode dump for certain number constants.</li>
+-<li>Fix argument type in error message for relative arguments.</li>
+-<li>Fix argument error handling on Lua stacks without a frame.</li>
+-<li>Add missing mcode limit check in assembler backend.</li>
+-<li>Fix compilation on OpenBSD.</li>
+-<li>Avoid recursive GC steps after GC-triggered trace exit.</li>
+-<li>Replace <tt><unwind.h></tt> definitions with our
own.</li>
+-<li>Fix OSX build issues. Bump minimum required OSX version to 10.4.</li>
+-<li>Fix discharge order of comparisons in Lua parser.</li>
+-<li>Ensure running <tt>__gc</tt> of userdata created in
<tt>__gc</tt>
+-at state close.</li>
+-<li>Limit number of userdata <tt>__gc</tt> separations at state
close.</li>
+-<li>Fix bytecode <tt>JMP</tt> slot range when optimizing
+-<tt>and</tt>/<tt>or</tt> with constant LHS.</li>
+-<li>Fix DSE of <tt>USTORE</tt>.</li>
+-<li>Make <tt>lua_concat()</tt> work from C hook with partial
frame.</li>
+-<li>Add required PHIs for implicit conversions, e.g. via
<tt>XREF</tt>
+-forwarding.</li>
+-<li>Add more comparison variants to Valgrind suppressions file.</li>
+-<li>Disable loading bytecode with an extra header (BOM or
<tt>#!</tt>).</li>
+-<li>Fix PHI stack slot syncing.</li>
+-<li>ARM: Reorder type/value tests to silence Valgrind.</li>
+-<li>ARM: Fix register allocation for <tt>ldrd</tt>-optimized
+-<tt>HREFK</tt>.</li>
+-<li>ARM: Fix conditional branch fixup for <tt>OBAR</tt>.</li>
+-<li>ARM: Invoke SPLIT pass for <tt>double</tt> args in FFI
call.</li>
+-<li>ARM: Handle all <tt>CALL*</tt> ops with
<tt>double</tt> results in
+-SPLIT pass.</li>
+-<li>ARM: Fix rejoin of <tt>POW</tt> in SPLIT pass.</li>
+-<li>ARM: Fix compilation of <tt>math.sinh</tt>,
<tt>math.cosh</tt>,
+-<tt>math.tanh</tt>.</li>
+-<li>ARM, PPC: Avoid pointless arg clearing in
<tt>BC_IFUNCF</tt>.</li>
+-<li>PPC: Fix resume after yield from hook.</li>
+-<li>PPC: Fix argument checking for <tt>rawget()</tt>.</li>
+-<li>PPC: Fix fusion of floating-point
<tt>XLOAD</tt>/<tt>XSTORE</tt>.</li>
+-<li>PPC: Fix <tt>HREFK</tt> code generation for huge
tables.</li>
+-<li>PPC: Use builtin D-Cache/I-Cache sync code.</li>
+-</ul></li>
+-<li>FFI library:
+-<ul>
+-<li>Ignore empty statements in <tt>ffi.cdef()</tt>.</li>
+-<li>Ignore number parsing errors while skipping definitions.</li>
+-<li>Don't touch frame in callbacks with tailcalls to fast
functions.</li>
+-<li>Fix library unloading on POSIX systems.</li>
+-<li>Finalize cdata before userdata when closing the state.</li>
+-<li>Change <tt>ffi.load()</tt> library name resolution for
Cygwin.</li>
+-<li>Fix resolving of function name redirects on Windows/x86.</li>
+-<li>Fix symbol resolving error messages on Windows.</li>
+-<li>Fix blacklisting of C functions calling callbacks.</li>
+-<li>Fix result type of pointer difference.</li>
+-<li>Use correct PC in FFI metamethod error message.</li>
+-<li>Allow <tt>'typedef _Bool int BOOL;'</tt> for the Windows
API.</li>
+-<li>Don't record test for bool result of call, if ignored.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta9">LuaJIT 2.0.0-beta9 —
2011-12-14</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>PPC port of LuaJIT is complete. Default is the dual-number port
+-(usually faster). Single-number port selectable via <tt>src/Makefile</tt>
+-at build time.</li>
+-<li>Add FFI callback support.</li>
+-<li>Extend <tt>-b</tt> to generate <tt>.c</tt>,
<tt>.h</tt> or <tt>.obj/.o</tt>
+-files with embedded bytecode.</li>
+-<li>Allow loading embedded bytecode with
<tt>require()</tt>.</li>
+-<li>From Lua 5.2: Change to <tt>'\z'</tt> escape. Reject
undefined escape
+-sequences.</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix OSX 10.7 build. Fix <tt>install_name</tt> and versioning on
OSX.</li>
+-<li>Fix iOS build.</li>
+-<li>Install <tt>dis_arm.lua</tt>, too.</li>
+-<li>Mark installed shared library as executable.</li>
+-<li>Add debug option to <tt>msvcbuild.bat</tt> and improve error
handling.</li>
+-<li>Fix data-flow analysis for iterators.</li>
+-<li>Fix forced unwinding triggered by external unwinder.</li>
+-<li>Record missing <tt>for</tt> loop slot loads (return to lower
frame).</li>
+-<li>Always use ANSI variants of Windows system functions.</li>
+-<li>Fix GC barrier for multi-result table constructor
(<tt>TSETM</tt>).</li>
+-<li>Fix/add various FOLD rules.</li>
+-<li>Add potential PHI for number conversions due to type instability.</li>
+-<li>Do not eliminate PHIs only referenced from other PHIs.</li>
+-<li>Correctly anchor implicit number to string conversions in Lua/C
API.</li>
+-<li>Fix various stack limit checks.</li>
+-<li>x64: Use thread-safe exceptions for external unwinding (GCC
platforms).</li>
+-<li>x64: Fix result type of cdata index conversions.</li>
+-<li>x64: Fix <tt>math.random()</tt> and
<tt>bit.bswap()</tt> code generation.</li>
+-<li>x64: Fix <tt>lightuserdata</tt> comparisons.</li>
+-<li>x64: Always extend stack-passed arguments to pointer size.</li>
+-<li>ARM: Many fixes to code generation backend.</li>
+-<li>PPC/e500: Fix dispatch for binop metamethods.</li>
+-<li>PPC/e500: Save/restore condition registers when entering/leaving the
VM.</li>
+-<li>PPC/e500: Fix write barrier in stores of strings to upvalues.</li>
+-</ul></li>
+-<li>FFI library:
+-<ul>
+-<li>Fix C comment parsing.</li>
+-<li>Fix snapshot optimization for cdata comparisons.</li>
+-<li>Fix recording of const/enum lookups in namespaces.</li>
+-<li>Fix call argument and return handling for <tt>I8/U8/I16/U16</tt>
types.</li>
+-<li>Fix unfused loads of float fields.</li>
+-<li>Fix <tt>ffi.string()</tt> recording.</li>
+-<li>Save <tt>GetLastError()</tt> around
<tt>ffi.load()</tt> and symbol
+-resolving, too.</li>
+-<li>Improve ld script detection in <tt>ffi.load()</tt>.</li>
+-<li>Record loads/stores to external variables in namespaces.</li>
+-<li>Compile calls to stdcall, fastcall and vararg functions.</li>
+-<li>Treat function ctypes like pointers in comparisons.</li>
+-<li>Resolve <tt>__call</tt> metamethod for pointers, too.</li>
+-<li>Record C function calls with bool return values.</li>
+-<li>Record <tt>ffi.errno()</tt>.</li>
+-<li>x86: Fix number to <tt>uint32_t</tt> conversion
rounding.</li>
+-<li>x86: Fix 64 bit arithmetic in assembler backend.</li>
+-<li>x64: Fix struct-by-value calling conventions.</li>
+-<li>ARM: Ensure invocation of SPLIT pass for float conversions.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Display trace types with <tt>-jv</tt> and
<tt>-jdump</tt>.</li>
+-<li>Record isolated calls. But prefer recording loops over calls.</li>
+-<li>Specialize to prototype for non-monomorphic functions. Solves the
+-trace-explosion problem for closure-heavy programming styles.</li>
+-<li>Always generate a portable <tt>vmdef.lua</tt>. Easier for
distros.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta8">LuaJIT 2.0.0-beta8 —
2011-06-23</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>Soft-float ARM port of LuaJIT is complete.</li>
+-<li>Add support for bytecode loading/saving and <tt>-b</tt> command
line
+-option.</li>
+-<li>From Lua 5.2: <tt>__len</tt> metamethod for tables
+-(disabled by default).</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>ARM: Misc. fixes for interpreter.</li>
+-<li>x86/x64: Fix <tt>bit.*</tt> argument checking in
interpreter.</li>
+-<li>Catch early out-of-memory in memory allocator initialization.</li>
+-<li>Fix data-flow analysis for paths leading to an upvalue close.</li>
+-<li>Fix check for missing arguments in
<tt>string.format()</tt>.</li>
+-<li>Fix Solaris/x86 build (note: not a supported target).</li>
+-<li>Fix recording of loops with instable directions in side traces.</li>
+-<li>x86/x64: Fix fusion of comparisons with
<tt>u8</tt>/<tt>u16</tt>
+-<tt>XLOAD</tt>.</li>
+-<li>x86/x64: Fix register allocation for variable shifts.</li>
+-</ul></li>
+-<li>FFI library:
+-<ul>
+-<li>Add <tt>ffi.errno()</tt>. Save
<tt>errno</tt>/<tt>GetLastError()</tt>
+-around allocations etc.</li>
+-<li>Fix <tt>__gc</tt> for VLA/VLS cdata objects.</li>
+-<li>Fix recording of casts from 32 bit cdata pointers to integers.</li>
+-<li><tt>tonumber(cdata)</tt> returns <tt>nil</tt> for
non-numbers.</li>
+-<li>Show address pointed to for
<tt>tostring(pointer)</tt>.</li>
+-<li>Print <tt>NULL</tt> pointers as <tt>"cdata<...
*>: NULL"</tt>.</li>
+-<li>Support <tt>__tostring</tt> metamethod for pointers to structs,
too.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>More tuning for loop unrolling heuristics.</li>
+-<li>Flatten and compress in-memory debug info (saves ~70%).</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta7">LuaJIT 2.0.0-beta7 —
2011-05-05</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>ARM port of the LuaJIT interpreter is complete.</li>
+-<li>FFI library: Add <tt>ffi.gc()</tt>,
<tt>ffi.metatype()</tt>,
+-<tt>ffi.istype()</tt>.</li>
+-<li>FFI library: Resolve ld script redirection in
<tt>ffi.load()</tt>.</li>
+-<li>From Lua 5.2: <tt>package.searchpath()</tt>,
<tt>fp:read("*L")</tt>,
+-<tt>load(string)</tt>.</li>
+-<li>From Lua 5.2, disabled by default: empty statement,
+-<tt>table.unpack()</tt>, modified
<tt>coroutine.running()</tt>.</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>FFI library: numerous fixes.</li>
+-<li>Fix type mismatches in store-to-load forwarding.</li>
+-<li>Fix error handling within metamethods.</li>
+-<li>Fix <tt>table.maxn()</tt>.</li>
+-<li>Improve accuracy of <tt>x^-k</tt> on x64.</li>
+-<li>Fix code generation for Intel Atom in x64 mode.</li>
+-<li>Fix narrowing of POW.</li>
+-<li>Fix recording of retried fast functions.</li>
+-<li>Fix code generation for <tt>bit.bnot()</tt> and
multiplies.</li>
+-<li>Fix error location within cpcall frames.</li>
+-<li>Add workaround for old libgcc unwind bug.</li>
+-<li>Fix <tt>lua_yield()</tt> and
<tt>getmetatable(lightuserdata)</tt> on x64.</li>
+-<li>Misc. fixes for PPC/e500 interpreter.</li>
+-<li>Fix stack slot updates for down-recursion.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Add dual-number mode (int/double) for the VM. Enabled for ARM.</li>
+-<li>Improve narrowing of arithmetic operators and <tt>for</tt>
loops.</li>
+-<li>Tune loop unrolling heuristics and increase trace recorder limits.</li>
+-<li>Eliminate dead slots in snapshots using bytecode data-flow
analysis.</li>
+-<li>Avoid phantom stores to proxy tables.</li>
+-<li>Optimize lookups in empty proxy tables.</li>
+-<li>Improve bytecode optimization of
<tt>and</tt>/<tt>or</tt> operators.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta6">LuaJIT 2.0.0-beta6 —
2011-02-11</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>PowerPC/e500v2 port of the LuaJIT interpreter is complete.</li>
+-<li>Various minor features from Lua 5.2: Hex escapes in literals,
+-<tt>'\*'</tt> escape, reversible
<tt>string.format("%q",s)</tt>,
+-<tt>"%g"</tt> pattern, <tt>table.sort</tt> checks
callbacks,
+-<tt>os.exit(status|true|false[,close])</tt>.</li>
+-<li>Lua 5.2 <tt>__pairs</tt> and <tt>__ipairs</tt>
metamethods
+-(disabled by default).</li>
+-<li>Initial release of the FFI library.</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix <tt>string.format()</tt> for non-finite numbers.</li>
+-<li>Fix memory leak when compiled to use the built-in allocator.</li>
+-<li>x86/x64: Fix unnecessary resize in <tt>TSETM</tt>
bytecode.</li>
+-<li>Fix various GC issues with traces and
<tt>jit.flush()</tt>.</li>
+-<li>x64: Fix fusion of indexes for array references.</li>
+-<li>x86/x64: Fix stack overflow handling for coroutine results.</li>
+-<li>Enable low-2GB memory allocation on FreeBSD/x64.</li>
+-<li>Fix <tt>collectgarbage("count")</tt> result if more than
2GB is in use.</li>
+-<li>Fix parsing of hex floats.</li>
+-<li>x86/x64: Fix loop branch inversion with trailing
+-<tt>HREF+NE/EQ</tt>.</li>
+-<li>Add <tt>jit.os</tt> string.</li>
+-<li><tt>coroutine.create()</tt> permits running C functions,
too.</li>
+-<li>Fix OSX build to work with newer ld64 versions.</li>
+-<li>Fix bytecode optimization of <tt>and</tt>/<tt>or</tt>
operators.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Emit specialized bytecode for
<tt>pairs()</tt>/<tt>next()</tt>.</li>
+-<li>Improve bytecode coalescing of <tt>nil</tt> constants.</li>
+-<li>Compile calls to vararg functions.</li>
+-<li>Compile <tt>select()</tt>.</li>
+-<li>Improve alias analysis, esp. for loads from allocations.</li>
+-<li>Tuning of various compiler heuristics.</li>
+-<li>Refactor and extend IR conversion instructions.</li>
+-<li>x86/x64: Various backend enhancements related to the FFI.</li>
+-<li>Add SPLIT pass to split 64 bit IR instructions for 32 bit CPUs.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta5">LuaJIT 2.0.0-beta5 —
2010-08-24</h2>
+-<ul>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix trace exit dispatch to function headers.</li>
+-<li>Fix Windows and OSX builds with LUAJIT_DISABLE_JIT.</li>
+-<li>Reorganize and fix placement of generated machine code on x64.</li>
+-<li>Fix TNEW in x64 interpreter.</li>
+-<li>Do not eliminate PHIs for values only referenced from side exits.</li>
+-<li>OS-independent canonicalization of strings for non-finite numbers.</li>
+-<li>Fix <tt>string.char()</tt> range check on x64.</li>
+-<li>Fix <tt>tostring()</tt> resolving within
<tt>print()</tt>.</li>
+-<li>Fix error handling for <tt>next()</tt>.</li>
+-<li>Fix passing of constant arguments to external calls on x64.</li>
+-<li>Fix interpreter argument check for two-argument SSE math
functions.</li>
+-<li>Fix C frame chain corruption caused by
<tt>lua_cpcall()</tt>.</li>
+-<li>Fix return from <tt>pcall()</tt> within active hook.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Replace on-trace GC frame syncing with interpreter exit.</li>
+-<li>Improve hash lookup specialization by not removing dead keys during
GC.</li>
+-<li>Turn traces into true GC objects.</li>
+-<li>Avoid starting a GC cycle immediately after library init.</li>
+-<li>Add weak guards to improve dead-code elimination.</li>
+-<li>Speed up string interning.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta4">LuaJIT 2.0.0-beta4 —
2010-03-28</h2>
+-<ul>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix precondition for on-trace creation of table keys.</li>
+-<li>Fix <tt>{f()}</tt> on x64 when table is resized.</li>
+-<li>Fix folding of ordered comparisons with same references.</li>
+-<li>Fix snapshot restores for multi-result bytecodes.</li>
+-<li>Fix potential hang when recording bytecode with nested closures.</li>
+-<li>Fix recording of <tt>getmetatable()</tt>,
<tt>tonumber()</tt> and bad argument types.</li>
+-<li>Fix SLOAD fusion across returns to lower frames.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Add array bounds check elimination. <tt>-Oabc</tt> is enabled by
default.</li>
+-<li>More tuning for x64, e.g. smaller table objects.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta3">LuaJIT 2.0.0-beta3 —
2010-03-07</h2>
+-<ul>
+-<li>LuaJIT x64 port:
+-<ul>
+-<li>Port integrated memory allocator to Linux/x64, Windows/x64 and
OSX/x64.</li>
+-<li>Port interpreter and JIT compiler to x64.</li>
+-<li>Port DynASM to x64.</li>
+-<li>Many 32/64 bit cleanups in the VM.</li>
+-<li>Allow building the interpreter with either x87 or SSE2
arithmetics.</li>
+-<li>Add external unwinding and C++ exception interop (default on x64).</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix constructor bytecode generation for certain conditional
values.</li>
+-<li>Fix some cases of ordered string comparisons.</li>
+-<li>Fix <tt>lua_tocfunction()</tt>.</li>
+-<li>Fix cutoff register in JMP bytecode for some conditional
expressions.</li>
+-<li>Fix PHI marking algorithm for references from variant slots.</li>
+-<li>Fix <tt>package.cpath</tt> for non-default PREFIX.</li>
+-<li>Fix DWARF2 frame unwind information for interpreter on OSX.</li>
+-<li>Drive the GC forward on string allocations in the parser.</li>
+-<li>Implement call/return hooks (zero-cost if disabled).</li>
+-<li>Implement yield from C hooks.</li>
+-<li>Disable JIT compiler on older non-SSE2 CPUs instead of aborting.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Compile recursive code (tail-, up- and down-recursion).</li>
+-<li>Improve heuristics for bytecode penalties and blacklisting.</li>
+-<li>Split CALL/FUNC recording and clean up fast function call
semantics.</li>
+-<li>Major redesign of internal function call handling.</li>
+-<li>Improve FOR loop const specialization and integerness checks.</li>
+-<li>Switch to pre-initialized stacks. Avoid frame-clearing.</li>
+-<li>Colocation of prototypes and related data: bytecode, constants, debug
info.</li>
+-<li>Cleanup parser and streamline bytecode generation.</li>
+-<li>Add support for weak IR references to register allocator.</li>
+-<li>Switch to compressed, extensible snapshots.</li>
+-<li>Compile returns to frames below the start frame.</li>
+-<li>Improve alias analysis of upvalues using a disambiguation hash
value.</li>
+-<li>Compile floor/ceil/trunc to SSE2 helper calls or SSE4.1
instructions.</li>
+-<li>Add generic C call handling to IR and backend.</li>
+-<li>Improve KNUM fuse vs. load heuristics.</li>
+-<li>Compile various <tt>io.*()</tt> functions.</li>
+-<li>Compile <tt>math.sinh()</tt>, <tt>math.cosh()</tt>,
<tt>math.tanh()</tt>
+-and <tt>math.random()</tt>.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 —
2009-11-09</h2>
+-<ul>
+-<li>Reorganize build system. Build static+shared library on POSIX.</li>
+-<li>Allow C++ exception conversion on all platforms
+-using a wrapper function.</li>
+-<li>Automatically catch C++ exceptions and rethrow Lua error
+-(DWARF2 only).</li>
+-<li>Check for the correct x87 FPU precision at strategic points.</li>
+-<li>Always use wrappers for libm functions.</li>
+-<li>Resurrect metamethod name strings before copying them.</li>
+-<li>Mark current trace, even if compiler is idle.</li>
+-<li>Ensure FILE metatable is created only once.</li>
+-<li>Fix type comparisons when different integer types are involved.</li>
+-<li>Fix <tt>getmetatable()</tt> recording.</li>
+-<li>Fix TDUP with dead keys in template table.</li>
+-<li><tt>jit.flush(tr)</tt> returns status.
+-Prevent manual flush of a trace that's still linked.</li>
+-<li>Improve register allocation heuristics for invariant references.</li>
+-<li>Compile the push/pop variants of <tt>table.insert()</tt> and
+-<tt>table.remove()</tt>.</li>
+-<li>Compatibility with MSVC <tt>link /debug</tt>.</li>
+-<li>Fix <tt>lua_iscfunction()</tt>.</li>
+-<li>Fix <tt>math.random()</tt> when compiled with
<tt>-fpic</tt> (OSX).</li>
+-<li>Fix <tt>table.maxn()</tt>.</li>
+-<li>Bump <tt>MACOSX_DEPLOYMENT_TARGET</tt> to
<tt>10.4</tt></li>
+-<li><tt>luaL_check*()</tt> and <tt>luaL_opt*()</tt> now
support
+-negative arguments, too.<br>
+-This matches the behavior of Lua 5.1, but not the specification.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta1">LuaJIT 2.0.0-beta1 —
2009-10-31</h2>
+-<ul>
+-<li>This is the first public release of LuaJIT 2.0.</li>
+-<li>The whole VM has been rewritten from the ground up, so there's
+-no point in listing differences over earlier versions.</li>
+-</ul>
+-</div>
+-<br class="flush">
+-</div>
+-<div id="foot">
+-<hr class="hide">
+-Copyright © 2005-2017 Mike Pall
+-<span class="noprint">
+-·
+-<a href="contact.html">Contact</a>
+-</span>
+-</div>
+-</body>
+-</html>
+diff --git a/doc/contact.html b/doc/contact.html
+index fe4751c0..c253a08b 100644
+--- a/doc/contact.html
++++ b/doc/contact.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Contact</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Contact</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -46,28 +47,23 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+ <p>
+ If you want to report bugs, propose fixes or suggest enhancements,
+ please use the
+-<a
href="https://github.com/LuaJIT/LuaJIT/issues">GitHub issue
tracker</a>.
++<a
href="https://github.com/LuaJIT/LuaJIT/issues"><span
class="ext">»</span> GitHub issue
tracker</a>.
+ </p>
+ <p>
+ Please send general questions to the
+-<a
href="http://luajit.org/list.html"><span
class="ext">»</span> LuaJIT mailing list</a>.
++<a
href="https://luajit.org/list.html"><span
class="ext">»</span> LuaJIT mailing list</a>.
+ </p>
+ <p>
+ You can also send any questions you have directly to me:
+@@ -93,7 +89,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
+ <h2>Copyright</h2>
+ <p>
+ All documentation is
+-Copyright © 2005-2017 Mike Pall.
++Copyright © 2005-2021 Mike Pall.
+ </p>
+
+
+@@ -101,7 +97,7 @@ Copyright © 2005-2017 Mike Pall.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html
+new file mode 100644
+index 00000000..63c2efe3
+--- /dev/null
++++ b/doc/ext_buffer.html
+@@ -0,0 +1,693 @@
++<!DOCTYPE html>
++<html>
++<head>
++<title>String Buffer Library</title>
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
++<meta name="Language" content="en">
++<link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
++<link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
++<style type="text/css">
++.lib {
++ vertical-align: middle;
++ margin-left: 5px;
++ padding: 0 5px;
++ font-size: 60%;
++ border-radius: 5px;
++ background: #c5d5ff;
++ color: #000;
++}
++</style>
++</head>
++<body>
++<div id="site">
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++</div>
++<div id="head">
++<h1>String Buffer Library</h1>
++</div>
++<div id="nav">
++<ul><li>
++<a href="luajit.html">LuaJIT</a>
++<ul><li>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
++</li><li>
++<a href="install.html">Installation</a>
++</li><li>
++<a href="running.html">Running</a>
++</li></ul>
++</li><li>
++<a href="extensions.html">Extensions</a>
++<ul><li>
++<a href="ext_ffi.html">FFI Library</a>
++<ul><li>
++<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
++</li><li>
++<a href="ext_ffi_api.html">ffi.* API</a>
++</li><li>
++<a href="ext_ffi_semantics.html">FFI Semantics</a>
++</li></ul>
++</li><li>
++<a class="current" href="ext_buffer.html">String
Buffers</a>
++</li><li>
++<a href="ext_jit.html">jit.* Library</a>
++</li><li>
++<a href="ext_c_api.html">Lua/C API</a>
++</li><li>
++<a href="ext_profiler.html">Profiler</a>
++</li></ul>
++</li><li>
++<a href="status.html">Status</a>
++</li><li>
++<a href="faq.html">FAQ</a>
++</li><li>
++<a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
++</li><li>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++</li></ul>
++</div>
++<div id="main">
++<p>
++The string buffer library allows <b>high-performance manipulation of
++string-like data</b>.
++</p>
++<p>
++Unlike Lua strings, which are constants, string buffers are
++<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data
++can be stored, formatted and encoded into a string buffer and later
++converted, extracted or decoded.
++</p>
++<p>
++The convenient string buffer API simplifies common string manipulation
++tasks, that would otherwise require creating many intermediate strings.
++String buffers improve performance by eliminating redundant memory
++copies, object creation, string interning and garbage collection
++overhead. In conjunction with the FFI library, they allow zero-copy
++operations.
++</p>
++<p>
++The string buffer libary also includes a high-performance
++<a href="serialize">serializer</a> for Lua objects.
++</p>
++
++<h2 id="wip" style="color:#ff0000">Work in
Progress</h2>
++<p>
++<b style="color:#ff0000">This library is a work in progress. More
++functionality will be added soon.</b>
++</p>
++
++<h2 id="use">Using the String Buffer Library</h2>
++<p>
++The string buffer library is built into LuaJIT by default, but it's not
++loaded by default. Add this to the start of every Lua file that needs
++one of its functions:
++</p>
++<pre class="code">
++local buffer = require("string.buffer")
++</pre>
++<p>
++The convention for the syntax shown on this page is that <tt>buffer</tt>
++refers to the buffer library and <tt>buf</tt> refers to an individual
++buffer object.
++</p>
++<p>
++Please note the difference between a Lua function call, e.g.
++<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g.
++<tt>buf:reset()</tt> (with a colon).
++</p>
++
++<h3 id="buffer_object">Buffer Objects</h3>
++<p>
++A buffer object is a garbage-collected Lua object. After creation with
++<tt>buffer.new()</tt>, it can (and should) be reused for many operations.
++When the last reference to a buffer object is gone, it will eventually
++be freed by the garbage collector, along with the allocated buffer
++space.
++</p>
++<p>
++Buffers operate like a FIFO (first-in first-out) data structure. Data
++can be appended (written) to the end of the buffer and consumed (read)
++from the front of the buffer. These operations may be freely mixed.
++</p>
++<p>
++The buffer space that holds the characters is managed automatically
++— it grows as needed and already consumed space is recycled. Use
++<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need
more
++control.
++</p>
++<p>
++The maximum size of a single buffer is the same as the maximum size of a
++Lua string, which is slightly below two gigabytes. For huge data sizes,
++neither strings nor buffers are the right data structure — use the
++FFI library to directly map memory or files up to the virtual memory
++limit of your OS.
++</p>
++
++<h3 id="buffer_overview">Buffer Method Overview</h3>
++<ul>
++<li>
++The <tt>buf:put*()</tt>-like methods append (write) characters to the
++end of the buffer.
++</li>
++<li>
++The <tt>buf:get*()</tt>-like methods consume (read) characters from the
++front of the buffer.
++</li>
++<li>
++Other methods, like <tt>buf:tostring()</tt> only read the buffer
++contents, but don't change the buffer.
++</li>
++<li>
++The <tt>buf:set()</tt> method allows zero-copy consumption of a string
++or an FFI cdata object as a buffer.
++</li>
++<li>
++The FFI-specific methods allow zero-copy read/write-style operations or
++modifying the buffer contents in-place. Please check the
++<a href="#ffi_caveats">FFI caveats</a> below, too.
++</li>
++<li>
++Methods that don't need to return anything specific, return the buffer
++object itself as a convenience. This allows method chaining, e.g.:
++<tt>buf:reset():encode(obj)</tt> or
<tt>buf:skip(len):get()</tt>
++</li>
++</ul>
++
++<h2 id="create">Buffer Creation and Management</h2>
++
++<h3 id="buffer_new"><tt>local buf = buffer.new([size
[,options]])<br>
++local buf = buffer.new([options])</tt></h3>
++<p>
++Creates a new buffer object.
++</p>
++<p>
++The optional <tt>size</tt> argument ensures a minimum initial buffer
++size. This is strictly an optimization when the required buffer size is
++known beforehand. The buffer space will grow as needed, in any case.
++</p>
++<p>
++The optional table <tt>options</tt> sets various
++<a href="#serialize_options">serialization options</a>.
++</p>
++
++<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3>
++<p>
++Reset (empty) the buffer. The allocated buffer space is not freed and
++may be reused.
++</p>
++
++<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3>
++<p>
++The buffer space of the buffer object is freed. The object itself
++remains intact, empty and may be reused.
++</p>
++<p>
++Note: you normally don't need to use this method. The garbage collector
++automatically frees the buffer space, when the buffer object is
++collected. Use this method, if you need to free the associated memory
++immediately.
++</p>
++
++<h2 id="write">Buffer Writers</h2>
++
++<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj]
[,…])</tt></h3>
++<p>
++Appends a string <tt>str</tt>, a number <tt>num</tt> or any
object
++<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the
buffer.
++Multiple arguments are appended in the given order.
++</p>
++<p>
++Appending a buffer to a buffer is possible and short-circuited
++internally. But it still involves a copy. Better combine the buffer
++writes to use a single buffer.
++</p>
++
++<h3 id="buffer_putf"><tt>buf = buf:putf(format,
…)</tt></h3>
++<p>
++Appends the formatted arguments to the buffer. The <tt>format</tt>
++string supports the same options as <tt>string.format()</tt>.
++</p>
++
++<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata,
len)</tt><span class="lib">FFI</span></h3>
++<p>
++Appends the given <tt>len</tt> number of bytes from the memory pointed
++to by the FFI <tt>cdata</tt> object to the buffer. The object needs to
++be convertible to a (constant) pointer.
++</p>
++
++<h3 id="buffer_set"><tt>buf = buf:set(str)<br>
++buf = buf:set(cdata, len)</tt><span
class="lib">FFI</span></h3>
++<p>
++This method allows zero-copy consumption of a string or an FFI cdata
++object as a buffer. It stores a reference to the passed string
++<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any
buffer
++space originally allocated is freed. This is <i>not</i> an append
++operation, unlike the <tt>buf:put*()</tt> methods.
++</p>
++<p>
++After calling this method, the buffer behaves as if
++<tt>buf:free():put(str)</tt> or
<tt>buf:free():put(cdata, len)</tt>
++had been called. However, the data is only referenced and not copied, as
++long as the buffer is only consumed.
++</p>
++<p>
++In case the buffer is written to later on, the referenced data is copied
++and the object reference is removed (copy-on-write semantics).
++</p>
++<p>
++The stored reference is an anchor for the garbage collector and keeps the
++originally passed string or FFI cdata object alive.
++</p>
++
++<h3 id="buffer_reserve"><tt>ptr, len =
buf:reserve(size)</tt><span class="lib">FFI</span><br>
++<tt>buf = buf:commit(used)</tt><span
class="lib">FFI</span></h3>
++<p>
++The <tt>reserve</tt> method reserves at least <tt>size</tt>
bytes of
++write space in the buffer. It returns an <tt>uint8_t *</tt> FFI
++cdata pointer <tt>ptr</tt> that points to this space.
++</p>
++<p>
++The available length in bytes is returned in <tt>len</tt>. This is at
++least <tt>size</tt> bytes, but may be more to facilitate efficient
++buffer growth. You can either make use of the additional space or ignore
++<tt>len</tt> and only use <tt>size</tt> bytes.
++</p>
++<p>
++The <tt>commit</tt> method appends the <tt>used</tt> bytes of
the
++previously returned write space to the buffer data.
++</p>
++<p>
++This pair of methods allows zero-copy use of C read-style APIs:
++</p>
++<pre class="code">
++local MIN_SIZE = 65536
++repeat
++ local ptr, len = buf:reserve(MIN_SIZE)
++ local n = C.read(fd, ptr, len)
++ if n == 0 then break end -- EOF.
++ if n < 0 then error("read error") end
++ buf:commit(n)
++until false
++</pre>
++<p>
++The reserved write space is <i>not</i> initialized. At least the
++<tt>used</tt> bytes <b>must</b> be written to before calling
the
++<tt>commit</tt> method. There's no need to call the
<tt>commit</tt>
++method, if nothing is added to the buffer (e.g. on error).
++</p>
++
++<h2 id="read">Buffer Readers</h2>
++
++<h3 id="buffer_length"><tt>len = #buf</tt></h3>
++<p>
++Returns the current length of the buffer data in bytes.
++</p>
++
++<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf
[…]</tt></h3>
++<p>
++The Lua concatenation operator <tt>..</tt> also accepts buffers, just
++like strings or numbers. It always returns a string and not a buffer.
++</p>
++<p>
++Note that although this is supported for convenience, this thwarts one
++of the main reasons to use buffers, which is to avoid string
++allocations. Rewrite it with <tt>buf:put()</tt> and
<tt>buf:get()</tt>.
++</p>
++<p>
++Mixing this with unrelated objects that have a <tt>__concat</tt>
++metamethod may not work, since these probably only expect strings.
++</p>
++
++<h3 id="buffer_skip"><tt>buf =
buf:skip(len)</tt></h3>
++<p>
++Skips (consumes) <tt>len</tt> bytes from the buffer up to the current
++length of the buffer data.
++</p>
++
++<h3 id="buffer_get"><tt>str, … = buf:get([len|nil]
[,…])</tt></h3>
++<p>
++Consumes the buffer data and returns one or more strings. If called
++without arguments, the whole buffer data is consumed. If called with a
++number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt>
argument
++consumes the remaining buffer space (this only makes sense as the last
++argument). Multiple arguments consume the buffer data in the given
++order.
++</p>
++<p>
++Note: a zero length or no remaining buffer data returns an empty string
++and not <tt>nil</tt>.
++</p>
++
++<h3 id="buffer_tostring"><tt>str = buf:tostring()<br>
++str = tostring(buf)</tt></h3>
++<p>
++Creates a string from the buffer data, but doesn't consume it. The
++buffer remains unchanged.
++</p>
++<p>
++Buffer objects also define a <tt>__tostring</tt> metamethod. This means
++buffers can be passed to the global <tt>tostring()</tt> function and
++many other functions that accept this in place of strings. The important
++internal uses in functions like <tt>io.write()</tt> are short-circuited
++to avoid the creation of an intermediate string object.
++</p>
++
++<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span
class="lib">FFI</span></h3>
++<p>
++Returns an <tt>uint8_t *</tt> FFI cdata pointer
<tt>ptr</tt> that
++points to the buffer data. The length of the buffer data in bytes is
++returned in <tt>len</tt>.
++</p>
++<p>
++The returned pointer can be directly passed to C functions that expect a
++buffer and a length. You can also do bytewise reads
++(<tt>local x = ptr[i]</tt>) or writes
++(<tt>ptr[i] = 0x40</tt>) of the buffer data.
++</p>
++<p>
++In conjunction with the <tt>skip</tt> method, this allows zero-copy use
++of C write-style APIs:
++</p>
++<pre class="code">
++repeat
++ local ptr, len = buf:ref()
++ if len == 0 then break end
++ local n = C.write(fd, ptr, len)
++ if n < 0 then error("write error") end
++ buf:skip(n)
++until n >= len
++</pre>
++<p>
++Unlike Lua strings, buffer data is <i>not</i> implicitly
++zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that
++expect zero-terminated strings. If you're not using <tt>len</tt>, then
++you're doing something wrong.
++</p>
++
++<h2 id="serialize">Serialization of Lua Objects</h2>
++<p>
++The following functions and methods allow <b>high-speed serialization</b>
++(encoding) of a Lua object into a string and decoding it back to a Lua
++object. This allows convenient storage and transport of <b>structured
++data</b>.
++</p>
++<p>
++The encoded data is in an <a href="#serialize_format">internal binary
++format</a>. The data can be stored in files, binary-transparent
++databases or transmitted to other LuaJIT instances across threads,
++processes or networks.
++</p>
++<p>
++Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or
++server-class system, even when serializing many small objects. Decoding
++speed is mostly constrained by object creation cost.
++</p>
++<p>
++The serializer handles most Lua types, common FFI number types and
++nested structures. Functions, thread objects, other FFI cdata and full
++userdata cannot be serialized (yet).
++</p>
++<p>
++The encoder serializes nested structures as trees. Multiple references
++to a single object will be stored separately and create distinct objects
++after decoding. Circular references cause an error.
++</p>
++
++<h3 id="serialize_methods">Serialization Functions and
Methods</h3>
++
++<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br>
++buf = buf:encode(obj)</tt></h3>
++<p>
++Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone
++function returns a string <tt>str</tt>. The buffer method appends the
++encoding to the buffer.
++</p>
++<p>
++<tt>obj</tt> can be any of the supported Lua types — it
doesn't
++need to be a Lua table.
++</p>
++<p>
++This function may throw an error when attempting to serialize
++unsupported object types, circular references or deeply nested tables.
++</p>
++
++<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br>
++obj = buf:decode()</tt></h3>
++<p>
++The stand-alone function de-serializes (decodes) the string
++<tt>str</tt>, the buffer method de-serializes one object from the
++buffer. Both return a Lua object <tt>obj</tt>.
++</p>
++<p>
++The returned object may be any of the supported Lua types —
++even <tt>nil</tt>.
++</p>
++<p>
++This function may throw an error when fed with malformed or incomplete
++encoded data. The stand-alone function throws when there's left-over
++data after decoding a single top-level object. The buffer method leaves
++any left-over data in the buffer.
++</p>
++
++<h3 id="serialize_options">Serialization Options</h3>
++<p>
++The <tt>options</tt> table passed to <tt>buffer.new()</tt> may
contain
++the following members (all optional):
++</p>
++<ul>
++<li>
++<tt>dict</tt> is a Lua table holding a <b>dictionary of
strings</b> that
++commonly occur as table keys of objects you are serializing. These keys
++are compactly encoded as indexes during serialization. A well chosen
++dictionary saves space and improves serialization performance.
++</li>
++<li>
++<tt>metatable</tt> is a Lua table holding a <b>dictionary of
metatables</b>
++for the table objects you are serializing.
++</li>
++</ul>
++<p>
++<tt>dict</tt> needs to be an array of strings and
<tt>metatable</tt> needs
++to be an array of tables. Both starting at index 1 and without holes (no
++<tt>nil</tt> inbetween). The tables are anchored in the buffer object and
++internally modified into a two-way index (don't do this yourself, just pass
++a plain array). The tables must not be modified after they have been passed
++to <tt>buffer.new()</tt>.
++</p>
++<p>
++The <tt>dict</tt> and <tt>metatable</tt> tables used by the
encoder and
++decoder must be the same. Put the most common entries at the front. Extend
++at the end to ensure backwards-compatibility — older encodings can
++then still be read. You may also set some indexes to <tt>false</tt> to
++explicitly drop backwards-compatibility. Old encodings that use these
++indexes will throw an error when decoded.
++</p>
++<p>
++Metatables that are not found in the <tt>metatable</tt> dictionary are
++ignored when encoding. Decoding returns a table with a <tt>nil</tt>
++metatable.
++</p>
++<p>
++Note: parsing and preparation of the options table is somewhat
++expensive. Create a buffer object only once and recycle it for multiple
++uses. Avoid mixing encoder and decoder buffers, since the
++<tt>buf:set()</tt> method frees the already allocated buffer space:
++</p>
++<pre class="code">
++local options = {
++ dict = { "commonly", "used", "string", "keys"
},
++}
++local buf_enc = buffer.new(options)
++local buf_dec = buffer.new(options)
++
++local function encode(obj)
++ return buf_enc:reset():encode(obj):get()
++end
++
++local function decode(str)
++ return buf_dec:set(str):decode()
++end
++</pre>
++
++<h3 id="serialize_stream">Streaming Serialization</h3>
++<p>
++In some contexts, it's desirable to do piecewise serialization of large
++datasets, also known as <i>streaming</i>.
++</p>
++<p>
++This serialization format can be safely concatenated and supports streaming.
++Multiple encodings can simply be appended to a buffer and later decoded
++individually:
++</p>
++<pre class="code">
++local buf = buffer.new()
++buf:encode(obj1)
++buf:encode(obj2)
++local copy1 = buf:decode()
++local copy2 = buf:decode()
++</pre>
++<p>
++Here's how to iterate over a stream:
++</p>
++<pre class="code">
++while #buf ~= 0 do
++ local obj = buf:decode()
++ -- Do something with obj.
++end
++</pre>
++<p>
++Since the serialization format doesn't prepend a length to its encoding,
++network applications may need to transmit the length, too.
++</p>
++
++<h3 id="serialize_format">Serialization Format Specification</h3>
++<p>
++This serialization format is designed for <b>internal use</b> by LuaJIT
++applications. Serialized data is upwards-compatible and portable across
++all supported LuaJIT platforms.
++</p>
++<p>
++It's an <b>8-bit binary format</b> and not human-readable. It uses e.g.
++embedded zeroes and stores embedded Lua string objects unmodified, which
++are 8-bit-clean, too. Encoded data can be safely concatenated for
++streaming and later decoded one top-level object at a time.
++</p>
++<p>
++The encoding is reasonably compact, but tuned for maximum performance,
++not for minimum space usage. It compresses well with any of the common
++byte-oriented data compression algorithms.
++</p>
++<p>
++Although documented here for reference, this format is explicitly
++<b>not</b> intended to be a 'public standard' for structured data
++interchange across computer languages (like JSON or MessagePack). Please
++do not use it as such.
++</p>
++<p>
++The specification is given below as a context-free grammar with a
++top-level <tt>object</tt> as the starting point. Alternatives are
++separated by the <tt>|</tt> symbol and <tt>*</tt> indicates
repeats.
++Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are
++either plain hex numbers, encoded as bytes, or have a <tt>.format</tt>
++suffix.
++</p>
++<pre>
++object → nil | false | true
++ | null | lightud32 | lightud64
++ | int | num | tab | tab_mt
++ | int64 | uint64 | complex
++ | string
++
++nil → 0x00
++false → 0x01
++true → 0x02
++
++null → 0x03 // NULL lightuserdata
++lightud32 → 0x04 data.I // 32 bit lightuserdata
++lightud64 → 0x05 data.L // 64 bit lightuserdata
++
++int → 0x06 int.I // int32_t
++num → 0x07 double.L
++
++tab → 0x08 // Empty table
++ | 0x09 h.U h*{object object} // Key/value hash
++ | 0x0a a.U a*object // 0-based array
++ | 0x0b a.U a*object h.U h*{object object} // Mixed
++ | 0x0c a.U (a-1)*object // 1-based array
++ | 0x0d a.U (a-1)*object h.U h*{object object} // Mixed
++tab_mt → 0x0e (index-1).U tab // Metatable dict entry
++
++int64 → 0x10 int.L // FFI int64_t
++uint64 → 0x11 uint.L // FFI uint64_t
++complex → 0x12 re.L im.L // FFI complex
++
++string → (0x20+len).U len*char.B
++ | 0x0f (index-1).U // String dict entry
++
++.B = 8 bit
++.I = 32 bit little-endian
++.L = 64 bit little-endian
++.U = prefix-encoded 32 bit unsigned number n:
++ 0x00..0xdf → n.B
++ 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B
++ 0x1fe0.. → 0xff n.I
++</pre>
++
++<h2 id="error">Error handling</h2>
++<p>
++Many of the buffer methods can throw an error. Out-of-memory or usage
++errors are best caught with an outer wrapper for larger parts of code.
++There's not much one can do after that, anyway.
++</p>
++<p>
++OTOH you may want to catch some errors individually. Buffer methods need
++to receive the buffer object as the first argument. The Lua colon-syntax
++<tt>obj:method()</tt> does that implicitly. But to wrap a method with
++<tt>pcall()</tt>, the arguments need to be passed like this:
++</p>
++<pre class="code">
++local ok, err = pcall(buf.encode, buf, obj)
++if not ok then
++ -- Handle error in err.
++end
++</pre>
++
++<h2 id="ffi_caveats">FFI caveats</h2>
++<p>
++The string buffer library has been designed to work well together with
++the FFI library. But due to the low-level nature of the FFI library,
++some care needs to be taken:
++</p>
++<p>
++First, please remember that FFI pointers are zero-indexed. The space
++returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts
at the
++returned pointer and ends before <tt>len</tt> bytes after that.
++</p>
++<p>
++I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index
++is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no
valid
++index at all. The returned pointer may even be <tt>NULL</tt>.
++</p>
++<p>
++The space pointed to by the returned pointer is only valid as long as
++the buffer is not modified in any way (neither append, nor consume, nor
++reset, etc.). The pointer is also not a GC anchor for the buffer object
++itself.
++</p>
++<p>
++Buffer data is only guaranteed to be byte-aligned. Casting the returned
++pointer to a data type with higher alignment may cause unaligned
++accesses. It depends on the CPU architecture whether this is allowed or
++not (it's always OK on x86/x64 and mostly OK on other modern
++architectures).
++</p>
++<p>
++FFI pointers or references do not count as GC anchors for an underlying
++object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt>
is
++anchored by <tt>buf:set(array, len)</tt>, but not by
++<tt>buf:set(array+offset, len)</tt>. The addition of the offset
++creates a new pointer, even when the offset is zero. In this case, you
++need to make sure there's still a reference to the original array as
++long as its contents are in use by the buffer.
++</p>
++<p>
++Even though each LuaJIT VM instance is single-threaded (but you can
++create multiple VMs), FFI data structures can be accessed concurrently.
++Be careful when reading/writing FFI cdata from/to buffers to avoid
++concurrent accesses or modifications. In particular, the memory
++referenced by <tt>buf:set(cdata, len)</tt> must not be modified
++while buffer readers are working on it. Shared, but read-only memory
++mappings of files are OK, but only if the file does not change.
++</p>
++<br class="flush">
++</div>
++<div id="foot">
++<hr class="hide">
++Copyright © 2005-2021
++<span class="noprint">
++·
++<a href="contact.html">Contact</a>
++</span>
++</div>
++</body>
++</html>
+diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
+index ad462c63..9f1ad212 100644
+--- a/doc/ext_c_api.html
++++ b/doc/ext_c_api.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Lua/C API Extensions</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Lua/C API Extensions</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a class="current" href="ext_c_api.html">Lua/C API</a>
+@@ -46,17 +47,12 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -91,8 +87,8 @@ other Lua/C API functions).
+ </p>
+ <p>
+ The third argument specifies the mode, which is 'or'ed with a flag.
+-The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on,
+-<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or
++The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off,
++<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or
+ <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code.
+ </p>
+ <p>
+@@ -179,7 +175,7 @@ Also note that this mechanism is not without overhead.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
+index 5e1daaf5..b934dc78 100644
+--- a/doc/ext_ffi.html
++++ b/doc/ext_ffi.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>FFI Library</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>FFI Library</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -46,17 +47,12 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -322,7 +318,7 @@ without undue conversion penalties.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
+index 91af2e1d..061cc42a 100644
+--- a/doc/ext_ffi_api.html
++++ b/doc/ext_ffi_api.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>ffi.* API Functions</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -16,7 +15,7 @@ td.abiparam { font-weight: bold; width: 6em; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1><tt>ffi.*</tt> API Functions</h1>
+@@ -25,7 +24,7 @@ td.abiparam { font-weight: bold; width: 6em; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -43,6 +42,8 @@ td.abiparam { font-weight: bold; width: 6em; }
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -51,17 +52,12 @@ td.abiparam { font-weight: bold; width: 6em; }
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -261,7 +257,7 @@ contents of an <tt>__index</tt> table (if any) may be
modified
+ afterwards. The associated metatable automatically applies to all uses
+ of this type, no matter how the objects are created or where they
+ originate from. Note that pre-defined operations on types have
+-precedence (e.g. declared field names cannot be overriden).
++precedence (e.g. declared field names cannot be overridden).
+ </p>
+ <p>
+ All standard Lua metamethods are implemented. These are called directly,
+@@ -469,6 +465,8 @@ otherwise. The following parameters are currently defined:
+ <tr class="odd">
+ <td class="abiparam">win</td><td
class="abidesc">Windows variant of the standard ABI</td></tr>
+ <tr class="even">
++<td class="abiparam">uwp</td><td
class="abidesc">Universal Windows Platform</td></tr>
++<tr class="odd">
+ <td class="abiparam">gc64</td><td
class="abidesc">64 bit GC references</td></tr>
+ </table>
+
+@@ -560,7 +558,7 @@ named <tt>i</tt>.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
+index 800b6b18..fef39c32 100644
+--- a/doc/ext_ffi_semantics.html
++++ b/doc/ext_ffi_semantics.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>FFI Semantics</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -16,7 +15,7 @@ td.convop { font-style: italic; width: 40%; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>FFI Semantics</h1>
+@@ -25,7 +24,7 @@ td.convop { font-style: italic; width: 40%; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -43,6 +42,8 @@ td.convop { font-style: italic; width: 40%; }
+ <a class="current" href="ext_ffi_semantics.html">FFI
Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -51,17 +52,12 @@ td.convop { font-style: italic; width: 40%; }
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -678,7 +674,7 @@ through unions is explicitly detected and allowed.
+ <a href="ext_ffi_api.html#ffi_new">constructor</a>. This is
equivalent
+ to <tt>ffi.new(ct, ...)</tt>, unless a <tt>__new</tt> metamethod
is
+ defined. The <tt>__new</tt> metamethod is called with the ctype object
+-plus any other arguments passed to the contructor. Note that you have to
++plus any other arguments passed to the constructor. Note that you have to
+ use <tt>ffi.new</tt> inside of it, since calling
<tt>ct(...)</tt> would
+ cause infinite recursion.</li>
+
+@@ -864,7 +860,7 @@ place of a type, you'd need to use
<tt>ffi.typeof("int")</tt> instead.
+ <p>
+ The main use for parameterized types are libraries implementing abstract
+ data types
+-(<a
href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8&...
class="ext">»</span> example</a>),
++(<a
href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8...
class="ext">»</span> example</a>),
+ similar to what can be achieved with C++ template metaprogramming.
+ Another use case are derived types of anonymous structs, which avoids
+ pollution of the global struct namespace.
+@@ -1225,7 +1221,7 @@ suboptimal performance, especially when used in inner loops:
+ <li>Table initializers.</li>
+ <li>Initialization of nested
<tt>struct</tt>/<tt>union</tt> types.</li>
+ <li>Non-default initialization of VLA/VLS or large C types
+-(> 128 bytes or > 16 array elements.</li>
++(> 128 bytes or > 16 array elements).</li>
+ <li>Bitfield initializations.</li>
+ <li>Pointer differences for element sizes that are not a power of
+ two.</li>
+@@ -1252,7 +1248,7 @@ compiled.</li>
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
+index 36500664..ca71be4d 100644
+--- a/doc/ext_ffi_tutorial.html
++++ b/doc/ext_ffi_tutorial.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>FFI Tutorial</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -18,7 +17,7 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>FFI Tutorial</h1>
+@@ -27,7 +26,7 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -45,6 +44,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -53,17 +54,12 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -222,7 +218,7 @@ a fascinating best-selling game is left as an exercise for the
reader.
+ <h2 id="zlib">Accessing the zlib Compression Library</h2>
+ <p>
+ The following code shows how to access the <a
+-href="http://zlib.net/">zlib</a> compression library from Lua code.
++href="https://zlib.net/"><span
class="ext">»</span> zlib</a> compression
library from Lua code.
+ We'll define two convenience wrapper functions that take a string and
+ compress or uncompress it to another string:
+ </p>
+@@ -305,7 +301,7 @@ comes pre-installed. Since <tt>ffi.load()</tt>
automatically adds any
+ missing standard prefixes/suffixes, we can simply load the
+ <tt>"z"</tt> library. On Windows it's named
<tt>zlib1.dll</tt> and
+ you'll have to download it first from the
+-<a
href="http://zlib.net/"><span
class="ext">»</span> zlib site</a>. The check
for
++<a
href="https://zlib.net/"><span
class="ext">»</span> zlib site</a>. The check
for
+ <tt>ffi.os</tt> makes sure we pass the right name to
+ <tt>ffi.load()</tt>.
+ </p>
+@@ -593,7 +589,7 @@ it to a local variable in the function scope is unnecessary.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_jit.html b/doc/ext_jit.html
+index e4088bcb..6dd54c70 100644
+--- a/doc/ext_jit.html
++++ b/doc/ext_jit.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>jit.* Library</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1><tt>jit.*</tt> Library</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a class="current" href="ext_jit.html">jit.*
Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -46,17 +47,12 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -153,7 +149,7 @@ Contains the target OS name:
+ <h3 id="jit_arch"><tt>jit.arch</tt></h3>
+ <p>
+ Contains the target architecture name:
+-"x86", "x64", "arm", "arm64", "ppc",
"mips" or "mips64".
++"x86", "x64", "arm", "arm64",
"arm64be", "ppc", "mips", "mipsel",
"mips64", "mips64el", "mips64r6", "mips64r6el".
+ </p>
+
+ <h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler
optimization control</h2>
+@@ -191,7 +187,7 @@ if you want to know more.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html
+index 71b8c033..2783abdb 100644
+--- a/doc/ext_profiler.html
++++ b/doc/ext_profiler.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Profiler</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Profiler</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -46,17 +47,12 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -355,7 +351,7 @@ use.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/extensions.html b/doc/extensions.html
+index d7cc9693..748c1793 100644
+--- a/doc/extensions.html
++++ b/doc/extensions.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Extensions</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -28,7 +27,7 @@ td.excinterop {
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Extensions</h1>
+@@ -37,7 +36,7 @@ td.excinterop {
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -55,6 +54,8 @@ td.excinterop {
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -63,25 +64,20 @@ td.excinterop {
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+ <p>
+ LuaJIT is fully upwards-compatible with Lua 5.1. It supports all
+-<a
href="http://www.lua.org/manual/5.1/manual.html#5"><span
class="ext">»</span> standard Lua
++<a
href="https://www.lua.org/manual/5.1/manual.html#5"><span
class="ext">»</span> standard Lua
+ library functions</a> and the full set of
+-<a
href="http://www.lua.org/manual/5.1/manual.html#3"><span
class="ext">»</span> Lua/C API
++<a
href="https://www.lua.org/manual/5.1/manual.html#3"><span
class="ext">»</span> Lua/C API
+ functions</a>.
+ </p>
+ <p>
+@@ -105,7 +101,7 @@ LuaJIT comes with several built-in extension modules:
+ <h3 id="bit"><tt>bit.*</tt> — Bitwise
operations</h3>
+ <p>
+ LuaJIT supports all bitwise operations as defined by
+-<a href="http://bitop.luajit.org"><span
class="ext">»</span> Lua BitOp</a>:
++<a href="https://bitop.luajit.org"><span
class="ext">»</span> Lua BitOp</a>:
+ </p>
+ <pre class="code">
+ bit.tobit bit.tohex bit.bnot bit.band bit.bor bit.bxor
+@@ -114,7 +110,7 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap
+ <p>
+ This module is a LuaJIT built-in — you don't need to download or
+ install Lua BitOp. The Lua BitOp site has full documentation for all
+-<a
href="http://bitop.luajit.org/api.html"><span
class="ext">»</span> Lua BitOp API
functions</a>.
++<a
href="https://bitop.luajit.org/api.html"><span
class="ext">»</span> Lua BitOp API
functions</a>.
+ The FFI adds support for
+ <a href="ext_ffi_semantics.html#cdata_arith">64 bit bitwise
operations</a>,
+ using the same API functions.
+@@ -209,9 +205,8 @@ bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
+ </p>
+ <p>
+ Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which
implies
+-a different, incompatible bytecode format for ports that use this mode (e.g.
+-ARM64 or MIPS64) or when explicitly enabled for x64. This may be rectified
+-in the future.
++a different, incompatible bytecode format for all 64 bit ports. This may be
++rectified in the future.
+ </p>
+
+ <h3 id="table_new"><tt>table.new(narray, nhash)</tt>
allocates a pre-sized table</h3>
+@@ -254,6 +249,10 @@ for every call. The result is uniformly distributed between 0.0 and
1.0.
+ It's correctly scaled up and rounded for
<tt>math.random(n [,m])</tt> to
+ preserve uniformity.
+ </p>
++<p>
++Important: Neither this nor any other PRNG based on the simplistic
++<tt>math.random()</tt> API is suitable for cryptographic use.
++</p>
+
+ <h3 id="io"><tt>io.*</tt> functions handle 64 bit
file offsets</h3>
+ <p>
+@@ -374,6 +373,7 @@ LuaJIT supports some extensions from Lua 5.3:
+ <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8
encoding in string literals.</li>
+ <li>The argument table <tt>arg</tt> can be read (and modified) by
<tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
+ <li><tt>io.read()</tt> and <tt>file:read()</tt> accept
formats with or without a leading <tt>*</tt>.</li>
++<li><tt>assert()</tt> accepts any type of error object.</li>
+ <li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
+ <li><tt>coroutine.isyieldable()</tt>.</li>
+ <li>Lua/C API extensions:
+@@ -394,29 +394,19 @@ the toolchain used to compile LuaJIT:
+ <td class="excinterop">Interoperability</td>
+ </tr>
+ <tr class="odd separate">
+-<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
+-<td class="exccompiler">GCC 4.3+, Clang</td>
++<td class="excplatform">External frame unwinding</td>
++<td class="exccompiler">GCC, Clang, MSVC</td>
+ <td class="excinterop"><b style="color:
#00a000;">Full</b></td>
+ </tr>
+ <tr class="even">
+-<td class="excplatform">ARM
<tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
+-<td class="exccompiler">GCC, Clang</td>
+-<td class="excinterop"><b style="color:
#00a000;">Full</b></td>
+-</tr>
+-<tr class="odd">
+-<td class="excplatform">Other platforms, DWARF2 unwinding</td>
++<td class="excplatform">Internal frame unwinding + DWARF2</td>
+ <td class="exccompiler">GCC, Clang</td>
+ <td class="excinterop"><b style="color:
#c06000;">Limited</b></td>
+ </tr>
+-<tr class="even">
+-<td class="excplatform">Windows/x64</td>
+-<td class="exccompiler">MSVC or WinSDK</td>
+-<td class="excinterop"><b style="color:
#00a000;">Full</b></td>
+-</tr>
+ <tr class="odd">
+-<td class="excplatform">Windows/x86</td>
+-<td class="exccompiler">Any</td>
+-<td class="excinterop"><b style="color:
#00a000;">Full</b></td>
++<td class="excplatform">Windows 64 bit</td>
++<td class="exccompiler">non-MSVC</td>
++<td class="excinterop"><b style="color:
#c06000;">Limited</b></td>
+ </tr>
+ <tr class="even">
+ <td class="excplatform">Other platforms</td>
+@@ -437,7 +427,9 @@ the toolchain used to compile LuaJIT:
+ on the C stack. The contents of the C++ exception object
+ pass through unmodified.</li>
+ <li>Lua errors can be caught on the C++ side with
<tt>catch(...)</tt>.
+-The corresponding Lua error message can be retrieved from the Lua stack.</li>
++The corresponding Lua error message can be retrieved from the Lua stack.<br>
++For MSVC for Windows 64 bit this requires compilation of your C++ code
++with <tt>/EHa</tt>.</li>
+ <li>Throwing Lua errors across C++ frames is safe. C++ destructors
+ will be called.</li>
+ </ul>
+@@ -472,7 +464,7 @@ C++ destructors.</li>
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/faq.html b/doc/faq.html
+index 2c930743..1b7cb371 100644
+--- a/doc/faq.html
++++ b/doc/faq.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Frequently Asked Questions (FAQ)</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -14,7 +13,7 @@ dd { margin-left: 1.5em; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Frequently Asked Questions (FAQ)</h1>
+@@ -23,7 +22,7 @@ dd { margin-left: 1.5em; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -41,6 +40,8 @@ dd { margin-left: 1.5em; }
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -49,67 +50,60 @@ dd { margin-left: 1.5em; }
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a class="current" href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+-<dl>
++<dl id="info">
+ <dt>Q: Where can I learn more about LuaJIT and Lua?</dt>
+ <dd>
+ <ul style="padding: 0;">
+-<li>The <a
href="http://luajit.org/list.html"><span
class="ext">»</span> LuaJIT mailing list</a>
focuses on topics
++<li>The <a
href="https://luajit.org/list.html"><span
class="ext">»</span> LuaJIT mailing list</a>
focuses on topics
+ related to LuaJIT.</li>
+ <li>The <a
href="http://wiki.luajit.org/"><span
class="ext">»</span> LuaJIT wiki</a> gathers
community
+ resources about LuaJIT.</li>
+ <li>News about Lua itself can be found at the
+-<a
href="http://www.lua.org/lua-l.html"><span
class="ext">»</span> Lua mailing list</a>.
++<a
href="https://www.lua.org/lua-l.html"><span
class="ext">»</span> Lua mailing list</a>.
+ The mailing list archives are worth checking out for older postings
+ about LuaJIT.</li>
+-<li>The <a href="http://lua.org"><span
class="ext">»</span> main
Lua.org site</a> has
complete
+-<a
href="http://www.lua.org/docs.html"><span
class="ext">»</span> documentation</a> of the
language
++<li>The <a href="https://lua.org"><span
class="ext">»</span> main
Lua.org site</a> has
complete
++<a
href="https://www.lua.org/docs.html"><span
class="ext">»</span> documentation</a> of the
language
+ and links to books and papers about Lua.</li>
+ <li>The community-managed <a
href="http://lua-users.org/wiki/"><span
class="ext">»</span> Lua Wiki</a>
+ has information about diverse topics.</li>
+-</ul>
++</ul></dd>
+ </dl>
+
+-<dl>
++<dl id="tech">
+ <dt>Q: Where can I learn more about the compiler technology used by
LuaJIT?</dt>
+ <dd>
+-I'm planning to write more documentation about the internals of LuaJIT.
+-In the meantime, please use the following Google Scholar searches
+-to find relevant papers:<br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=Trace+Compiler">&l...
class="ext">»</span> Trace
Compiler</a><br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=JIT+Compiler"><...
class="ext">»</span> JIT
Compiler</a><br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=Dynamic+Language+Optimizat...
class="ext">»</span> Dynamic Language
Optimizations</a><br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=SSA+Form"><span
class="ext">»</span> SSA Form</a><br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=Linear+Scan+Register+Alloc...
class="ext">»</span> Linear Scan Register
Allocation</a><br>
+-Here is a list of the <a
href="http://article.gmane.org/gmane.comp.lang.lua.general/58908&quo...
class="ext">»</span> innovative features in
LuaJIT</a>.<br>
+-And, you know, reading the source is of course the only way to enlightenment. :-)
++Please use the following Google Scholar searches to find relevant papers:<br>
++Search for: <a
href="https://scholar.google.com/scholar?q=Trace+Compiler">&...
class="ext">»</span> Trace
Compiler</a><br>
++Search for: <a
href="https://scholar.google.com/scholar?q=JIT+Compiler"><...
class="ext">»</span> JIT
Compiler</a><br>
++Search for: <a
href="https://scholar.google.com/scholar?q=Dynamic+Language+Optimiza...
class="ext">»</span> Dynamic Language
Optimizations</a><br>
++Search for: <a
href="https://scholar.google.com/scholar?q=SSA+Form"><span
class="ext">»</span> SSA Form</a><br>
++Search for: <a
href="https://scholar.google.com/scholar?q=Linear+Scan+Register+Allo...
class="ext">»</span> Linear Scan Register
Allocation</a><br>
++Here is a list of the <a
href="http://lua-users.org/lists/lua-l/2009-11/msg00089.html"&g...
class="ext">»</span> innovative features in
LuaJIT</a>.<br>
++And, you know, reading the source is of course the only way to enlightenment.
+ </dd>
+ </dl>
+
+-<dl>
++<dl id="arg">
+ <dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil
value)"?<br>
+ Q: My vararg functions fail after switching to LuaJIT!</dt>
+ <dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't
+ support the implicit <tt>arg</tt> parameter for old-style vararg
+ functions from Lua 5.0.<br>Please convert your code to the
+-<a
href="http://www.lua.org/manual/5.1/manual.html#2.5.9"><span
class="ext">»</span> Lua 5.1
++<a
href="https://www.lua.org/manual/5.1/manual.html#2.5.9"><...
class="ext">»</span> Lua 5.1
+ vararg syntax</a>.</dd>
+ </dl>
+
+-<dl>
++<dl id="x87">
+ <dt>Q: Why do I get this error: "bad FPU precision"?<br>
+ <dt>Q: I get weird behavior after initializing Direct3D.<br>
+ <dt>Q: Some FPU operations crash after I load a Delphi DLL.<br>
+@@ -127,56 +121,75 @@ Consider testing your application with older versions,
too.<br>
+
+ Similarly, the Borland/Delphi runtime modifies the FPU control word and
+ enables FP exceptions. Of course this violates the Windows ABI, too.
+-Please check the Delphi docs for the Set8087CW method.
+-
++Please check the Delphi docs for the Set8087CW method.</dd>
+ </dl>
+
+-<dl>
++<dl id="ctrlc">
+ <dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt>
+ <dd>The interrupt signal handler sets a Lua debug hook. But this is
+-currently ignored by compiled code (this will eventually be fixed). If
+-your program is running in a tight loop and never falls back to the
+-interpreter, the debug hook never runs and can't throw the
+-"interrupted!" error.<br> In the meantime you have to press Ctrl-C
+-twice to get stop your program. That's similar to when it's stuck
+-running inside a C function under the Lua interpreter.</dd>
++ignored by compiled code. If your program is running in a tight loop
++and never falls back to the interpreter, the debug hook never runs and
++can't throw the "interrupted!" error.<br>
++You have to press Ctrl-C twice to get stop your program. That's similar
++to when it's stuck running inside a C function under the Lua
interpreter.</dd>
+ </dl>
+
+-<dl>
+-<dt>Q: Why doesn't my favorite power-patch for Lua apply against
LuaJIT?</dt>
+-<dd>Because it's a completely redesigned VM and has very little code
+-in common with Lua anymore. Also, if the patch introduces changes to
+-the Lua semantics, these would need to be reflected everywhere in the
+-VM, from the interpreter up to all stages of the compiler.<br> Please
+-use only standard Lua language constructs. For many common needs you
+-can use source transformations or use wrapper or proxy functions.
+-The compiler will happily optimize away such indirections.</dd>
++<dl id="order">
++<dt>Q: Table iteration with <tt>pairs()</tt> does not result in the
same order?</dt>
++<dd>The order of table iteration is explicitly <b>undefined</b> by
++the Lua language standard.<br>
++Different Lua implementations or versions may use different orders for
++otherwise identical tables. Different ways of constructing a table may
++result in different orders, too.<br>
++Due to improved VM security, LuaJIT 2.1 may even use a different order
++on separate VM invocations or when string keys are newly interned.<br><br>
++If your program relies on a deterministic order, it has a bug. Rewrite it,
++so it doesn't rely on the key order. Or sort the table keys, if you
must.</dd>
+ </dl>
+
+-<dl>
++<dl id="sandbox">
++<dt>Q: Can Lua code be safely sandboxed?</dt>
++<dd>
++Maybe for an extremly restricted subset of Lua and if you relentlessly
++scrutinize every single interface function you offer to the untrusted code.<br>
++
++Although Lua provides some sandboxing functionality (<tt>setfenv()</tt>,
hooks),
++it's very hard to get this right even for the Lua core libraries. Of course,
++you'll need to inspect any extension library, too. And there are libraries
++that are inherently unsafe, e.g. the <a href="ext_ffi.html">FFI
library</a>.<br>
++
++More reading material at the <a
href="http://lua-users.org/wiki/SandBoxes"><span
class="ext">»</span> Lua Wiki</a> and <a
href="https://en.wikipedia.org/wiki/Sandbox_(computer_security)"...
class="ext">»</span> Wikipedia</a>.<br><br>
++
++Relatedly, <b>loading untrusted bytecode is not safe!</b><br>
++
++It's trivial to crash the Lua or LuaJIT VM with maliciously crafted bytecode.
++This is well known and there's no bytecode verification on purpose, so please
++don't report a bug about it. Check the <tt>mode</tt> parameter for the
++<tt>load*()</tt> functions to disable loading of
bytecode.<br><br>
++
++<b>In general, the only promising approach is to sandbox Lua code at the
++process level and not the VM level.</b>
++</dd>
++</dl>
++
++<dl id="arch">
+ <dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt>
+ <dd>Because it's a compiler — it needs to generate native
+ machine code. This means the code generator must be ported to each
+ architecture. And the fast interpreter is written in assembler and
+ must be ported, too. This is quite an undertaking.<br>
+ The <a href="install.html">install documentation</a> shows the
supported
+-architectures. Other architectures will follow based on sufficient user
+-demand and/or sponsoring.</dd>
+-</dl>
+-
+-<dl>
+-<dt>Q: When will feature X be added? When will the next version be
released?</dt>
+-<dd>When it's ready.<br>
+-C'mon, it's open source — I'm doing it on my own time and
you're
+-getting it for free. You can either contribute a patch or sponsor
+-the development of certain features, if they are important to you.
+-</dd>
++architectures.<br>
++Other architectures may follow based on sufficient user demand and
++market-relevance of the architecture. Sponsoring is required to develop
++the port itself, to integrate it and to continuously maintain it in the
++actively developed branches.</dd>
+ </dl>
+ <br class="flush">
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/install.html b/doc/install.html
+index c491c601..e4af9dde 100644
+--- a/doc/install.html
++++ b/doc/install.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Installation</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -39,7 +38,7 @@ td.compatno {
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Installation</h1>
+@@ -48,7 +47,7 @@ td.compatno {
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a class="current" href="install.html">Installation</a>
+ </li><li>
+@@ -66,6 +65,8 @@ td.compatno {
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -74,17 +75,12 @@ td.compatno {
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -109,22 +105,22 @@ operating systems, CPUs and compilers:
+ <td class="compatcpu">CPU / OS</td>
+ <td class="compatos"><a href="#posix">Linux</a>
or<br><a href="#android">Android</a></td>
+ <td class="compatos"><a href="#posix">*BSD,
Other</a></td>
+-<td class="compatos"><a href="#posix">OSX
10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td>
+-<td class="compatos"><a
href="#windows">Windows<br>XP/Vista/7</a></td>
++<td class="compatos"><a href="#posix">macOS
10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td>
++<td class="compatos"><a href="#windows">Windows
7<br>or later</a></td>
+ </tr>
+ <tr class="odd separate">
+ <td class="compatcpu">x86 (32 bit)</td>
+ <td class="compatos">GCC 4.2+</td>
+ <td class="compatos">GCC 4.2+</td>
+ <td class="compatos">XCode 5.0+<br>Clang</td>
+-<td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW,
Cygwin</td>
++<td class="compatos">MSVC<br>MinGW, Cygwin</td>
+ </tr>
+ <tr class="even">
+ <td class="compatcpu">x64 (64 bit)</td>
+ <td class="compatos">GCC 4.2+</td>
+ <td class="compatos">GCC 4.2+<br>ORBIS (<a
href="#ps4">PS4</a>)</td>
+ <td class="compatos">XCode 5.0+<br>Clang</td>
+-<td class="compatos">MSVC + SDK v7.0<br>WinSDK
v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
++<td class="compatos">MSVC<br>Durango (<a
href="#xboxone">Xbox One</a>)</td>
+ </tr>
+ <tr class="odd">
+ <td class="compatcpu"><a
href="#cross2">ARMv5+<br>ARM9E+</a></td>
+@@ -134,7 +130,7 @@ operating systems, CPUs and compilers:
+ <td class="compatos compatno"> </td>
+ </tr>
+ <tr class="even">
+-<td class="compatcpu"><a
href="#cross2">ARM64</a></td>
++<td class="compatcpu"><a
href="#cross2">ARM64<br>ARM64be</a></td>
+ <td class="compatos">GCC 4.8+</td>
+ <td class="compatos compatno"> </td>
+ <td class="compatos">XCode 6.0+<br>Clang 3.5+</td>
+@@ -148,7 +144,7 @@ operating systems, CPUs and compilers:
+ <td class="compatos">XEDK (<a href="#xbox360">Xbox
360</a>)</td>
+ </tr>
+ <tr class="even">
+-<td class="compatcpu"><a
href="#cross2">MIPS32<br>MIPS64</a></td>
++<td class="compatcpu"><a
href="#cross2">MIPS32<br>MIPS64<br>MIPS64r6</a></td>
+ <td class="compatos">GCC 4.3+</td>
+ <td class="compatos">GCC 4.3+</td>
+ <td class="compatos compatno"> </td>
+@@ -169,22 +165,21 @@ only).</li>
+ <li><tt>src/Makefile</tt> has settings for
<b>compiling</b> LuaJIT
+ under POSIX, MinGW or Cygwin.</li>
+ <li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
+-MSVC or WinSDK.</li>
++MSVC (Visual Studio).</li>
+ </ul>
+ <p>
+ Please read the instructions given in these files, before changing
+ any settings.
+ </p>
+ <p>
+-LuaJIT on x64 currently uses 32 bit GC objects by default.
+-<tt>LJ_GC64</tt> mode may be explicitly enabled:
+-add <tt>XCFLAGS=-DLUAJIT_ENABLE_GC64</tt> to the make command or run
+-<tt>msvcbuild gc64</tt> for MSVC/WinSDK. Please check the note
+-about the <a href="extensions.html#string_dump">bytecode
format</a>
+-differences, too.
++All LuaJIT 64 bit ports use 64 bit GC objects by default
(<tt>LJ_GC64</tt>).
++For x64, you can select the old 32-on-64 bit mode by adding
++<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command.
++Please check the note about the
++<a href="extensions.html#string_dump">bytecode format</a>
differences, too.
+ </p>
+
+-<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2>
++<h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2>
+ <h3>Prerequisites</h3>
+ <p>
+ Depending on your distribution, you may need to install a package for
+@@ -192,14 +187,19 @@ GCC, the development headers and/or a complete SDK. E.g. on a
current
+ Debian/Ubuntu, install <tt>libc6-dev</tt> with the package manager.
+ </p>
+ <p>
+-Download the current source package of LuaJIT (pick the .tar.gz),
+-if you haven't already done so. Move it to a directory of your choice,
+-open a terminal window and change to this directory. Now unpack the archive
+-and change to the newly created directory:
++The recommended way to fetch the latest version is to do a pull from
++the git repository.
++</p>
++<p>
++Alternatively download the latest source package of LuaJIT (pick the .tar.gz).
++Move it to a directory of your choice, open a terminal window and change
++to this directory. Now unpack the archive and change to the newly created
++directory (replace XX.YY.ZZ with the version you downloaded):
+ </p>
+ <pre class="code">
+-tar zxf LuaJIT-2.0.5.tar.gz
+-cd LuaJIT-2.0.5</pre>
++tar zxf LuaJIT-XX.YY.ZZ.tar.gz
++cd LuaJIT-XX.YY.ZZ
++</pre>
+ <h3>Building LuaJIT</h3>
+ <p>
+ The supplied Makefiles try to auto-detect the settings needed for your
+@@ -223,9 +223,12 @@ You can add an extra prefix to the search paths by appending the
+ make PREFIX=/home/myself/lj2
+ </pre>
+ <p>
+-Note for OSX: if the <tt>MACOSX_DEPLOYMENT_TARGET</tt> environment
+-variable is not set, then it's forced to <tt>10.4</tt>.
++Note for macOS: you <b>must</b> set the
<tt>MACOSX_DEPLOYMENT_TARGET</tt>
++environment variable to a value supported by your toolchain:
+ </p>
++<pre class="code">
++MACOSX_DEPLOYMENT_TARGET=XX.YY make
++</pre>
+ <h3>Installing LuaJIT</h3>
+ <p>
+ The top-level Makefile installs LuaJIT by default under
+@@ -252,27 +255,18 @@ Obviously the prefixes given during build and installation need to
be the same.
+ <p>
+ Either install one of the open source SDKs
+ (<a
href="http://mingw.org/"><span
class="ext">»</span> MinGW</a> or
+-<a
href="http://www.cygwin.com/"><span
class="ext">»</span> Cygwin</a>), which come
with a modified
++<a
href="https://www.cygwin.com/"><span
class="ext">»</span> Cygwin</a>), which come
with a modified
+ GCC plus the required development headers.
++Or install Microsoft's Visual Studio (MSVC).
+ </p>
+ <p>
+-Or install Microsoft's Visual C++ (MSVC). The freely downloadable
+-<a
href="http://www.microsoft.com/Express/VC/"><span
class="ext">»</span> Express Edition</a>
+-works just fine, but only contains an x86 compiler.
+-</p>
+-<p>
+-The freely downloadable
+-<a
href="http://msdn.microsoft.com/en-us/windowsserver/bb980924.aspx&qu...
class="ext">»</span> Windows SDK</a>
+-only comes with command line tools, but this is all you need to build LuaJIT.
+-It contains x86 and x64 compilers.
+-</p>
+-<p>
+-Next, download the source package and unpack it using an archive manager
+-(e.g. the Windows Explorer) to a directory of your choice.
++Next, pull from the git repository or download the source package and
++unpack it using an archive manager (e.g. the Windows Explorer) to
++a directory of your choice.
+ </p>
+ <h3>Building with MSVC</h3>
+ <p>
+-Open a "Visual Studio .NET Command Prompt", <tt>cd</tt> to the
++Open a "Visual Studio Command Prompt" (either x86 or x64),
<tt>cd</tt> to the
+ directory where you've unpacked the sources and run these commands:
+ </p>
+ <pre class="code">
+@@ -280,37 +274,14 @@ cd src
+ msvcbuild
+ </pre>
+ <p>
+-Then follow the installation instructions below.
+-</p>
+-<h3>Building with the Windows SDK</h3>
+-<p>
+-Open a "Windows SDK Command Shell" and select the x86 compiler:
+-</p>
+-<pre class="code">
+-setenv /release /x86
+-</pre>
+-<p>
+-Or select the x64 compiler:
+-</p>
+-<pre class="code">
+-setenv /release /x64
+-</pre>
+-<p>
+-Then <tt>cd</tt> to the directory where you've unpacked the sources
+-and run these commands:
+-</p>
+-<pre class="code">
+-cd src
+-msvcbuild
+-</pre>
+-<p>
++Check the <tt>msvcbuild.bat</tt> file for more options.
+ Then follow the installation instructions below.
+ </p>
+ <h3>Building with MinGW or Cygwin</h3>
+ <p>
+ Open a command prompt window and make sure the MinGW or Cygwin programs
+-are in your path. Then <tt>cd</tt> to the directory where
+-you've unpacked the sources and run this command for MinGW:
++are in your path. Then <tt>cd</tt> to the directory of the git repository
++or where you've unpacked the sources. Then run this command for MinGW:
+ </p>
+ <pre class="code">
+ mingw32-make
+@@ -365,7 +336,7 @@ You need to specify <tt>TARGET_SYS</tt> whenever the host
OS and the
+ target OS differ, or you'll get assembler or linker errors:
+ </p>
+ <ul>
+-<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or
Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples
below.</li>
++<li>E.g. if you're compiling on a Windows or macOS host for embedded Linux or
Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples
below.</li>
+ <li>For a minimal target OS, you may need to disable the built-in allocator in
<tt>src/Makefile</tt> and use
<tt>TARGET_SYS=Other</tt>.</li>
+ <li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the
install step, too.</li>
+ </ul>
+@@ -428,52 +399,31 @@ make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2
-mabi=64"
+ make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
+ </pre>
+ <p>
+-You can cross-compile for <b id="android">Android</b> using the
<a
href="https://developer.android.com/ndk/index.html">Android
NDK</a>.
+-The environment variables need to match the install locations and the
+-desired target platform. E.g. Android 4.0 corresponds to ABI
level 14.
+-For details check the folder <tt>docs</tt> in the NDK directory.
+-</p>
+-<p>
+-Only a few common variations for the different CPUs, ABIs and platforms
+-are listed. Please use your own judgement for which combination you want
+-to build/deploy or which lowest common denominator you want to pick:
++You can cross-compile for <b id="android">Android</b> using the
<a
href="https://developer.android.com/ndk/"><span
class="ext">»</span> Android NDK</a>.
++Please adapt the environment variables to match the install locations and the
++desired target platform. E.g. Android 4.1 corresponds to ABI
level 16.
+ </p>
+ <pre class="code">
+-# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo)
+-NDK=/opt/android/ndk
+-NDKABI=8
+-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
+-NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
+-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
+-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
+-
+-# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS)
+-NDK=/opt/android/ndk
+-NDKABI=14
+-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
+-NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
+-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
+-NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8"
+-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH"
++# Android/ARM64, aarch64, Android 5.0+ (L)
++NDKDIR=/opt/android/ndk
++NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
++NDKCROSS=$NDKBIN/aarch64-linux-android-
++NDKCC=$NDKBIN/aarch64-linux-android21-clang
++make CROSS=$NDKCROSS \
++ STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
++ TARGET_LD=$NDKCC
+
+-# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS)
+-NDK=/opt/android/ndk
+-NDKABI=14
+-NDKVER=$NDK/toolchains/mipsel-linux-android-4.9
+-NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android-
+-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips"
+-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
+-
+-# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS)
+-NDK=/opt/android/ndk
+-NDKABI=14
+-NDKVER=$NDK/toolchains/x86-4.9
+-NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android-
+-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
+-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
++# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
++NDKDIR=/opt/android/ndk
++NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
++NDKCROSS=$NDKBIN/arm-linux-androideabi-
++NDKCC=$NDKBIN/armv7a-linux-androideabi16-clang
++make HOST_CC="gcc -m32" CROSS=$NDKCROSS \
++ STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
++ TARGET_LD=$NDKCC
+ </pre>
+ <p>
+-You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad)
using the <a
href="http://developer.apple.com/devcenter/ios/index.action">...
class="ext">»</span> iOS SDK</a>:
++You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad)
using the <a
href="https://developer.apple.com/ios/"><span
class="ext">»</span> iOS SDK</a>:
+ </p>
+ <p style="font-size: 8pt;">
+ Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
+@@ -483,13 +433,6 @@ much slower than the JIT compiler. Please complain to Apple, not
me.
+ Or use Android. :-p
+ </p>
+ <pre class="code">
+-# iOS/ARM (32 bit)
+-ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+-ICC=$(xcrun --sdk iphoneos --find clang)
+-ISDKF="-arch armv7 -isysroot $ISDKP"
+-make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \
+- CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
+-
+ # iOS/ARM64
+ ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ ICC=$(xcrun --sdk iphoneos --find clang)
+@@ -590,14 +533,6 @@ the DLL). You may link LuaJIT statically on Windows only if you
don't
+ intend to load Lua/C modules at runtime.
+ </li></ul>
+ </li>
+-<li>
+-If you're building a 64 bit application on OSX which links directly or
+-indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode,
+-you need to link your main executable with these flags:
+-<pre class="code">
+--pagezero_size 10000 -image_base 100000000
+-</pre>
+-</li>
+ </ul>
+ <p>Additional hints for initializing LuaJIT using the C API functions:</p>
+ <ul>
+@@ -606,7 +541,7 @@ you need to link your main executable with these flags:
+ for embedding Lua or LuaJIT into your application.</li>
+ <li>Make sure you use <tt>luaL_newstate</tt>. Avoid using
+ <tt>lua_newstate</tt>, since this uses the (slower) default memory
+-allocator from your system (no support for this on x64).</li>
++allocator from your system (no support for this on 64 bit
architectures).</li>
+ <li>Make sure you use <tt>luaL_openlibs</tt> and not the old Lua 5.0
style
+ of calling <tt>luaopen_base</tt> etc. directly.</li>
+ <li>To change or extend the list of standard libraries to load, copy
+@@ -615,7 +550,7 @@ Make sure the <tt>jit</tt> library is loaded or the JIT
compiler
+ will not be activated.</li>
+ <li>The <tt>bit.*</tt> module for bitwise operations
+ is already built-in. There's no need to statically link
+-<a
href="http://bitop.luajit.org/"><span
class="ext">»</span> Lua BitOp</a> to your
application.</li>
++<a
href="https://bitop.luajit.org/"><span
class="ext">»</span> Lua BitOp</a> to your
application.</li>
+ </ul>
+
+ <h2 id="distro">Hints for Distribution Maintainers</h2>
+@@ -682,7 +617,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/luajit.html b/doc/luajit.html
+index ef5b824c..a25267a6 100644
+--- a/doc/luajit.html
++++ b/doc/luajit.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>LuaJIT</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -96,7 +95,7 @@ table.feature small {
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>LuaJIT</h1>
+@@ -105,7 +104,7 @@ table.feature small {
+ <ul><li>
+ <a class="current" href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -123,6 +122,8 @@ table.feature small {
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -131,36 +132,31 @@ table.feature small {
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+ <p>
+ LuaJIT is a <b>Just-In-Time Compiler</b> (JIT) for the
+-<a
href="http://www.lua.org/"><span
class="ext">»</span> Lua</a> programming
language.
++<a
href="https://www.lua.org/"><span
class="ext">»</span> Lua</a> programming
language.
+ Lua is a powerful, dynamic and light-weight programming language.
+ It may be embedded or used as a general-purpose, stand-alone language.
+ </p>
+ <p>
+-LuaJIT is Copyright © 2005-2017 Mike Pall, released under the
+-<a
href="http://www.opensource.org/licenses/mit-license.php">&l...
class="ext">»</span> MIT open source
license</a>.
++LuaJIT is Copyright © 2005-2021 Mike Pall, released under the
++<a
href="https://www.opensource.org/licenses/mit-license.php">&...
class="ext">»</span> MIT open source
license</a>.
+ </p>
+ <p>
+ </p>
+
+ <h2>Compatibility</h2>
+ <table class="feature os os1">
+-<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>OSX</td><td>POSIX</td></tr>
++<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>macOS</td><td>POSIX</td></tr>
+ </table>
+ <table class="feature os os2">
+ <tr><td><span
style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
+@@ -194,7 +190,7 @@ LuaJIT has been successfully used as a <b>scripting
middleware</b> in
+ games, appliances, network and graphics apps, numerical simulations,
+ trading platforms and many other specialty applications. It scales from
+ embedded devices, smartphones, desktops up to server farms. It combines
+-high flexibility with <a
href="http://luajit.org/performance.html"><span
class="ext">»</span> high performance</a>
++high flexibility with high performance
+ and an unmatched <b>low memory footprint</b>.
+ </p>
+ <p>
+@@ -226,7 +222,7 @@ Please select a sub-topic in the navigation bar to learn more about
LuaJIT.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/running.html b/doc/running.html
+index 666b0abc..b55b8439 100644
+--- a/doc/running.html
++++ b/doc/running.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Running LuaJIT</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -33,7 +32,7 @@ td.param_default {
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Running LuaJIT</h1>
+@@ -42,7 +41,7 @@ td.param_default {
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -60,6 +59,8 @@ td.param_default {
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -68,17 +69,12 @@ td.param_default {
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -95,7 +91,7 @@ The <tt>luajit</tt> stand-alone executable is just a
slightly modified
+ version of the regular <tt>lua</tt> stand-alone executable.
+ It supports the same basic options, too. <tt>luajit -h</tt>
+ prints a short list of the available options. Please have a look at the
+-<a
href="http://www.lua.org/manual/5.1/manual.html#6"><span
class="ext">»</span> Lua manual</a>
++<a
href="https://www.lua.org/manual/5.1/manual.html#6"><span
class="ext">»</span> Lua manual</a>
+ for details.
+ </p>
+ <p>
+@@ -189,8 +185,8 @@ itself. For a description of their options and output format, please
+ read the comment block at the start of their source.
+ They can be found in the <tt>lib</tt> directory of the source
+ distribution or installed under the <tt>jit</tt> directory. By default
+-this is <tt>/usr/local/share/luajit-2.0.5/jit</tt> on POSIX
+-systems.
++this is <tt>/usr/local/share/luajit-XX.YY.ZZ>/jit</tt> on POSIX
++systems (replace XX.YY.ZZ by the installed version).
+ </p>
+
+ <h3 id="opt_O"><tt>-O[level]</tt><br>
+@@ -299,7 +295,7 @@ Here are the parameters and their default settings:
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/status.html b/doc/status.html
+index cad6ca65..1d3ba984 100644
+--- a/doc/status.html
++++ b/doc/status.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Status</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -14,7 +13,7 @@ ul li { padding-bottom: 0.3em; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Status</h1>
+@@ -23,7 +22,7 @@ ul li { padding-bottom: 0.3em; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -41,6 +40,8 @@ ul li { padding-bottom: 0.3em; }
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -49,24 +50,25 @@ ul li { padding-bottom: 0.3em; }
+ </li></ul>
+ </li><li>
+ <a class="current" href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+ <p>
+-<span style="color: #0000c0;">LuaJIT 2.0</span> is the
current
+-<span style="color: #0000c0;">stable branch</span>. This branch is
in
+-feature-freeze — new features will only be added to LuaJIT 2.1.
++This documentation is for LuaJIT 2.1.0-beta3. Please check the <tt>doc</tt>
++directory in each git branch for the version-specific documentation.
++</p>
++<p>
++The currently developed branches are LuaJIT 2.1 and LuaJIT 2.0.
++</p>
++<p>
++LuaJIT 2.0 is in feature-freeze — new features will only
++be added to LuaJIT 2.1.
+ </p>
+
+ <h2>Current Status</h2>
+@@ -90,30 +92,12 @@ The Lua <b>debug API</b> is missing a couple of features
(return
+ hooks for non-Lua functions) and shows slightly different behavior
+ in LuaJIT (no per-coroutine hooks, no tail call counting).
+ </li>
+-<li>
+-Currently some <b>out-of-memory</b> errors from <b>on-trace
code</b> are not
+-handled correctly. The error may fall through an on-trace
+-<tt>pcall</tt> or it may be passed on to the function set with
+-<tt>lua_atpanic</tt> on x64. This issue will be fixed with the new
+-garbage collector.
+-</li>
+-<li>
+-LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the
+-<b>legacy <tt>lightuserdata</tt></b> data type.
+-This is only relevant on x64 systems which use the negative part of the
+-virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems
+-configured with a 48 bit or 52 bit VA.
+-Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside
+-of that range, e.g. variables on the stack. In general, avoid this data
+-type for new code and replace it with (much more performant) FFI bindings.
+-FFI cdata pointers can address the full 64 bit range.
+-</li>
+ </ul>
+ <br class="flush">
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
+index a43f7c66..ebcf4ac0 100644
+--- a/dynasm/dasm_arm.h
++++ b/dynasm/dasm_arm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM ARM encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ case DASM_IMMV8:
+ CK((n & 3) == 0, RANGE_I);
+ n >>= 2;
++ /* fallthrough */
+ case DASM_IMML8:
+ case DASM_IMML12:
+ CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
+@@ -293,7 +294,7 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+@@ -370,7 +371,11 @@ int dasm_encode(Dst_DECL, void *buffer)
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+ break;
+ case DASM_REL_LG:
+- CK(n >= 0, UNDEF_LG);
++ if (n < 0) {
++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4);
++ goto patchrel;
++ }
++ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4;
+diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua
+index 32f595af..0c775ae2 100644
+--- a/dynasm/dasm_arm.lua
++++ b/dynasm/dasm_arm.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM ARM module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+
+@@ -9,9 +9,9 @@
+ local _info = {
+ arch = "arm",
+ description = "DynASM ARM module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
+index 47e1e074..d6da4528 100644
+--- a/dynasm/dasm_arm64.h
++++ b/dynasm/dasm_arm64.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM ARM64 encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -21,8 +21,9 @@ enum {
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+- DASM_REL_PC, DASM_LABEL_PC,
++ DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A,
+ DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
++ DASM_IMMV, DASM_VREG,
+ DASM__MAX
+ };
+
+@@ -39,6 +40,7 @@ enum {
+ #define DASM_S_RANGE_LG 0x13000000
+ #define DASM_S_RANGE_PC 0x14000000
+ #define DASM_S_RANGE_REL 0x15000000
++#define DASM_S_RANGE_VREG 0x16000000
+ #define DASM_S_UNDEF_LG 0x21000000
+ #define DASM_S_UNDEF_PC 0x22000000
+
+@@ -247,7 +249,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n]; goto stop;
+ case DASM_ESC: p++; ofs += 4; break;
+- case DASM_REL_EXT: break;
++ case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break;
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+ case DASM_REL_LG:
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
+@@ -268,6 +270,11 @@ void dasm_put(Dst_DECL, int start, ...)
+ *pl = pos;
+ }
+ pos++;
++ if ((ins & 0x8000)) ofs += 8;
++ break;
++ case DASM_REL_A:
++ b[pos++] = n;
++ b[pos++] = va_arg(ap, int);
+ break;
+ case DASM_LABEL_LG:
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+@@ -312,13 +319,21 @@ void dasm_put(Dst_DECL, int start, ...)
+ }
+ case DASM_IMML: {
+ #ifdef DASM_CHECKS
+- int scale = (p[-2] >> 30);
++ int scale = (ins & 3);
+ CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) <
4096) ||
+ (unsigned int)(n+256) < 512, RANGE_I);
+ #endif
+ b[pos++] = n;
+ break;
+ }
++ case DASM_IMMV:
++ ofs += 4;
++ b[pos++] = n;
++ break;
++ case DASM_VREG:
++ CK(n < 32, RANGE_VREG);
++ b[pos++] = n;
++ break;
+ }
+ }
+ }
+@@ -348,7 +363,7 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+@@ -375,8 +390,8 @@ int dasm_link(Dst_DECL, size_t *szp)
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+ case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
+- case DASM_IMML: pos++; break;
+- case DASM_IMM13X: pos += 2; break;
++ case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break;
++ case DASM_IMM13X: case DASM_REL_A: pos += 2; break;
+ }
+ }
+ stop: (void)0;
+@@ -426,7 +441,13 @@ int dasm_encode(Dst_DECL, void *buffer)
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+ break;
+ case DASM_REL_LG:
+- CK(n >= 0, UNDEF_LG);
++ if (n < 0) {
++ ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4;
++ n = (int)na;
++ CK((ptrdiff_t)n == na, RANGE_REL);
++ goto patchrel;
++ }
++ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+@@ -445,8 +466,24 @@ int dasm_encode(Dst_DECL, void *buffer)
+ } else if ((ins & 0x1000)) { /* TBZ, TBNZ */
+ CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
+ cp[-1] |= ((n << 3) & 0x0007ffe0);
++ } else if ((ins & 0x8000)) { /* absolute */
++ cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n);
++ cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32);
++ cp += 2;
+ }
+ break;
++ case DASM_REL_A: {
++ ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n);
++ if ((ins & 0x3000) == 0x3000) { /* ADRP */
++ ins &= ~0x1000;
++ na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12);
++ } else {
++ na = na - (ptrdiff_t)cp + 4;
++ }
++ n = (int)na;
++ CK((ptrdiff_t)n == na, RANGE_REL);
++ goto patchrel;
++ }
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+ break;
+@@ -467,11 +504,17 @@ int dasm_encode(Dst_DECL, void *buffer)
+ cp[-1] |= (dasm_imm13(n, *b++) << 10);
+ break;
+ case DASM_IMML: {
+- int scale = (p[-2] >> 30);
++ int scale = (ins & 3);
+ cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale)
< 4096) ?
+ ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
+ break;
+ }
++ case DASM_IMMV:
++ *cp++ = n;
++ break;
++ case DASM_VREG:
++ cp[-1] |= (n & 0x1f) << (ins & 0x1f);
++ break;
+ default: *cp++ = ins; break;
+ }
+ }
+diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua
+index 8a5f735d..cb82dc4a 100644
+--- a/dynasm/dasm_arm64.lua
++++ b/dynasm/dasm_arm64.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM ARM64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+
+@@ -9,9 +9,9 @@
+ local _info = {
+ arch = "arm",
+ description = "DynASM ARM64 module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+@@ -23,12 +23,12 @@ local _M = { _info = _info }
+ local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+ local assert, setmetatable, rawget = assert, setmetatable, rawget
+ local _s = string
+-local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
++local format, byte, char = _s.format, _s.byte, _s.char
+ local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+ local concat, sort, insert = table.concat, table.sort, table.insert
+ local bit = bit or require("bit")
+ local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+-local ror, tohex = bit.ror, bit.tohex
++local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit
+
+ -- Inherited tables and callbacks.
+ local g_opt, g_arch
+@@ -39,7 +39,9 @@ local wline, werror, wfatal, wwarn
+ local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+- "REL_PC", "LABEL_PC", "IMM", "IMM6",
"IMM12", "IMM13W", "IMM13X", "IMML",
++ "REL_PC", "LABEL_PC", "REL_A",
++ "IMM", "IMM6", "IMM12", "IMM13W",
"IMM13X", "IMML", "IMMV",
++ "VREG",
+ }
+
+ -- Maximum number of section buffer positions for dasm_put().
+@@ -246,9 +248,12 @@ local map_cond = {
+
+ local parse_reg_type
+
+-local function parse_reg(expr)
++local function parse_reg(expr, shift)
+ if not expr then werror("expected register name") end
+ local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
++ if not tname then
++ tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$")
++ end
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+@@ -266,18 +271,28 @@ local function parse_reg(expr)
+ elseif parse_reg_type ~= rt then
+ werror("register size mismatch")
+ end
+- return r, tp
++ return shl(r, shift), tp
++ end
++ end
++ local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$")
++ if vreg then
++ if not parse_reg_type then
++ parse_reg_type = vrt
++ elseif parse_reg_type ~= vrt then
++ werror("register size mismatch")
+ end
++ if shift then waction("VREG", shift, vreg) end
++ return 0
+ end
+ werror("bad register name `"..expr.."'")
+ end
+
+ local function parse_reg_base(expr)
+ if expr == "sp" then return 0x3e0 end
+- local base, tp = parse_reg(expr)
++ local base, tp = parse_reg(expr, 5)
+ if parse_reg_type ~= "x" then werror("bad register type") end
+ parse_reg_type = false
+- return shl(base, 5), tp
++ return base, tp
+ end
+
+ local parse_ctx = {}
+@@ -297,7 +312,7 @@ local function parse_number(n)
+ local code = loadenv("return "..n)
+ if code then
+ local ok, y = pcall(code)
+- if ok then return y end
++ if ok and type(y) == "number" then return y end
+ end
+ return nil
+ end
+@@ -403,7 +418,7 @@ local function parse_imm_load(imm, scale)
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+- waction("IMML", 0, imm)
++ waction("IMML", scale, imm)
+ return 0
+ end
+ end
+@@ -462,6 +477,7 @@ end
+
+ local function parse_load(params, nparams, n, op)
+ if params[n+2] then werror("too many operands") end
++ local scale = shr(op, 30)
+ local pn, p2 = params[n], params[n+1]
+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+ if not p1 then
+@@ -470,14 +486,13 @@ local function parse_load(params, nparams, n, op)
+ if reg and tailr ~= "" then
+ local base, tp = parse_reg_base(reg)
+ if tp then
+- waction("IMML", 0, format(tp.ctypefmt, tailr))
++ waction("IMML", scale, format(tp.ctypefmt, tailr))
+ return op + base
+ end
+ end
+ end
+ werror("expected address operand")
+ end
+- local scale = shr(op, 30)
+ if p2 then
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+@@ -494,7 +509,7 @@ local function parse_load(params, nparams, n, op)
+ op = op + parse_imm_load(imm, scale)
+ else
+ local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+- op = op + shl(parse_reg(p2b), 16) + 0x00200800
++ op = op + parse_reg(p2b, 16) + 0x00200800
+ if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+ werror("bad index register type")
+ end
+@@ -561,14 +576,14 @@ local function parse_load_pair(params, nparams, n, op)
+ end
+
+ local function parse_label(label, def)
+- local prefix = sub(label, 1, 2)
++ local prefix = label:sub(1, 2)
+ -- =>label (pc label reference)
+ if prefix == "=>" then
+- return "PC", 0, sub(label, 3)
++ return "PC", 0, label:sub(3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+- return "LG", map_global[sub(label, 3)]
++ return "LG", map_global[label:sub(3)]
+ end
+ if def then
+ -- [1-9] (local label definition)
+@@ -586,8 +601,11 @@ local function parse_label(label, def)
+ if extname then
+ return "EXT", map_extern[extname]
+ end
++ -- &expr (pointer)
++ if label:sub(1, 1) == "&" then
++ return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2))
++ end
+ end
+- werror("bad label `"..label.."'")
+ end
+
+ local function branch_type(op)
+@@ -620,24 +638,24 @@ local function alias_bfx(p)
+ end
+
+ local function alias_bfiz(p)
+- parse_reg(p[1])
++ parse_reg(p[1], 0)
+ if parse_reg_type == "w" then
+- p[3] = "#-("..p[3]:sub(2)..")%32"
++ p[3] = "#(32-("..p[3]:sub(2).."))%32"
+ p[4] = "#("..p[4]:sub(2)..")-1"
+ else
+- p[3] = "#-("..p[3]:sub(2)..")%64"
++ p[3] = "#(64-("..p[3]:sub(2).."))%64"
+ p[4] = "#("..p[4]:sub(2)..")-1"
+ end
+ end
+
+ local alias_lslimm = op_alias("ubfm_4", function(p)
+- parse_reg(p[1])
++ parse_reg(p[1], 0)
+ local sh = p[3]:sub(2)
+ if parse_reg_type == "w" then
+- p[3] = "#-("..sh..")%32"
++ p[3] = "#(32-("..sh.."))%32"
+ p[4] = "#31-("..sh..")"
+ else
+- p[3] = "#-("..sh..")%64"
++ p[3] = "#(64-("..sh.."))%64"
+ p[4] = "#63-("..sh..")"
+ end
+ end)
+@@ -881,25 +899,25 @@ end
+
+ -- Handle opcodes defined with template strings.
+ local function parse_template(params, template, nparams, pos)
+- local op = tonumber(sub(template, 1, 8), 16)
++ local op = tonumber(template:sub(1, 8), 16)
+ local n = 1
+ local rtt = {}
+
+ parse_reg_type = false
+
+ -- Process each character.
+- for p in gmatch(sub(template, 9), ".") do
++ for p in gmatch(template:sub(9), ".") do
+ local q = params[n]
+ if p == "D" then
+- op = op + parse_reg(q); n = n + 1
++ op = op + parse_reg(q, 0); n = n + 1
+ elseif p == "N" then
+- op = op + shl(parse_reg(q), 5); n = n + 1
++ op = op + parse_reg(q, 5); n = n + 1
+ elseif p == "M" then
+- op = op + shl(parse_reg(q), 16); n = n + 1
++ op = op + parse_reg(q, 16); n = n + 1
+ elseif p == "A" then
+- op = op + shl(parse_reg(q), 10); n = n + 1
++ op = op + parse_reg(q, 10); n = n + 1
+ elseif p == "m" then
+- op = op + shl(parse_reg(params[n-1]), 16)
++ op = op + parse_reg(params[n-1], 16)
+
+ elseif p == "p" then
+ if q == "sp" then params[n] = "@x31" end
+@@ -930,8 +948,14 @@ local function parse_template(params, template, nparams, pos)
+
+ elseif p == "B" then
+ local mode, v, s = parse_label(q, false); n = n + 1
++ if not mode then werror("bad label `"..q.."'") end
+ local m = branch_type(op)
+- waction("REL_"..mode, v+m, s, 1)
++ if mode == "A" then
++ waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s))
++ actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s)
++ else
++ waction("REL_"..mode, v+m, s, 1)
++ end
+
+ elseif p == "I" then
+ op = op + parse_imm12(q); n = n + 1
+@@ -977,8 +1001,8 @@ function op_template(params, template, nparams)
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "")
end
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+- -- A single opcode needs a maximum of 3 positions.
+- if secpos+3 > maxsecpos then wflush() end
++ -- A single opcode needs a maximum of 4 positions.
++ if secpos+4 > maxsecpos then wflush() end
+ local pos = wpos()
+ local lpos, apos, spos = #actlist, #actargs, secpos
+
+@@ -990,9 +1014,11 @@ function op_template(params, template, nparams)
+ actlist[lpos+1] = nil
+ actlist[lpos+2] = nil
+ actlist[lpos+3] = nil
++ actlist[lpos+4] = nil
+ actargs[apos+1] = nil
+ actargs[apos+2] = nil
+ actargs[apos+3] = nil
++ actargs[apos+4] = nil
+ end
+ error(err, 0)
+ end
+@@ -1036,23 +1062,50 @@ map_op[".label_1"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr" end
+ if secpos+1 > maxsecpos then wflush() end
+ local mode, n, s = parse_label(params[1], true)
+- if mode == "EXT" then werror("bad label definition") end
++ if not mode or mode == "EXT" then werror("bad label definition")
end
+ waction("LABEL_"..mode, n, s, 1)
+ end
+
+ ------------------------------------------------------------------------------
+
+ -- Pseudo-opcodes for data storage.
+-map_op[".long_*"] = function(params)
++local function op_data(params)
+ if not params then return "imm..." end
++ local sz = params.op == ".long" and 4 or 8
+ for _,p in ipairs(params) do
+- local n = tonumber(p)
+- if not n then werror("bad immediate `"..p.."'") end
+- if n < 0 then n = n + 2^32 end
+- wputw(n)
++ local imm = parse_number(p)
++ if imm then
++ local n = tobit(imm)
++ if n == imm or (n < 0 and n + 2^32 == imm) then
++ wputw(n < 0 and n + 2^32 or n)
++ if sz == 8 then
++ wputw(imm < 0 and 0xffffffff or 0)
++ end
++ elseif sz == 4 then
++ werror("bad immediate `"..p.."'")
++ else
++ imm = nil
++ end
++ end
++ if not imm then
++ local mode, v, s = parse_label(p, false)
++ if sz == 4 then
++ if mode then werror("label does not fit into .long") end
++ waction("IMMV", 0, p)
++ elseif mode and mode ~= "A" then
++ waction("REL_"..mode, v+0x8000, s, 1)
++ else
++ if mode == "A" then p = s end
++ waction("IMMV", 0, format("(unsigned int)(%s)", p))
++ waction("IMMV", 0, format("(unsigned int)((unsigned long
long)(%s)>>32)", p))
++ end
++ end
+ if secpos+2 > maxsecpos then wflush() end
+ end
+ end
++map_op[".long_*"] = op_data
++map_op[".quad_*"] = op_data
++map_op[".addr_*"] = op_data
+
+ -- Alignment pseudo-opcode.
+ map_op[".align_1"] = function(params)
+diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
+index 4b49fd8c..b99b56b0 100644
+--- a/dynasm/dasm_mips.h
++++ b/dynasm/dasm_mips.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM MIPS encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -273,7 +273,7 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+@@ -349,19 +349,24 @@ int dasm_encode(Dst_DECL, void *buffer)
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
+ break;
+ case DASM_REL_LG:
+- CK(n >= 0, UNDEF_LG);
++ if (n < 0) {
++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
++ goto patchrel;
++ }
++ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n);
+ if (ins & 2048)
+- n = n - (int)((char *)cp - base);
+- else
+ n = (n + (int)(size_t)base) & 0x0fffffff;
+- patchrel:
++ else
++ n = n - (int)((char *)cp - base);
++ patchrel: {
++ unsigned int e = 16 + ((ins >> 12) & 15);
+ CK((n & 3) == 0 &&
+- ((n + ((ins & 2048) ? 0x00020000 : 0)) >>
+- ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL);
+- cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff));
++ ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0,
RANGE_REL);
++ cp[-1] |= ((n>>2) & ((1<<e)-1));
++ }
+ break;
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
+index 78a4e34a..59147015 100644
+--- a/dynasm/dasm_mips.lua
++++ b/dynasm/dasm_mips.lua
+@@ -1,19 +1,20 @@
+ ------------------------------------------------------------------------------
+ -- DynASM MIPS32/MIPS64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+
+ local mips64 = mips64
++local mipsr6 = _map_def.MIPSR6
+
+ -- Module information:
+ local _info = {
+ arch = mips64 and "mips64" or "mips",
+ description = "DynASM MIPS32/MIPS64 module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2016-05-24",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+@@ -238,7 +239,6 @@ local map_op = {
+ bne_3 = "14000000STB",
+ blez_2 = "18000000SB",
+ bgtz_2 = "1c000000SB",
+- addi_3 = "20000000TSI",
+ li_2 = "24000000TI",
+ addiu_3 = "24000000TSI",
+ slti_3 = "28000000TSI",
+@@ -248,40 +248,22 @@ local map_op = {
+ ori_3 = "34000000TSU",
+ xori_3 = "38000000TSU",
+ lui_2 = "3c000000TU",
+- beqzl_2 = "50000000SB",
+- beql_3 = "50000000STB",
+- bnezl_2 = "54000000SB",
+- bnel_3 = "54000000STB",
+- blezl_2 = "58000000SB",
+- bgtzl_2 = "5c000000SB",
+- daddi_3 = mips64 and "60000000TSI",
+ daddiu_3 = mips64 and "64000000TSI",
+ ldl_2 = mips64 and "68000000TO",
+ ldr_2 = mips64 and "6c000000TO",
+ lb_2 = "80000000TO",
+ lh_2 = "84000000TO",
+- lwl_2 = "88000000TO",
+ lw_2 = "8c000000TO",
+ lbu_2 = "90000000TO",
+ lhu_2 = "94000000TO",
+- lwr_2 = "98000000TO",
+ lwu_2 = mips64 and "9c000000TO",
+ sb_2 = "a0000000TO",
+ sh_2 = "a4000000TO",
+- swl_2 = "a8000000TO",
+ sw_2 = "ac000000TO",
+- sdl_2 = mips64 and "b0000000TO",
+- sdr_2 = mips64 and "b1000000TO",
+- swr_2 = "b8000000TO",
+- cache_2 = "bc000000NO",
+- ll_2 = "c0000000TO",
+ lwc1_2 = "c4000000HO",
+- pref_2 = "cc000000NO",
+ ldc1_2 = "d4000000HO",
+ ld_2 = mips64 and "dc000000TO",
+- sc_2 = "e0000000TO",
+ swc1_2 = "e4000000HO",
+- scd_2 = mips64 and "f0000000TO",
+ sdc1_2 = "f4000000HO",
+ sd_2 = mips64 and "fc000000TO",
+
+@@ -289,10 +271,6 @@ local map_op = {
+ nop_0 = "00000000",
+ sll_3 = "00000000DTA",
+ sextw_2 = "00000000DT",
+- movf_2 = "00000001DS",
+- movf_3 = "00000001DSC",
+- movt_2 = "00010001DS",
+- movt_3 = "00010001DSC",
+ srl_3 = "00000002DTA",
+ rotr_3 = "00200002DTA",
+ sra_3 = "00000003DTA",
+@@ -301,31 +279,16 @@ local map_op = {
+ rotrv_3 = "00000046DTS",
+ drotrv_3 = mips64 and "00000056DTS",
+ srav_3 = "00000007DTS",
+- jr_1 = "00000008S",
+ jalr_1 = "0000f809S",
+ jalr_2 = "00000009DS",
+- movz_3 = "0000000aDST",
+- movn_3 = "0000000bDST",
+ syscall_0 = "0000000c",
+ syscall_1 = "0000000cY",
+ break_0 = "0000000d",
+ break_1 = "0000000dY",
+ sync_0 = "0000000f",
+- mfhi_1 = "00000010D",
+- mthi_1 = "00000011S",
+- mflo_1 = "00000012D",
+- mtlo_1 = "00000013S",
+ dsllv_3 = mips64 and "00000014DTS",
+ dsrlv_3 = mips64 and "00000016DTS",
+ dsrav_3 = mips64 and "00000017DTS",
+- mult_2 = "00000018ST",
+- multu_2 = "00000019ST",
+- div_2 = "0000001aST",
+- divu_2 = "0000001bST",
+- dmult_2 = mips64 and "0000001cST",
+- dmultu_2 = mips64 and "0000001dST",
+- ddiv_2 = mips64 and "0000001eST",
+- ddivu_2 = mips64 and "0000001fST",
+ add_3 = "00000020DST",
+ move_2 = mips64 and "00000025DS" or "00000021DS",
+ addu_3 = "00000021DST",
+@@ -369,32 +332,9 @@ local map_op = {
+ bgez_2 = "04010000SB",
+ bltzl_2 = "04020000SB",
+ bgezl_2 = "04030000SB",
+- tgei_2 = "04080000SI",
+- tgeiu_2 = "04090000SI",
+- tlti_2 = "040a0000SI",
+- tltiu_2 = "040b0000SI",
+- teqi_2 = "040c0000SI",
+- tnei_2 = "040e0000SI",
+- bltzal_2 = "04100000SB",
+ bal_1 = "04110000B",
+- bgezal_2 = "04110000SB",
+- bltzall_2 = "04120000SB",
+- bgezall_2 = "04130000SB",
+ synci_1 = "041f0000O",
+
+- -- Opcode SPECIAL2.
+- madd_2 = "70000000ST",
+- maddu_2 = "70000001ST",
+- mul_3 = "70000002DST",
+- msub_2 = "70000004ST",
+- msubu_2 = "70000005ST",
+- clz_2 = "70000020DS=",
+- clo_2 = "70000021DS=",
+- dclz_2 = mips64 and "70000024DS=",
+- dclo_2 = mips64 and "70000025DS=",
+- sdbbp_0 = "7000003f",
+- sdbbp_1 = "7000003fY",
+-
+ -- Opcode SPECIAL3.
+ ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
+ dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
+@@ -445,15 +385,6 @@ local map_op = {
+ ctc1_2 = "44c00000TG",
+ mthc1_2 = "44e00000TG",
+
+- bc1f_1 = "45000000B",
+- bc1f_2 = "45000000CB",
+- bc1t_1 = "45010000B",
+- bc1t_2 = "45010000CB",
+- bc1fl_1 = "45020000B",
+- bc1fl_2 = "45020000CB",
+- bc1tl_1 = "45030000B",
+- bc1tl_2 = "45030000CB",
+-
+ ["add.s_3"] = "46000000FGH",
+ ["sub.s_3"] = "46000001FGH",
+ ["mul.s_3"] = "46000002FGH",
+@@ -470,51 +401,11 @@ local map_op = {
+ ["trunc.w.s_2"] = "4600000dFG",
+ ["ceil.w.s_2"] = "4600000eFG",
+ ["floor.w.s_2"] = "4600000fFG",
+- ["movf.s_2"] = "46000011FG",
+- ["movf.s_3"] = "46000011FGC",
+- ["movt.s_2"] = "46010011FG",
+- ["movt.s_3"] = "46010011FGC",
+- ["movz.s_3"] = "46000012FGT",
+- ["movn.s_3"] = "46000013FGT",
+ ["recip.s_2"] = "46000015FG",
+ ["rsqrt.s_2"] = "46000016FG",
+ ["cvt.d.s_2"] = "46000021FG",
+ ["cvt.w.s_2"] = "46000024FG",
+ ["cvt.l.s_2"] = "46000025FG",
+- ["cvt.ps.s_3"] = "46000026FGH",
+- ["c.f.s_2"] = "46000030GH",
+- ["c.f.s_3"] = "46000030VGH",
+- ["c.un.s_2"] = "46000031GH",
+- ["c.un.s_3"] = "46000031VGH",
+- ["c.eq.s_2"] = "46000032GH",
+- ["c.eq.s_3"] = "46000032VGH",
+- ["c.ueq.s_2"] = "46000033GH",
+- ["c.ueq.s_3"] = "46000033VGH",
+- ["c.olt.s_2"] = "46000034GH",
+- ["c.olt.s_3"] = "46000034VGH",
+- ["c.ult.s_2"] = "46000035GH",
+- ["c.ult.s_3"] = "46000035VGH",
+- ["c.ole.s_2"] = "46000036GH",
+- ["c.ole.s_3"] = "46000036VGH",
+- ["c.ule.s_2"] = "46000037GH",
+- ["c.ule.s_3"] = "46000037VGH",
+- ["c.sf.s_2"] = "46000038GH",
+- ["c.sf.s_3"] = "46000038VGH",
+- ["c.ngle.s_2"] = "46000039GH",
+- ["c.ngle.s_3"] = "46000039VGH",
+- ["c.seq.s_2"] = "4600003aGH",
+- ["c.seq.s_3"] = "4600003aVGH",
+- ["c.ngl.s_2"] = "4600003bGH",
+- ["c.ngl.s_3"] = "4600003bVGH",
+- ["c.lt.s_2"] = "4600003cGH",
+- ["c.lt.s_3"] = "4600003cVGH",
+- ["c.nge.s_2"] = "4600003dGH",
+- ["c.nge.s_3"] = "4600003dVGH",
+- ["c.le.s_2"] = "4600003eGH",
+- ["c.le.s_3"] = "4600003eVGH",
+- ["c.ngt.s_2"] = "4600003fGH",
+- ["c.ngt.s_3"] = "4600003fVGH",
+-
+ ["add.d_3"] = "46200000FGH",
+ ["sub.d_3"] = "46200001FGH",
+ ["mul.d_3"] = "46200002FGH",
+@@ -531,130 +422,410 @@ local map_op = {
+ ["trunc.w.d_2"] = "4620000dFG",
+ ["ceil.w.d_2"] = "4620000eFG",
+ ["floor.w.d_2"] = "4620000fFG",
+- ["movf.d_2"] = "46200011FG",
+- ["movf.d_3"] = "46200011FGC",
+- ["movt.d_2"] = "46210011FG",
+- ["movt.d_3"] = "46210011FGC",
+- ["movz.d_3"] = "46200012FGT",
+- ["movn.d_3"] = "46200013FGT",
+ ["recip.d_2"] = "46200015FG",
+ ["rsqrt.d_2"] = "46200016FG",
+ ["cvt.s.d_2"] = "46200020FG",
+ ["cvt.w.d_2"] = "46200024FG",
+ ["cvt.l.d_2"] = "46200025FG",
+- ["c.f.d_2"] = "46200030GH",
+- ["c.f.d_3"] = "46200030VGH",
+- ["c.un.d_2"] = "46200031GH",
+- ["c.un.d_3"] = "46200031VGH",
+- ["c.eq.d_2"] = "46200032GH",
+- ["c.eq.d_3"] = "46200032VGH",
+- ["c.ueq.d_2"] = "46200033GH",
+- ["c.ueq.d_3"] = "46200033VGH",
+- ["c.olt.d_2"] = "46200034GH",
+- ["c.olt.d_3"] = "46200034VGH",
+- ["c.ult.d_2"] = "46200035GH",
+- ["c.ult.d_3"] = "46200035VGH",
+- ["c.ole.d_2"] = "46200036GH",
+- ["c.ole.d_3"] = "46200036VGH",
+- ["c.ule.d_2"] = "46200037GH",
+- ["c.ule.d_3"] = "46200037VGH",
+- ["c.sf.d_2"] = "46200038GH",
+- ["c.sf.d_3"] = "46200038VGH",
+- ["c.ngle.d_2"] = "46200039GH",
+- ["c.ngle.d_3"] = "46200039VGH",
+- ["c.seq.d_2"] = "4620003aGH",
+- ["c.seq.d_3"] = "4620003aVGH",
+- ["c.ngl.d_2"] = "4620003bGH",
+- ["c.ngl.d_3"] = "4620003bVGH",
+- ["c.lt.d_2"] = "4620003cGH",
+- ["c.lt.d_3"] = "4620003cVGH",
+- ["c.nge.d_2"] = "4620003dGH",
+- ["c.nge.d_3"] = "4620003dVGH",
+- ["c.le.d_2"] = "4620003eGH",
+- ["c.le.d_3"] = "4620003eVGH",
+- ["c.ngt.d_2"] = "4620003fGH",
+- ["c.ngt.d_3"] = "4620003fVGH",
+-
+- ["add.ps_3"] = "46c00000FGH",
+- ["sub.ps_3"] = "46c00001FGH",
+- ["mul.ps_3"] = "46c00002FGH",
+- ["abs.ps_2"] = "46c00005FG",
+- ["mov.ps_2"] = "46c00006FG",
+- ["neg.ps_2"] = "46c00007FG",
+- ["movf.ps_2"] = "46c00011FG",
+- ["movf.ps_3"] = "46c00011FGC",
+- ["movt.ps_2"] = "46c10011FG",
+- ["movt.ps_3"] = "46c10011FGC",
+- ["movz.ps_3"] = "46c00012FGT",
+- ["movn.ps_3"] = "46c00013FGT",
+- ["cvt.s.pu_2"] = "46c00020FG",
+- ["cvt.s.pl_2"] = "46c00028FG",
+- ["pll.ps_3"] = "46c0002cFGH",
+- ["plu.ps_3"] = "46c0002dFGH",
+- ["pul.ps_3"] = "46c0002eFGH",
+- ["puu.ps_3"] = "46c0002fFGH",
+- ["c.f.ps_2"] = "46c00030GH",
+- ["c.f.ps_3"] = "46c00030VGH",
+- ["c.un.ps_2"] = "46c00031GH",
+- ["c.un.ps_3"] = "46c00031VGH",
+- ["c.eq.ps_2"] = "46c00032GH",
+- ["c.eq.ps_3"] = "46c00032VGH",
+- ["c.ueq.ps_2"] = "46c00033GH",
+- ["c.ueq.ps_3"] = "46c00033VGH",
+- ["c.olt.ps_2"] = "46c00034GH",
+- ["c.olt.ps_3"] = "46c00034VGH",
+- ["c.ult.ps_2"] = "46c00035GH",
+- ["c.ult.ps_3"] = "46c00035VGH",
+- ["c.ole.ps_2"] = "46c00036GH",
+- ["c.ole.ps_3"] = "46c00036VGH",
+- ["c.ule.ps_2"] = "46c00037GH",
+- ["c.ule.ps_3"] = "46c00037VGH",
+- ["c.sf.ps_2"] = "46c00038GH",
+- ["c.sf.ps_3"] = "46c00038VGH",
+- ["c.ngle.ps_2"] = "46c00039GH",
+- ["c.ngle.ps_3"] = "46c00039VGH",
+- ["c.seq.ps_2"] = "46c0003aGH",
+- ["c.seq.ps_3"] = "46c0003aVGH",
+- ["c.ngl.ps_2"] = "46c0003bGH",
+- ["c.ngl.ps_3"] = "46c0003bVGH",
+- ["c.lt.ps_2"] = "46c0003cGH",
+- ["c.lt.ps_3"] = "46c0003cVGH",
+- ["c.nge.ps_2"] = "46c0003dGH",
+- ["c.nge.ps_3"] = "46c0003dVGH",
+- ["c.le.ps_2"] = "46c0003eGH",
+- ["c.le.ps_3"] = "46c0003eVGH",
+- ["c.ngt.ps_2"] = "46c0003fGH",
+- ["c.ngt.ps_3"] = "46c0003fVGH",
+-
+ ["cvt.s.w_2"] = "46800020FG",
+ ["cvt.d.w_2"] = "46800021FG",
+-
+ ["cvt.s.l_2"] = "46a00020FG",
+ ["cvt.d.l_2"] = "46a00021FG",
+-
+- -- Opcode COP1X.
+- lwxc1_2 = "4c000000FX",
+- ldxc1_2 = "4c000001FX",
+- luxc1_2 = "4c000005FX",
+- swxc1_2 = "4c000008FX",
+- sdxc1_2 = "4c000009FX",
+- suxc1_2 = "4c00000dFX",
+- prefx_2 = "4c00000fMX",
+- ["alnv.ps_4"] = "4c00001eFGHS",
+- ["madd.s_4"] = "4c000020FRGH",
+- ["madd.d_4"] = "4c000021FRGH",
+- ["madd.ps_4"] = "4c000026FRGH",
+- ["msub.s_4"] = "4c000028FRGH",
+- ["msub.d_4"] = "4c000029FRGH",
+- ["msub.ps_4"] = "4c00002eFRGH",
+- ["nmadd.s_4"] = "4c000030FRGH",
+- ["nmadd.d_4"] = "4c000031FRGH",
+- ["nmadd.ps_4"] = "4c000036FRGH",
+- ["nmsub.s_4"] = "4c000038FRGH",
+- ["nmsub.d_4"] = "4c000039FRGH",
+- ["nmsub.ps_4"] = "4c00003eFRGH",
+ }
+
++if mipsr6 then -- Instructions added with MIPSR6.
++
++ for k,v in pairs({
++
++ -- Add immediate to upper bits.
++ aui_3 = "3c000000TSI",
++ daui_3 = mips64 and "74000000TSI",
++ dahi_2 = mips64 and "04060000SI",
++ dati_2 = mips64 and "041e0000SI",
++
++ -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc.
++
++ -- Compact branches.
++ blezalc_2 = "18000000TB", -- rt != 0.
++ bgezalc_2 = "18000000T=SB", -- rt != 0.
++ bgtzalc_2 = "1c000000TB", -- rt != 0.
++ bltzalc_2 = "1c000000T=SB", -- rt != 0.
++
++ blezc_2 = "58000000TB", -- rt != 0.
++ bgezc_2 = "58000000T=SB", -- rt != 0.
++ bgec_3 = "58000000STB", -- rs != rt.
++ blec_3 = "58000000TSB", -- rt != rs.
++
++ bgtzc_2 = "5c000000TB", -- rt != 0.
++ bltzc_2 = "5c000000T=SB", -- rt != 0.
++ bltc_3 = "5c000000STB", -- rs != rt.
++ bgtc_3 = "5c000000TSB", -- rt != rs.
++
++ bgeuc_3 = "18000000STB", -- rs != rt.
++ bleuc_3 = "18000000TSB", -- rt != rs.
++ bltuc_3 = "1c000000STB", -- rs != rt.
++ bgtuc_3 = "1c000000TSB", -- rt != rs.
++
++ beqzalc_2 = "20000000TB", -- rt != 0.
++ bnezalc_2 = "60000000TB", -- rt != 0.
++ beqc_3 = "20000000STB", -- rs < rt.
++ bnec_3 = "60000000STB", -- rs < rt.
++ bovc_3 = "20000000STB", -- rs >= rt.
++ bnvc_3 = "60000000STB", -- rs >= rt.
++
++ beqzc_2 = "d8000000SK", -- rs != 0.
++ bnezc_2 = "f8000000SK", -- rs != 0.
++ jic_2 = "d8000000TI",
++ jialc_2 = "f8000000TI",
++ bc_1 = "c8000000L",
++ balc_1 = "e8000000L",
++
++ -- Opcode SPECIAL.
++ jr_1 = "00000009S",
++ sdbbp_0 = "0000000e",
++ sdbbp_1 = "0000000eY",
++ lsa_4 = "00000005DSTA",
++ dlsa_4 = mips64 and "00000015DSTA",
++ seleqz_3 = "00000035DST",
++ selnez_3 = "00000037DST",
++ clz_2 = "00000050DS",
++ clo_2 = "00000051DS",
++ dclz_2 = mips64 and "00000052DS",
++ dclo_2 = mips64 and "00000053DS",
++ mul_3 = "00000098DST",
++ muh_3 = "000000d8DST",
++ mulu_3 = "00000099DST",
++ muhu_3 = "000000d9DST",
++ div_3 = "0000009aDST",
++ mod_3 = "000000daDST",
++ divu_3 = "0000009bDST",
++ modu_3 = "000000dbDST",
++ dmul_3 = mips64 and "0000009cDST",
++ dmuh_3 = mips64 and "000000dcDST",
++ dmulu_3 = mips64 and "0000009dDST",
++ dmuhu_3 = mips64 and "000000ddDST",
++ ddiv_3 = mips64 and "0000009eDST",
++ dmod_3 = mips64 and "000000deDST",
++ ddivu_3 = mips64 and "0000009fDST",
++ dmodu_3 = mips64 and "000000dfDST",
++
++ -- Opcode SPECIAL3.
++ align_4 = "7c000220DSTA",
++ dalign_4 = mips64 and "7c000224DSTA",
++ bitswap_2 = "7c000020DT",
++ dbitswap_2 = mips64 and "7c000024DT",
++
++ -- Opcode COP1.
++ bc1eqz_2 = "45200000HB",
++ bc1nez_2 = "45a00000HB",
++
++ ["sel.s_3"] = "46000010FGH",
++ ["seleqz.s_3"] = "46000014FGH",
++ ["selnez.s_3"] = "46000017FGH",
++ ["maddf.s_3"] = "46000018FGH",
++ ["msubf.s_3"] = "46000019FGH",
++ ["rint.s_2"] = "4600001aFG",
++ ["class.s_2"] = "4600001bFG",
++ ["min.s_3"] = "4600001cFGH",
++ ["mina.s_3"] = "4600001dFGH",
++ ["max.s_3"] = "4600001eFGH",
++ ["maxa.s_3"] = "4600001fFGH",
++ ["cmp.af.s_3"] = "46800000FGH",
++ ["cmp.un.s_3"] = "46800001FGH",
++ ["cmp.or.s_3"] = "46800011FGH",
++ ["cmp.eq.s_3"] = "46800002FGH",
++ ["cmp.une.s_3"] = "46800012FGH",
++ ["cmp.ueq.s_3"] = "46800003FGH",
++ ["cmp.ne.s_3"] = "46800013FGH",
++ ["cmp.lt.s_3"] = "46800004FGH",
++ ["cmp.ult.s_3"] = "46800005FGH",
++ ["cmp.le.s_3"] = "46800006FGH",
++ ["cmp.ule.s_3"] = "46800007FGH",
++ ["cmp.saf.s_3"] = "46800008FGH",
++ ["cmp.sun.s_3"] = "46800009FGH",
++ ["cmp.sor.s_3"] = "46800019FGH",
++ ["cmp.seq.s_3"] = "4680000aFGH",
++ ["cmp.sune.s_3"] = "4680001aFGH",
++ ["cmp.sueq.s_3"] = "4680000bFGH",
++ ["cmp.sne.s_3"] = "4680001bFGH",
++ ["cmp.slt.s_3"] = "4680000cFGH",
++ ["cmp.sult.s_3"] = "4680000dFGH",
++ ["cmp.sle.s_3"] = "4680000eFGH",
++ ["cmp.sule.s_3"] = "4680000fFGH",
++
++ ["sel.d_3"] = "46200010FGH",
++ ["seleqz.d_3"] = "46200014FGH",
++ ["selnez.d_3"] = "46200017FGH",
++ ["maddf.d_3"] = "46200018FGH",
++ ["msubf.d_3"] = "46200019FGH",
++ ["rint.d_2"] = "4620001aFG",
++ ["class.d_2"] = "4620001bFG",
++ ["min.d_3"] = "4620001cFGH",
++ ["mina.d_3"] = "4620001dFGH",
++ ["max.d_3"] = "4620001eFGH",
++ ["maxa.d_3"] = "4620001fFGH",
++ ["cmp.af.d_3"] = "46a00000FGH",
++ ["cmp.un.d_3"] = "46a00001FGH",
++ ["cmp.or.d_3"] = "46a00011FGH",
++ ["cmp.eq.d_3"] = "46a00002FGH",
++ ["cmp.une.d_3"] = "46a00012FGH",
++ ["cmp.ueq.d_3"] = "46a00003FGH",
++ ["cmp.ne.d_3"] = "46a00013FGH",
++ ["cmp.lt.d_3"] = "46a00004FGH",
++ ["cmp.ult.d_3"] = "46a00005FGH",
++ ["cmp.le.d_3"] = "46a00006FGH",
++ ["cmp.ule.d_3"] = "46a00007FGH",
++ ["cmp.saf.d_3"] = "46a00008FGH",
++ ["cmp.sun.d_3"] = "46a00009FGH",
++ ["cmp.sor.d_3"] = "46a00019FGH",
++ ["cmp.seq.d_3"] = "46a0000aFGH",
++ ["cmp.sune.d_3"] = "46a0001aFGH",
++ ["cmp.sueq.d_3"] = "46a0000bFGH",
++ ["cmp.sne.d_3"] = "46a0001bFGH",
++ ["cmp.slt.d_3"] = "46a0000cFGH",
++ ["cmp.sult.d_3"] = "46a0000dFGH",
++ ["cmp.sle.d_3"] = "46a0000eFGH",
++ ["cmp.sule.d_3"] = "46a0000fFGH",
++
++ }) do map_op[k] = v end
++
++else -- Instructions removed by MIPSR6.
++
++ for k,v in pairs({
++ -- Traps, don't use.
++ addi_3 = "20000000TSI",
++ daddi_3 = mips64 and "60000000TSI",
++
++ -- Branch on likely, don't use.
++ beqzl_2 = "50000000SB",
++ beql_3 = "50000000STB",
++ bnezl_2 = "54000000SB",
++ bnel_3 = "54000000STB",
++ blezl_2 = "58000000SB",
++ bgtzl_2 = "5c000000SB",
++
++ lwl_2 = "88000000TO",
++ lwr_2 = "98000000TO",
++ swl_2 = "a8000000TO",
++ sdl_2 = mips64 and "b0000000TO",
++ sdr_2 = mips64 and "b1000000TO",
++ swr_2 = "b8000000TO",
++ cache_2 = "bc000000NO",
++ ll_2 = "c0000000TO",
++ pref_2 = "cc000000NO",
++ sc_2 = "e0000000TO",
++ scd_2 = mips64 and "f0000000TO",
++
++ -- Opcode SPECIAL.
++ movf_2 = "00000001DS",
++ movf_3 = "00000001DSC",
++ movt_2 = "00010001DS",
++ movt_3 = "00010001DSC",
++ jr_1 = "00000008S",
++ movz_3 = "0000000aDST",
++ movn_3 = "0000000bDST",
++ mfhi_1 = "00000010D",
++ mthi_1 = "00000011S",
++ mflo_1 = "00000012D",
++ mtlo_1 = "00000013S",
++ mult_2 = "00000018ST",
++ multu_2 = "00000019ST",
++ div_3 = "0000001aST",
++ divu_3 = "0000001bST",
++ ddiv_3 = mips64 and "0000001eST",
++ ddivu_3 = mips64 and "0000001fST",
++ dmult_2 = mips64 and "0000001cST",
++ dmultu_2 = mips64 and "0000001dST",
++
++ -- Opcode REGIMM.
++ tgei_2 = "04080000SI",
++ tgeiu_2 = "04090000SI",
++ tlti_2 = "040a0000SI",
++ tltiu_2 = "040b0000SI",
++ teqi_2 = "040c0000SI",
++ tnei_2 = "040e0000SI",
++ bltzal_2 = "04100000SB",
++ bgezal_2 = "04110000SB",
++ bltzall_2 = "04120000SB",
++ bgezall_2 = "04130000SB",
++
++ -- Opcode SPECIAL2.
++ madd_2 = "70000000ST",
++ maddu_2 = "70000001ST",
++ mul_3 = "70000002DST",
++ msub_2 = "70000004ST",
++ msubu_2 = "70000005ST",
++ clz_2 = "70000020D=TS",
++ clo_2 = "70000021D=TS",
++ dclz_2 = mips64 and "70000024D=TS",
++ dclo_2 = mips64 and "70000025D=TS",
++ sdbbp_0 = "7000003f",
++ sdbbp_1 = "7000003fY",
++
++ -- Opcode COP1.
++ bc1f_1 = "45000000B",
++ bc1f_2 = "45000000CB",
++ bc1t_1 = "45010000B",
++ bc1t_2 = "45010000CB",
++ bc1fl_1 = "45020000B",
++ bc1fl_2 = "45020000CB",
++ bc1tl_1 = "45030000B",
++ bc1tl_2 = "45030000CB",
++
++ ["movf.s_2"] = "46000011FG",
++ ["movf.s_3"] = "46000011FGC",
++ ["movt.s_2"] = "46010011FG",
++ ["movt.s_3"] = "46010011FGC",
++ ["movz.s_3"] = "46000012FGT",
++ ["movn.s_3"] = "46000013FGT",
++ ["cvt.ps.s_3"] = "46000026FGH",
++ ["c.f.s_2"] = "46000030GH",
++ ["c.f.s_3"] = "46000030VGH",
++ ["c.un.s_2"] = "46000031GH",
++ ["c.un.s_3"] = "46000031VGH",
++ ["c.eq.s_2"] = "46000032GH",
++ ["c.eq.s_3"] = "46000032VGH",
++ ["c.ueq.s_2"] = "46000033GH",
++ ["c.ueq.s_3"] = "46000033VGH",
++ ["c.olt.s_2"] = "46000034GH",
++ ["c.olt.s_3"] = "46000034VGH",
++ ["c.ult.s_2"] = "46000035GH",
++ ["c.ult.s_3"] = "46000035VGH",
++ ["c.ole.s_2"] = "46000036GH",
++ ["c.ole.s_3"] = "46000036VGH",
++ ["c.ule.s_2"] = "46000037GH",
++ ["c.ule.s_3"] = "46000037VGH",
++ ["c.sf.s_2"] = "46000038GH",
++ ["c.sf.s_3"] = "46000038VGH",
++ ["c.ngle.s_2"] = "46000039GH",
++ ["c.ngle.s_3"] = "46000039VGH",
++ ["c.seq.s_2"] = "4600003aGH",
++ ["c.seq.s_3"] = "4600003aVGH",
++ ["c.ngl.s_2"] = "4600003bGH",
++ ["c.ngl.s_3"] = "4600003bVGH",
++ ["c.lt.s_2"] = "4600003cGH",
++ ["c.lt.s_3"] = "4600003cVGH",
++ ["c.nge.s_2"] = "4600003dGH",
++ ["c.nge.s_3"] = "4600003dVGH",
++ ["c.le.s_2"] = "4600003eGH",
++ ["c.le.s_3"] = "4600003eVGH",
++ ["c.ngt.s_2"] = "4600003fGH",
++ ["c.ngt.s_3"] = "4600003fVGH",
++ ["movf.d_2"] = "46200011FG",
++ ["movf.d_3"] = "46200011FGC",
++ ["movt.d_2"] = "46210011FG",
++ ["movt.d_3"] = "46210011FGC",
++ ["movz.d_3"] = "46200012FGT",
++ ["movn.d_3"] = "46200013FGT",
++ ["c.f.d_2"] = "46200030GH",
++ ["c.f.d_3"] = "46200030VGH",
++ ["c.un.d_2"] = "46200031GH",
++ ["c.un.d_3"] = "46200031VGH",
++ ["c.eq.d_2"] = "46200032GH",
++ ["c.eq.d_3"] = "46200032VGH",
++ ["c.ueq.d_2"] = "46200033GH",
++ ["c.ueq.d_3"] = "46200033VGH",
++ ["c.olt.d_2"] = "46200034GH",
++ ["c.olt.d_3"] = "46200034VGH",
++ ["c.ult.d_2"] = "46200035GH",
++ ["c.ult.d_3"] = "46200035VGH",
++ ["c.ole.d_2"] = "46200036GH",
++ ["c.ole.d_3"] = "46200036VGH",
++ ["c.ule.d_2"] = "46200037GH",
++ ["c.ule.d_3"] = "46200037VGH",
++ ["c.sf.d_2"] = "46200038GH",
++ ["c.sf.d_3"] = "46200038VGH",
++ ["c.ngle.d_2"] = "46200039GH",
++ ["c.ngle.d_3"] = "46200039VGH",
++ ["c.seq.d_2"] = "4620003aGH",
++ ["c.seq.d_3"] = "4620003aVGH",
++ ["c.ngl.d_2"] = "4620003bGH",
++ ["c.ngl.d_3"] = "4620003bVGH",
++ ["c.lt.d_2"] = "4620003cGH",
++ ["c.lt.d_3"] = "4620003cVGH",
++ ["c.nge.d_2"] = "4620003dGH",
++ ["c.nge.d_3"] = "4620003dVGH",
++ ["c.le.d_2"] = "4620003eGH",
++ ["c.le.d_3"] = "4620003eVGH",
++ ["c.ngt.d_2"] = "4620003fGH",
++ ["c.ngt.d_3"] = "4620003fVGH",
++ ["add.ps_3"] = "46c00000FGH",
++ ["sub.ps_3"] = "46c00001FGH",
++ ["mul.ps_3"] = "46c00002FGH",
++ ["abs.ps_2"] = "46c00005FG",
++ ["mov.ps_2"] = "46c00006FG",
++ ["neg.ps_2"] = "46c00007FG",
++ ["movf.ps_2"] = "46c00011FG",
++ ["movf.ps_3"] = "46c00011FGC",
++ ["movt.ps_2"] = "46c10011FG",
++ ["movt.ps_3"] = "46c10011FGC",
++ ["movz.ps_3"] = "46c00012FGT",
++ ["movn.ps_3"] = "46c00013FGT",
++ ["cvt.s.pu_2"] = "46c00020FG",
++ ["cvt.s.pl_2"] = "46c00028FG",
++ ["pll.ps_3"] = "46c0002cFGH",
++ ["plu.ps_3"] = "46c0002dFGH",
++ ["pul.ps_3"] = "46c0002eFGH",
++ ["puu.ps_3"] = "46c0002fFGH",
++ ["c.f.ps_2"] = "46c00030GH",
++ ["c.f.ps_3"] = "46c00030VGH",
++ ["c.un.ps_2"] = "46c00031GH",
++ ["c.un.ps_3"] = "46c00031VGH",
++ ["c.eq.ps_2"] = "46c00032GH",
++ ["c.eq.ps_3"] = "46c00032VGH",
++ ["c.ueq.ps_2"] = "46c00033GH",
++ ["c.ueq.ps_3"] = "46c00033VGH",
++ ["c.olt.ps_2"] = "46c00034GH",
++ ["c.olt.ps_3"] = "46c00034VGH",
++ ["c.ult.ps_2"] = "46c00035GH",
++ ["c.ult.ps_3"] = "46c00035VGH",
++ ["c.ole.ps_2"] = "46c00036GH",
++ ["c.ole.ps_3"] = "46c00036VGH",
++ ["c.ule.ps_2"] = "46c00037GH",
++ ["c.ule.ps_3"] = "46c00037VGH",
++ ["c.sf.ps_2"] = "46c00038GH",
++ ["c.sf.ps_3"] = "46c00038VGH",
++ ["c.ngle.ps_2"] = "46c00039GH",
++ ["c.ngle.ps_3"] = "46c00039VGH",
++ ["c.seq.ps_2"] = "46c0003aGH",
++ ["c.seq.ps_3"] = "46c0003aVGH",
++ ["c.ngl.ps_2"] = "46c0003bGH",
++ ["c.ngl.ps_3"] = "46c0003bVGH",
++ ["c.lt.ps_2"] = "46c0003cGH",
++ ["c.lt.ps_3"] = "46c0003cVGH",
++ ["c.nge.ps_2"] = "46c0003dGH",
++ ["c.nge.ps_3"] = "46c0003dVGH",
++ ["c.le.ps_2"] = "46c0003eGH",
++ ["c.le.ps_3"] = "46c0003eVGH",
++ ["c.ngt.ps_2"] = "46c0003fGH",
++ ["c.ngt.ps_3"] = "46c0003fVGH",
++
++ -- Opcode COP1X.
++ lwxc1_2 = "4c000000FX",
++ ldxc1_2 = "4c000001FX",
++ luxc1_2 = "4c000005FX",
++ swxc1_2 = "4c000008FX",
++ sdxc1_2 = "4c000009FX",
++ suxc1_2 = "4c00000dFX",
++ prefx_2 = "4c00000fMX",
++ ["alnv.ps_4"] = "4c00001eFGHS",
++ ["madd.s_4"] = "4c000020FRGH",
++ ["madd.d_4"] = "4c000021FRGH",
++ ["madd.ps_4"] = "4c000026FRGH",
++ ["msub.s_4"] = "4c000028FRGH",
++ ["msub.d_4"] = "4c000029FRGH",
++ ["msub.ps_4"] = "4c00002eFRGH",
++ ["nmadd.s_4"] = "4c000030FRGH",
++ ["nmadd.d_4"] = "4c000031FRGH",
++ ["nmadd.ps_4"] = "4c000036FRGH",
++ ["nmsub.s_4"] = "4c000038FRGH",
++ ["nmsub.d_4"] = "4c000039FRGH",
++ ["nmsub.ps_4"] = "4c00003eFRGH",
++
++ }) do map_op[k] = v end
++
++end
++
+ ------------------------------------------------------------------------------
+
+ local function parse_gpr(expr)
+@@ -808,10 +979,12 @@ map_op[".template__"] = function(params, template,
nparams)
+ op = op + parse_disp(params[n]); n = n + 1
+ elseif p == "X" then
+ op = op + parse_index(params[n]); n = n + 1
+- elseif p == "B" or p == "J" then
+- local mode, n, s = parse_label(params[n], false)
+- if p == "B" then n = n + 2048 end
+- waction("REL_"..mode, n, s, 1)
++ elseif p == "B" or p == "J" or p == "K" or p ==
"L" then
++ local mode, m, s = parse_label(params[n], false)
++ if p == "J" then m = m + 0xa800
++ elseif p == "K" then m = m + 0x5000
++ elseif p == "L" then m = m + 0xa000 end
++ waction("REL_"..mode, m, s, 1)
+ n = n + 1
+ elseif p == "A" then
+ op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
+@@ -833,7 +1006,7 @@ map_op[".template__"] = function(params, template,
nparams)
+ elseif p == "Z" then
+ op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1
+ elseif p == "=" then
+- op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo.
++ n = n - 1 -- Re-use previous parameter for next template char.
+ else
+ assert(false)
+ end
+diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua
+index 5636b23a..8ab5d33a 100644
+--- a/dynasm/dasm_mips64.lua
++++ b/dynasm/dasm_mips64.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM MIPS64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+ -- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
+diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
+index 3a7ee9b0..35264f2e 100644
+--- a/dynasm/dasm_ppc.h
++++ b/dynasm/dasm_ppc.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM PPC/PPC64 encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -277,7 +277,7 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+@@ -353,7 +353,11 @@ int dasm_encode(Dst_DECL, void *buffer)
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
+ break;
+ case DASM_REL_LG:
+- CK(n >= 0, UNDEF_LG);
++ if (n < 0) {
++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
++ goto patchrel;
++ }
++ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
+index f73974dd..ee2afb2e 100644
+--- a/dynasm/dasm_ppc.lua
++++ b/dynasm/dasm_ppc.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM PPC/PPC64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ --
+ -- Support for various extensions contributed by Caio Souza Oliveira.
+@@ -11,9 +11,9 @@
+ local _info = {
+ arch = "ppc",
+ description = "DynASM PPC module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+@@ -1722,9 +1722,9 @@ op_template = function(params, template, nparams)
+ elseif p == "M" then
+ op = op + parse_shiftmask(params[n], false); n = n + 1
+ elseif p == "J" or p == "K" then
+- local mode, n, s = parse_label(params[n], false)
+- if p == "K" then n = n + 2048 end
+- waction("REL_"..mode, n, s, 1)
++ local mode, m, s = parse_label(params[n], false)
++ if p == "K" then m = m + 2048 end
++ waction("REL_"..mode, m, s, 1)
+ n = n + 1
+ elseif p == "0" then
+ if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end
+diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
+index 59d9e2b2..8914596a 100644
+--- a/dynasm/dasm_proto.h
++++ b/dynasm/dasm_proto.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM encoding engine prototypes.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -10,8 +10,8 @@
+ #include <stddef.h>
+ #include <stdarg.h>
+
+-#define DASM_IDENT "DynASM 1.4.0"
+-#define DASM_VERSION 10400 /* 1.4.0 */
++#define DASM_IDENT "DynASM 1.5.0"
++#define DASM_VERSION 10500 /* 1.5.0 */
+
+ #ifndef Dst_DECL
+ #define Dst_DECL dasm_State **Dst
+diff --git a/dynasm/dasm_x64.lua b/dynasm/dasm_x64.lua
+index e8bdeb37..2c0a0e86 100644
+--- a/dynasm/dasm_x64.lua
++++ b/dynasm/dasm_x64.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM x64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+ -- This module just sets 64 bit mode for the combined x86/x64 module.
+diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
+index bc636357..d8d4928c 100644
+--- a/dynasm/dasm_x86.h
++++ b/dynasm/dasm_x86.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM x86 encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -194,12 +194,13 @@ void dasm_put(Dst_DECL, int start, ...)
+ switch (action) {
+ case DASM_DISP:
+ if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
+- case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
++ /* fallthrough */
++ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
+ case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
+ case DASM_IMM_D: ofs += 4; break;
+ case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
+ case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
+- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
++ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
+ case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
+ case DASM_SPACE: p++; ofs += n; break;
+ case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
+@@ -207,8 +208,8 @@ void dasm_put(Dst_DECL, int start, ...)
+ if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
+ if (*p < 0x20 && (n&7) == 4) ofs++;
+ switch ((*p++ >> 3) & 3) {
+- case 3: n |= b[pos-3];
+- case 2: n |= b[pos-2];
++ case 3: n |= b[pos-3]; /* fallthrough */
++ case 2: n |= b[pos-2]; /* fallthrough */
+ case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
+ }
+ continue;
+@@ -238,8 +239,11 @@ void dasm_put(Dst_DECL, int start, ...)
+ }
+ pos++;
+ ofs += 4; /* Maximum offset needed. */
+- if (action == DASM_REL_LG || action == DASM_REL_PC)
++ if (action == DASM_REL_LG || action == DASM_REL_PC) {
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
++ } else if (sizeof(ptrdiff_t) == 8) {
++ ofs += 4;
++ }
+ break;
+ case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
+@@ -304,11 +308,13 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
++ int op = 0;
+ while (1) {
+- int op, action = *p++;
++ int action = *p++;
+ switch (action) {
+- case DASM_REL_LG: p++; op = p[-3]; goto rel_pc;
+- case DASM_REL_PC: op = p[-2]; rel_pc: {
++ case DASM_REL_LG: p++;
++ /* fallthrough */
++ case DASM_REL_PC: {
+ int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0);
+ if (shrink) { /* Shrinkable branch opcode? */
+ int lofs, lpos = b[pos];
+@@ -329,17 +335,21 @@ int dasm_link(Dst_DECL, size_t *szp)
+ pos += 2;
+ break;
+ }
++ /* fallthrough */
+ case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
++ /* fallthrough */
+ case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
+ case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
+ case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
+ case DASM_LABEL_LG: p++;
++ /* fallthrough */
+ case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
+ case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
+ case DASM_EXTERN: p += 2; break;
+- case DASM_ESC: p++; break;
++ case DASM_ESC: op = *p++; break;
+ case DASM_MARK: break;
+ case DASM_SECTION: case DASM_STOP: goto stop;
++ default: op = action; break;
+ }
+ }
+ stop: (void)0;
+@@ -358,10 +368,22 @@ int dasm_link(Dst_DECL, size_t *szp)
+ do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
+ #define dasmd(x) \
+ do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
++#define dasmq(x) \
++ do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0)
+ #else
+ #define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
+ #define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
++#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0)
+ #endif
++static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x)
++{
++ if (sizeof(ptrdiff_t) == 8)
++ dasmq((unsigned long long)x);
++ else
++ dasmd((unsigned int)x);
++ return cp;
++}
++#define dasma(x) (cp = dasma_(cp, (x)))
+
+ /* Pass 3: Encode sections. */
+ int dasm_encode(Dst_DECL, void *buffer)
+@@ -391,12 +413,15 @@ int dasm_encode(Dst_DECL, void *buffer)
+ if (mrm != 5) { mm[-1] -= 0x80; break; } }
+ if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
+ }
++ /* fallthrough */
+ case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
+ case DASM_IMM_DB: if (((n+128)&-256) == 0) {
+ db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
+ } else mark = NULL;
++ /* fallthrough */
+ case DASM_IMM_D: wd: dasmd(n); break;
+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
++ /* fallthrough */
+ case DASM_IMM_W: dasmw(n); break;
+ case DASM_VREG: {
+ int t = *p++;
+@@ -421,7 +446,9 @@ int dasm_encode(Dst_DECL, void *buffer)
+ }
+ case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
+ b++; n = (int)(ptrdiff_t)D->globals[-n];
+- case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
++ /* fallthrough */
++ case DASM_REL_A: rel_a:
++ n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
+ case DASM_REL_PC: rel_pc: {
+ int shrink = *b++;
+ int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
+@@ -431,11 +458,13 @@ int dasm_encode(Dst_DECL, void *buffer)
+ goto wb;
+ }
+ case DASM_IMM_LG:
+- p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
++ p++;
++ if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; }
++ /* fallthrough */
+ case DASM_IMM_PC: {
+ int *pb = DASM_POS2PTR(D, n);
+- n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
+- goto wd;
++ dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base));
++ break;
+ }
+ case DASM_LABEL_LG: {
+ int idx = *p++;
+@@ -452,6 +481,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
+ case DASM_MARK: mark = cp; break;
+ case DASM_ESC: action = *p++;
++ /* fallthrough */
+ default: *cp++ = action; break;
+ case DASM_SECTION: case DASM_STOP: goto stop;
+ }
+diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
+index 4c031e2c..fe2cf579 100644
+--- a/dynasm/dasm_x86.lua
++++ b/dynasm/dasm_x86.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM x86/x64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+
+@@ -11,9 +11,9 @@ local x64 = x64
+ local _info = {
+ arch = x64 and "x64" or "x86",
+ description = "DynASM x86/x64 module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+@@ -484,6 +484,22 @@ local function wputdarg(n)
+ end
+ end
+
++-- Put signed or unsigned qword or arg.
++local function wputqarg(n)
++ local tn = type(n)
++ if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
++ wputb(band(n, 255))
++ wputb(band(shr(n, 8), 255))
++ wputb(band(shr(n, 16), 255))
++ wputb(shr(n, 24))
++ local sign = n < 0 and 255 or 0
++ wputb(sign); wputb(sign); wputb(sign); wputb(sign)
++ else
++ waction("IMM_D", format("(unsigned int)(%s)", n))
++ waction("IMM_D", format("(unsigned int)((unsigned long
long)(%s)>>32)", n))
++ end
++end
++
+ -- Put operand-size dependent number or arg (defaults to dword).
+ local function wputszarg(sz, n)
+ if not sz or sz == "d" or sz == "q" then wputdarg(n)
+@@ -663,10 +679,16 @@ local function opmodestr(op, args)
+ end
+
+ -- Convert number to valid integer or nil.
+-local function toint(expr)
++local function toint(expr, isqword)
+ local n = tonumber(expr)
+ if n then
+- if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
++ if n % 1 ~= 0 then
++ werror("not an integer number `"..expr.."'")
++ elseif isqword then
++ if n < -2147483648 or n > 2147483647 then
++ n = nil -- Handle it as an expression to avoid precision loss.
++ end
++ elseif n < -2147483648 or n > 4294967295 then
+ werror("bad integer number `"..expr.."'")
+ end
+ return n
+@@ -749,7 +771,7 @@ local function rtexpr(expr)
+ end
+
+ -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
+-local function parseoperand(param)
++local function parseoperand(param, isqword)
+ local t = {}
+
+ local expr = param
+@@ -810,7 +832,7 @@ local function parseoperand(param)
+ if t.disp then break end
+
+ -- [reg+xreg...]
+- local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
++ local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$")
+ xreg, t.xreg, tp = rtexpr(xreg)
+ if not t.xreg then
+ -- [reg+-expr]
+@@ -837,7 +859,7 @@ local function parseoperand(param)
+ t.disp = dispexpr(tailx)
+ else
+ -- imm or opsize*imm
+- local imm = toint(expr)
++ local imm = toint(expr, isqword)
+ if not imm and sub(expr, 1, 1) == "*" and t.opsize then
+ imm = toint(sub(expr, 2))
+ if imm then
+@@ -955,6 +977,7 @@ end
+ -- "u" Use VEX encoding, vvvv unused.
+ -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the
operand is
+ -- removed from the list used by future characters).
++-- "w" Use VEX encoding, vvvv from 3rd operand.
+ -- "L" Force VEX.L
+ --
+ -- All of the following characters force a flush of the opcode:
+@@ -1536,8 +1559,8 @@ local map_op = {
+ vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
+ vrsqrtps_2 = "rmoy:0Fu52rM",
+ vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
+- vroundpd_3 = "rmioy:660F3AV09rMU",
+- vroundps_3 = "rmioy:660F3AV08rMU",
++ vroundpd_3 = "rmioy:660F3Au09rMU",
++ vroundps_3 = "rmioy:660F3Au08rMU",
+ vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
+ vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
+ vshufpd_4 = "rrmioy:660FVC6rMU",
+@@ -1677,6 +1700,91 @@ local map_op = {
+ -- Intel ADX
+ adcx_2 = "rmqd:660F38F6rM",
+ adox_2 = "rmqd:F30F38F6rM",
++
++ -- BMI1
++ andn_3 = "rrmqd:0F38VF2rM",
++ bextr_3 = "rmrqd:0F38wF7rM",
++ blsi_2 = "rmqd:0F38vF33m",
++ blsmsk_2 = "rmqd:0F38vF32m",
++ blsr_2 = "rmqd:0F38vF31m",
++ tzcnt_2 = "rmqdw:F30FBCrM",
++
++ -- BMI2
++ bzhi_3 = "rmrqd:0F38wF5rM",
++ mulx_3 = "rrmqd:F20F38VF6rM",
++ pdep_3 = "rrmqd:F20F38VF5rM",
++ pext_3 = "rrmqd:F30F38VF5rM",
++ rorx_3 = "rmSqd:F20F3AuF0rMS",
++ sarx_3 = "rmrqd:F30F38wF7rM",
++ shrx_3 = "rmrqd:F20F38wF7rM",
++ shlx_3 = "rmrqd:660F38wF7rM",
++
++ -- FMA3
++ vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
++ vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
++ vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
++ vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
++ vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
++ vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
++
++ vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
++ vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
++ vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
++ vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
++ vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
++ vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
++
++ vfmadd132pd_3 = "rrmoy:660F38VX98rM",
++ vfmadd132ps_3 = "rrmoy:660F38V98rM",
++ vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
++ vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
++ vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
++ vfmadd213ps_3 = "rrmoy:660F38VA8rM",
++ vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
++ vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
++ vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
++ vfmadd231ps_3 = "rrmoy:660F38VB8rM",
++ vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
++ vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
++
++ vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
++ vfmsub132ps_3 = "rrmoy:660F38V9ArM",
++ vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
++ vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
++ vfmsub213pd_3 = "rrmoy:660F38VXAArM",
++ vfmsub213ps_3 = "rrmoy:660F38VAArM",
++ vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
++ vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
++ vfmsub231pd_3 = "rrmoy:660F38VXBArM",
++ vfmsub231ps_3 = "rrmoy:660F38VBArM",
++ vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
++ vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
++
++ vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
++ vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
++ vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
++ vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
++ vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
++ vfnmadd213ps_3 = "rrmoy:660F38VACrM",
++ vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
++ vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
++ vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
++ vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
++ vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
++ vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
++
++ vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
++ vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
++ vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
++ vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
++ vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
++ vfnmsub213ps_3 = "rrmoy:660F38VAErM",
++ vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
++ vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
++ vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
++ vfnmsub231ps_3 = "rrmoy:660F38VBErM",
++ vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
++ vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
+ }
+
+ ------------------------------------------------------------------------------
+@@ -1766,7 +1874,7 @@ end
+
+ ------------------------------------------------------------------------------
+
+-local map_vexarg = { u = false, v = 1, V = 2 }
++local map_vexarg = { u = false, v = 1, V = 2, w = 3 }
+
+ -- Process pattern string.
+ local function dopattern(pat, args, sz, op, needrex)
+@@ -1866,7 +1974,7 @@ local function dopattern(pat, args, sz, op, needrex)
+ local a = args[narg]
+ narg = narg + 1
+ local mode, imm = a.mode, a.imm
+- if mode == "iJ" and not match("iIJ", c) then
++ if mode == "iJ" and not match(x64 and "J" or "iIJ", c)
then
+ werror("bad operand size for label")
+ end
+ if c == "S" then
+@@ -2058,14 +2166,16 @@ end
+ local function op_data(params)
+ if not params then return "imm..." end
+ local sz = sub(params.op, 2, 2)
+- if sz == "a" then sz = addrsize end
++ if sz == "l" then sz = "d" elseif sz == "a" then sz =
addrsize end
+ for _,p in ipairs(params) do
+- local a = parseoperand(p)
++ local a = parseoperand(p, sz == "q")
+ if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
+ werror("bad mode or size in `"..p.."'")
+ end
+ if a.mode == "iJ" then
+ wputlabel("IMM_", a.imm, 1)
++ elseif sz == "q" then
++ wputqarg(a.imm)
+ else
+ wputszarg(sz, a.imm)
+ end
+@@ -2077,7 +2187,11 @@ map_op[".byte_*"] = op_data
+ map_op[".sbyte_*"] = op_data
+ map_op[".word_*"] = op_data
+ map_op[".dword_*"] = op_data
++map_op[".qword_*"] = op_data
+ map_op[".aword_*"] = op_data
++map_op[".long_*"] = op_data
++map_op[".quad_*"] = op_data
++map_op[".addr_*"] = op_data
+
+ ------------------------------------------------------------------------------
+
+diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
+index 5ec21a79..f4e71eca 100644
+--- a/dynasm/dynasm.lua
++++ b/dynasm/dynasm.lua
+@@ -2,7 +2,7 @@
+ -- DynASM. A dynamic assembler for code generation engines.
+ -- Originally designed and implemented for LuaJIT.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See below for full copyright notice.
+ ------------------------------------------------------------------------------
+
+@@ -10,14 +10,14 @@
+ local _info = {
+ name = "DynASM",
+ description = "A dynamic assembler for code generation engines",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+- url = "http://luajit.org/dynasm.html",
++ url = "https://luajit.org/dynasm.html",
+ license = "MIT",
+ copyright = [[
+-Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+@@ -38,7 +38,7 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-[ MIT license:
http://www.opensource.org/licenses/mit-license.php ]
++[ MIT license:
https://www.opensource.org/licenses/mit-license.php ]
+ ]],
+ }
+
+@@ -630,6 +630,7 @@ end
+ -- Load architecture-specific module.
+ local function loadarch(arch)
+ if not match(arch, "^[%w_]+$") then return "bad arch name" end
++ _G._map_def = map_def
+ local ok, m_arch = pcall(require, "dasm_"..arch)
+ if not ok then return "cannot load module: "..m_arch end
+ g_arch = m_arch
+diff --git a/etc/luajit.1 b/etc/luajit.1
+index 0d263db7..2846d073 100644
+--- a/etc/luajit.1
++++ b/etc/luajit.1
+@@ -6,7 +6,7 @@ luajit \- Just-In-Time Compiler for the Lua Language
+ .B luajit
+ [\fIoptions\fR]... [\fIscript\fR [\fIargs\fR]...]
+ .SH "WEB SITE"
+-.IR
http://luajit.org
++.IR
https://luajit.org
+ .SH DESCRIPTION
+ .PP
+ This is the command-line program to run Lua programs with \fBLuaJIT\fR.
+@@ -74,15 +74,15 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end
end end"
+ Runs some nested loops and shows the resulting traces.
+ .SH COPYRIGHT
+ .PP
+-\fBLuaJIT\fR is Copyright \(co 2005-2017 Mike Pall.
++\fBLuaJIT\fR is Copyright \(co 2005-2021 Mike Pall.
+ .br
+ \fBLuaJIT\fR is open source software, released under the MIT license.
+ .SH SEE ALSO
+ .PP
+ More details in the provided HTML docs or at:
+-.IR
http://luajit.org
++.IR
https://luajit.org
+ .br
+ More about the Lua language can be found at:
+-.IR
http://lua.org/docs.html
++.IR
https://lua.org/docs.html
+ .PP
+ lua(1)
+diff --git a/etc/luajit.pc b/etc/luajit.pc
+index a78f1746..39e1e577 100644
+--- a/etc/luajit.pc
++++ b/etc/luajit.pc
+@@ -17,7 +17,7 @@ INSTALL_CMOD=${prefix}/${multilib}/lua/${abiver}
+
+ Name: LuaJIT
+ Description: Just-in-time compiler for Lua
+-URL:
http://luajit.org
++URL:
https://luajit.org
+ Version: ${version}
+ Requires:
+ Libs: -L${libdir} -l${libname}
+diff --git a/src/Makefile b/src/Makefile
+index f56465d1..2538503f 100644
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -7,7 +7,7 @@
+ # Also works with MinGW and Cygwin on Windows.
+ # Please check msvcbuild.bat for building with MSVC on Windows.
+ #
+-# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++# Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ ##############################################################################
+
+ MAJVER= 2
+@@ -110,8 +110,8 @@ XCFLAGS=
+ #XCFLAGS+= -DLUAJIT_NUMMODE=1
+ #XCFLAGS+= -DLUAJIT_NUMMODE=2
+ #
+-# Enable GC64 mode for x64.
+-#XCFLAGS+= -DLUAJIT_ENABLE_GC64
++# Disable LJ_GC64 mode for x64.
++#XCFLAGS+= -DLUAJIT_DISABLE_GC64
+ #
+ ##############################################################################
+
+@@ -132,7 +132,6 @@ XCFLAGS=
+ #
+ # This define is required to run LuaJIT under Valgrind. The Valgrind
+ # header files must be installed. You should enable debug information, too.
+-# Use --suppressions=lj.supp to avoid some false positives.
+ #XCFLAGS+= -DLUAJIT_USE_VALGRIND
+ #
+ # This is the client for the GDB JIT API. GDB 7.0 or higher is required
+@@ -158,13 +157,16 @@ XCFLAGS=
+
+ ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
+ HOST_SYS= Windows
+- HOST_RM= del
+ else
+ HOST_SYS:= $(shell uname -s)
+ ifneq (,$(findstring MINGW,$(HOST_SYS)))
+ HOST_SYS= Windows
+ HOST_MSYS= mingw
+ endif
++ ifneq (,$(findstring MSYS,$(HOST_SYS)))
++ HOST_SYS= Windows
++ HOST_MSYS= mingw
++ endif
+ ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
+ HOST_SYS= Windows
+ HOST_MSYS= cygwin
+@@ -191,7 +193,7 @@ CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
+ LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
+
+ HOST_CC= $(CC)
+-HOST_RM= rm -f
++HOST_RM?= rm -f
+ # If left blank, minilua is built and used. You can supply an installed
+ # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua
+ HOST_LUA=
+@@ -209,7 +211,7 @@ TARGET_CC= $(STATIC_CC)
+ TARGET_STCC= $(STATIC_CC)
+ TARGET_DYNCC= $(DYNAMIC_CC)
+ TARGET_LD= $(CROSS)$(CC)
+-TARGET_AR= $(CROSS)ar rcus 2>/dev/null
++TARGET_AR= $(CROSS)ar rcus
+ TARGET_STRIP= $(CROSS)strip
+
+ TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
+@@ -217,6 +219,7 @@ TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER)
+ TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib
+ TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME)
+ TARGET_DLLNAME= lua$(NODOTABIVER).dll
++TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a
+ TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME)
+ TARGET_DYNXLDOPTS=
+
+@@ -305,24 +308,22 @@ endif
+ TARGET_SYS?= $(HOST_SYS)
+ ifeq (Windows,$(TARGET_SYS))
+ TARGET_STRIP+= --strip-unneeded
+- TARGET_XSHLDFLAGS= -shared
++ TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME)
+ TARGET_DYNXLDOPTS=
+ else
++ TARGET_AR+= 2>/dev/null
+ ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector
2>/dev/null || echo 1))
+ TARGET_XCFLAGS+= -fno-stack-protector
+ endif
+ ifeq (Darwin,$(TARGET_SYS))
+ ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
+- export MACOSX_DEPLOYMENT_TARGET=10.4
++ $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY)
+ endif
+ TARGET_STRIP+= -x
++ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+ TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
+ TARGET_DYNXLDOPTS=
+ TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version
$(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
+- ifeq (x64,$(TARGET_LJARCH))
+- TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
+- TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
+- endif
+ else
+ ifeq (iOS,$(TARGET_SYS))
+ TARGET_STRIP+= -x
+@@ -333,6 +334,13 @@ ifeq (iOS,$(TARGET_SYS))
+ TARGET_XCFLAGS+= -fno-omit-frame-pointer
+ endif
+ else
++ ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
++ # Find out whether the target toolchain always generates unwind tables.
++ TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int
a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa
-e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info
tmpunwind.o; } && echo E; rm -f tmpunwind.o)
++ ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
++ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
++ endif
++ endif
+ ifneq (SunOS,$(TARGET_SYS))
+ ifneq (PS3,$(TARGET_SYS))
+ TARGET_XLDFLAGS+= -Wl,-E
+@@ -359,7 +367,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS))
+ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
+ else
+ ifeq (iOS,$(TARGET_SYS))
+- HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
++ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1
+ else
+ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER
+ endif
+@@ -439,6 +447,9 @@ ifeq (arm,$(TARGET_LJARCH))
+ DASM_AFLAGS+= -D IOS
+ endif
+ else
++ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH)))
++ DASM_AFLAGS+= -D MIPSR6
++endif
+ ifeq (ppc,$(TARGET_LJARCH))
+ ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D SQRT
+@@ -452,9 +463,6 @@ ifeq (ppc,$(TARGET_LJARCH))
+ ifeq (PS3,$(TARGET_SYS))
+ DASM_AFLAGS+= -D PPE -D TOC
+ endif
+- ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
+- DASM_ARCH= ppc64
+- endif
+ endif
+ endif
+ endif
+@@ -476,13 +484,15 @@ LJVM_BOUT= $(LJVM_S)
+ LJVM_MODE= elfasm
+
+ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
+- lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
++ lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
++ lib_buffer.o
+ LJLIB_C= $(LJLIB_O:.o=.c)
+
+-LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
++LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
+ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
+- lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
+- lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \
++ lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \
++ lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \
++ lj_api.o lj_profile.o \
+ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
+ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
+@@ -557,6 +567,7 @@ ifeq (Windows,$(HOST_SYS))
+ MINILUA_X= host\minilua
+ BUILDVM_X= host\buildvm
+ ALL_RM:= $(subst /,\,$(ALL_RM))
++ HOST_RM= del
+ endif
+ endif
+
+@@ -597,7 +608,6 @@ E= @echo
+ default all: $(TARGET_T)
+
+ amalg:
+- @grep "^[+|]" ljamalg.c
+ $(MAKE) all "LJCORE_O=ljamalg.o"
+
+ clean:
+@@ -631,7 +641,7 @@ $(MINILUA_T): $(MINILUA_O)
+ $(E) "HOSTLINK $@"
+ $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
+
+-host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua
++host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua lj_arch.h lua.h
luaconf.h
+ $(E) "DYNASM $@"
+ $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
+
+diff --git a/src/Makefile.dep b/src/Makefile.dep
+index 2b1cb5ef..1ad6701a 100644
+--- a/src/Makefile.dep
++++ b/src/Makefile.dep
+@@ -1,15 +1,19 @@
+ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \
+- lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h
++ lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h
+ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
+- lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \
+- lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
+- lj_strfmt.h lj_lib.h lj_libdef.h
++ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \
++ lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
++ lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \
++ lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h
+ lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
+ lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
+ lj_ffdef.h lj_lib.h lj_libdef.h
++lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
++ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
++ lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \
++ lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h
+ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
+ lj_libdef.h
+@@ -28,7 +32,7 @@ lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h
lj_def.h \
+ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
+ lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+- lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
++ lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h
+ lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
+ lj_libdef.h
+@@ -41,16 +45,18 @@ lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h
lj_obj.h \
+ lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+-lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
++lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \
++ lj_prng.h
+ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
+ lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
+ lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
+ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+- lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
+- lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+- lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \
+- lj_asm_*.h
++ lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
++ lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
++ lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
++ lj_emit_*.h lj_asm_*.h
++lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
+ lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
+ lj_bcdef.h
+ lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+@@ -75,8 +81,8 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h
+ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
+- lj_ccallback.h
++ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \
++ lj_cdata.h lj_cconv.h lj_ccallback.h
+ lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
+ lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
+@@ -108,10 +114,10 @@ lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_err.h \
+ lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h lj_strfmt.h
+ lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
+- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+- lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
+- lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
++ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \
++ lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
++ lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \
++ lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h
+ lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_traceerr.h lj_vm.h
+@@ -125,21 +131,21 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h
lj_arch.h \
+ lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
+- lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
++ lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h
+ lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
+ lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
+ lj_strfmt.h
+ lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
+- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
+- lj_bcdump.h lj_lib.h
++ lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \
++ lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h
+ lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
+ lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
+- lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
++ lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h
+ lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
+@@ -155,7 +161,7 @@ lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h
lj_arch.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
+ lj_vm.h
+ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h
++ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h
+ lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h lj_strscan.h
+@@ -168,6 +174,7 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
+ lj_vm.h lj_vmevent.h
++lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
+ lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
+ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
+@@ -175,7 +182,10 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
\
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
+ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
+ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+- lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
++ lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h
++lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \
++ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
++ lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h
+ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
+@@ -183,11 +193,13 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_gc.h \
+ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
+- lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
++ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \
++ lj_alloc.h luajit.h
+ lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+- lj_err.h lj_errmsg.h lj_str.h lj_char.h
++ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h
+ lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
++ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \
++ lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h
+ lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
+ lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+@@ -198,36 +210,37 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
\
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \
+ lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \
+- lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h
++ lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h
+ lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_gc.h lj_udata.h
++ lj_gc.h lj_err.h lj_errmsg.h lj_udata.h
+ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
+ lj_vm.h lj_vmevent.h
+ lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_ir.h lj_vm.h
+-ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
+- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
+- lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
+- lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
+- lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
+- lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
+- lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
+- lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
+- lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
+- lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \
+- lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
+- lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
+- lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
+- lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
+- lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
+- lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
+- lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
+- lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
+- lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
+- lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
+- lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
+- lib_ffi.c lib_init.c
++ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \
++ lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \
++ lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \
++ lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
++ lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \
++ lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \
++ lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
++ lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \
++ lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \
++ lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \
++ lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
++ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
++ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
++ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
++ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
++ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
++ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
++ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
++ lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
++ lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
++ lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
++ lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
++ lib_buffer.c lib_init.c
+ luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
+ host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
+ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
+diff --git a/src/host/buildvm.c b/src/host/buildvm.c
+index de23fabd..9dc328fc 100644
+--- a/src/host/buildvm.c
++++ b/src/host/buildvm.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** This is a tool to build the hand-tuned assembler code required for
+ ** LuaJIT's bytecode interpreter. It supports a variety of output formats
+diff --git a/src/host/buildvm.h b/src/host/buildvm.h
+index b90428dc..f81ef7e0 100644
+--- a/src/host/buildvm.h
++++ b/src/host/buildvm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _BUILDVM_H
+diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
+index ffd14903..01a1ba06 100644
+--- a/src/host/buildvm_asm.c
++++ b/src/host/buildvm_asm.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder: Assembler source code emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "buildvm.h"
+@@ -144,14 +144,6 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
+ fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
+ (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins
>> 16) & 31, sym);
+ } else if ((ins >> 26) == 18) {
+-#if LJ_ARCH_PPC64
+- const char *suffix = strchr(sym, '@');
+- if (suffix && suffix[1] == 'h') {
+- fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
+- } else if (suffix && suffix[1] == 'l') {
+- fprintf(ctx->fp, "\tld 12, %s\n", sym);
+- } else
+-#endif
+ fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ?
"bl" : "b", sym);
+ } else {
+ fprintf(stderr,
+@@ -250,9 +242,6 @@ void emit_asm(BuildCtx *ctx)
+ int i, rel;
+
+ fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n",
ctx->dasm_arch);
+-#if LJ_ARCH_PPC64
+- fprintf(ctx->fp, "\t.abiversion 2\n");
+-#endif
+ fprintf(ctx->fp, "\t.text\n");
+ emit_asm_align(ctx, 4);
+
+@@ -338,7 +327,7 @@ void emit_asm(BuildCtx *ctx)
+ #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
+ fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX
"progbits\n");
+ #endif
+-#if LJ_TARGET_PPC && !LJ_TARGET_PS3
++#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
+ /* Hard-float ABI. */
+ fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
+ #endif
+diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c
+index d579f4d4..02b51c4e 100644
+--- a/src/host/buildvm_fold.c
++++ b/src/host/buildvm_fold.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder: IR folding hash table generator.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "buildvm.h"
+diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
+index 2956fdb6..20bb77cd 100644
+--- a/src/host/buildvm_lib.c
++++ b/src/host/buildvm_lib.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder: library definition compiler.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "buildvm.h"
+@@ -385,6 +385,8 @@ void emit_lib(BuildCtx *ctx)
+ ok = LJ_HASJIT;
+ else if (!strcmp(buf, "#if LJ_HASFFI\n"))
+ ok = LJ_HASFFI;
++ else if (!strcmp(buf, "#if LJ_HASBUFFER\n"))
++ ok = LJ_HASBUFFER;
+ if (!ok) {
+ int lvl = 1;
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c
+index 2eb2bb7b..aa061e6e 100644
+--- a/src/host/buildvm_peobj.c
++++ b/src/host/buildvm_peobj.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder: PE object emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Only used for building on Windows, since we cannot assume the presence
+ ** of a suitable assembler. The host and target byte order must match.
+@@ -9,7 +9,7 @@
+ #include "buildvm.h"
+ #include "lj_bc.h"
+
+-#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC
++#if LJ_TARGET_X86ORX64
+
+ /* Context for PE object emitter. */
+ static char *strtab;
+@@ -93,12 +93,6 @@ typedef struct PEsymaux {
+ #define PEOBJ_RELOC_ADDR32NB 0x03
+ #define PEOBJ_RELOC_OFS 0
+ #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
+-#elif LJ_TARGET_PPC
+-#define PEOBJ_ARCH_TARGET 0x01f2
+-#define PEOBJ_RELOC_REL32 0x06
+-#define PEOBJ_RELOC_DIR32 0x02
+-#define PEOBJ_RELOC_OFS (-4)
+-#define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */
+ #endif
+
+ /* Section numbers (0-based). */
+@@ -251,15 +245,8 @@ void emit_peobj(BuildCtx *ctx)
+ /* Write .text section. */
+ host_endian.u = 1;
+ if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) {
+-#if LJ_TARGET_PPC
+- uint32_t *p = (uint32_t *)ctx->code;
+- int n = (int)(ctx->codesz >> 2);
+- for (i = 0; i < n; i++, p++)
+- *p = lj_bswap(*p); /* Byteswap .text section. */
+-#else
+ fprintf(stderr, "Error: different byte order for host and target\n");
+ exit(1);
+-#endif
+ }
+ owrite(ctx, ctx->code, ctx->codesz);
+ for (i = 0; i < ctx->nreloc; i++) {
+diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
+index 6f5a05cc..921769f7 100644
+--- a/src/host/genlibbc.lua
++++ b/src/host/genlibbc.lua
+@@ -2,7 +2,7 @@
+ -- Lua script to dump the bytecode of the library functions written in Lua.
+ -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
+ ----------------------------------------------------------------------------
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+
+diff --git a/src/host/genminilua.lua b/src/host/genminilua.lua
+index 50feff01..c05ed63c 100644
+--- a/src/host/genminilua.lua
++++ b/src/host/genminilua.lua
+@@ -2,7 +2,7 @@
+ -- Lua script to generate a customized, minified version of Lua.
+ -- The resulting 'minilua' is used for the build process of LuaJIT.
+ ----------------------------------------------------------------------------
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+
+diff --git a/src/host/minilua.c b/src/host/minilua.c
+index 79150286..cfc7491d 100644
+--- a/src/host/minilua.c
++++ b/src/host/minilua.c
+@@ -1134,7 +1134,7 @@ if(!cl->isC){
+ CallInfo*ci;
+ StkId st,base;
+ Proto*p=cl->p;
+-luaD_checkstack(L,p->maxstacksize);
++luaD_checkstack(L,p->maxstacksize+p->numparams);
+ func=restorestack(L,funcr);
+ if(!p->is_vararg){
+ base=func+1;
+diff --git a/src/jit/bc.lua b/src/jit/bc.lua
+index 193cf01f..e58a3fef 100644
+--- a/src/jit/bc.lua
++++ b/src/jit/bc.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT bytecode listing module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
+index c17c88e0..ab13667a 100644
+--- a/src/jit/bcsave.lua
++++ b/src/jit/bcsave.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT module to save/list bytecode.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+@@ -17,6 +17,10 @@ local bit = require("bit")
+ -- Symbol name prefix for LuaJIT bytecode.
+ local LJBC_PREFIX = "luaJIT_BC_"
+
++local type, assert = type, assert
++local format = string.format
++local tremove, tconcat = table.remove, table.concat
++
+ ------------------------------------------------------------------------------
+
+ local function usage()
+@@ -63,8 +67,18 @@ local map_type = {
+ }
+
+ local map_arch = {
+- x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true,
+- ppc = true, mips = true, mipsel = true,
++ x86 = { e = "le", b = 32, m = 3, p = 0x14c, },
++ x64 = { e = "le", b = 64, m = 62, p = 0x8664, },
++ arm = { e = "le", b = 32, m = 40, p = 0x1c0, },
++ arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, },
++ arm64be = { e = "be", b = 64, m = 183, },
++ ppc = { e = "be", b = 32, m = 20, },
++ mips = { e = "be", b = 32, m = 8, f = 0x50001006, },
++ mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, },
++ mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, },
++ mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
++ mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
++ mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
+ }
+
+ local map_os = {
+@@ -73,33 +87,33 @@ local map_os = {
+ }
+
+ local function checkarg(str, map, err)
+- str = string.lower(str)
++ str = str:lower()
+ local s = check(map[str], "unknown ", err)
+- return s == true and str or s
++ return type(s) == "string" and s or str
+ end
+
+ local function detecttype(str)
+- local ext = string.match(string.lower(str), "%.(%a+)$")
++ local ext = str:lower():match("%.(%a+)$")
+ return map_type[ext] or "raw"
+ end
+
+ local function checkmodname(str)
+- check(string.match(str, "^[%w_.%-]+$"), "bad module name")
+- return string.gsub(str, "[%.%-]", "_")
++ check(str:match("^[%w_.%-]+$"), "bad module name")
++ return str:gsub("[%.%-]", "_")
+ end
+
+ local function detectmodname(str)
+ if type(str) == "string" then
+- local tail = string.match(str, "[^/\\]+$")
++ local tail = str:match("[^/\\]+$")
+ if tail then str = tail end
+- local head = string.match(str, "^(.*)%.[^.]*$")
++ local head = str:match("^(.*)%.[^.]*$")
+ if head then str = head end
+- str = string.match(str, "^[%w_.%-]+")
++ str = str:match("^[%w_.%-]+")
+ else
+ str = nil
+ end
+ check(str, "cannot derive module name, use -n name")
+- return string.gsub(str, "[%.%-]", "_")
++ return str:gsub("[%.%-]", "_")
+ end
+
+ ------------------------------------------------------------------------------
+@@ -118,8 +132,8 @@ end
+ local function bcsave_c(ctx, output, s)
+ local fp = savefile(output, "w")
+ if ctx.type == "c" then
+- fp:write(string.format([[
+-#ifdef _cplusplus
++ fp:write(format([[
++#ifdef __cplusplus
+ extern "C"
+ #endif
+ #ifdef _WIN32
+@@ -128,7 +142,7 @@ __declspec(dllexport)
+ const unsigned char %s%s[] = {
+ ]], LJBC_PREFIX, ctx.modname))
+ else
+- fp:write(string.format([[
++ fp:write(format([[
+ #define %s%s_SIZE %d
+ static const unsigned char %s%s[] = {
+ ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
+@@ -138,13 +152,13 @@ static const unsigned char %s%s[] = {
+ local b = tostring(string.byte(s, i))
+ m = m + #b + 1
+ if m > 78 then
+- fp:write(table.concat(t, ",", 1, n), ",\n")
++ fp:write(tconcat(t, ",", 1, n), ",\n")
+ n, m = 0, #b + 1
+ end
+ n = n + 1
+ t[n] = b
+ end
+- bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n")
++ bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n")
+ end
+
+ local function bcsave_elfobj(ctx, output, s, ffi)
+@@ -199,12 +213,8 @@ typedef struct {
+ } ELF64obj;
+ ]]
+ local symname = LJBC_PREFIX..ctx.modname
+- local is64, isbe = false, false
+- if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch ==
"arm64be" then
+- is64 = true
+- elseif ctx.arch == "ppc" or ctx.arch == "mips" then
+- isbe = true
+- end
++ local ai = assert(map_arch[ctx.arch])
++ local is64, isbe = ai.b == 64, ai.e == "be"
+
+ -- Handle different host/target endianess.
+ local function f32(x) return x end
+@@ -237,10 +247,8 @@ typedef struct {
+ hdr.eendian = isbe and 2 or 1
+ hdr.eversion = 1
+ hdr.type = f16(1)
+- hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8,
mipsel=8 })[ctx.arch])
+- if ctx.arch == "mips" or ctx.arch == "mipsel" then
+- hdr.flags = f32(0x50001006)
+- end
++ hdr.machine = f16(ai.m)
++ hdr.flags = f32(ai.f or 0)
+ hdr.version = f32(1)
+ hdr.shofs = fofs(ffi.offsetof(o, "sect"))
+ hdr.ehsize = f16(ffi.sizeof(hdr))
+@@ -275,7 +283,7 @@ typedef struct {
+ o.sect[2].size = fofs(ofs)
+ o.sect[3].type = f32(3) -- .strtab
+ o.sect[3].ofs = fofs(sofs + ofs)
+- o.sect[3].size = fofs(#symname+1)
++ o.sect[3].size = fofs(#symname+2)
+ ffi.copy(o.space+ofs+1, symname)
+ ofs = ofs + #symname + 2
+ o.sect[4].type = f32(1) -- .rodata
+@@ -336,12 +344,8 @@ typedef struct {
+ } PEobj;
+ ]]
+ local symname = LJBC_PREFIX..ctx.modname
+- local is64 = false
+- if ctx.arch == "x86" then
+- symname = "_"..symname
+- elseif ctx.arch == "x64" then
+- is64 = true
+- end
++ local ai = assert(map_arch[ctx.arch])
++ local is64 = ai.b == 64
+ local symexport = " /EXPORT:"..symname..",DATA "
+
+ -- The file format is always little-endian. Swap if the host is big-endian.
+@@ -355,7 +359,7 @@ typedef struct {
+ -- Create PE object and fill in header.
+ local o = ffi.new("PEobj")
+ local hdr = o.hdr
+- hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366,
mipsel=0x366 })[ctx.arch])
++ hdr.arch = f16(assert(ai.p))
+ hdr.nsects = f16(2)
+ hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
+ hdr.nsyms = f32(6)
+@@ -605,16 +609,16 @@ local function docmd(...)
+ local n = 1
+ local list = false
+ local ctx = {
+- strip = true, arch = jit.arch, os = string.lower(jit.os),
++ strip = true, arch = jit.arch, os = jit.os:lower(),
+ type = false, modname = false,
+ }
+ while n <= #arg do
+ local a = arg[n]
+- if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~=
"-" then
+- table.remove(arg, n)
++ if type(a) == "string" and a:sub(1, 1) == "-" and a ~=
"-" then
++ tremove(arg, n)
+ if a == "--" then break end
+ for m=2,#a do
+- local opt = string.sub(a, m, m)
++ local opt = a:sub(m, m)
+ if opt == "l" then
+ list = true
+ elseif opt == "s" then
+@@ -627,13 +631,13 @@ local function docmd(...)
+ if n ~= 1 then usage() end
+ arg[1] = check(loadstring(arg[1]))
+ elseif opt == "n" then
+- ctx.modname = checkmodname(table.remove(arg, n))
++ ctx.modname = checkmodname(tremove(arg, n))
+ elseif opt == "t" then
+- ctx.type = checkarg(table.remove(arg, n), map_type, "file type")
++ ctx.type = checkarg(tremove(arg, n), map_type, "file type")
+ elseif opt == "a" then
+- ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture")
++ ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture")
+ elseif opt == "o" then
+- ctx.os = checkarg(table.remove(arg, n), map_os, "OS name")
++ ctx.os = checkarg(tremove(arg, n), map_os, "OS name")
+ else
+ usage()
+ end
+diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
+index c2dd7769..ba79c47e 100644
+--- a/src/jit/dis_arm.lua
++++ b/src/jit/dis_arm.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT ARM disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This is a helper module used by the LuaJIT machine code dumper module.
+diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua
+index a7173326..ad909fbd 100644
+--- a/src/jit/dis_arm64.lua
++++ b/src/jit/dis_arm64.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT ARM64 disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ --
+ -- Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
+@@ -1089,7 +1089,7 @@ local function disass_ins(ctx)
+ last = "#"..(sf+32 - immr)
+ operands[#operands] = last
+ x = x + 1
+- elseif x >= immr then
++ else
+ name = a2
+ x = x - immr + 1
+ end
+diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua
+index 7eb389e2..edcbffa8 100644
+--- a/src/jit/dis_arm64be.lua
++++ b/src/jit/dis_arm64be.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT ARM64BE disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- ARM64 instructions are always little-endian. So just forward to the
+diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
+index a12b8e62..6ad17f54 100644
+--- a/src/jit/dis_mips.lua
++++ b/src/jit/dis_mips.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT MIPS disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT/X license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This is a helper module used by the LuaJIT machine code dumper module.
+@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex
+ local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+
+ ------------------------------------------------------------------------------
+--- Primary and extended opcode maps
++-- Extended opcode maps common to all MIPS releases
+ ------------------------------------------------------------------------------
+
+-local map_movci = { shift = 16, mask = 1, [0] = "movfDSC",
"movtDSC", }
+ local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA",
}
+ local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS",
}
+
++local map_cop0 = {
++ shift = 25, mask = 1,
++ [0] = {
++ shift = 21, mask = 15,
++ [0] = "mfc0TDW", [4] = "mtc0TDW",
++ [10] = "rdpgprDT",
++ [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
++ [14] = "wrpgprDT",
++ }, {
++ shift = 0, mask = 63,
++ [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] =
"tlbp",
++ [24] = "eret", [31] = "deret",
++ [32] = "wait",
++ },
++}
++
++------------------------------------------------------------------------------
++-- Primary and extended opcode maps for MIPS R1-R5
++------------------------------------------------------------------------------
++
++local map_movci = { shift = 16, mask = 1, [0] = "movfDSC",
"movtDSC", }
++
+ local map_special = {
+ shift = 0, mask = 63,
+ [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
+@@ -87,22 +108,6 @@ local map_regimm = {
+ false, false, false, "synciSO",
+ }
+
+-local map_cop0 = {
+- shift = 25, mask = 1,
+- [0] = {
+- shift = 21, mask = 15,
+- [0] = "mfc0TDW", [4] = "mtc0TDW",
+- [10] = "rdpgprDT",
+- [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
+- [14] = "wrpgprDT",
+- }, {
+- shift = 0, mask = 63,
+- [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] =
"tlbp",
+- [24] = "eret", [31] = "deret",
+- [32] = "wait",
+- },
+-}
+-
+ local map_cop1s = {
+ shift = 0, mask = 63,
+ [0] =
"add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
+@@ -233,6 +238,208 @@ local map_pri = {
+ false, "sdc1HSO", "sdc2TSO", "sdTSO",
+ }
+
++------------------------------------------------------------------------------
++-- Primary and extended opcode maps for MIPS R6
++------------------------------------------------------------------------------
++
++local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] =
"muhDST" }
++local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] =
"muhuDST" }
++local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] =
"modDST" }
++local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] =
"moduDST" }
++local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] =
"dmuhDST" }
++local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] =
"dmuhuDST" }
++local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] =
"dmodDST" }
++local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] =
"dmoduDST" }
++
++local map_special_r6 = {
++ shift = 0, mask = 63,
++ [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
++ false, map_srl, "sraDTA",
++ "sllvDTS", false, map_srlv, "sravDTS",
++ "jrS", "jalrD1S", false, false,
++ "syscallY", "breakY", false, "sync",
++ "clzDS", "cloDS", "dclzDS", "dcloDS",
++
"dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST",
++ map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6,
++ map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6,
++ "addDST", "addu|moveDST0",
"subDST", "subu|neguDS0T",
++
"andDST", "or|moveDST0", "xorDST", "nor|notDST0",
++ false, false, "sltDST", "sltuDST",
++ "daddDST", "dadduDST", "dsubDST", "dsubuDST",
++ "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
++ "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST",
++ "dsllDTA", false, "dsrlDTA", "dsraDTA",
++ "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
++}
++
++local map_bshfl_r6 = {
++ shift = 9, mask = 3,
++ [1] = "alignDSTa",
++ _ = {
++ shift = 6, mask = 31,
++ [0] = "bitswapDT",
++ [2] = "wsbhDT",
++ [16] = "sebDT",
++ [24] = "sehDT",
++ }
++}
++
++local map_dbshfl_r6 = {
++ shift = 9, mask = 3,
++ [1] = "dalignDSTa",
++ _ = {
++ shift = 6, mask = 31,
++ [0] = "dbitswapDT",
++ [2] = "dsbhDT",
++ [5] = "dshdDT",
++ }
++}
++
++local map_special3_r6 = {
++ shift = 0, mask = 63,
++ [0] = "extTSAK", [1] = "dextmTSAP", [3] =
"dextTSAK",
++ [4] = "insTSAL", [6] = "dinsuTSEQ", [7] =
"dinsTSAL",
++ [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD",
++}
++
++local map_regimm_r6 = {
++ shift = 16, mask = 31,
++ [0] = "bltzSB", [1] = "bgezSB",
++ [6] = "dahiSI", [30] = "datiSI",
++ [23] = "sigrieI", [31] = "synciSO",
++}
++
++local map_pcrel_r6 = {
++ shift = 19, mask = 3,
++ [0] = "addiupcS2", "lwpcS2", "lwupcS2", {
++ shift = 18, mask = 1,
++ [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] =
"aluipcSI" }
++ }
++}
++
++local map_cop1s_r6 = {
++ shift = 0, mask = 63,
++ [0] =
"add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
++ "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG",
++
"round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG",
++
"round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG",
++ "sel.sFGH", false, false, false,
++
"seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH",
++
"maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG",
++
"min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH",
++ false, "cvt.d.sFG", false, false,
++ "cvt.w.sFG", "cvt.l.sFG",
++}
++
++local map_cop1d_r6 = {
++ shift = 0, mask = 63,
++ [0] =
"add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH",
++ "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG",
++
"round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG",
++
"round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG",
++ "sel.dFGH", false, false, false,
++
"seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH",
++
"maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG",
++
"min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH",
++ "cvt.s.dFG", false, false, false,
++ "cvt.w.dFG", "cvt.l.dFG",
++}
++
++local map_cop1w_r6 = {
++ shift = 0, mask = 63,
++ [0] =
"cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH",
++
"cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH",
++
"cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH",
++
"cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH",
++ false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH",
++ false, false, false, false,
++
false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH",
++ false, false, false, false,
++ "cvt.s.wFG", "cvt.d.wFG",
++}
++
++local map_cop1l_r6 = {
++ shift = 0, mask = 63,
++ [0] =
"cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH",
++
"cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH",
++
"cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH",
++
"cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH",
++ false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH",
++ false, false, false, false,
++
false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH",
++ false, false, false, false,
++ "cvt.s.lFG", "cvt.d.lFG",
++}
++
++local map_cop1_r6 = {
++ shift = 21, mask = 31,
++ [0] = "mfc1TG",
"dmfc1TG", "cfc1TG", "mfhc1TG",
++ "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
++ false, "bc1eqzHB", false, false,
++ false, "bc1nezHB", false, false,
++ map_cop1s_r6, map_cop1d_r6, false, false,
++ map_cop1w_r6, map_cop1l_r6,
++}
++
++local function maprs_popTS(rs, rt)
++ if rt == 0 then return 0 elseif rs == 0 then return 1
++ elseif rs == rt then return 2 else return 3 end
++end
++
++local map_pop06_r6 = {
++ maprs = maprs_popTS, [0] = "blezSB", "blezalcTB",
"bgezalcTB", "bgeucSTB"
++}
++local map_pop07_r6 = {
++ maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB",
"bltzalcTB", "bltucSTB"
++}
++local map_pop26_r6 = {
++ maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB"
++}
++local map_pop27_r6 = {
++ maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB"
++}
++
++local function maprs_popS(rs, rt)
++ if rs == 0 then return 0 else return 1 end
++end
++
++local map_pop66_r6 = {
++ maprs = maprs_popS, [0] = "jicTI", "beqzcSb"
++}
++local map_pop76_r6 = {
++ maprs = maprs_popS, [0] = "jialcTI", "bnezcSb"
++}
++
++local function maprs_popST(rs, rt)
++ if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end
++end
++
++local map_pop10_r6 = {
++ maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB",
"beqcSTB"
++}
++local map_pop30_r6 = {
++ maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB",
"bnecSTB"
++}
++
++local map_pri_r6 = {
++ [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ",
++ "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6,
++ map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI",
++
"andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U",
++ map_cop0, map_cop1_r6, false, false,
++ false, false, map_pop26_r6, map_pop27_r6,
++ map_pop30_r6, "daddiuTSI", false, false,
++ false, "dauiTSI", false, map_special3_r6,
++ "lbTSO", "lhTSO", false, "lwTSO",
++ "lbuTSO", "lhuTSO", false, false,
++ "sbTSO", "shTSO", false, "swTSO",
++ false, false, false, false,
++ false, "lwc1HSO", "bc#", false,
++ false, "ldc1HSO", map_pop66_r6, "ldTSO",
++ false, "swc1HSO", "balc#", map_pcrel_r6,
++ false, "sdc1HSO", map_pop76_r6, "sdTSO",
++}
++
+ ------------------------------------------------------------------------------
+
+ local map_gpr = {
+@@ -287,10 +494,14 @@ local function disass_ins(ctx)
+ ctx.op = op
+ ctx.rel = nil
+
+- local opat = map_pri[rshift(op, 26)]
++ local opat = ctx.map_pri[rshift(op, 26)]
+ while type(opat) ~= "string" do
+ if not opat then return unknown(ctx) end
+- opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
++ if opat.maprs then
++ opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))]
++ else
++ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
++ end
+ end
+ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
+ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
+@@ -314,6 +525,8 @@ local function disass_ins(ctx)
+ x = "f"..band(rshift(op, 21), 31)
+ elseif p == "A" then
+ x = band(rshift(op, 6), 31)
++ elseif p == "a" then
++ x = band(rshift(op, 6), 7)
+ elseif p == "E" then
+ x = band(rshift(op, 6), 31) + 32
+ elseif p == "M" then
+@@ -333,6 +546,10 @@ local function disass_ins(ctx)
+ x = band(rshift(op, 11), 31) - last + 33
+ elseif p == "I" then
+ x = arshift(lshift(op, 16), 16)
++ elseif p == "2" then
++ x = arshift(lshift(op, 13), 11)
++ elseif p == "3" then
++ x = arshift(lshift(op, 14), 11)
+ elseif p == "U" then
+ x = band(op, 0xffff)
+ elseif p == "O" then
+@@ -342,7 +559,15 @@ local function disass_ins(ctx)
+ local index = map_gpr[band(rshift(op, 16), 31)]
+ operands[#operands] = format("%s(%s)", index, last)
+ elseif p == "B" then
+- x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4
++ x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4
++ ctx.rel = x
++ x = format("0x%08x", x)
++ elseif p == "b" then
++ x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4
++ ctx.rel = x
++ x = format("0x%08x", x)
++ elseif p == "#" then
++ x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4
+ ctx.rel = x
+ x = format("0x%08x", x)
+ elseif p == "J" then
+@@ -408,6 +633,7 @@ local function create(code, addr, out)
+ ctx.disass = disass_block
+ ctx.hexdump = 8
+ ctx.get = get_be
++ ctx.map_pri = map_pri
+ return ctx
+ end
+
+@@ -417,6 +643,19 @@ local function create_el(code, addr, out)
+ return ctx
+ end
+
++local function create_r6(code, addr, out)
++ local ctx = create(code, addr, out)
++ ctx.map_pri = map_pri_r6
++ return ctx
++end
++
++local function create_r6_el(code, addr, out)
++ local ctx = create(code, addr, out)
++ ctx.get = get_le
++ ctx.map_pri = map_pri_r6
++ return ctx
++end
++
+ -- Simple API: disassemble code (a string) at address and output via out.
+ local function disass(code, addr, out)
+ create(code, addr, out):disass()
+@@ -426,6 +665,14 @@ local function disass_el(code, addr, out)
+ create_el(code, addr, out):disass()
+ end
+
++local function disass_r6(code, addr, out)
++ create_r6(code, addr, out):disass()
++end
++
++local function disass_r6_el(code, addr, out)
++ create_r6_el(code, addr, out):disass()
++end
++
+ -- Return register name for RID.
+ local function regname(r)
+ if r < 32 then return map_gpr[r] end
+@@ -436,8 +683,12 @@ end
+ return {
+ create = create,
+ create_el = create_el,
++ create_r6 = create_r6,
++ create_r6_el = create_r6_el,
+ disass = disass,
+ disass_el = disass_el,
++ disass_r6 = disass_r6,
++ disass_r6_el = disass_r6_el,
+ regname = regname
+ }
+
+diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua
+index c4374928..5ad48f8f 100644
+--- a/src/jit/dis_mips64.lua
++++ b/src/jit/dis_mips64.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT MIPS64 disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This module just exports the big-endian functions from the
+diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua
+index 2b1470af..d50e3a18 100644
+--- a/src/jit/dis_mips64el.lua
++++ b/src/jit/dis_mips64el.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT MIPS64EL disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This module just exports the little-endian functions from the
+diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64r6.lua
+similarity index 67%
+copy from src/jit/dis_mips64el.lua
+copy to src/jit/dis_mips64r6.lua
+index 2b1470af..921b3cbe 100644
+--- a/src/jit/dis_mips64el.lua
++++ b/src/jit/dis_mips64r6.lua
+@@ -1,17 +1,17 @@
+ ----------------------------------------------------------------------------
+--- LuaJIT MIPS64EL disassembler wrapper module.
++-- LuaJIT MIPS64R6 disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+--- This module just exports the little-endian functions from the
++-- This module just exports the r6 big-endian functions from the
+ -- MIPS disassembler module. All the interesting stuff is there.
+ ------------------------------------------------------------------------------
+
+ local dis_mips = require((string.match(..., ".*%.") or
"").."dis_mips")
+ return {
+- create = dis_mips.create_el,
+- disass = dis_mips.disass_el,
++ create = dis_mips.create_r6,
++ disass = dis_mips.disass_r6,
+ regname = dis_mips.regname
+ }
+
+diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64r6el.lua
+similarity index 66%
+copy from src/jit/dis_mips64el.lua
+copy to src/jit/dis_mips64r6el.lua
+index 2b1470af..aadef9f3 100644
+--- a/src/jit/dis_mips64el.lua
++++ b/src/jit/dis_mips64r6el.lua
+@@ -1,17 +1,17 @@
+ ----------------------------------------------------------------------------
+--- LuaJIT MIPS64EL disassembler wrapper module.
++-- LuaJIT MIPS64R6EL disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+--- This module just exports the little-endian functions from the
++-- This module just exports the r6 little-endian functions from the
+ -- MIPS disassembler module. All the interesting stuff is there.
+ ------------------------------------------------------------------------------
+
+ local dis_mips = require((string.match(..., ".*%.") or
"").."dis_mips")
+ return {
+- create = dis_mips.create_el,
+- disass = dis_mips.disass_el,
++ create = dis_mips.create_r6_el,
++ disass = dis_mips.disass_r6_el,
+ regname = dis_mips.regname
+ }
+
+diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
+index f69b11f0..52cebefb 100644
+--- a/src/jit/dis_mipsel.lua
++++ b/src/jit/dis_mipsel.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT MIPSEL disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This module just exports the little-endian functions from the
+diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
+index 2aeb1b29..08d742f1 100644
+--- a/src/jit/dis_ppc.lua
++++ b/src/jit/dis_ppc.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT PPC disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT/X license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This is a helper module used by the LuaJIT machine code dumper module.
+diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
+index d5714ee1..2d37423e 100644
+--- a/src/jit/dis_x64.lua
++++ b/src/jit/dis_x64.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT x64 disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This module just exports the 64 bit functions from the combined
+diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
+index 4371233d..5480854c 100644
+--- a/src/jit/dis_x86.lua
++++ b/src/jit/dis_x86.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT x86/x64 disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This is a helper module used by the LuaJIT machine code dumper module.
+@@ -239,6 +239,24 @@
nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
+ --8x
+ [0x8c] = "||pmaskmovXrvVSm",
+ [0x8e] = "||pmaskmovVSmXvr",
++--9x
++[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
++[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
++[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
++[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
++[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
++--Ax
++[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
++[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
++[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
++[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
++[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
++--Bx
++[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
++[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
++[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
++[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
++[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
+ --Dx
+ [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
+ [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
+@@ -483,7 +501,7 @@ local function putpat(ctx, name, pat)
+ local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
+ local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
+
+- -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
++ -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
+ for p in gmatch(pat, ".") do
+ local x = nil
+ if p == "V" or p == "U" then
+@@ -506,6 +524,9 @@ local function putpat(ctx, name, pat)
+ sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+ if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
+ regs = map_regs[sz]
++ elseif p == "H" then
++ name = name..(ctx.rexw and "d" or "s")
++ ctx.rexw = false
+ elseif p == "S" then
+ name = name..lower(sz)
+ elseif p == "s" then
+@@ -735,6 +756,7 @@ map_act = {
+ V = putpat, U = putpat, T = putpat,
+ M = putpat, X = putpat, P = putpat,
+ F = putpat, G = putpat, Y = putpat,
++ H = putpat,
+
+ -- Collect prefixes.
+ [":"] = function(ctx, name, pat)
+diff --git a/src/jit/dump.lua b/src/jit/dump.lua
+index 2bea652b..9eda08c4 100644
+--- a/src/jit/dump.lua
++++ b/src/jit/dump.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT compiler dump module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+@@ -102,10 +102,12 @@ end
+ local function fillsymtab(tr, nexit)
+ local t = symtab
+ if nexitsym == 0 then
++ local maskaddr = jit.arch == "arm" and -2
+ local ircall = vmdef.ircall
+ for i=0,#ircall do
+ local addr = ircalladdr(i)
+ if addr ~= 0 then
++ if maskaddr then addr = band(addr, maskaddr) end
+ if addr < 0 then addr = addr + 2^32 end
+ t[addr] = ircall[i]
+ end
+@@ -217,8 +219,10 @@ local function colorize_text(s)
+ return s
+ end
+
+-local function colorize_ansi(s, t)
+- return format(colortype_ansi[t], s)
++local function colorize_ansi(s, t, extra)
++ local out = format(colortype_ansi[t], s)
++ if extra then out = "\027[3m"..out end
++ return out
+ end
+
+ local irtype_ansi = setmetatable({},
+@@ -227,9 +231,10 @@ local irtype_ansi = setmetatable({},
+
+ local html_escape = { ["<"] = "<", [">"] =
">", ["&"] = "&", }
+
+-local function colorize_html(s, t)
++local function colorize_html(s, t, extra)
+ s = gsub(s, "[<>&]", html_escape)
+- return format('<span class="irt_%s">%s</span>',
irtype_text[t], s)
++ return format('<span class="irt_%s%s">%s</span>',
++ irtype_text[t], extra and " irt_extra" or "", s)
+ end
+
+ local irtype_html = setmetatable({},
+@@ -254,6 +259,7 @@ span.irt_tab { color: #c00000; }
+ span.irt_udt, span.irt_lud { color: #00c0c0; }
+ span.irt_num { color: #4040c0; }
+ span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
++span.irt_extra { font-style: italic; }
+ </style>
+ ]]
+
+@@ -269,6 +275,7 @@ local litname = {
+ if band(mode, 8) ~= 0 then s = s.."C" end
+ if band(mode, 16) ~= 0 then s = s.."R" end
+ if band(mode, 32) ~= 0 then s = s.."I" end
++ if band(mode, 64) ~= 0 then s = s.."K" end
+ t[mode] = s
+ return s
+ end}),
+@@ -277,15 +284,18 @@ local litname = {
+ local s = irtype[band(mode, 31)]
+ s = irtype[band(shr(mode, 5), 31)].."."..s
+ if band(mode, 0x800) ~= 0 then s = s.." sext" end
+- local c = shr(mode, 14)
+- if c == 2 then s = s.." index" elseif c == 3 then s = s.."
check" end
++ local c = shr(mode, 12)
++ if c == 1 then s = s.." none"
++ elseif c == 2 then s = s.." index"
++ elseif c == 3 then s = s.." check" end
+ t[mode] = s
+ return s
+ end}),
+ ["FLOAD "] = vmdef.irfield,
+ ["FREF "] = vmdef.irfield,
+ ["FPMATH"] = vmdef.irfpm,
+- ["BUFHDR"] = { [0] = "RESET", "APPEND" },
++ ["TMPREF"] = { [0] = "", "IN", "OUT",
"INOUT", "", "", "OUT2", "INOUT2" },
++ ["BUFHDR"] = { [0] = "RESET", "APPEND",
"WRITE" },
+ ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
+ }
+
+@@ -315,7 +325,9 @@ local function formatk(tr, idx, sn)
+ local tn = type(k)
+ local s
+ if tn == "number" then
+- if band(sn or 0, 0x30000) ~= 0 then
++ if t < 12 then
++ s = k == 0 and "NULL" or format("[0x%08x]", k)
++ elseif band(sn or 0, 0x30000) ~= 0 then
+ s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
+ elseif k == 2^52+2^51 then
+ s = "bias"
+@@ -343,7 +355,7 @@ local function formatk(tr, idx, sn)
+ else
+ s = tostring(k) -- For primitives.
+ end
+- s = colorize(format("%-4s", s), t)
++ s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0)
+ if slot then
+ s = format("%s @%d", s, slot)
+ end
+@@ -363,7 +375,7 @@ local function printsnap(tr, snap)
+ out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
+ else
+ local m, ot, op1, op2 = traceir(tr, ref)
+- out:write(colorize(format("%04d", ref), band(ot, 31)))
++ out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~=
0))
+ end
+ out:write(band(sn, 0x10000) == 0 and " " or "|") --
SNAP_FRAME
+ else
+@@ -582,7 +594,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
+ end
+
+ -- Dump recorded bytecode.
+-local function dump_record(tr, func, pc, depth, callee)
++local function dump_record(tr, func, pc, depth)
+ if depth ~= recdepth then
+ recdepth = depth
+ recprefix = rep(" .", depth)
+@@ -593,7 +605,6 @@ local function dump_record(tr, func, pc, depth, callee)
+ if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end
+ else
+ line = "0000 "..recprefix.." FUNCC \n"
+- callee = func
+ end
+ if pc <= 0 then
+ out:write(sub(line, 1, -2), " ; ", fmtfunc(func), "\n")
+@@ -607,12 +618,15 @@ end
+
+ ------------------------------------------------------------------------------
+
++local gpr64 = jit.arch:match("64")
++local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel"
++
+ -- Dump taken trace exits.
+ local function dump_texit(tr, ex, ngpr, nfpr, ...)
+ out:write("---- TRACE ", tr, " exit ", ex, "\n")
+ if dumpmode.X then
+ local regs = {...}
+- if jit.arch == "x64" then
++ if gpr64 then
+ for i=1,ngpr do
+ out:write(format(" %016x", regs[i]))
+ if i % 4 == 0 then out:write("\n") end
+@@ -623,7 +637,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
+ if i % 8 == 0 then out:write("\n") end
+ end
+ end
+- if jit.arch == "mips" or jit.arch == "mipsel" then
++ if fprmips32 then
+ for i=1,nfpr,2 do
+ out:write(format(" %+17.14g", regs[ngpr+i]))
+ if i % 8 == 7 then out:write("\n") end
+diff --git a/src/jit/p.lua b/src/jit/p.lua
+index 7be10586..c9ec1d8b 100644
+--- a/src/jit/p.lua
++++ b/src/jit/p.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT profiler.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+@@ -238,6 +238,7 @@ local function prof_finish()
+ prof_count1 = nil
+ prof_count2 = nil
+ prof_ud = nil
++ if out ~= stdout then out:close() end
+ end
+ end
+
+diff --git a/src/jit/v.lua b/src/jit/v.lua
+index 934de985..83589143 100644
+--- a/src/jit/v.lua
++++ b/src/jit/v.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- Verbose mode of the LuaJIT compiler.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+diff --git a/src/jit/zone.lua b/src/jit/zone.lua
+index fa702c4e..94357854 100644
+--- a/src/jit/zone.lua
++++ b/src/jit/zone.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT profiler zones.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+diff --git a/src/lib_aux.c b/src/lib_aux.c
+index c40565c3..4ef55581 100644
+--- a/src/lib_aux.c
++++ b/src/lib_aux.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Auxiliary library for the Lua/C API.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major parts taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -218,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
+
+ LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
+ {
+- while (l--)
+- luaL_addchar(B, *s++);
++ if (l <= bufffree(B)) {
++ memcpy(B->p, s, l);
++ B->p += l;
++ } else {
++ emptybuffer(B);
++ lua_pushlstring(B->L, s, l);
++ B->lvl++;
++ adjuststack(B);
++ }
+ }
+
+ LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
+@@ -338,17 +345,13 @@ LUALIB_API lua_State *luaL_newstate(void)
+
+ #else
+
+-#include "lj_alloc.h"
+-
+ LUALIB_API lua_State *luaL_newstate(void)
+ {
+ lua_State *L;
+- void *ud = lj_alloc_create();
+- if (ud == NULL) return NULL;
+ #if LJ_64 && !LJ_GC64
+- L = lj_state_newstate(lj_alloc_f, ud);
++ L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL);
+ #else
+- L = lua_newstate(lj_alloc_f, ud);
++ L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL);
+ #endif
+ if (L) G(L)->panic = panic;
+ return L;
+diff --git a/src/lib_base.c b/src/lib_base.c
+index 3a757870..55e3c6b8 100644
+--- a/src/lib_base.c
++++ b/src/lib_base.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Base and coroutine library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2011
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -19,6 +19,7 @@
+ #include "lj_gc.h"
+ #include "lj_err.h"
+ #include "lj_debug.h"
++#include "lj_buf.h"
+ #include "lj_str.h"
+ #include "lj_tab.h"
+ #include "lj_meta.h"
+@@ -42,13 +43,13 @@
+
+ LJLIB_ASM(assert) LJLIB_REC(.)
+ {
+- GCstr *s;
+ lj_lib_checkany(L, 1);
+- s = lj_lib_optstr(L, 2);
+- if (s)
+- lj_err_callermsg(L, strdata(s));
+- else
++ if (L->top == L->base+1)
+ lj_err_caller(L, LJ_ERR_ASSERT);
++ else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
++ lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
++ else
++ lj_err_run(L);
+ return FFH_UNREACHABLE;
+ }
+
+@@ -75,9 +76,10 @@ LJLIB_ASM_(type) LJLIB_REC(.)
+ /* This solves a circular dependency problem -- change FF_next_N as needed. */
+ LJ_STATIC_ASSERT((int)FF_next == FF_next_N);
+
+-LJLIB_ASM(next)
++LJLIB_ASM(next) LJLIB_REC(.)
+ {
+ lj_lib_checktab(L, 1);
++ lj_err_msg(L, LJ_ERR_NEXTIDX);
+ return FFH_UNREACHABLE;
+ }
+
+@@ -224,9 +226,11 @@ LJLIB_CF(unpack)
+ int32_t n, i = lj_lib_optint(L, 2, 1);
+ int32_t e = (L->base+3-1 < L->top && !tvisnil(L->base+3-1)) ?
+ lj_lib_checkint(L, 3) : (int32_t)lj_tab_len(t);
++ uint32_t nu;
+ if (i > e) return 0;
+- n = e - i + 1;
+- if (n <= 0 || !lua_checkstack(L, n))
++ nu = (uint32_t)e - (uint32_t)i;
++ n = (int32_t)(nu+1);
++ if (nu >= LUAI_MAXCSTACK || !lua_checkstack(L, n))
+ lj_err_caller(L, LJ_ERR_UNPACK);
+ do {
+ cTValue *tv = lj_tab_getint(t, i);
+@@ -287,18 +291,27 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
+ } else {
+ const char *p = strdata(lj_lib_checkstr(L, 1));
+ char *ep;
++ unsigned int neg = 0;
+ unsigned long ul;
+ if (base < 2 || base > 36)
+ lj_err_arg(L, 2, LJ_ERR_BASERNG);
+- ul = strtoul(p, &ep, base);
+- if (p != ep) {
+- while (lj_char_isspace((unsigned char)(*ep))) ep++;
+- if (*ep == '\0') {
+- if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
+- setintV(L->base-1-LJ_FR2, (int32_t)ul);
+- else
+- setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
+- return FFH_RES(1);
++ while (lj_char_isspace((unsigned char)(*p))) p++;
++ if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
++ if (lj_char_isalnum((unsigned char)(*p))) {
++ ul = strtoul(p, &ep, base);
++ if (p != ep) {
++ while (lj_char_isspace((unsigned char)(*ep))) ep++;
++ if (*ep == '\0') {
++ if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
++ if (neg) ul = (unsigned long)-(long)ul;
++ setintV(L->base-1-LJ_FR2, (int32_t)ul);
++ } else {
++ lua_Number n = (lua_Number)ul;
++ if (neg) n = -n;
++ setnumV(L->base-1-LJ_FR2, n);
++ }
++ return FFH_RES(1);
++ }
+ }
+ }
+ }
+@@ -395,10 +408,22 @@ LJLIB_CF(load)
+ GCstr *name = lj_lib_optstr(L, 2);
+ GCstr *mode = lj_lib_optstr(L, 3);
+ int status;
+- if (L->base < L->top && (tvisstr(L->base) ||
tvisnumber(L->base))) {
+- GCstr *s = lj_lib_checkstr(L, 1);
++ if (L->base < L->top &&
++ (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) {
++ const char *s;
++ MSize len;
++ if (tvisbuf(L->base)) {
++ SBufExt *sbx = bufV(L->base);
++ s = sbx->r;
++ len = sbufxlen(sbx);
++ if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */
++ } else {
++ GCstr *str = lj_lib_checkstr(L, 1);
++ s = strdata(str);
++ len = str->len;
++ }
+ lua_settop(L, 4); /* Ensure env arg exists. */
+- status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s),
++ status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s,
+ mode ? strdata(mode) : NULL);
+ } else {
+ lj_lib_checkfunc(L, 1);
+@@ -493,7 +518,8 @@ LJLIB_CF(print)
+ lua_gettable(L, LUA_GLOBALSINDEX);
+ tv = L->top-1;
+ }
+- shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
++ shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) &&
++ !gcrefu(basemt_it(G(L), LJ_TNUMX));
+ for (i = 0; i < nargs; i++) {
+ cTValue *o = &L->base[i];
+ const char *str;
+diff --git a/src/lib_bit.c b/src/lib_bit.c
+index c979a448..6fb8ad47 100644
+--- a/src/lib_bit.c
++++ b/src/lib_bit.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Bit manipulation library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lib_bit_c
+diff --git a/src/lib_buffer.c b/src/lib_buffer.c
+new file mode 100644
+index 00000000..2e364861
+--- /dev/null
++++ b/src/lib_buffer.c
+@@ -0,0 +1,356 @@
++/*
++** Buffer library.
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#define lib_buffer_c
++#define LUA_LIB
++
++#include "lua.h"
++#include "lauxlib.h"
++#include "lualib.h"
++
++#include "lj_obj.h"
++
++#if LJ_HASBUFFER
++#include "lj_gc.h"
++#include "lj_err.h"
++#include "lj_buf.h"
++#include "lj_str.h"
++#include "lj_tab.h"
++#include "lj_udata.h"
++#include "lj_meta.h"
++#if LJ_HASFFI
++#include "lj_ctype.h"
++#include "lj_cdata.h"
++#include "lj_cconv.h"
++#endif
++#include "lj_strfmt.h"
++#include "lj_serialize.h"
++#include "lj_lib.h"
++
++/* -- Helper functions ---------------------------------------------------- */
++
++/* Check that the first argument is a string buffer. */
++static SBufExt *buffer_tobuf(lua_State *L)
++{
++ if (!(L->base < L->top && tvisbuf(L->base)))
++ lj_err_argtype(L, 1, "buffer");
++ return bufV(L->base);
++}
++
++/* Ditto, but for writers. */
++static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ setsbufXL_(sbx, L);
++ return sbx;
++}
++
++#define buffer_toudata(sbx) ((GCudata *)(sbx)-1)
++
++/* -- Buffer methods ------------------------------------------------------ */
++
++#define LJLIB_MODULE_buffer_method
++
++LJLIB_CF(buffer_method_free)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ lj_bufx_free(L, sbx);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_reset) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ lj_bufx_reset(sbx);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_skip) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
++ MSize len = sbufxlen(sbx);
++ if (n < len) {
++ sbx->r += n;
++ } else {
++ sbx->r = sbx->w = sbx->b;
++ }
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_set) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ GCobj *ref;
++ const char *p;
++ MSize len;
++#if LJ_HASFFI
++ if (tviscdata(L->base+1)) {
++ CTState *cts = ctype_cts(L);
++ lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
++ L->base+1, CCF_ARG(2));
++ len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
++ } else
++#endif
++ {
++ GCstr *str = lj_lib_checkstrx(L, 2);
++ p = strdata(str);
++ len = str->len;
++ }
++ lj_bufx_free(L, sbx);
++ lj_bufx_set_cow(L, sbx, p, len);
++ ref = gcV(L->base+1);
++ setgcref(sbx->cowref, ref);
++ lj_gc_objbarrier(L, buffer_toudata(sbx), ref);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_put) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ ptrdiff_t arg, narg = L->top - L->base;
++ for (arg = 1; arg < narg; arg++) {
++ cTValue *o = &L->base[arg], *mo = NULL;
++ retry:
++ if (tvisstr(o)) {
++ lj_buf_putstr((SBuf *)sbx, strV(o));
++ } else if (tvisint(o)) {
++ lj_strfmt_putint((SBuf *)sbx, intV(o));
++ } else if (tvisnum(o)) {
++ lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o));
++ } else if (tvisbuf(o)) {
++ SBufExt *sbx2 = bufV(o);
++ if (sbx2 == sbx) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
++ lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2));
++ } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
++ /* Call __tostring metamethod inline. */
++ copyTV(L, L->top++, mo);
++ copyTV(L, L->top++, o);
++ lua_call(L, 1, 1);
++ o = &L->base[arg]; /* The stack may have been reallocated. */
++ copyTV(L, &L->base[arg], L->top-1);
++ L->top = L->base + narg;
++ goto retry; /* Retry with the result. */
++ } else {
++ lj_err_argtype(L, arg+1, "string/number/__tostring");
++ }
++ /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */
++ }
++ L->top = L->base+1; /* Chain buffer object. */
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_method_putf) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2);
++ L->top = L->base+1; /* Chain buffer object. */
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_method_get) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ ptrdiff_t arg, narg = L->top - L->base;
++ if (narg == 1) {
++ narg++;
++ setnilV(L->top++); /* get() is the same as get(nil). */
++ }
++ for (arg = 1; arg < narg; arg++) {
++ TValue *o = &L->base[arg];
++ MSize n = tvisnil(o) ? LJ_MAX_BUF :
++ (MSize) lj_lib_checkintrange(L, arg+1, 0, LJ_MAX_BUF);
++ MSize len = sbufxlen(sbx);
++ if (n > len) n = len;
++ setstrV(L, o, lj_str_new(L, sbx->r, n));
++ sbx->r += n;
++ }
++ if (sbx->r == sbx->w) sbx->r = sbx->w = sbx->b;
++ lj_gc_check(L);
++ return narg-1;
++}
++
++#if LJ_HASFFI
++LJLIB_CF(buffer_method_putcdata) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ const char *p;
++ MSize len;
++ if (tviscdata(L->base+1)) {
++ CTState *cts = ctype_cts(L);
++ lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
++ L->base+1, CCF_ARG(2));
++ } else {
++ lj_err_argtype(L, 2, "cdata");
++ }
++ len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
++ lj_buf_putmem((SBuf *)sbx, p, len);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_reserve) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
++ GCcdata *cd;
++ lj_buf_more((SBuf *)sbx, sz);
++ ctype_loadffi(L);
++ cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
++ *(void **)cdataptr(cd) = sbx->w;
++ setcdataV(L, L->top++, cd);
++ setintV(L->top++, sbufleft(sbx));
++ return 2;
++}
++
++LJLIB_CF(buffer_method_commit) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
++ if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG);
++ sbx->w += len;
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_ref) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ GCcdata *cd;
++ ctype_loadffi(L);
++ cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
++ *(void **)cdataptr(cd) = sbx->r;
++ setcdataV(L, L->top++, cd);
++ setintV(L->top++, sbufxlen(sbx));
++ return 2;
++}
++#endif
++
++LJLIB_CF(buffer_method_encode) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ cTValue *o = lj_lib_checkany(L, 2);
++ lj_serialize_put(sbx, o);
++ lj_gc_check(L);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_decode) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ setnilV(L->top++);
++ sbx->r = lj_serialize_get(sbx, L->top-1);
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_method___gc)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ lj_bufx_free(L, sbx);
++ return 0;
++}
++
++LJLIB_CF(buffer_method___tostring) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx)));
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_method___len) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ setintV(L->top-1, (int32_t)sbufxlen(sbx));
++ return 1;
++}
++
++LJLIB_PUSH("buffer") LJLIB_SET(__metatable)
++LJLIB_PUSH(top-1) LJLIB_SET(__index)
++
++/* -- Buffer library functions -------------------------------------------- */
++
++#define LJLIB_MODULE_buffer
++
++LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
++
++LJLIB_CF(buffer_new)
++{
++ MSize sz = 0;
++ int targ = 1;
++ GCtab *env, *dict_str = NULL, *dict_mt = NULL;
++ GCudata *ud;
++ SBufExt *sbx;
++ if (L->base < L->top && !tvistab(L->base)) {
++ targ = 2;
++ if (!tvisnil(L->base))
++ sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF);
++ }
++ if (L->base+targ-1 < L->top) {
++ GCtab *options = lj_lib_checktab(L, targ);
++ cTValue *opt_dict, *opt_mt;
++ opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
++ if (opt_dict && tvistab(opt_dict)) {
++ dict_str = tabV(opt_dict);
++ lj_serialize_dict_prep_str(L, dict_str);
++ }
++ opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable"));
++ if (opt_mt && tvistab(opt_mt)) {
++ dict_mt = tabV(opt_mt);
++ lj_serialize_dict_prep_mt(L, dict_mt);
++ }
++ }
++ env = tabref(curr_func(L)->c.env);
++ ud = lj_udata_new(L, sizeof(SBufExt), env);
++ ud->udtype = UDTYPE_BUFFER;
++ /* NOBARRIER: The GCudata is new (marked white). */
++ setgcref(ud->metatable, obj2gco(env));
++ setudataV(L, L->top++, ud);
++ sbx = (SBufExt *)uddata(ud);
++ lj_bufx_init(L, sbx);
++ setgcref(sbx->dict_str, obj2gco(dict_str));
++ setgcref(sbx->dict_mt, obj2gco(dict_mt));
++ if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
++ return 1;
++}
++
++LJLIB_CF(buffer_encode) LJLIB_REC(.)
++{
++ cTValue *o = lj_lib_checkany(L, 1);
++ setstrV(L, L->top++, lj_serialize_encode(L, o));
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_decode) LJLIB_REC(.)
++{
++ GCstr *str = lj_lib_checkstrx(L, 1);
++ setnilV(L->top++);
++ lj_serialize_decode(L, L->top-1, str);
++ return 1;
++}
++
++/* ------------------------------------------------------------------------ */
++
++#include "lj_libdef.h"
++
++int luaopen_string_buffer(lua_State *L)
++{
++ LJ_LIB_REG(L, NULL, buffer_method);
++ lua_getfield(L, -1, "__tostring");
++ lua_setfield(L, -2, "tostring");
++ LJ_LIB_REG(L, NULL, buffer);
++ return 1;
++}
++
++#endif
+diff --git a/src/lib_debug.c b/src/lib_debug.c
+index f112b5bc..a6acc6f2 100644
+--- a/src/lib_debug.c
++++ b/src/lib_debug.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Debug library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -231,8 +231,8 @@ LJLIB_CF(debug_upvalueid)
+ int32_t n = lj_lib_checkint(L, 2) - 1;
+ if ((uint32_t)n >= fn->l.nupvalues)
+ lj_err_arg(L, 2, LJ_ERR_IDXRNG);
+- setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
+- (void *)&fn->c.upvalue[n]);
++ lua_pushlightuserdata(L, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
++ (void *)&fn->c.upvalue[n]);
+ return 1;
+ }
+
+@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue)
+
+ /* ------------------------------------------------------------------------ */
+
+-#define KEY_HOOK ((void *)0x3004)
++#define KEY_HOOK (U64x(80000000,00000000)|'h')
+
+ static void hookf(lua_State *L, lua_Debug *ar)
+ {
+ static const char *const hooknames[] =
+ {"call", "return", "line", "count",
"tail return"};
+- lua_pushlightuserdata(L, KEY_HOOK);
++ (L->top++)->u64 = KEY_HOOK;
+ lua_rawget(L, LUA_REGISTRYINDEX);
+ if (lua_isfunction(L, -1)) {
+ lua_pushstring(L, hooknames[(int)ar->event]);
+@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook)
+ count = luaL_optint(L, arg+3, 0);
+ func = hookf; mask = makemask(smask, count);
+ }
+- lua_pushlightuserdata(L, KEY_HOOK);
++ (L->top++)->u64 = KEY_HOOK;
+ lua_pushvalue(L, arg+1);
+ lua_rawset(L, LUA_REGISTRYINDEX);
+ lua_sethook(L, func, mask, count);
+@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook)
+ if (hook != NULL && hook != hookf) { /* external hook? */
+ lua_pushliteral(L, "external hook");
+ } else {
+- lua_pushlightuserdata(L, KEY_HOOK);
++ (L->top++)->u64 = KEY_HOOK;
+ lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */
+ }
+ lua_pushstring(L, unmakemask(mask, buff));
+@@ -369,7 +369,8 @@ LJLIB_CF(debug_debug)
+ return 0;
+ if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") ||
+ lua_pcall(L, 0, 0, 0)) {
+- fputs(lua_tostring(L, -1), stderr);
++ const char *s = lua_tostring(L, -1);
++ fputs(s ? s : "(error object is not a string)", stderr);
+ fputs("\n", stderr);
+ }
+ lua_settop(L, 0); /* remove eventual returns */
+diff --git a/src/lib_ffi.c b/src/lib_ffi.c
+index 136e98e8..b4321048 100644
+--- a/src/lib_ffi.c
++++ b/src/lib_ffi.c
+@@ -1,6 +1,6 @@
+ /*
+ ** FFI library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lib_ffi_c
+@@ -573,6 +573,7 @@ LJLIB_CF(ffi_typeinfo)
+ setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")),
(int32_t)ct->sib);
+ if (gcref(ct->name)) {
+ GCstr *s = gco2str(gcref(ct->name));
++ if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s));
+ setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
+ }
+ lj_gc_check(L);
+@@ -720,47 +721,47 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.)
+ return 0;
+ }
+
+-#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
+-
+ /* Test ABI string. */
+ LJLIB_CF(ffi_abi) LJLIB_REC(.)
+ {
+ GCstr *s = lj_lib_checkstr(L, 1);
+- int b = 0;
+- switch (s->hash) {
++ int b = lj_cparse_case(s,
+ #if LJ_64
+- case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */
++ "\00564bit"
+ #else
+- case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */
++ "\00532bit"
+ #endif
+ #if LJ_ARCH_HASFPU
+- case H_(e33ee463,e33ee463): b = 1; break; /* fpu */
++ "\003fpu"
+ #endif
+ #if LJ_ABI_SOFTFP
+- case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */
++ "\006softfp"
+ #else
+- case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */
++ "\006hardfp"
+ #endif
+ #if LJ_ABI_EABI
+- case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */
++ "\004eabi"
+ #endif
+ #if LJ_ABI_WIN
+- case H_(4ab624a8,4ab624a8): b = 1; break; /* win */
++ "\003win"
++#endif
++#if LJ_TARGET_UWP
++ "\003uwp"
++#endif
++#if LJ_LE
++ "\002le"
++#else
++ "\002be"
+ #endif
+- case H_(3af93066,1f001464): b = 1; break; /* le/be */
+ #if LJ_GC64
+- case H_(9e89d2c9,13c83c92): b = 1; break; /* gc64 */
++ "\004gc64"
+ #endif
+- default:
+- break;
+- }
++ ) >= 0;
+ setboolV(L->top-1, b);
+ setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */
+ return 1;
+ }
+
+-#undef H_
+-
+ LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */
+
+ LJLIB_CF(ffi_metatype)
+diff --git a/src/lib_init.c b/src/lib_init.c
+index 2ed370e9..56e0619a 100644
+--- a/src/lib_init.c
++++ b/src/lib_init.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Library initialization.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major parts taken verbatim from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+diff --git a/src/lib_io.c b/src/lib_io.c
+index 9763ed46..b9d8cc75 100644
+--- a/src/lib_io.c
++++ b/src/lib_io.c
+@@ -1,6 +1,6 @@
+ /*
+ ** I/O library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2011
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -60,12 +60,12 @@ static IOFileUD *io_tofile(lua_State *L)
+ return iof;
+ }
+
+-static FILE *io_stdfile(lua_State *L, ptrdiff_t id)
++static IOFileUD *io_stdfile(lua_State *L, ptrdiff_t id)
+ {
+ IOFileUD *iof = IOSTDF_IOF(L, id);
+ if (iof->fp == NULL)
+ lj_err_caller(L, LJ_ERR_IOSTDCL);
+- return iof->fp;
++ return iof;
+ }
+
+ static IOFileUD *io_file_new(lua_State *L)
+@@ -99,11 +99,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
+ int stat = -1;
+ #if LJ_TARGET_POSIX
+ stat = pclose(iof->fp);
+-#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE
++#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
+ stat = _pclose(iof->fp);
+-#else
+- lua_assert(0);
+- return 0;
+ #endif
+ #if LJ_52
+ iof->fp = NULL;
+@@ -112,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
+ ok = (stat != -1);
+ #endif
+ } else {
+- lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF);
++ lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF,
++ "close of unknown FILE* type");
+ setnilV(L->top++);
+ lua_pushliteral(L, "cannot close standard file");
+ return 2;
+@@ -180,7 +178,7 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
+ MSize n = (MSize)fread(buf, 1, m, fp);
+ setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+ lj_gc_check(L);
+- return (n > 0 || m == 0);
++ return n > 0;
+ } else {
+ int c = getc(fp);
+ ungetc(c, fp);
+@@ -189,8 +187,9 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
+ }
+ }
+
+-static int io_file_read(lua_State *L, FILE *fp, int start)
++static int io_file_read(lua_State *L, IOFileUD *iof, int start)
+ {
++ FILE *fp = iof->fp;
+ int ok, n, nargs = (int)(L->top - L->base) - start;
+ clearerr(fp);
+ if (nargs == 0) {
+@@ -226,8 +225,9 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
+ return n - start;
+ }
+
+-static int io_file_write(lua_State *L, FILE *fp, int start)
++static int io_file_write(lua_State *L, IOFileUD *iof, int start)
+ {
++ FILE *fp = iof->fp;
+ cTValue *tv;
+ int status = 1;
+ for (tv = L->base+start; tv < L->top; tv++) {
+@@ -255,13 +255,11 @@ static int io_file_iter(lua_State *L)
+ lj_err_caller(L, LJ_ERR_IOCLFL);
+ L->top = L->base;
+ if (n) { /* Copy upvalues with options to stack. */
+- if (n > LUAI_MAXCSTACK)
+- lj_err_caller(L, LJ_ERR_STKOV);
+ lj_state_checkstack(L, (MSize)n);
+ memcpy(L->top, &fn->c.upvalue[1], n*sizeof(TValue));
+ L->top += n;
+ }
+- n = io_file_read(L, iof->fp, 0);
++ n = io_file_read(L, iof, 0);
+ if (ferror(iof->fp))
+ lj_err_callermsg(L, strVdata(L->top-2));
+ if (tvisnil(L->base) && (iof->type & IOFILE_FLAG_CLOSE)) {
+@@ -286,19 +284,25 @@ static int io_file_lines(lua_State *L)
+
+ LJLIB_CF(io_method_close)
+ {
+- IOFileUD *iof = L->base < L->top ? io_tofile(L) :
+- IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
++ IOFileUD *iof;
++ if (L->base < L->top) {
++ iof = io_tofile(L);
++ } else {
++ iof = IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
++ if (iof->fp == NULL)
++ lj_err_caller(L, LJ_ERR_IOCLFL);
++ }
+ return io_file_close(L, iof);
+ }
+
+ LJLIB_CF(io_method_read)
+ {
+- return io_file_read(L, io_tofile(L)->fp, 1);
++ return io_file_read(L, io_tofile(L), 1);
+ }
+
+ LJLIB_CF(io_method_write) LJLIB_REC(io_write 0)
+ {
+- return io_file_write(L, io_tofile(L)->fp, 1);
++ return io_file_write(L, io_tofile(L), 1);
+ }
+
+ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
+@@ -306,6 +310,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
+ return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
+ }
+
++#if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24
++/* The Android NDK is such an unmatched marvel of engineering. */
++extern int fseeko32(FILE *, long int, int) __asm__("fseeko");
++extern long int ftello32(FILE *) __asm__("ftello");
++#define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence)))
++#define ftello(fp) (ftello32((fp)))
++#endif
++
+ LJLIB_CF(io_method_seek)
+ {
+ FILE *fp = io_tofile(L)->fp;
+@@ -406,7 +418,7 @@ LJLIB_CF(io_open)
+
+ LJLIB_CF(io_popen)
+ {
+-#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE)
++#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE &&
!LJ_TARGET_UWP)
+ const char *fname = strdata(lj_lib_checkstr(L, 1));
+ GCstr *s = lj_lib_optstr(L, 2);
+ const char *mode = s ? strdata(s) : "r";
+@@ -452,7 +464,7 @@ LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT)
+
+ LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT)
+ {
+- return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL);
++ return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)->fp) == 0, NULL);
+ }
+
+ static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode)
+diff --git a/src/lib_jit.c b/src/lib_jit.c
+index 22ca0a1a..817c2967 100644
+--- a/src/lib_jit.c
++++ b/src/lib_jit.c
+@@ -1,6 +1,6 @@
+ /*
+ ** JIT library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lib_jit_c
+@@ -104,8 +104,8 @@ LJLIB_CF(jit_status)
+ jit_State *J = L2J(L);
+ L->top = L->base;
+ setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
+- flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING);
+- flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING);
++ flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING);
++ flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING);
+ return (int)(L->top - L->base);
+ #else
+ setboolV(L->top++, 0);
+@@ -113,6 +113,13 @@ LJLIB_CF(jit_status)
+ #endif
+ }
+
++LJLIB_CF(jit_security)
++{
++ int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING);
++ setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3));
++ return 1;
++}
++
+ LJLIB_CF(jit_attach)
+ {
+ #ifdef LUAJIT_DISABLE_VMEVENT
+@@ -227,7 +234,7 @@ LJLIB_CF(jit_util_funcbc)
+ if (pc < pt->sizebc) {
+ BCIns ins = proto_bc(pt)[pc];
+ BCOp op = bc_op(ins);
+- lua_assert(op < BC__MAX);
++ lj_assertL(op < BC__MAX, "bad bytecode op %d", op);
+ setintV(L->top, ins);
+ setintV(L->top+1, lj_bc_mode[op]);
+ L->top += 2;
+@@ -339,11 +346,7 @@ LJLIB_CF(jit_util_tracek)
+ ir = &T->ir[ir->op1];
+ }
+ #if LJ_HASFFI
+- if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
+- ptrdiff_t oldtop = savestack(L, L->top);
+- luaopen_ffi(L); /* Load FFI library on-demand. */
+- L->top = restorestack(L, oldtop);
+- }
++ if (ir->o == IR_KINT64) ctype_loadffi(L);
+ #endif
+ lj_ir_kvalue(L, L->top-2, ir);
+ setintV(L->top-1, (int32_t)irt_type(ir->t));
+@@ -471,7 +474,7 @@ static int jitopt_flag(jit_State *J, const char *str)
+ str += str[2] == '-' ? 3 : 2;
+ set = 0;
+ }
+- for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) {
++ for (opt = JIT_F_OPT; ; opt <<= 1) {
+ size_t len = *(const uint8_t *)lst;
+ if (len == 0)
+ break;
+@@ -491,7 +494,7 @@ static int jitopt_param(jit_State *J, const char *str)
+ int i;
+ for (i = 0; i < JIT_P__MAX; i++) {
+ size_t len = *(const uint8_t *)lst;
+- lua_assert(len != 0);
++ lj_assertJ(len != 0, "bad JIT_P_STRING");
+ if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
+ int32_t n = 0;
+ const char *p = &str[len+1];
+@@ -540,15 +543,15 @@ LJLIB_CF(jit_opt_start)
+
+ /* Not loaded by default, use: local profile = require("jit.profile") */
+
+-static const char KEY_PROFILE_THREAD = 't';
+-static const char KEY_PROFILE_FUNC = 'f';
++#define KEY_PROFILE_THREAD (U64x(80000000,00000000)|'t')
++#define KEY_PROFILE_FUNC (U64x(80000000,00000000)|'f')
+
+ static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
+ int vmstate)
+ {
+ TValue key;
+ cTValue *tv;
+- setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
++ key.u64 = KEY_PROFILE_FUNC;
+ tv = lj_tab_get(L, tabV(registry(L)), &key);
+ if (tvisfunc(tv)) {
+ char vmst = (char)vmstate;
+@@ -575,9 +578,9 @@ LJLIB_CF(jit_profile_start)
+ lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */
+ TValue key;
+ /* Anchor thread and function in registry. */
+- setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
++ key.u64 = KEY_PROFILE_THREAD;
+ setthreadV(L, lj_tab_set(L, registry, &key), L2);
+- setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
++ key.u64 = KEY_PROFILE_FUNC;
+ setfuncV(L, lj_tab_set(L, registry, &key), func);
+ lj_gc_anybarriert(L, registry);
+ luaJIT_profile_start(L, mode ? strdata(mode) : "",
+@@ -592,9 +595,9 @@ LJLIB_CF(jit_profile_stop)
+ TValue key;
+ luaJIT_profile_stop(L);
+ registry = tabV(registry(L));
+- setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
++ key.u64 = KEY_PROFILE_THREAD;
+ setnilV(lj_tab_set(L, registry, &key));
+- setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
++ key.u64 = KEY_PROFILE_FUNC;
+ setnilV(lj_tab_set(L, registry, &key));
+ lj_gc_anybarriert(L, registry);
+ return 0;
+@@ -640,59 +643,41 @@ JIT_PARAMDEF(JIT_PARAMINIT)
+ #undef JIT_PARAMINIT
+ 0
+ };
+-#endif
+
+ #if LJ_TARGET_ARM && LJ_TARGET_LINUX
+ #include <sys/utsname.h>
+ #endif
+
+-/* Arch-dependent CPU detection. */
+-static uint32_t jit_cpudetect(lua_State *L)
++/* Arch-dependent CPU feature detection. */
++static uint32_t jit_cpudetect(void)
+ {
+ uint32_t flags = 0;
+ #if LJ_TARGET_X86ORX64
++
+ uint32_t vendor[4];
+ uint32_t features[4];
+ if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
+-#if !LJ_HASJIT
+-#define JIT_F_SSE2 2
+-#endif
+- flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
+-#if LJ_HASJIT
+ flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
+ flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
+- if (vendor[2] == 0x6c65746e) { /* Intel. */
+- if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
+- flags |= JIT_F_LEA_AGU;
+- } else if (vendor[2] == 0x444d4163) { /* AMD. */
+- uint32_t fam = (features[0] & 0x0ff00f00);
+- if (fam >= 0x00000f00) /* K8, K10. */
+- flags |= JIT_F_PREFER_IMUL;
+- }
+ if (vendor[0] >= 7) {
+ uint32_t xfeatures[4];
+ lj_vm_cpuid(7, xfeatures);
+ flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
+ }
+-#endif
+ }
+- /* Check for required instruction set support on x86 (unnecessary on x64). */
+-#if LJ_TARGET_X86
+- if (!(flags & JIT_F_SSE2))
+- luaL_error(L, "CPU with SSE2 required");
+-#endif
++ /* Don't bother checking for SSE2 -- the VM will crash before getting here. */
++
+ #elif LJ_TARGET_ARM
+-#if LJ_HASJIT
++
+ int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
+ #if LJ_TARGET_LINUX
+ if (ver < 70) { /* Runtime ARM CPU detection. */
+ struct utsname ut;
+ uname(&ut);
+ if (strncmp(ut.machine, "armv", 4) == 0) {
+- if (ut.machine[4] >= '7')
+- ver = 70;
+- else if (ut.machine[4] == '6')
+- ver = 60;
++ if (ut.machine[4] >= '8') ver = 80;
++ else if (ut.machine[4] == '7') ver = 70;
++ else if (ut.machine[4] == '6') ver = 60;
+ }
+ }
+ #endif
+@@ -700,20 +685,22 @@ static uint32_t jit_cpudetect(lua_State *L)
+ ver >= 61 ? JIT_F_ARMV6T2_ :
+ ver >= 60 ? JIT_F_ARMV6_ : 0;
+ flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
+-#endif
++
+ #elif LJ_TARGET_ARM64
++
+ /* No optional CPU features to detect (for now). */
++
+ #elif LJ_TARGET_PPC
+-#if LJ_HASJIT
++
+ #if LJ_ARCH_SQRT
+ flags |= JIT_F_SQRT;
+ #endif
+ #if LJ_ARCH_ROUND
+ flags |= JIT_F_ROUND;
+ #endif
+-#endif
++
+ #elif LJ_TARGET_MIPS
+-#if LJ_HASJIT
++
+ /* Compile-time MIPS CPU detection. */
+ #if LJ_ARCH_VERSION >= 20
+ flags |= JIT_F_MIPSXXR2;
+@@ -731,31 +718,28 @@ static uint32_t jit_cpudetect(lua_State *L)
+ if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
+ }
+ #endif
+-#endif
++
+ #else
+ #error "Missing CPU detection for this architecture"
+ #endif
+- UNUSED(L);
+ return flags;
+ }
+
+ /* Initialize JIT compiler. */
+ static void jit_init(lua_State *L)
+ {
+- uint32_t flags = jit_cpudetect(L);
+-#if LJ_HASJIT
+ jit_State *J = L2J(L);
+- J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
++ J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
+ memcpy(J->param, jit_param_default, sizeof(J->param));
+ lj_dispatch_update(G(L));
+-#else
+- UNUSED(flags);
+-#endif
+ }
++#endif
+
+ LUALIB_API int luaopen_jit(lua_State *L)
+ {
++#if LJ_HASJIT
+ jit_init(L);
++#endif
+ lua_pushliteral(L, LJ_OS_NAME);
+ lua_pushliteral(L, LJ_ARCH_NAME);
+ lua_pushinteger(L, LUAJIT_VERSION_NUM);
+diff --git a/src/lib_math.c b/src/lib_math.c
+index ef9dda2d..e9a0b597 100644
+--- a/src/lib_math.c
++++ b/src/lib_math.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Math library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include <math.h>
+@@ -15,6 +15,7 @@
+ #include "lj_obj.h"
+ #include "lj_lib.h"
+ #include "lj_vm.h"
++#include "lj_prng.h"
+
+ /* ------------------------------------------------------------------------ */
+
+@@ -33,19 +34,19 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT)
+ lj_lib_checknum(L, 1);
+ return FFH_RETRY;
+ }
+-LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10)
+-LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP)
+-LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN)
+-LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS)
+-LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN)
+-LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin)
+-LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos)
+-LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan)
+-LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh)
+-LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh)
+-LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
++LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10)
++LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp)
++LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin)
++LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos)
++LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan)
++LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin)
++LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos)
++LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan)
++LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh)
++LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh)
++LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh)
+ LJLIB_ASM_(math_frexp)
+-LJLIB_ASM_(math_modf) LJLIB_REC(.)
++LJLIB_ASM_(math_modf)
+
+ LJLIB_ASM(math_log) LJLIB_REC(math_log)
+ {
+@@ -105,34 +106,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge)
+ ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
+ */
+
+-/* PRNG state. */
+-struct RandomState {
+- uint64_t gen[4]; /* State of the 4 LFSR generators. */
+- int valid; /* State is valid. */
+-};
+-
+ /* Union needed for bit-pattern conversion between uint64_t and double. */
+ typedef union { uint64_t u64; double d; } U64double;
+
+-/* Update generator i and compute a running xor of all states. */
+-#define TW223_GEN(i, k, q, s) \
+- z = rs->gen[i]; \
+- z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 <<
(64-k)))<<s); \
+- r ^= z; rs->gen[i] = z;
+-
+-/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
+-LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
+-{
+- uint64_t z, r = 0;
+- TW223_GEN(0, 63, 31, 18)
+- TW223_GEN(1, 58, 19, 28)
+- TW223_GEN(2, 55, 24, 7)
+- TW223_GEN(3, 47, 21, 8)
+- return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
+-}
+-
+-/* PRNG initialization function. */
+-static void random_init(RandomState *rs, double d)
++/* PRNG seeding function. */
++static void random_seed(PRNGState *rs, double d)
+ {
+ uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
+ int i;
+@@ -141,24 +119,22 @@ static void random_init(RandomState *rs, double d)
+ uint32_t m = 1u << (r&255);
+ r >>= 8;
+ u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
+- if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */
+- rs->gen[i] = u.u64;
++ if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */
++ rs->u[i] = u.u64;
+ }
+- rs->valid = 1;
+ for (i = 0; i < 10; i++)
+- lj_math_random_step(rs);
++ (void)lj_prng_u64(rs);
+ }
+
+ /* PRNG extract function. */
+-LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
++LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
+ LJLIB_CF(math_random) LJLIB_REC(.)
+ {
+ int n = (int)(L->top - L->base);
+- RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
++ PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ U64double u;
+ double d;
+- if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0);
+- u.u64 = lj_math_random_step(rs);
++ u.u64 = lj_prng_u64d(rs);
+ d = u.d - 1.0;
+ if (n > 0) {
+ #if LJ_DUALNUM
+@@ -203,11 +179,11 @@ LJLIB_CF(math_random) LJLIB_REC(.)
+ }
+
+ /* PRNG seed function. */
+-LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
++LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
+ LJLIB_CF(math_randomseed)
+ {
+- RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+- random_init(rs, lj_lib_checknum(L, 1));
++ PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
++ random_seed(rs, lj_lib_checknum(L, 1));
+ return 0;
+ }
+
+@@ -217,9 +193,8 @@ LJLIB_CF(math_randomseed)
+
+ LUALIB_API int luaopen_math(lua_State *L)
+ {
+- RandomState *rs;
+- rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
+- rs->valid = 0; /* Use lazy initialization to save some time on startup. */
++ PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState));
++ lj_prng_seed_fixed(rs);
+ LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
+ return 1;
+ }
+diff --git a/src/lib_os.c b/src/lib_os.c
+index 9e78d49a..f19b831c 100644
+--- a/src/lib_os.c
++++ b/src/lib_os.c
+@@ -1,6 +1,6 @@
+ /*
+ ** OS library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -205,12 +205,12 @@ LJLIB_CF(os_date)
+ setboolfield(L, "isdst", stm->tm_isdst);
+ } else if (*s) {
+ SBuf *sb = &G(L)->tmpbuf;
+- MSize sz = 0;
++ MSize sz = 0, retry = 4;
+ const char *q;
+ for (q = s; *q; q++)
+ sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
+ setsbufL(sb, L);
+- for (;;) {
++ while (retry--) { /* Limit growth for invalid format or empty result. */
+ char *buf = lj_buf_need(sb, sz);
+ size_t len = strftime(buf, sbufsz(sb), s, stm);
+ if (len) {
+diff --git a/src/lib_package.c b/src/lib_package.c
+index 6fac43ec..2068a098 100644
+--- a/src/lib_package.c
++++ b/src/lib_package.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Package library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2012
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
+ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
+ #endif
+
++#if LJ_TARGET_UWP
++void *LJ_WIN_LOADLIBA(const char *path)
++{
++ DWORD err = GetLastError();
++ wchar_t wpath[256];
++ HANDLE lib = NULL;
++ if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
++ lib = LoadPackagedLibrary(wpath, 0);
++ }
++ SetLastError(err);
++ return lib;
++}
++#endif
++
+ #undef setprogdir
+
+ static void setprogdir(lua_State *L)
+@@ -119,7 +133,7 @@ static void ll_unloadlib(void *lib)
+
+ static void *ll_load(lua_State *L, const char *path, int gl)
+ {
+- HINSTANCE lib = LoadLibraryExA(path, NULL, 0);
++ HINSTANCE lib = LJ_WIN_LOADLIBA(path);
+ if (lib == NULL) pusherror(L);
+ UNUSED(gl);
+ return lib;
+@@ -132,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char
*sym)
+ return f;
+ }
+
++#if LJ_TARGET_UWP
++EXTERN_C IMAGE_DOS_HEADER __ImageBase;
++#endif
++
+ static const char *ll_bcsym(void *lib, const char *sym)
+ {
+ if (lib) {
+ return (const char *)GetProcAddress((HINSTANCE)lib, sym);
+ } else {
++#if LJ_TARGET_UWP
++ return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
++#else
+ HINSTANCE h = GetModuleHandleA(NULL);
+ const char *p = (const char *)GetProcAddress(h, sym);
+ if (p == NULL &&
GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+ (const char *)ll_bcsym, &h))
+ p = (const char *)GetProcAddress(h, sym);
+ return p;
++#endif
+ }
+ }
+
+@@ -215,7 +237,12 @@ static const char *mksymname(lua_State *L, const char *modname,
+
+ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
+ {
+- void **reg = ll_register(L, path);
++ void **reg;
++ if (strlen(path) >= 4096) {
++ lua_pushliteral(L, "path too long");
++ return PACKAGE_ERR_LIB;
++ }
++ reg = ll_register(L, path);
+ if (*reg == NULL) *reg = ll_load(L, path, (*name == '*'));
+ if (*reg == NULL) {
+ return PACKAGE_ERR_LIB; /* Unable to load library. */
+@@ -233,7 +260,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char
*name, int r)
+ const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
+ lua_pop(L, 1);
+ if (bcdata) {
+- if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
++ if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+ return PACKAGE_ERR_LOAD;
+ return 0;
+ }
+@@ -390,7 +417,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
+ if (lua_isnil(L, -1)) { /* Not found? */
+ const char *bcname = mksymname(L, name, SYMPREFIX_BC);
+ const char *bcdata = ll_bcsym(NULL, bcname);
+- if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
++ if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+ lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
+ }
+ return 1;
+@@ -398,7 +425,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
+
+ /* ------------------------------------------------------------------------ */
+
+-#define sentinel ((void *)0x4004)
++#define KEY_SENTINEL (U64x(80000000,00000000)|'s')
+
+ static int lj_cf_package_require(lua_State *L)
+ {
+@@ -408,7 +435,7 @@ static int lj_cf_package_require(lua_State *L)
+ lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
+ lua_getfield(L, 2, name);
+ if (lua_toboolean(L, -1)) { /* is it there? */
+- if (lua_touserdata(L, -1) == sentinel) /* check loops */
++ if ((L->top-1)->u64 == KEY_SENTINEL) /* check loops */
+ luaL_error(L, "loop or previous error loading module " LUA_QS, name);
+ return 1; /* package is already loaded */
+ }
+@@ -431,14 +458,14 @@ static int lj_cf_package_require(lua_State *L)
+ else
+ lua_pop(L, 1);
+ }
+- lua_pushlightuserdata(L, sentinel);
++ (L->top++)->u64 = KEY_SENTINEL;
+ lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */
+ lua_pushstring(L, name); /* pass name as argument to module */
+ lua_call(L, 1, 1); /* run loaded module */
+ if (!lua_isnil(L, -1)) /* non-nil return? */
+ lua_setfield(L, 2, name); /* _LOADED[name] = returned value */
+ lua_getfield(L, 2, name);
+- if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */
++ if ((L->top-1)->u64 == KEY_SENTINEL) { /* module did not set a value? */
+ lua_pushboolean(L, 1); /* use true as result */
+ lua_pushvalue(L, -1); /* extra copy to be returned */
+ lua_setfield(L, 2, name); /* _LOADED[name] = true */
+diff --git a/src/lib_string.c b/src/lib_string.c
+index 76b0730a..75d855d6 100644
+--- a/src/lib_string.c
++++ b/src/lib_string.c
+@@ -1,6 +1,6 @@
+ /*
+ ** String library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -136,7 +136,7 @@ LJLIB_CF(string_dump)
+ /* ------------------------------------------------------------------------ */
+
+ /* macro to `unsign' a character */
+-#define uchar(c) ((unsigned char)(c))
++#define uchar(c) ((unsigned char)(c))
+
+ #define CAP_UNFINISHED (-1)
+ #define CAP_POSITION (-2)
+@@ -640,89 +640,14 @@ LJLIB_CF(string_gsub)
+
+ /* ------------------------------------------------------------------------ */
+
+-/* Emulate tostring() inline. */
+-static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
+-{
+- TValue *o = L->base+arg-1;
+- cTValue *mo;
+- lua_assert(o < L->top); /* Caller already checks for existence. */
+- if (LJ_LIKELY(tvisstr(o)))
+- return strV(o);
+- if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+- copyTV(L, L->top++, mo);
+- copyTV(L, L->top++, o);
+- lua_call(L, 1, 1);
+- copyTV(L, L->base+arg-1, --L->top);
+- return NULL; /* Buffer may be overwritten, retry. */
+- }
+- return lj_strfmt_obj(L, o);
+-}
+-
+ LJLIB_CF(string_format) LJLIB_REC(.)
+ {
+- int arg, top = (int)(L->top - L->base);
+- GCstr *fmt;
+- SBuf *sb;
+- FormatState fs;
+- SFormat sf;
+ int retry = 0;
+-again:
+- arg = 1;
+- sb = lj_buf_tmp_(L);
+- fmt = lj_lib_checkstr(L, arg);
+- lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+- while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+- if (sf == STRFMT_LIT) {
+- lj_buf_putmem(sb, fs.str, fs.len);
+- } else if (sf == STRFMT_ERR) {
+- lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
+- } else {
+- if (++arg > top)
+- luaL_argerror(L, arg, lj_obj_typename[0]);
+- switch (STRFMT_TYPE(sf)) {
+- case STRFMT_INT:
+- if (tvisint(L->base+arg-1)) {
+- int32_t k = intV(L->base+arg-1);
+- if (sf == STRFMT_INT)
+- lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
+- else
+- lj_strfmt_putfxint(sb, sf, k);
+- } else {
+- lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+- }
+- break;
+- case STRFMT_UINT:
+- if (tvisint(L->base+arg-1))
+- lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
+- else
+- lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
+- break;
+- case STRFMT_NUM:
+- lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
+- break;
+- case STRFMT_STR: {
+- GCstr *str = string_fmt_tostring(L, arg, retry);
+- if (str == NULL)
+- retry = 1;
+- else if ((sf & STRFMT_T_QUOTED))
+- lj_strfmt_putquoted(sb, str); /* No formatting. */
+- else
+- lj_strfmt_putfstr(sb, sf, str);
+- break;
+- }
+- case STRFMT_CHAR:
+- lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+- break;
+- case STRFMT_PTR: /* No formatting. */
+- lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
+- break;
+- default:
+- lua_assert(0);
+- break;
+- }
+- }
+- }
+- if (retry++ == 1) goto again;
++ SBuf *sb;
++ do {
++ sb = lj_buf_tmp_(L);
++ retry = lj_strfmt_putarg(L, sb, 1, -retry);
++ } while (retry > 0);
+ setstrV(L, L->top-1, lj_buf_str(L, sb));
+ lj_gc_check(L);
+ return 1;
+@@ -743,6 +668,9 @@ LUALIB_API int luaopen_string(lua_State *L)
+ setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
+ settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1));
+ mt->nomm = (uint8_t)(~(1u<<MM_index));
++#if LJ_HASBUFFER
++ lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer,
tabV(L->top-1));
++#endif
+ return 1;
+ }
+
+diff --git a/src/lib_table.c b/src/lib_table.c
+index 0450f1f6..0214bb40 100644
+--- a/src/lib_table.c
++++ b/src/lib_table.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Table library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -159,7 +159,7 @@ LJLIB_CF(table_concat) LJLIB_REC(.)
+ SBuf *sb = lj_buf_tmp_(L);
+ SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
+ if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */
+- int32_t idx = (int32_t)(intptr_t)sbufP(sb);
++ int32_t idx = (int32_t)(intptr_t)sb->w;
+ cTValue *o = lj_tab_getint(t, idx);
+ lj_err_callerv(L, LJ_ERR_TABCAT,
+ lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
+diff --git a/src/lj.supp b/src/lj.supp
+deleted file mode 100644
+index 217f7c89..00000000
+--- a/src/lj.supp
++++ /dev/null
+@@ -1,41 +0,0 @@
+-# Valgrind suppression file for LuaJIT 2.0.
+-{
+- Optimized string compare
+- Memcheck:Addr4
+- fun:lj_str_cmp
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr1
+- fun:lj_str_cmp
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr4
+- fun:lj_str_new
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr1
+- fun:lj_str_new
+-}
+-{
+- Optimized string compare
+- Memcheck:Cond
+- fun:lj_str_new
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr4
+- fun:str_fastcmp
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr1
+- fun:str_fastcmp
+-}
+-{
+- Optimized string compare
+- Memcheck:Cond
+- fun:str_fastcmp
+-}
+diff --git a/src/lj_alloc.c b/src/lj_alloc.c
+index 95d15d04..165203fa 100644
+--- a/src/lj_alloc.c
++++ b/src/lj_alloc.c
+@@ -6,7 +6,7 @@
+ **
+ ** This is a version (aka dlmalloc) of malloc/free/realloc written by
+ ** Doug Lea and released to the public domain, as explained at
+-**
http://creativecommons.org/licenses/publicdomain.
++**
https://creativecommons.org/licenses/publicdomain.
+ **
+ ** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee)
+ **
+@@ -16,8 +16,8 @@
+ ** If you want to use dlmalloc in another project, you should get
+ ** the original from:
ftp://gee.cs.oswego.edu/pub/misc/
+ ** For thread-safe derivatives, take a look at:
+-** - ptmalloc:
http://www.malloc.de/
+-** - nedmalloc:
http://www.nedprod.com/programs/portable/nedmalloc/
++** - ptmalloc:
https://www.malloc.de/
++** - nedmalloc:
https://www.nedprod.com/programs/portable/nedmalloc/
+ */
+
+ #define lj_alloc_c
+@@ -31,6 +31,7 @@
+ #include "lj_def.h"
+ #include "lj_arch.h"
+ #include "lj_alloc.h"
++#include "lj_prng.h"
+
+ #ifndef LUAJIT_USE_SYSMALLOC
+
+@@ -123,7 +124,7 @@
+
+ #if LJ_ALLOC_NTAVM
+ /* Undocumented, but hey, that's what we all love so much about Windows. */
+-typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
++typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits,
+ size_t *size, ULONG alloctype, ULONG prot);
+ static PNTAVM ntavm;
+
+@@ -140,7 +141,7 @@ static void init_mmap(void)
+ #define INIT_MMAP() init_mmap()
+
+ /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
+-static void *CALL_MMAP(size_t size)
++static void *mmap_plain(size_t size)
+ {
+ DWORD olderr = GetLastError();
+ void *ptr = NULL;
+@@ -151,7 +152,7 @@ static void *CALL_MMAP(size_t size)
+ }
+
+ /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+-static void *DIRECT_MMAP(size_t size)
++static void *direct_mmap(size_t size)
+ {
+ DWORD olderr = GetLastError();
+ void *ptr = NULL;
+@@ -164,26 +165,29 @@ static void *DIRECT_MMAP(size_t size)
+ #else
+
+ /* Win32 MMAP via VirtualAlloc */
+-static void *CALL_MMAP(size_t size)
++static void *mmap_plain(size_t size)
+ {
+ DWORD olderr = GetLastError();
+- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
++ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ SetLastError(olderr);
+ return ptr ? ptr : MFAIL;
+ }
+
+ /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+-static void *DIRECT_MMAP(size_t size)
++static void *direct_mmap(size_t size)
+ {
+ DWORD olderr = GetLastError();
+- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+- PAGE_READWRITE);
++ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
++ PAGE_READWRITE);
+ SetLastError(olderr);
+ return ptr ? ptr : MFAIL;
+ }
+
+ #endif
+
++#define CALL_MMAP(prng, size) mmap_plain(size)
++#define DIRECT_MMAP(prng, size) direct_mmap(size)
++
+ /* This function supports releasing coalesed segments */
+ static int CALL_MUNMAP(void *ptr, size_t size)
+ {
+@@ -226,36 +230,17 @@ static int CALL_MUNMAP(void *ptr, size_t size)
+
+ #define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
+
+-/* No point in a giant ifdef mess. Just try to open /dev/urandom.
+-** It doesn't really matter if this fails, since we get some ASLR bits from
+-** every unsuitable allocation, too. And we prefer linear allocation, anyway.
+-*/
+-#include <fcntl.h>
+-#include <unistd.h>
+-
+-static uintptr_t mmap_probe_seed(void)
+-{
+- uintptr_t val;
+- int fd = open("/dev/urandom", O_RDONLY);
+- if (fd != -1) {
+- int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val));
+- (void)close(fd);
+- if (ok) return val;
+- }
+- return 1; /* Punt. */
+-}
+-
+-static void *mmap_probe(size_t size)
++static void *mmap_probe(PRNGState *rs, size_t size)
+ {
+ /* Hint for next allocation. Doesn't need to be thread-safe. */
+ static uintptr_t hint_addr = 0;
+- static uintptr_t hint_prng = 0;
+ int olderr = errno;
+ int retry;
+ for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
+ void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
+ uintptr_t addr = (uintptr_t)p;
+- if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >=
LJ_ALLOC_MMAP_PROBE_LOWER) {
++ if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >=
LJ_ALLOC_MMAP_PROBE_LOWER &&
++ ((addr + size) >> LJ_ALLOC_MBITS) == 0) {
+ /* We got a suitable address. Bump the hint address. */
+ hint_addr = addr + size;
+ errno = olderr;
+@@ -280,15 +265,8 @@ static void *mmap_probe(size_t size)
+ }
+ }
+ /* Finally, try pseudo-random probing. */
+- if (LJ_UNLIKELY(hint_prng == 0)) {
+- hint_prng = mmap_probe_seed();
+- }
+- /* The unsuitable address we got has some ASLR PRNG bits. */
+- hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1));
+- do { /* The PRNG itself is very weak, but see above. */
+- hint_prng = hint_prng * 1103515245 + 12345;
+- hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE;
+- hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1);
++ do {
++ hint_addr = lj_prng_u64(rs) &
(((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE);
+ } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
+ }
+ errno = olderr;
+@@ -299,18 +277,22 @@ static void *mmap_probe(size_t size)
+
+ #if LJ_ALLOC_MMAP32
+
+-#if defined(__sun__)
++#if LJ_TARGET_SOLARIS
+ #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
+ #else
+ #define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
+ #endif
+
++#if LJ_ALLOC_MMAP_PROBE
++static void *mmap_map32(PRNGState *rs, size_t size)
++#else
+ static void *mmap_map32(size_t size)
++#endif
+ {
+ #if LJ_ALLOC_MMAP_PROBE
+ static int fallback = 0;
+ if (fallback)
+- return mmap_probe(size);
++ return mmap_probe(rs, size);
+ #endif
+ {
+ int olderr = errno;
+@@ -320,7 +302,7 @@ static void *mmap_map32(size_t size)
+ #if LJ_ALLOC_MMAP_PROBE
+ if (ptr == MFAIL) {
+ fallback = 1;
+- return mmap_probe(size);
++ return mmap_probe(rs, size);
+ }
+ #endif
+ return ptr;
+@@ -330,20 +312,25 @@ static void *mmap_map32(size_t size)
+ #endif
+
+ #if LJ_ALLOC_MMAP32
+-#define CALL_MMAP(size) mmap_map32(size)
++#if LJ_ALLOC_MMAP_PROBE
++#define CALL_MMAP(prng, size) mmap_map32(prng, size)
++#else
++#define CALL_MMAP(prng, size) mmap_map32(size)
++#endif
+ #elif LJ_ALLOC_MMAP_PROBE
+-#define CALL_MMAP(size) mmap_probe(size)
++#define CALL_MMAP(prng, size) mmap_probe(prng, size)
+ #else
+-static void *CALL_MMAP(size_t size)
++static void *mmap_plain(size_t size)
+ {
+ int olderr = errno;
+ void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
+ errno = olderr;
+ return ptr;
+ }
++#define CALL_MMAP(prng, size) mmap_plain(size)
+ #endif
+
+-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
++#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__
< 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
+
+ #include <sys/resource.h>
+
+@@ -378,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int
flags)
+ #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
+ #define CALL_MREMAP_NOMOVE 0
+ #define CALL_MREMAP_MAYMOVE 1
+-#if LJ_64 && !LJ_GC64
++#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
+ #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
+ #else
+ #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
+@@ -393,7 +380,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int
flags)
+ #endif
+
+ #ifndef DIRECT_MMAP
+-#define DIRECT_MMAP(s) CALL_MMAP(s)
++#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s)
+ #endif
+
+ #ifndef CALL_MREMAP
+@@ -552,6 +539,7 @@ struct malloc_state {
+ mchunkptr smallbins[(NSMALLBINS+1)*2];
+ tbinptr treebins[NTREEBINS];
+ msegment seg;
++ PRNGState *prng;
+ };
+
+ typedef struct malloc_state *mstate;
+@@ -609,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss)
+ noncontiguous segments are added.
+ */
+ #define TOP_FOOT_SIZE\
+- (align_offset(chunk2mem(0))+pad_request(sizeof(struct
malloc_segment))+MIN_CHUNK_SIZE)
++ (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct
malloc_segment))+MIN_CHUNK_SIZE)
+
+ /* ---------------------------- Indexing Bins ---------------------------- */
+
+@@ -834,11 +822,11 @@ static int has_segment_link(mstate m, msegmentptr ss)
+
+ /* ----------------------- Direct-mmapping chunks ----------------------- */
+
+-static void *direct_alloc(size_t nb)
++static void *direct_alloc(mstate m, size_t nb)
+ {
+ size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */
+- char *mm = (char *)(DIRECT_MMAP(mmsize));
++ char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize));
+ if (mm != CMFAIL) {
+ size_t offset = align_offset(chunk2mem(mm));
+ size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
+@@ -850,6 +838,7 @@ static void *direct_alloc(size_t nb)
+ return chunk2mem(p);
+ }
+ }
++ UNUSED(m);
+ return NULL;
+ }
+
+@@ -998,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb)
+
+ /* Directly map large chunks */
+ if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
+- void *mem = direct_alloc(nb);
++ void *mem = direct_alloc(m, nb);
+ if (mem != 0)
+ return mem;
+ }
+@@ -1007,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb)
+ size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
+ size_t rsize = granularity_align(req);
+ if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
+- char *mp = (char *)(CALL_MMAP(rsize));
++ char *mp = (char *)(CALL_MMAP(m->prng, rsize));
+ if (mp != CMFAIL) {
+ tbase = mp;
+ tsize = rsize;
+@@ -1234,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb)
+
+ /* ----------------------------------------------------------------------- */
+
+-void *lj_alloc_create(void)
++void *lj_alloc_create(PRNGState *rs)
+ {
+ size_t tsize = DEFAULT_GRANULARITY;
+ char *tbase;
+ INIT_MMAP();
+- tbase = (char *)(CALL_MMAP(tsize));
++ UNUSED(rs);
++ tbase = (char *)(CALL_MMAP(rs, tsize));
+ if (tbase != CMFAIL) {
+ size_t msize = pad_request(sizeof(struct malloc_state));
+ mchunkptr mn;
+@@ -1258,6 +1248,12 @@ void *lj_alloc_create(void)
+ return NULL;
+ }
+
++void lj_alloc_setprng(void *msp, PRNGState *rs)
++{
++ mstate ms = (mstate)msp;
++ ms->prng = rs;
++}
++
+ void lj_alloc_destroy(void *msp)
+ {
+ mstate ms = (mstate)msp;
+diff --git a/src/lj_alloc.h b/src/lj_alloc.h
+index f87a7cf3..669f50b7 100644
+--- a/src/lj_alloc.h
++++ b/src/lj_alloc.h
+@@ -9,7 +9,8 @@
+ #include "lj_def.h"
+
+ #ifndef LUAJIT_USE_SYSMALLOC
+-LJ_FUNC void *lj_alloc_create(void);
++LJ_FUNC void *lj_alloc_create(PRNGState *rs);
++LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs);
+ LJ_FUNC void lj_alloc_destroy(void *msp);
+ LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize);
+ #endif
+diff --git a/src/lj_api.c b/src/lj_api.c
+index d17a5754..8c60c058 100644
+--- a/src/lj_api.c
++++ b/src/lj_api.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Public Lua/C API.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -28,8 +28,8 @@
+
+ /* -- Common helper functions --------------------------------------------- */
+
+-#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base))
+-#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L))
++#define lj_checkapi_slot(idx) \
++ lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of
range", (idx))
+
+ static TValue *index2adr(lua_State *L, int idx)
+ {
+@@ -37,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx)
+ TValue *o = L->base + (idx - 1);
+ return o < L->top ? o : niltv(L);
+ } else if (idx > LUA_REGISTRYINDEX) {
+- api_check(L, idx != 0 && -idx <= L->top - L->base);
++ lj_checkapi(idx != 0 && -idx <= L->top - L->base,
++ "bad stack slot %d", idx);
+ return L->top + idx;
+ } else if (idx == LUA_GLOBALSINDEX) {
+ TValue *o = &G(L)->tmptv;
+@@ -47,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx)
+ return registry(L);
+ } else {
+ GCfunc *fn = curr_func(L);
+- api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn));
++ lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn),
++ "calling frame is not a C function");
+ if (idx == LUA_ENVIRONINDEX) {
+ TValue *o = &G(L)->tmptv;
+ settabV(L, o, tabref(fn->c.env));
+@@ -59,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx)
+ }
+ }
+
+-static TValue *stkindex2adr(lua_State *L, int idx)
++static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx)
++{
++ TValue *o = index2adr(L, idx);
++ lj_checkapi(o != niltv(L), "invalid stack slot %d", idx);
++ return o;
++}
++
++static TValue *index2adr_stack(lua_State *L, int idx)
+ {
+ if (idx > 0) {
+ TValue *o = L->base + (idx - 1);
++ if (o < L->top) {
++ return o;
++ } else {
++ lj_checkapi(0, "invalid stack slot %d", idx);
++ return niltv(L);
++ }
+ return o < L->top ? o : niltv(L);
+ } else {
+- api_check(L, idx != 0 && -idx <= L->top - L->base);
++ lj_checkapi(idx != 0 && -idx <= L->top - L->base,
++ "invalid stack slot %d", idx);
+ return L->top + idx;
+ }
+ }
+@@ -99,17 +115,17 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char
*msg)
+ lj_err_callerv(L, LJ_ERR_STKOVM, msg);
+ }
+
+-LUA_API void lua_xmove(lua_State *from, lua_State *to, int n)
++LUA_API void lua_xmove(lua_State *L, lua_State *to, int n)
+ {
+ TValue *f, *t;
+- if (from == to) return;
+- api_checknelems(from, n);
+- api_check(from, G(from) == G(to));
++ if (L == to) return;
++ lj_checkapi_slot(n);
++ lj_checkapi(G(L) == G(to), "move across global states");
+ lj_state_checkstack(to, (MSize)n);
+- f = from->top;
++ f = L->top;
+ t = to->top = to->top + n;
+ while (--n >= 0) copyTV(to, --t, --f);
+- from->top = f;
++ L->top = f;
+ }
+
+ LUA_API const lua_Number *lua_version(lua_State *L)
+@@ -129,7 +145,7 @@ LUA_API int lua_gettop(lua_State *L)
+ LUA_API void lua_settop(lua_State *L, int idx)
+ {
+ if (idx >= 0) {
+- api_check(L, idx <= tvref(L->maxstack) - L->base);
++ lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot
%d", idx);
+ if (L->base + idx > L->top) {
+ if (L->base + idx >= tvref(L->maxstack))
+ lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base));
+@@ -138,23 +154,21 @@ LUA_API void lua_settop(lua_State *L, int idx)
+ L->top = L->base + idx;
+ }
+ } else {
+- api_check(L, -(idx+1) <= (L->top - L->base));
++ lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d",
idx);
+ L->top += idx+1; /* Shrinks top (idx < 0). */
+ }
+ }
+
+ LUA_API void lua_remove(lua_State *L, int idx)
+ {
+- TValue *p = stkindex2adr(L, idx);
+- api_checkvalidindex(L, p);
++ TValue *p = index2adr_stack(L, idx);
+ while (++p < L->top) copyTV(L, p-1, p);
+ L->top--;
+ }
+
+ LUA_API void lua_insert(lua_State *L, int idx)
+ {
+- TValue *q, *p = stkindex2adr(L, idx);
+- api_checkvalidindex(L, p);
++ TValue *q, *p = index2adr_stack(L, idx);
+ for (q = L->top; q > p; q--) copyTV(L, q, q-1);
+ copyTV(L, p, L->top);
+ }
+@@ -162,19 +176,18 @@ LUA_API void lua_insert(lua_State *L, int idx)
+ static void copy_slot(lua_State *L, TValue *f, int idx)
+ {
+ if (idx == LUA_GLOBALSINDEX) {
+- api_check(L, tvistab(f));
++ lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
+ /* NOBARRIER: A thread (i.e. L) is never black. */
+ setgcref(L->env, obj2gco(tabV(f)));
+ } else if (idx == LUA_ENVIRONINDEX) {
+ GCfunc *fn = curr_func(L);
+ if (fn->c.gct != ~LJ_TFUNC)
+ lj_err_msg(L, LJ_ERR_NOENV);
+- api_check(L, tvistab(f));
++ lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
+ setgcref(fn->c.env, obj2gco(tabV(f)));
+ lj_gc_barrier(L, fn, f);
+ } else {
+- TValue *o = index2adr(L, idx);
+- api_checkvalidindex(L, o);
++ TValue *o = index2adr_check(L, idx);
+ copyTV(L, o, f);
+ if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */
+ lj_gc_barrier(L, curr_func(L), f);
+@@ -183,7 +196,7 @@ static void copy_slot(lua_State *L, TValue *f, int idx)
+
+ LUA_API void lua_replace(lua_State *L, int idx)
+ {
+- api_checknelems(L, 1);
++ lj_checkapi_slot(1);
+ copy_slot(L, L->top - 1, idx);
+ L->top--;
+ }
+@@ -219,7 +232,7 @@ LUA_API int lua_type(lua_State *L, int idx)
+ #else
+ int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) &
15u);
+ #endif
+- lua_assert(tt != LUA_TNIL || tvisnil(o));
++ lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion");
+ return tt;
+ }
+ }
+@@ -595,7 +608,7 @@ LUA_API void *lua_touserdata(lua_State *L, int idx)
+ if (tvisudata(o))
+ return uddata(udataV(o));
+ else if (tvislightud(o))
+- return lightudV(o);
++ return lightudV(G(L), o);
+ else
+ return NULL;
+ }
+@@ -608,7 +621,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
+
+ LUA_API const void *lua_topointer(lua_State *L, int idx)
+ {
+- return lj_obj_ptr(index2adr(L, idx));
++ return lj_obj_ptr(G(L), index2adr(L, idx));
+ }
+
+ /* -- Stack setters (object creation) ------------------------------------- */
+@@ -677,14 +690,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int
n)
+ {
+ GCfunc *fn;
+ lj_gc_check(L);
+- api_checknelems(L, n);
++ lj_checkapi_slot(n);
+ fn = lj_func_newC(L, (MSize)n, getcurrenv(L));
+ fn->c.f = f;
+ L->top -= n;
+ while (n--)
+ copyTV(L, &fn->c.upvalue[n], L->top+n);
+ setfuncV(L, L->top, fn);
+- lua_assert(iswhite(obj2gco(fn)));
++ lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white");
+ incr_top(L);
+ }
+
+@@ -696,7 +709,10 @@ LUA_API void lua_pushboolean(lua_State *L, int b)
+
+ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
+ {
+- setlightudV(L->top, checklightudptr(L, p));
++#if LJ_64
++ p = lj_lightud_intern(L, p);
++#endif
++ setrawlightudV(L->top, p);
+ incr_top(L);
+ }
+
+@@ -754,7 +770,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size)
+
+ LUA_API void lua_concat(lua_State *L, int n)
+ {
+- api_checknelems(L, n);
++ lj_checkapi_slot(n);
+ if (n >= 2) {
+ n--;
+ do {
+@@ -780,9 +796,8 @@ LUA_API void lua_concat(lua_State *L, int n)
+
+ LUA_API void lua_gettable(lua_State *L, int idx)
+ {
+- cTValue *v, *t = index2adr(L, idx);
+- api_checkvalidindex(L, t);
+- v = lj_meta_tget(L, t, L->top-1);
++ cTValue *t = index2adr_check(L, idx);
++ cTValue *v = lj_meta_tget(L, t, L->top-1);
+ if (v == NULL) {
+ L->top += 2;
+ lj_vm_call(L, L->top-2, 1+1);
+@@ -794,9 +809,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
+
+ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
+ {
+- cTValue *v, *t = index2adr(L, idx);
++ cTValue *v, *t = index2adr_check(L, idx);
+ TValue key;
+- api_checkvalidindex(L, t);
+ setstrV(L, &key, lj_str_newz(L, k));
+ v = lj_meta_tget(L, t, &key);
+ if (v == NULL) {
+@@ -812,14 +826,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
+ LUA_API void lua_rawget(lua_State *L, int idx)
+ {
+ cTValue *t = index2adr(L, idx);
+- api_check(L, tvistab(t));
++ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
+ copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1));
+ }
+
+ LUA_API void lua_rawgeti(lua_State *L, int idx, int n)
+ {
+ cTValue *v, *t = index2adr(L, idx);
+- api_check(L, tvistab(t));
++ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
+ v = lj_tab_getint(tabV(t), n);
+ if (v) {
+ copyTV(L, L->top, v);
+@@ -861,8 +875,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char
*field)
+
+ LUA_API void lua_getfenv(lua_State *L, int idx)
+ {
+- cTValue *o = index2adr(L, idx);
+- api_checkvalidindex(L, o);
++ cTValue *o = index2adr_check(L, idx);
+ if (tvisfunc(o)) {
+ settabV(L, L->top, tabref(funcV(o)->c.env));
+ } else if (tvisudata(o)) {
+@@ -879,12 +892,14 @@ LUA_API int lua_next(lua_State *L, int idx)
+ {
+ cTValue *t = index2adr(L, idx);
+ int more;
+- api_check(L, tvistab(t));
+- more = lj_tab_next(L, tabV(t), L->top-1);
+- if (more) {
++ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
++ more = lj_tab_next(tabV(t), L->top-1, L->top-1);
++ if (more > 0) {
+ incr_top(L); /* Return new key and value slot. */
+- } else { /* End of traversal. */
++ } else if (!more) { /* End of traversal. */
+ L->top--; /* Remove key slot. */
++ } else {
++ lj_err_msg(L, LJ_ERR_NEXTIDX);
+ }
+ return more;
+ }
+@@ -892,7 +907,8 @@ LUA_API int lua_next(lua_State *L, int idx)
+ LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n)
+ {
+ TValue *val;
+- const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val);
++ GCobj *o;
++ const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val,
&o);
+ if (name) {
+ copyTV(L, L->top, val);
+ incr_top(L);
+@@ -904,7 +920,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n)
+ {
+ GCfunc *fn = funcV(index2adr(L, idx));
+ n--;
+- api_check(L, (uint32_t)n < fn->l.nupvalues);
++ lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n);
+ return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
+ (void *)&fn->c.upvalue[n];
+ }
+@@ -914,8 +930,10 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int
idx2, int n2)
+ GCfunc *fn1 = funcV(index2adr(L, idx1));
+ GCfunc *fn2 = funcV(index2adr(L, idx2));
+ n1--; n2--;
+- api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues);
+- api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues);
++ lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1);
++ lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2);
++ lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1);
++ lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1);
+ setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]);
+ lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
+ }
+@@ -944,9 +962,8 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char
*tname)
+ LUA_API void lua_settable(lua_State *L, int idx)
+ {
+ TValue *o;
+- cTValue *t = index2adr(L, idx);
+- api_checknelems(L, 2);
+- api_checkvalidindex(L, t);
++ cTValue *t = index2adr_check(L, idx);
++ lj_checkapi_slot(2);
+ o = lj_meta_tset(L, t, L->top-2);
+ if (o) {
+ /* NOBARRIER: lj_meta_tset ensures the table is not black. */
+@@ -965,9 +982,8 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
+ {
+ TValue *o;
+ TValue key;
+- cTValue *t = index2adr(L, idx);
+- api_checknelems(L, 1);
+- api_checkvalidindex(L, t);
++ cTValue *t = index2adr_check(L, idx);
++ lj_checkapi_slot(1);
+ setstrV(L, &key, lj_str_newz(L, k));
+ o = lj_meta_tset(L, t, &key);
+ if (o) {
+@@ -986,7 +1002,7 @@ LUA_API void lua_rawset(lua_State *L, int idx)
+ {
+ GCtab *t = tabV(index2adr(L, idx));
+ TValue *dst, *key;
+- api_checknelems(L, 2);
++ lj_checkapi_slot(2);
+ key = L->top-2;
+ dst = lj_tab_set(L, t, key);
+ copyTV(L, dst, key+1);
+@@ -998,7 +1014,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n)
+ {
+ GCtab *t = tabV(index2adr(L, idx));
+ TValue *dst, *src;
+- api_checknelems(L, 1);
++ lj_checkapi_slot(1);
+ dst = lj_tab_setint(L, t, n);
+ src = L->top-1;
+ copyTV(L, dst, src);
+@@ -1010,13 +1026,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
+ {
+ global_State *g;
+ GCtab *mt;
+- cTValue *o = index2adr(L, idx);
+- api_checknelems(L, 1);
+- api_checkvalidindex(L, o);
++ cTValue *o = index2adr_check(L, idx);
++ lj_checkapi_slot(1);
+ if (tvisnil(L->top-1)) {
+ mt = NULL;
+ } else {
+- api_check(L, tvistab(L->top-1));
++ lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
+ mt = tabV(L->top-1);
+ }
+ g = G(L);
+@@ -1053,11 +1068,10 @@ LUALIB_API void luaL_setmetatable(lua_State *L, const char
*tname)
+
+ LUA_API int lua_setfenv(lua_State *L, int idx)
+ {
+- cTValue *o = index2adr(L, idx);
++ cTValue *o = index2adr_check(L, idx);
+ GCtab *t;
+- api_checknelems(L, 1);
+- api_checkvalidindex(L, o);
+- api_check(L, tvistab(L->top-1));
++ lj_checkapi_slot(1);
++ lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
+ t = tabV(L->top-1);
+ if (tvisfunc(o)) {
+ setgcref(funcV(o)->c.env, obj2gco(t));
+@@ -1078,13 +1092,14 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
+ {
+ cTValue *f = index2adr(L, idx);
+ TValue *val;
++ GCobj *o;
+ const char *name;
+- api_checknelems(L, 1);
+- name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val);
++ lj_checkapi_slot(1);
++ name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o);
+ if (name) {
+ L->top--;
+ copyTV(L, val, L->top);
+- lj_gc_barrier(L, funcV(f), L->top);
++ lj_gc_barrier(L, o, L->top);
+ }
+ return name;
+ }
+@@ -1106,8 +1121,9 @@ static TValue *api_call_base(lua_State *L, int nargs)
+
+ LUA_API void lua_call(lua_State *L, int nargs, int nresults)
+ {
+- api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
+- api_checknelems(L, nargs+1);
++ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
++ "thread called in wrong state %d", L->status);
++ lj_checkapi_slot(nargs+1);
+ lj_vm_call(L, api_call_base(L, nargs), nresults+1);
+ }
+
+@@ -1117,13 +1133,13 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int
errfunc)
+ uint8_t oldh = hook_save(g);
+ ptrdiff_t ef;
+ int status;
+- api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
+- api_checknelems(L, nargs+1);
++ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
++ "thread called in wrong state %d", L->status);
++ lj_checkapi_slot(nargs+1);
+ if (errfunc == 0) {
+ ef = 0;
+ } else {
+- cTValue *o = stkindex2adr(L, errfunc);
+- api_checkvalidindex(L, o);
++ cTValue *o = index2adr_stack(L, errfunc);
+ ef = savestack(L, o);
+ }
+ status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
+@@ -1138,7 +1154,10 @@ static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
+ fn->c.f = func;
+ setfuncV(L, top++, fn);
+ if (LJ_FR2) setnilV(top++);
+- setlightudV(top++, checklightudptr(L, ud));
++#if LJ_64
++ ud = lj_lightud_intern(L, ud);
++#endif
++ setrawlightudV(top++, ud);
+ cframe_nres(L->cframe) = 1+0; /* Zero results. */
+ L->top = top;
+ return top-1; /* Now call the newly allocated C function. */
+@@ -1149,7 +1168,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
+ global_State *g = G(L);
+ uint8_t oldh = hook_save(g);
+ int status;
+- api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
++ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
++ "thread called in wrong state %d", L->status);
+ status = lj_vm_cpcall(L, func, ud, cpcall);
+ if (status) hook_restore(g, oldh);
+ return status;
+@@ -1198,11 +1218,12 @@ LUA_API int lua_yield(lua_State *L, int nresults)
+ setcont(top, lj_cont_hook);
+ if (LJ_FR2) top++;
+ setframe_pc(top, cframe_pc(cf)-1);
+- if (LJ_FR2) top++;
++ top++;
+ setframe_gc(top, obj2gco(L), LJ_TTHREAD);
++ if (LJ_FR2) top++;
+ setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
+ L->top = L->base = top+1;
+-#if LJ_TARGET_X64
++#if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 ||
defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS
+ lj_err_throw(L, LUA_YIELD);
+ #else
+ L->cframe = NULL;
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index c8d7138e..ae999467 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Target architecture selection.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_ARCH_H
+@@ -8,6 +8,8 @@
+
+ #include "lua.h"
+
++/* -- Target definitions -------------------------------------------------- */
++
+ /* Target endianess. */
+ #define LUAJIT_LE 0
+ #define LUAJIT_BE 1
+@@ -38,6 +40,14 @@
+ #define LUAJIT_OS_BSD 4
+ #define LUAJIT_OS_POSIX 5
+
++/* Number mode. */
++#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
++#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
++#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
++#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */
++
++/* -- Target detection ---------------------------------------------------- */
++
+ /* Select native target if no target defined. */
+ #ifndef LUAJIT_TARGET
+
+@@ -69,12 +79,16 @@
+ #elif defined(__linux__)
+ #define LUAJIT_OS LUAJIT_OS_LINUX
+ #elif defined(__MACH__) && defined(__APPLE__)
++#include "TargetConditionals.h"
+ #define LUAJIT_OS LUAJIT_OS_OSX
+ #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
+ defined(__NetBSD__) || defined(__OpenBSD__) || \
+ defined(__DragonFly__)) && !defined(__ORBIS__)
+ #define LUAJIT_OS LUAJIT_OS_BSD
+-#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__)
++#elif (defined(__sun__) && defined(__svr4__))
++#define LJ_TARGET_SOLARIS 1
++#define LUAJIT_OS LUAJIT_OS_POSIX
++#elif defined(__HAIKU__)
+ #define LUAJIT_OS LUAJIT_OS_POSIX
+ #elif defined(__CYGWIN__)
+ #define LJ_TARGET_CYGWIN 1
+@@ -103,10 +117,16 @@
+ #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS)
+ #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX)
+ #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
+-#define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM ||
LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
++#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD)
+ #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
+ #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
+
++#if TARGET_OS_IPHONE
++#define LJ_TARGET_IOS 1
++#else
++#define LJ_TARGET_IOS 0
++#endif
++
+ #ifdef __CELLOS_LV2__
+ #define LJ_TARGET_PS3 1
+ #define LJ_TARGET_CONSOLE 1
+@@ -135,10 +155,14 @@
+ #define LJ_TARGET_GC64 1
+ #endif
+
+-#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
+-#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
+-#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
+-#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */
++#ifdef _UWP
++#define LJ_TARGET_UWP 1
++#if LUAJIT_TARGET == LUAJIT_ARCH_X64
++#define LJ_TARGET_GC64 1
++#endif
++#endif
++
++/* -- Arch-specific settings ---------------------------------------------- */
+
+ /* Set target architecture properties. */
+ #if LUAJIT_TARGET == LUAJIT_ARCH_X86
+@@ -146,14 +170,10 @@
+ #define LJ_ARCH_NAME "x86"
+ #define LJ_ARCH_BITS 32
+ #define LJ_ARCH_ENDIAN LUAJIT_LE
+-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
+-#define LJ_ABI_WIN 1
+-#else
+-#define LJ_ABI_WIN 0
+-#endif
+ #define LJ_TARGET_X86 1
+ #define LJ_TARGET_X86ORX64 1
+ #define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_EHRAREG 8
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNALIGNED 1
+@@ -164,21 +184,19 @@
+ #define LJ_ARCH_NAME "x64"
+ #define LJ_ARCH_BITS 64
+ #define LJ_ARCH_ENDIAN LUAJIT_LE
+-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
+-#define LJ_ABI_WIN 1
+-#else
+-#define LJ_ABI_WIN 0
+-#endif
+ #define LJ_TARGET_X64 1
+ #define LJ_TARGET_X86ORX64 1
+ #define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_EHRAREG 16
+ #define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNALIGNED 1
+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
+-#ifdef LUAJIT_ENABLE_GC64
++#ifndef LUAJIT_DISABLE_GC64
+ #define LJ_TARGET_GC64 1
++#elif LJ_TARGET_OSX
++#error "macOS requires GC64 -- don't disable it"
+ #endif
+
+ #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
+@@ -195,19 +213,20 @@
+ #define LJ_ABI_EABI 1
+ #define LJ_TARGET_ARM 1
+ #define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_EHRAREG 14
+ #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
+ #define LJ_TARGET_MASKSHIFT 0
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+
+-#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
++#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
+ #define LJ_ARCH_VERSION 80
+-#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ ||
__ARM_ARCH_7VE__
++#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||
__ARM_ARCH_7S__ || __ARM_ARCH_7VE__
+ #define LJ_ARCH_VERSION 70
+ #elif __ARM_ARCH_6T2__
+ #define LJ_ARCH_VERSION 61
+-#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ ||
__ARM_ARCH_6ZK__
++#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ ||
__ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
+ #define LJ_ARCH_VERSION 60
+ #else
+ #define LJ_ARCH_VERSION 50
+@@ -225,6 +244,7 @@
+ #endif
+ #define LJ_TARGET_ARM64 1
+ #define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_EHRAREG 30
+ #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+@@ -254,23 +274,43 @@
+ #else
+ #define LJ_ARCH_BITS 32
+ #define LJ_ARCH_NAME "ppc"
++
++#if !defined(LJ_ARCH_HASFPU)
++#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
++#define LJ_ARCH_HASFPU 0
++#else
++#define LJ_ARCH_HASFPU 1
++#endif
++#endif
++
++#if !defined(LJ_ABI_SOFTFP)
++#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
++#define LJ_ABI_SOFTFP 1
++#else
++#define LJ_ABI_SOFTFP 0
++#endif
++#endif
++#endif
++
++#if LJ_ABI_SOFTFP
++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
++#else
++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
+ #endif
+
+ #define LJ_TARGET_PPC 1
+ #define LJ_TARGET_EHRETREG 3
++#define LJ_TARGET_EHRAREG 65
+ #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
+ #define LJ_TARGET_MASKSHIFT 0
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
+-#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
+
+ #if LJ_TARGET_CONSOLE
+ #define LJ_ARCH_PPC32ON64 1
+ #define LJ_ARCH_NOFFI 1
+ #elif LJ_ARCH_BITS == 64
+-#define LJ_ARCH_PPC64 1
+-#define LJ_TARGET_GC64 1
+-#define LJ_ARCH_NOJIT 1 /* NYI */
++#error "No support for PPC64"
+ #endif
+
+ #if _ARCH_PWR7
+@@ -302,18 +342,38 @@
+ #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
+
+ #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
++#if __mips_isa_rev >= 6
++#define LJ_TARGET_MIPSR6 1
++#define LJ_TARGET_UNALIGNED 1
++#endif
+ #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_NAME "mips32r6el"
++#else
+ #define LJ_ARCH_NAME "mipsel"
++#endif
++#else
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_NAME "mips64r6el"
+ #else
+ #define LJ_ARCH_NAME "mips64el"
+ #endif
++#endif
+ #define LJ_ARCH_ENDIAN LUAJIT_LE
+ #else
+ #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_NAME "mips32r6"
++#else
+ #define LJ_ARCH_NAME "mips"
++#endif
++#else
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_NAME "mips64r6"
+ #else
+ #define LJ_ARCH_NAME "mips64"
+ #endif
++#endif
+ #define LJ_ARCH_ENDIAN LUAJIT_BE
+ #endif
+
+@@ -337,22 +397,22 @@
+ #define LJ_ARCH_BITS 32
+ #define LJ_TARGET_MIPS32 1
+ #else
+-#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU
+-#define LJ_ARCH_NOJIT 1 /* NYI */
+-#endif
+ #define LJ_ARCH_BITS 64
+ #define LJ_TARGET_MIPS64 1
+ #define LJ_TARGET_GC64 1
+ #endif
+ #define LJ_TARGET_MIPS 1
+ #define LJ_TARGET_EHRETREG 4
++#define LJ_TARGET_EHRAREG 31
+ #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+
+-#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_VERSION 60
++#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
+ #define LJ_ARCH_VERSION 20
+ #else
+ #define LJ_ARCH_VERSION 10
+@@ -362,9 +422,7 @@
+ #error "No target architecture defined"
+ #endif
+
+-#ifndef LJ_PAGESIZE
+-#define LJ_PAGESIZE 4096
+-#endif
++/* -- Checks for requirements --------------------------------------------- */
+
+ /* Check for minimum required compiler versions. */
+ #if defined(__GNUC__)
+@@ -418,29 +476,30 @@
+ #error "No support for ILP32 model on ARM64"
+ #endif
+ #elif LJ_TARGET_PPC
+-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+-#error "No support for PowerPC CPUs without double-precision FPU"
+-#endif
+-#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
++#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER ==
_LITTLE_ENDIAN))
+ #error "No support for little-endian PPC32"
+ #endif
+-#if LJ_ARCH_PPC64
+-#error "No support for PowerPC 64 bit mode (yet)"
+-#endif
+-#ifdef __NO_FPRS__
++#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
+ #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
+ #endif
+ #elif LJ_TARGET_MIPS32
+ #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) ||
(defined(_ABIO32) && _MIPS_SIM == _ABIO32))
+ #error "Only o32 ABI supported for MIPS32"
+ #endif
++#if LJ_TARGET_MIPSR6
++/* Not that useful, since most available r6 CPUs are 64 bit. */
++#error "No support for MIPS32R6"
++#endif
+ #elif LJ_TARGET_MIPS64
+ #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) ||
(defined(_ABI64) && _MIPS_SIM == _ABI64))
++/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
+ #error "Only n64 ABI supported for MIPS64"
+ #endif
+ #endif
+ #endif
+
++/* -- Derived defines ----------------------------------------------------- */
++
+ /* Enable or disable the dual-number mode for the VM. */
+ #if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \
+ (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1)
+@@ -490,6 +549,13 @@
+ #define LJ_HASFFI 1
+ #endif
+
++/* Disable or enable the string buffer extension. */
++#if defined(LUAJIT_DISABLE_BUFFER)
++#define LJ_HASBUFFER 0
++#else
++#define LJ_HASBUFFER 1
++#endif
++
+ #if defined(LUAJIT_DISABLE_PROFILE)
+ #define LJ_HASPROFILE 0
+ #elif LJ_TARGET_POSIX
+@@ -512,6 +578,7 @@
+ #define LJ_ABI_SOFTFP 0
+ #endif
+ #define LJ_SOFTFP (!LJ_ARCH_HASFPU)
++#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32)
+
+ #if LJ_ARCH_ENDIAN == LUAJIT_BE
+ #define LJ_LE 0
+@@ -537,26 +604,52 @@
+ #define LJ_TARGET_UNALIGNED 0
+ #endif
+
++#ifndef LJ_PAGESIZE
++#define LJ_PAGESIZE 4096
++#endif
++
+ /* Various workarounds for embedded operating systems or weak C runtimes. */
+ #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 ||
LJ_TARGET_WINDOWS
+ #define LUAJIT_NO_LOG2
+ #endif
+-#if defined(__symbian__) || LJ_TARGET_WINDOWS
+-#define LUAJIT_NO_EXP2
+-#endif
+ #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED
>= __IPHONE_8_0)
+ #define LJ_NO_SYSTEM 1
+ #endif
+
+-#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
+-/* NYI: no support for compact unwind specification, yet. */
+-#define LUAJIT_NO_UNWIND 1
++#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
++#define LJ_ABI_WIN 1
++#else
++#define LJ_ABI_WIN 0
+ #endif
+
+-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3
|| LJ_TARGET_PS4
++#if LJ_TARGET_WINDOWS
++#if LJ_TARGET_UWP
++#define LJ_WIN_VALLOC VirtualAllocFromApp
++#define LJ_WIN_VPROTECT VirtualProtectFromApp
++extern void *LJ_WIN_LOADLIBA(const char *path);
++#else
++#define LJ_WIN_VALLOC VirtualAlloc
++#define LJ_WIN_VPROTECT VirtualProtect
++#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0)
++#endif
++#endif
++
++#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) ||
LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
+ #define LJ_NO_UNWIND 1
+ #endif
+
++#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN ||
(defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
++#define LJ_UNWIND_EXT 1
++#else
++#define LJ_UNWIND_EXT 0
++#endif
++
++#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN
&& LJ_TARGET_X86)
++#define LJ_UNWIND_JIT 1
++#else
++#define LJ_UNWIND_JIT 0
++#endif
++
+ /* Compatibility with Lua 5.1 vs. 5.2. */
+ #ifdef LUAJIT_ENABLE_LUA52COMPAT
+ #define LJ_52 1
+@@ -564,4 +657,46 @@
+ #define LJ_52 0
+ #endif
+
++/* -- VM security --------------------------------------------------------- */
++
++/* Don't make any changes here. Instead build with:
++** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value"
++**
++** Important note to distro maintainers: DO NOT change the defaults for a
++** regular distro build -- neither upwards, nor downwards!
++** These build-time configurable security flags are intended for embedders
++** who may have specific needs wrt. security vs. performance.
++*/
++
++/* Security defaults. */
++#ifndef LUAJIT_SECURITY_PRNG
++/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */
++#define LUAJIT_SECURITY_PRNG 1
++#endif
++
++#ifndef LUAJIT_SECURITY_STRHASH
++/* String hash: 0 = sparse only, 1 = sparse + dense. */
++#define LUAJIT_SECURITY_STRHASH 1
++#endif
++
++#ifndef LUAJIT_SECURITY_STRID
++/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */
++#define LUAJIT_SECURITY_STRID 1
++#endif
++
++#ifndef LUAJIT_SECURITY_MCODE
++/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */
++#define LUAJIT_SECURITY_MCODE 1
++#endif
++
++#define LJ_SECURITY_MODE \
++ ( 0u \
++ | ((LUAJIT_SECURITY_PRNG & 3) << 0) \
++ | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \
++ | ((LUAJIT_SECURITY_STRID & 3) << 4) \
++ | ((LUAJIT_SECURITY_MCODE & 3) << 6) \
++ )
++#define LJ_SECURITY_MODESTRING \
++ "\004prng\007strhash\005strid\005mcode"
++
+ #endif
+diff --git a/src/lj_asm.c b/src/lj_asm.c
+index c2cf5a95..5968c5e3 100644
+--- a/src/lj_asm.c
++++ b/src/lj_asm.c
+@@ -1,6 +1,6 @@
+ /*
+ ** IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_asm_c
+@@ -11,6 +11,7 @@
+ #if LJ_HASJIT
+
+ #include "lj_gc.h"
++#include "lj_buf.h"
+ #include "lj_str.h"
+ #include "lj_tab.h"
+ #include "lj_frame.h"
+@@ -22,7 +23,6 @@
+ #include "lj_ircall.h"
+ #include "lj_iropt.h"
+ #include "lj_mcode.h"
+-#include "lj_iropt.h"
+ #include "lj_trace.h"
+ #include "lj_snap.h"
+ #include "lj_asm.h"
+@@ -72,6 +72,8 @@ typedef struct ASMState {
+ IRRef snaprename; /* Rename highwater mark for snapshot check. */
+ SnapNo snapno; /* Current snapshot number. */
+ SnapNo loopsnapno; /* Loop snapshot number. */
++ int snapalloc; /* Current snapshot needs allocation. */
++ BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */
+
+ IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
+ IRRef sectref; /* Section base reference (loopref or 0). */
+@@ -85,6 +87,7 @@ typedef struct ASMState {
+
+ MCode *mcbot; /* Bottom of reserved MCode. */
+ MCode *mctop; /* Top of generated MCode. */
++ MCode *mctoporig; /* Original top of generated MCode. */
+ MCode *mcloop; /* Pointer to loop MCode (or NULL). */
+ MCode *invmcp; /* Points to invertible loop branch (or NULL). */
+ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
+@@ -97,6 +100,12 @@ typedef struct ASMState {
+ uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
+ } ASMState;
+
++#ifdef LUA_USE_ASSERT
++#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
++#else
++#define lj_assertA(c, ...) ((void)as)
++#endif
++
+ #define IR(ref) (&as->ir[(ref)])
+
+ #define ASMREF_TMP1 REF_TRUE /* Temp. register. */
+@@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as)
+ #ifdef LUA_USE_ASSERT
+ if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
+ IRIns *ir = IR(as->curins+1);
+- fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n",
as->mcp,
+- as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
+- lua_assert(0);
++ lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n",
as->mcp,
++ as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
+ }
+ #endif
+ if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
+@@ -244,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
+ *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
+ } else {
+ *p++ = '?';
+- lua_assert(0);
++ lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
+ }
+ } else if (e[1] == 'f' || e[1] == 'i') {
+ IRRef ref;
+@@ -262,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
+ } else if (e[1] == 'x') {
+ p += sprintf(p, "%08x", va_arg(argp, int32_t));
+ } else {
+- lua_assert(0);
++ lj_assertA(0, "bad debug format code");
+ }
+ fmt = e+2;
+ }
+@@ -321,7 +329,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ Reg r;
+ if (ra_iskref(ref)) {
+ r = ra_krefreg(ref);
+- lua_assert(!rset_test(as->freeset, r));
++ lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
+ ra_free(as, r);
+ ra_modified(as, r);
+ #if LJ_64
+@@ -333,12 +341,14 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ }
+ ir = IR(ref);
+ r = ir->r;
+- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
++ lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
++ lj_assertA(!ra_hasspill(ir->s),
++ "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
+ ra_free(as, r);
+ ra_modified(as, r);
+ ir->r = RID_INIT; /* Do not keep any hint. */
+ RA_DBGX((as, "remat $i $r", ir, r));
+-#if !LJ_SOFTFP
++#if !LJ_SOFTFP32
+ if (ir->o == IR_KNUM) {
+ emit_loadk64(as, r, ir);
+ } else
+@@ -347,7 +357,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
+ emit_getgl(as, r, jit_base);
+ } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
+- lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
++ /* REF_NIL stores ASMREF_L register. */
++ lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
+ emit_getgl(as, r, cur_L);
+ #if LJ_64
+ } else if (ir->o == IR_KINT64) {
+@@ -360,8 +371,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ #endif
+ #endif
+ } else {
+- lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
+- ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
++ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
++ "rematk of bad IR op %d", ir->o);
+ emit_loadi(as, r, ir->i);
+ }
+ return r;
+@@ -371,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ static int32_t ra_spill(ASMState *as, IRIns *ir)
+ {
+ int32_t slot = ir->s;
+- lua_assert(ir >= as->ir + REF_TRUE);
++ lj_assertA(ir >= as->ir + REF_TRUE,
++ "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
+ if (!ra_hasspill(slot)) {
+ if (irt_is64(ir->t)) {
+ slot = as->evenspill;
+@@ -396,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
+ {
+ IRIns *ir = IR(ref);
+ Reg r = ir->r;
+- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
++ lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
++ lj_assertA(!ra_hasspill(ir->s),
++ "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
+ ra_free(as, r);
+ ra_modified(as, r);
+ ir->r = RID_INIT;
+@@ -412,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref)
+ IRIns *ir = IR(ref);
+ int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */
+ Reg r = ir->r;
+- lua_assert(ra_hasreg(r));
++ lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref -
REF_BIAS);
+ ra_sethint(ir->r, r); /* Keep hint. */
+ ra_free(as, r);
+ if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
+@@ -441,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow)
+ {
+ IRRef ref;
+ RegCost cost = ~(RegCost)0;
+- lua_assert(allow != RSET_EMPTY);
++ lj_assertA(allow != RSET_EMPTY, "evict from empty set");
+ if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
+ GPRDEF(MINCOST)
+ } else {
+ FPRDEF(MINCOST)
+ }
+ ref = regcost_ref(cost);
+- lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref <
as->T->nins));
++ lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref <
as->T->nins),
++ "evict of out-of-range IR %04d", ref - REF_BIAS);
+ /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
+ if (!irref_isk(ref) && (as->weakset & allow)) {
+ IRIns *ir = IR(ref);
+@@ -606,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
+ IRIns *ir = IR(ref);
+ RegSet pick = as->freeset & allow;
+ Reg r;
+- lua_assert(ra_noreg(ir->r));
++ lj_assertA(ra_noreg(ir->r),
++ "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
+ if (pick) {
+ /* First check register hint from propagation or PHI. */
+ if (ra_hashint(ir->r)) {
+@@ -670,8 +687,10 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
+ IRIns *ir = IR(ref);
+ ir->r = (uint8_t)up;
+ as->cost[down] = 0;
+- lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
+- lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset,
up));
++ lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
++ "rename between GPR/FPR %d and %d", down, up);
++ lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d",
down);
++ lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
+ ra_free(as, down); /* 'down' is free ... */
+ ra_modified(as, down);
+ rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
+@@ -679,7 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
+ RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down,
up));
+ emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
+ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
+- ra_addrename(as, down, ref, as->snapno);
++ /*
++ ** The rename is effective at the subsequent (already emitted) exit
++ ** branch. This is for the current snapshot (as->snapno). Except if we
++ ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
++ ** then it belongs to the next snapshot.
++ ** See also the discussion at asm_snap_checkrename().
++ */
++ ra_addrename(as, down, ref, as->snapno + as->snapalloc);
+ }
+ }
+
+@@ -712,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
+ {
+ Reg dest = ra_dest(as, ir, RID2RSET(r));
+ if (dest != r) {
+- lua_assert(rset_test(as->freeset, r));
++ lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
+ ra_modified(as, r);
+ emit_movrr(as, ir, dest, r);
+ }
+@@ -745,8 +771,9 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
+ #endif
+ #endif
+ } else if (ir->o != IR_KPRI) {
+- lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
+- ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
++ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
++ "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
+ emit_loadi(as, dest, ir->i);
+ return;
+ }
+@@ -791,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
+ }
+ #endif
+
+-#if !LJ_64
+ /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
+ static void ra_destpair(ASMState *as, IRIns *ir)
+ {
+ Reg destlo = ir->r, desthi = (ir+1)->r;
++ IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir;
+ /* First spill unrelated refs blocking the destination registers. */
+ if (!rset_test(as->freeset, RID_RETLO) &&
+ destlo != RID_RETLO && desthi != RID_RETLO)
+@@ -819,29 +846,28 @@ static void ra_destpair(ASMState *as, IRIns *ir)
+ /* Check for conflicts and shuffle the registers as needed. */
+ if (destlo == RID_RETHI) {
+ if (desthi == RID_RETLO) {
+-#if LJ_TARGET_X86
+- *--as->mcp = XI_XCHGa + RID_RETHI;
++#if LJ_TARGET_X86ORX64
++ *--as->mcp = REX_64IR(irx, XI_XCHGa + RID_RETHI);
+ #else
+- emit_movrr(as, ir, RID_RETHI, RID_TMP);
+- emit_movrr(as, ir, RID_RETLO, RID_RETHI);
+- emit_movrr(as, ir, RID_TMP, RID_RETLO);
++ emit_movrr(as, irx, RID_RETHI, RID_TMP);
++ emit_movrr(as, irx, RID_RETLO, RID_RETHI);
++ emit_movrr(as, irx, RID_TMP, RID_RETLO);
+ #endif
+ } else {
+- emit_movrr(as, ir, RID_RETHI, RID_RETLO);
+- if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
++ emit_movrr(as, irx, RID_RETHI, RID_RETLO);
++ if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
+ }
+ } else if (desthi == RID_RETLO) {
+- emit_movrr(as, ir, RID_RETLO, RID_RETHI);
+- if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
++ emit_movrr(as, irx, RID_RETLO, RID_RETHI);
++ if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
+ } else {
+- if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
+- if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
++ if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
++ if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
+ }
+ /* Restore spill slots (if any). */
+ if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
+ if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
+ }
+-#endif
+
+ /* -- Snapshot handling --------- ----------------------------------------- */
+
+@@ -876,7 +902,10 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
+ static void asm_snap_alloc1(ASMState *as, IRRef ref)
+ {
+ IRIns *ir = IR(ref);
+- if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) {
++ if (!irref_isk(ref) && ir->r != RID_SUNK) {
++ bloomset(as->snapfilt1, ref);
++ bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
++ if (ra_used(ir)) return;
+ if (ir->r == RID_SINK) {
+ ir->r = RID_SUNK;
+ #if LJ_HASFFI
+@@ -888,11 +917,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
+ #endif
+ { /* Allocate stored values for TNEW, TDUP and CNEW. */
+ IRIns *irs;
+- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW);
++ lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
++ "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
+ for (irs = IR(as->snapref-1); irs > ir; irs--)
+ if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
+- lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
+- irs->o == IR_FSTORE || irs->o == IR_XSTORE);
++ lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
++ irs->o == IR_FSTORE || irs->o == IR_XSTORE,
++ "sunk store IR %04d has bad op %d",
++ (int)(irs - as->ir) - REF_BIAS, irs->o);
+ asm_snap_alloc1(as, irs->op2);
+ if (LJ_32 && (irs+1)->o == IR_HIOP)
+ asm_snap_alloc1(as, (irs+1)->op2);
+@@ -928,18 +960,21 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
+ }
+
+ /* Allocate refs escaping to a snapshot. */
+-static void asm_snap_alloc(ASMState *as)
++static void asm_snap_alloc(ASMState *as, int snapno)
+ {
+- SnapShot *snap = &as->T->snap[as->snapno];
++ SnapShot *snap = &as->T->snap[snapno];
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
+ MSize n, nent = snap->nent;
++ as->snapfilt1 = as->snapfilt2 = 0;
+ for (n = 0; n < nent; n++) {
+ SnapEntry sn = map[n];
+ IRRef ref = snap_ref(sn);
+ if (!irref_isk(ref)) {
+ asm_snap_alloc1(as, ref);
+ if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
+- lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP);
++ lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
++ "snap %d[%d] points to bad SOFTFP IR %04d",
++ snapno, n, ref - REF_BIAS);
+ asm_snap_alloc1(as, ref+1);
+ }
+ }
+@@ -955,35 +990,26 @@ static void asm_snap_alloc(ASMState *as)
+ */
+ static int asm_snap_checkrename(ASMState *as, IRRef ren)
+ {
+- SnapShot *snap = &as->T->snap[as->snapno];
+- SnapEntry *map = &as->T->snapmap[snap->mapofs];
+- MSize n, nent = snap->nent;
+- for (n = 0; n < nent; n++) {
+- SnapEntry sn = map[n];
+- IRRef ref = snap_ref(sn);
+- if (ref == ren || (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && ++ref
== ren)) {
+- IRIns *ir = IR(ref);
+- ra_spill(as, ir); /* Register renamed, so force a spill slot. */
+- RA_DBGX((as, "snaprensp $f $s", ref, ir->s));
+- return 1; /* Found. */
+- }
++ if (bloomtest(as->snapfilt1, ren) &&
++ bloomtest(as->snapfilt2, hashrot(ren, ren + HASH_BIAS))) {
++ IRIns *ir = IR(ren);
++ ra_spill(as, ir); /* Register renamed, so force a spill slot. */
++ RA_DBGX((as, "snaprensp $f $s", ren, ir->s));
++ return 1; /* Found. */
+ }
+ return 0; /* Not found. */
+ }
+
+-/* Prepare snapshot for next guard instruction. */
++/* Prepare snapshot for next guard or throwing instruction. */
+ static void asm_snap_prep(ASMState *as)
+ {
+- if (as->curins < as->snapref) {
+- do {
+- if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */
+- as->snapno--;
+- as->snapref = as->T->snap[as->snapno].ref;
+- } while (as->curins < as->snapref);
+- asm_snap_alloc(as);
++ if (as->snapalloc) {
++ /* Alloc on first invocation for each snapshot. */
++ as->snapalloc = 0;
++ asm_snap_alloc(as, as->snapno);
+ as->snaprename = as->T->nins;
+ } else {
+- /* Process any renames above the highwater mark. */
++ /* Check any renames above the highwater mark. */
+ for (; as->snaprename < as->T->nins; as->snaprename++) {
+ IRIns *ir = &as->T->ir[as->snaprename];
+ if (asm_snap_checkrename(as, ir->op1))
+@@ -992,6 +1018,35 @@ static void asm_snap_prep(ASMState *as)
+ }
+ }
+
++/* Move to previous snapshot when we cross the current snapshot ref. */
++static void asm_snap_prev(ASMState *as)
++{
++ if (as->curins < as->snapref) {
++ uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp);
++ if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
++ do {
++ if (as->snapno == 0) return;
++ as->snapno--;
++ as->snapref = as->T->snap[as->snapno].ref;
++ as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs.
*/
++ } while (as->curins < as->snapref); /* May have no ins inbetween. */
++ as->snapalloc = 1;
++ }
++}
++
++/* Fixup snapshot mcode offsetst. */
++static void asm_snap_fixup_mcofs(ASMState *as)
++{
++ uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
++ SnapShot *snap = as->T->snap;
++ SnapNo i;
++ for (i = as->T->nsnap-1; i > 0; i--) {
++ /* Compute offset from mcode start and store in correct snapshot. */
++ snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
++ }
++ snap[0].mcofs = 0;
++}
++
+ /* -- Miscellaneous helpers ----------------------------------------------- */
+
+ /* Calculate stack adjustment. */
+@@ -1003,21 +1058,26 @@ static int32_t asm_stack_adjust(ASMState *as)
+ }
+
+ /* Must match with hash*() in lj_tab.c. */
+-static uint32_t ir_khash(IRIns *ir)
++static uint32_t ir_khash(ASMState *as, IRIns *ir)
+ {
+ uint32_t lo, hi;
++ UNUSED(as);
+ if (irt_isstr(ir->t)) {
+- return ir_kstr(ir)->hash;
++ return ir_kstr(ir)->sid;
+ } else if (irt_isnum(ir->t)) {
+ lo = ir_knum(ir)->u32.lo;
+ hi = ir_knum(ir)->u32.hi << 1;
+ } else if (irt_ispri(ir->t)) {
+- lua_assert(!irt_isnil(ir->t));
++ lj_assertA(!irt_isnil(ir->t), "hash of nil key");
+ return irt_type(ir->t)-IRT_FALSE;
+ } else {
+- lua_assert(irt_isgcv(ir->t));
++ lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d",
irt_type(ir->t));
+ lo = u32ptr(ir_kgc(ir));
++#if LJ_GC64
++ hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) <<
15);
++#else
+ hi = lo + HASH_BIAS;
++#endif
+ }
+ return hashrot(lo, hi);
+ }
+@@ -1031,6 +1091,7 @@ static void asm_snew(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
+ IRRef args[3];
++ asm_snap_prep(as);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* const char *str */
+ args[2] = ir->op2; /* size_t len */
+@@ -1043,6 +1104,7 @@ static void asm_tnew(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
+ IRRef args[2];
++ asm_snap_prep(as);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ASMREF_TMP1; /* uint32_t ahsize */
+ as->gcsteps++;
+@@ -1055,6 +1117,7 @@ static void asm_tdup(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
+ IRRef args[2];
++ asm_snap_prep(as);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* const GCtab *kt */
+ as->gcsteps++;
+@@ -1080,28 +1143,43 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
+
+ /* -- Buffer operations --------------------------------------------------- */
+
+-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb);
++#endif
+
+ static void asm_bufhdr(ASMState *as, IRIns *ir)
+ {
+ Reg sb = ra_dest(as, ir, RSET_GPR);
+- if ((ir->op2 & IRBUFHDR_APPEND)) {
++ switch (ir->op2) {
++ case IRBUFHDR_RESET: {
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irbp;
++ irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */
++ emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w));
++ emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b));
++ break;
++ }
++ case IRBUFHDR_APPEND: {
+ /* Rematerialize const buffer pointer instead of likely spill. */
+ IRIns *irp = IR(ir->op1);
+ if (!(ra_hasreg(irp->r) || irp == ir-1 ||
+ (irp == ir-2 && !ra_used(ir-1)))) {
+- while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
++ while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET))
+ irp = IR(irp->op1);
+ if (irref_isk(irp->op1)) {
+ ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
+ ir = irp;
+ }
+ }
+- } else {
+- Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+- /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
+- emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
+- emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
++ break;
++ }
++#if LJ_HASBUFFER
++ case IRBUFHDR_WRITE:
++ asm_bufhdr_write(as, sb);
++ break;
++#endif
++ default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break;
+ }
+ #if LJ_TARGET_X86ORX64
+ ra_left(as, sb, ir->op1);
+@@ -1115,15 +1193,16 @@ static void asm_bufput(ASMState *as, IRIns *ir)
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
+ IRRef args[3];
+ IRIns *irs;
+- int kchar = -1;
++ int kchar = -129;
+ args[0] = ir->op1; /* SBuf * */
+ args[1] = ir->op2; /* GCstr * */
+ irs = IR(ir->op2);
+- lua_assert(irt_isstr(irs->t));
++ lj_assertA(irt_isstr(irs->t),
++ "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
+ if (irs->o == IR_KGC) {
+ GCstr *s = ir_kstr(irs);
+ if (s->len == 1) { /* Optimize put of single-char string constant. */
+- kchar = strdata(s)[0];
++ kchar = (int8_t)strdata(s)[0]; /* Signed! */
+ args[1] = ASMREF_TMP1; /* int, truncated to char */
+ ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+ }
+@@ -1133,7 +1212,8 @@ static void asm_bufput(ASMState *as, IRIns *ir)
+ args[1] = ASMREF_TMP1; /* TValue * */
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
+ } else {
+- lua_assert(irt_isinteger(IR(irs->op1)->t));
++ lj_assertA(irt_isinteger(IR(irs->op1)->t),
++ "TOSTR of non-numeric IR %04d", irs->op1);
+ args[1] = irs->op1; /* int */
+ if (irs->op2 == IRTOSTR_INT)
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
+@@ -1150,8 +1230,8 @@ static void asm_bufput(ASMState *as, IRIns *ir)
+ asm_gencall(as, ci, args);
+ if (args[1] == ASMREF_TMP1) {
+ Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
+- if (kchar == -1)
+- asm_tvptr(as, tmp, irs->op1);
++ if (kchar == -129)
++ asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
+ else
+ ra_allockreg(as, kchar, tmp);
+ }
+@@ -1173,6 +1253,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci;
+ IRRef args[2];
++ asm_snap_prep(as);
+ args[0] = ASMREF_L;
+ as->gcsteps++;
+ if (ir->op2 == IRTOSTR_NUM) {
+@@ -1188,7 +1269,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
+ asm_setupresult(as, ir, ci); /* GCstr * */
+ asm_gencall(as, ci, args);
+ if (ir->op2 == IRTOSTR_NUM)
+- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
++ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
+ }
+
+ #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
+@@ -1198,7 +1279,8 @@ static void asm_conv64(ASMState *as, IRIns *ir)
+ IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+ IRCallID id;
+ IRRef args[2];
+- lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
++ lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
++ "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
+ args[LJ_BE] = (ir-1)->op1;
+ args[LJ_LE] = ir->op1;
+ if (st == IRT_NUM || st == IRT_FLOAT) {
+@@ -1228,12 +1310,19 @@ static void asm_newref(ASMState *as, IRIns *ir)
+ IRRef args[3];
+ if (ir->r == RID_SINK)
+ return;
++ asm_snap_prep(as);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* GCtab *t */
+ args[2] = ASMREF_TMP1; /* cTValue *key */
+ asm_setupresult(as, ir, ci); /* TValue * */
+ asm_gencall(as, ci, args);
+- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
++ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
++}
++
++static void asm_tmpref(ASMState *as, IRIns *ir)
++{
++ Reg r = ra_dest(as, ir, RSET_GPR);
++ asm_tvptr(as, r, ir->op1, ir->op2);
+ }
+
+ static void asm_lref(ASMState *as, IRIns *ir)
+@@ -1253,15 +1342,16 @@ static void asm_collectargs(ASMState *as, IRIns *ir,
+ const CCallInfo *ci, IRRef *args)
+ {
+ uint32_t n = CCI_XNARGS(ci);
+- lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
++ /* Account for split args. */
++ lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
+ if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+ while (n-- > 1) {
+ ir = IR(ir->op1);
+- lua_assert(ir->o == IR_CARG);
++ lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
+ args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
+ }
+ args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
+- lua_assert(IR(ir->op1)->o != IR_CARG);
++ lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
+ }
+
+ /* Reconstruct CCallInfo flags for CALLX*. */
+@@ -1305,32 +1395,6 @@ static void asm_call(ASMState *as, IRIns *ir)
+ asm_gencall(as, ci, args);
+ }
+
+-#if !LJ_SOFTFP
+-static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
+-{
+- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
+- IRRef args[2];
+- args[0] = lref;
+- args[1] = rref;
+- asm_setupresult(as, ir, ci);
+- asm_gencall(as, ci, args);
+-}
+-
+-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
+-{
+- IRIns *irp = IR(ir->op1);
+- if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+- IRIns *irpp = IR(irp->op1);
+- if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+- irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+- asm_fppow(as, ir, irpp->op1, irp->op2);
+- return 1;
+- }
+- }
+- return 0;
+-}
+-#endif
+-
+ /* -- PHI and loop handling ----------------------------------------------- */
+
+ /* Break a PHI cycle by renaming to a free register (evict if needed). */
+@@ -1601,6 +1665,68 @@ static void asm_loop(ASMState *as)
+ #error "Missing assembler for target CPU"
+ #endif
+
++/* -- Common instruction helpers ------------------------------------------ */
++
++#if !LJ_SOFTFP32
++#if !LJ_TARGET_X86ORX64
++#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
++#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
++#endif
++
++static void asm_pow(ASMState *as, IRIns *ir)
++{
++#if LJ_64 && LJ_HASFFI
++ if (!irt_isnum(ir->t))
++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
++ IRCALL_lj_carith_powu64);
++ else
++#endif
++ if (irt_isnum(IR(ir->op2)->t))
++ asm_callid(as, ir, IRCALL_pow);
++ else
++ asm_fppowi(as, ir);
++}
++
++static void asm_div(ASMState *as, IRIns *ir)
++{
++#if LJ_64 && LJ_HASFFI
++ if (!irt_isnum(ir->t))
++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
++ IRCALL_lj_carith_divu64);
++ else
++#endif
++ asm_fpdiv(as, ir);
++}
++#endif
++
++static void asm_mod(ASMState *as, IRIns *ir)
++{
++#if LJ_64 && LJ_HASFFI
++ if (!irt_isint(ir->t))
++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
++ IRCALL_lj_carith_modu64);
++ else
++#endif
++ asm_callid(as, ir, IRCALL_lj_vm_modi);
++}
++
++static void asm_fuseequal(ASMState *as, IRIns *ir)
++{
++ /* Fuse HREF + EQ/NE. */
++ if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
++ as->curins--;
++ asm_href(as, ir-1, (IROp)ir->o);
++ } else {
++ asm_equal(as, ir);
++ }
++}
++
++static void asm_alen(ASMState *as, IRIns *ir)
++{
++ asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
++ IRCALL_lj_tab_len_hint);
++}
++
+ /* -- Instruction dispatch ------------------------------------------------ */
+
+ /* Assemble a single instruction. */
+@@ -1609,7 +1735,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ switch ((IROp)ir->o) {
+ /* Miscellaneous ops. */
+ case IR_LOOP: asm_loop(as); break;
+- case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
++ case IR_NOP: case IR_XBAR:
++ lj_assertA(!ra_used(ir),
++ "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
++ break;
+ case IR_USE:
+ ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+ case IR_PHI: asm_phi(as, ir); break;
+@@ -1623,14 +1752,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_ABC:
+ asm_comp(as, ir);
+ break;
+- case IR_EQ: case IR_NE:
+- if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+- as->curins--;
+- asm_href(as, ir-1, (IROp)ir->o);
+- } else {
+- asm_equal(as, ir);
+- }
+- break;
++ case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
+
+ case IR_RETF: asm_retf(as, ir); break;
+
+@@ -1652,16 +1774,17 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_MUL: asm_mul(as, ir); break;
+ case IR_MOD: asm_mod(as, ir); break;
+ case IR_NEG: asm_neg(as, ir); break;
+-#if LJ_SOFTFP
++#if LJ_SOFTFP32
+ case IR_DIV: case IR_POW: case IR_ABS:
+- case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
+- lua_assert(0); /* Unused for LJ_SOFTFP. */
++ case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
++ /* Unused for LJ_SOFTFP32. */
++ lj_assertA(0, "IR %04d with unused op %d",
++ (int)(ir - as->ir) - REF_BIAS, ir->o);
+ break;
+ #else
+ case IR_DIV: asm_div(as, ir); break;
+ case IR_POW: asm_pow(as, ir); break;
+ case IR_ABS: asm_abs(as, ir); break;
+- case IR_ATAN2: asm_atan2(as, ir); break;
+ case IR_LDEXP: asm_ldexp(as, ir); break;
+ case IR_FPMATH: asm_fpmath(as, ir); break;
+ case IR_TOBIT: asm_tobit(as, ir); break;
+@@ -1681,6 +1804,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_NEWREF: asm_newref(as, ir); break;
+ case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+ case IR_FREF: asm_fref(as, ir); break;
++ case IR_TMPREF: asm_tmpref(as, ir); break;
+ case IR_STRREF: asm_strref(as, ir); break;
+ case IR_LREF: asm_lref(as, ir); break;
+
+@@ -1691,6 +1815,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_FLOAD: asm_fload(as, ir); break;
+ case IR_XLOAD: asm_xload(as, ir); break;
+ case IR_SLOAD: asm_sload(as, ir); break;
++ case IR_ALEN: asm_alen(as, ir); break;
+
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+ case IR_FSTORE: asm_fstore(as, ir); break;
+@@ -1700,7 +1825,14 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+ case IR_TNEW: asm_tnew(as, ir); break;
+ case IR_TDUP: asm_tdup(as, ir); break;
+- case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
++ case IR_CNEW: case IR_CNEWI:
++#if LJ_HASFFI
++ asm_cnew(as, ir);
++#else
++ lj_assertA(0, "IR %04d with unused op %d",
++ (int)(ir - as->ir) - REF_BIAS, ir->o);
++#endif
++ break;
+
+ /* Buffer operations. */
+ case IR_BUFHDR: asm_bufhdr(as, ir); break;
+@@ -1767,8 +1899,7 @@ static void asm_head_side(ASMState *as)
+
+ if (as->snapno && as->topslot > as->parent->topslot) {
+ /* Force snap #0 alloc to prevent register overwrite in stack check. */
+- as->snapno = 0;
+- asm_snap_alloc(as);
++ asm_snap_alloc(as, 0);
+ }
+ allow = asm_head_side_base(as, irp, allow);
+
+@@ -1776,8 +1907,10 @@ static void asm_head_side(ASMState *as)
+ for (i = as->stopins; i > REF_BASE; i--) {
+ IRIns *ir = IR(i);
+ RegSP rs;
+- lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
+- (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL);
++ lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
++ (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
++ "IR %04d has bad parent op %d",
++ (int)(ir - as->ir) - REF_BIAS, ir->o);
+ rs = as->parentmap[i - REF_FIRST];
+ if (ra_hasreg(ir->r)) {
+ rset_clear(allow, ir->r);
+@@ -2005,12 +2138,16 @@ static void asm_setup_regsp(ASMState *as)
+ #endif
+
+ ra_setup(as);
++#if LJ_TARGET_ARM64
++ ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
++#endif
+
+ /* Clear reg/sp for constants. */
+ for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
+ ir->prev = REGSP_INIT;
+ if (irt_is64(ir->t) && ir->o != IR_KNULL) {
+ #if LJ_GC64
++ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
+ ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
+ #else
+ /* Make life easier for backends by putting address of constant in i. */
+@@ -2026,6 +2163,7 @@ static void asm_setup_regsp(ASMState *as)
+ as->snaprename = nins;
+ as->snapref = nins;
+ as->snapno = T->nsnap;
++ as->snapalloc = 0;
+
+ as->stopins = REF_BASE;
+ as->orignins = nins;
+@@ -2035,7 +2173,7 @@ static void asm_setup_regsp(ASMState *as)
+ ir = IR(REF_FIRST);
+ if (as->parent) {
+ uint16_t *p;
+- lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir);
++ lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
+ if (lastir - ir > LJ_MAX_JSLOTS)
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+ as->stopins = (IRRef)((lastir-1) - as->ir);
+@@ -2074,6 +2212,10 @@ static void asm_setup_regsp(ASMState *as)
+ ir->prev = (uint16_t)REGSP_HINT((rload & 15));
+ rload = lj_ror(rload, 4);
+ continue;
++ case IR_TMPREF:
++ if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
++ as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */
++ break;
+ #endif
+ case IR_CALLXS: {
+ CCallInfo ci;
+@@ -2083,7 +2225,17 @@ static void asm_setup_regsp(ASMState *as)
+ as->modset |= RSET_SCRATCH;
+ continue;
+ }
+- case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
++ case IR_CALLL:
++ /* lj_vm_next needs two TValues on the stack. */
++#if LJ_TARGET_X64 && LJ_ABI_WIN
++ if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST +
4)
++ as->evenspill = SPS_FIRST + 4;
++#else
++ if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next &&
as->evenspill < 4)
++ as->evenspill = 4;
++#endif
++ /* fallthrough */
++ case IR_CALLN: case IR_CALLA: case IR_CALLS: {
+ const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+ ir->prev = asm_setup_call_slots(as, ir, ci);
+ if (inloop)
+@@ -2091,7 +2243,6 @@ static void asm_setup_regsp(ASMState *as)
+ (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
+ continue;
+ }
+-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
+ case IR_HIOP:
+ switch ((ir-1)->o) {
+ #if LJ_SOFTFP && LJ_TARGET_ARM
+@@ -2102,15 +2253,15 @@ static void asm_setup_regsp(ASMState *as)
+ }
+ break;
+ #endif
+-#if !LJ_SOFTFP && LJ_NEED_FP64
++#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI
+ case IR_CONV:
+ if (irt_isfp((ir-1)->t)) {
+ ir->prev = REGSP_HINT(RID_FPRET);
+ continue;
+ }
+- /* fallthrough */
+ #endif
+- case IR_CALLN: case IR_CALLXS:
++ /* fallthrough */
++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+ #if LJ_SOFTFP
+ case IR_MIN: case IR_MAX:
+ #endif
+@@ -2121,12 +2272,11 @@ static void asm_setup_regsp(ASMState *as)
+ break;
+ }
+ break;
+-#endif
+ #if LJ_SOFTFP
+ case IR_MIN: case IR_MAX:
+ if ((ir+1)->o != IR_HIOP) break;
+- /* fallthrough */
+ #endif
++ /* fallthrough */
+ /* C calls evict all scratch regs and return results in RID_RET. */
+ case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
+ if (REGARG_NUMGPR < 3 && as->evenspill < 3)
+@@ -2137,9 +2287,12 @@ static void asm_setup_regsp(ASMState *as)
+ if (ir->op2 != REF_NIL && as->evenspill < 4)
+ as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
+ }
++ /* fallthrough */
+ #else
++ /* fallthrough */
+ case IR_CNEW:
+ #endif
++ /* fallthrough */
+ case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
+ case IR_BUFSTR:
+ ir->prev = REGSP_HINT(RID_RET);
+@@ -2151,35 +2304,45 @@ static void asm_setup_regsp(ASMState *as)
+ as->modset = RSET_SCRATCH;
+ break;
+ #if !LJ_SOFTFP
+- case IR_ATAN2:
+-#if LJ_TARGET_X86
+- if (as->evenspill < 4) /* Leave room to call atan2(). */
+- as->evenspill = 4;
+-#endif
+ #if !LJ_TARGET_X86ORX64
+ case IR_LDEXP:
+ #endif
+ #endif
++ /* fallthrough */
+ case IR_POW:
+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
+ if (inloop)
+ as->modset |= RSET_SCRATCH;
+ #if LJ_TARGET_X86
++ if (irt_isnum(IR(ir->op2)->t)) {
++ if (as->evenspill < 4) /* Leave room to call pow(). */
++ as->evenspill = 4;
++ }
+ break;
+ #else
+ ir->prev = REGSP_HINT(RID_FPRET);
+ continue;
+ #endif
+ }
+- /* fallthrough for integer POW */
++ /* fallthrough */ /* for integer POW */
+ case IR_DIV: case IR_MOD:
+- if (!irt_isnum(ir->t)) {
++ if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) {
++ ir->prev = REGSP_HINT(RID_RET);
++ if (inloop)
++ as->modset |= (RSET_SCRATCH & RSET_GPR);
++ continue;
++ }
++ break;
++#if LJ_64 && LJ_SOFTFP
++ case IR_ADD: case IR_SUB: case IR_MUL:
++ if (irt_isnum(ir->t)) {
+ ir->prev = REGSP_HINT(RID_RET);
+ if (inloop)
+ as->modset |= (RSET_SCRATCH & RSET_GPR);
+ continue;
+ }
+ break;
++#endif
+ case IR_FPMATH:
+ #if LJ_TARGET_X86ORX64
+ if (ir->op2 <= IRFPM_TRUNC) {
+@@ -2190,9 +2353,6 @@ static void asm_setup_regsp(ASMState *as)
+ continue;
+ }
+ break;
+- } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
+- if (as->evenspill < 4) /* Leave room to call pow(). */
+- as->evenspill = 4;
+ }
+ #endif
+ if (inloop)
+@@ -2208,6 +2368,7 @@ static void asm_setup_regsp(ASMState *as)
+ case IR_BSHL: case IR_BSHR: case IR_BSAR:
+ if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
+ break;
++ /* fallthrough */
+ case IR_BROL: case IR_BROR:
+ if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
+ IR(ir->op2)->r = REGSP_HINT(RID_ECX);
+@@ -2252,7 +2413,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ {
+ ASMState as_;
+ ASMState *as = &as_;
+- MCode *origtop;
+
+ /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
+ {
+@@ -2267,7 +2427,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ /* Ensure an initialized instruction beyond the last one for HIOP checks. */
+ /* This also allows one RENAME to be added without reallocating curfinal. */
+ as->orignins = lj_ir_nextins(J);
+- J->cur.ir[as->orignins].o = IR_NOP;
++ lj_ir_nop(&J->cur.ir[as->orignins]);
+
+ /* Setup initial state. Copy some fields to reduce indirections. */
+ as->J = J;
+@@ -2280,7 +2440,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ as->parent = J->parent ? traceref(J, J->parent) : NULL;
+
+ /* Reserve MCode memory. */
+- as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot);
++ as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
+ as->mcp = as->mctop;
+ as->mclim = as->mcbot + MCLIM_REDZONE;
+ asm_setup_target(as);
+@@ -2338,7 +2498,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ /* Assemble a trace in linear backwards order. */
+ for (as->curins--; as->curins > as->stopins; as->curins--) {
+ IRIns *ir = IR(as->curins);
+- lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */
++ /* 64 bit types handled by SPLIT for 32 bit archs. */
++ lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
++ "IR %04d has unsplit 64 bit type",
++ (int)(ir - as->ir) - REF_BIAS);
++ asm_snap_prev(as);
+ if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags &
JIT_F_OPT_DCE))
+ continue; /* Dead-code elimination can be soooo easy. */
+ if (irt_isguard(ir->t))
+@@ -2368,10 +2532,13 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ asm_phi_fixup(as);
+
+ if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
+- lua_assert(J->curfinal->nk == T->nk);
++ lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant
growth");
+ memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
+ (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
+ T->nins = J->curfinal->nins;
++ /* Fill mcofs of any unprocessed snapshots. */
++ as->curins = REF_FIRST;
++ asm_snap_prev(as);
+ break; /* Done. */
+ }
+
+@@ -2390,13 +2557,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ /* Set trace entry point before fixing up tail to allow link to self. */
+ T->mcode = as->mcp;
+ T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) :
0;
+- if (!as->loopref)
++ if (as->loopref)
++ asm_loop_tail_fixup(as);
++ else
+ asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
+ T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
++ asm_snap_fixup_mcofs(as);
+ #if LJ_TARGET_MCODE_FIXUP
+ asm_mcode_fixup(T->mcode, T->szmcode);
+ #endif
+- lj_mcode_sync(T->mcode, origtop);
++ lj_mcode_sync(T->mcode, as->mctoporig);
+ }
+
+ #undef IR
+diff --git a/src/lj_asm.h b/src/lj_asm.h
+index 2819481b..624da844 100644
+--- a/src/lj_asm.h
++++ b/src/lj_asm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_ASM_H
+diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
+index 37bfa40f..cc608c0d 100644
+--- a/src/lj_asm_arm.h
++++ b/src/lj_asm_arm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** ARM IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* -- Register allocator extensions --------------------------------------- */
+@@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow)
+ }
+ }
+ }
+- lua_assert(rset_test(RSET_GPREVEN, r));
++ lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r);
+ ra_modified(as, r);
+ ra_modified(as, r+1);
+ RA_DBGX((as, "scratchpair $r $r", r, r+1));
+@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp,
RegSet allow,
+ *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */
+ return ra_allock(as, (ofs & ~255), allow);
+ }
++ } else if (ir->o == IR_TMPREF) {
++ *ofsp = 0;
++ return RID_SP;
+ }
+ }
+ *ofsp = 0;
+@@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
+ return;
+ }
+ } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai &
0x08000000))) {
+- lua_assert(ofs == 0);
++ lj_assertA(ofs == 0, "bad usage");
+ ofs = (int32_t)sizeof(GCstr);
+ if (irref_isk(ir->op2)) {
+ ofs += IR(ir->op2)->i;
+@@ -389,9 +392,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
+ if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u;
+ if (gpr <= REGARG_LASTGPR) {
+- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
+ if (irt_isnum(ir->t)) {
+- lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */
++ lj_assertA(rset_test(as->freeset, gpr+1),
++ "reg %d not free", gpr+1); /* Ditto. */
+ emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15));
+ gpr += 2;
+ } else {
+@@ -408,7 +413,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ #endif
+ {
+ if (gpr <= REGARG_LASTGPR) {
+- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
+ if (ref) ra_leftov(as, gpr, ref);
+ gpr++;
+ } else {
+@@ -433,7 +439,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
+ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+- lua_assert(!irt_ispri(ir->t));
++ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
+ if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) {
+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
+@@ -495,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir)
+ emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
+ }
+
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irgc;
++ int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
++ if ((as->flags & JIT_F_ARMV6T2)) {
++ emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
++ } else {
++ emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
++ emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp);
++ }
++ emit_lso(as, ARMI_LDR, RID_TMP,
++ ra_allock(as, (addr & ~4095),
++ rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
++ (addr & 4095));
++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
+ /* -- Type conversions ---------------------------------------------------- */
+
+ #if !LJ_SOFTFP
+@@ -530,13 +560,17 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ #endif
+ IRRef lref = ir->op1;
+ /* 64 bit integer conversions are handled by SPLIT. */
+- lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64));
++ lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64),
++ "IR %04d has unsplit 64 bit type",
++ (int)(ir - as->ir) - REF_BIAS);
+ #if LJ_SOFTFP
+ /* FP conversions are handled by SPLIT. */
+- lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
++ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
++ "IR %04d has FP type",
++ (int)(ir - as->ir) - REF_BIAS);
+ /* Can't check for same types: SPLIT uses CONV
int.int + BXOR for sfp NEG. */
+ #else
+- lua_assert(irt_type(ir->t) != st);
++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+ if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+@@ -553,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
+@@ -572,7 +607,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV
EXT");
+ if ((as->flags & JIT_F_ARMV6)) {
+ ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
+ st == IRT_U8 ? ARMI_UXTB :
+@@ -658,35 +693,55 @@ static void asm_strto(ASMState *as, IRIns *ir)
+ /* -- Memory references --------------------------------------------------- */
+
+ /* Get pointer to TValue. */
+-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
+ {
+- IRIns *ir = IR(ref);
+- if (irt_isnum(ir->t)) {
+- if (irref_isk(ref)) {
+- /* Use the number constant itself as a TValue. */
+- ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+- } else {
++ if ((mode & IRTMPREF_IN1)) {
++ IRIns *ir = IR(ref);
++ if (irt_isnum(ir->t)) {
++ if ((mode & IRTMPREF_OUT1)) {
++#if LJ_SOFTFP
++ lj_assertA(irref_isk(ref), "unsplit FP op");
++ emit_dm(as, ARMI_MOV, dest, RID_SP);
++ emit_lso(as, ARMI_STR,
++ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
++ RID_SP, 0);
++ emit_lso(as, ARMI_STR,
++ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
++ RID_SP, 4);
++#else
++ Reg src = ra_alloc1(as, ref, RSET_FPR);
++ emit_dm(as, ARMI_MOV, dest, RID_SP);
++ emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
++#endif
++ } else if (irref_isk(ref)) {
++ /* Use the number constant itself as a TValue. */
++ ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
++ } else {
+ #if LJ_SOFTFP
+- lua_assert(0);
++ lj_assertA(0, "unsplit FP op");
+ #else
+- /* Otherwise force a spill and use the spill slot. */
+- emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
++ /* Otherwise force a spill and use the spill slot. */
++ emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+ #endif
++ }
++ } else {
++ /* Otherwise use [sp] and [sp+4] to hold the TValue.
++ ** This assumes the following call has max. 4 args.
++ */
++ Reg type;
++ emit_dm(as, ARMI_MOV, dest, RID_SP);
++ if (!irt_ispri(ir->t)) {
++ Reg src = ra_alloc1(as, ref, RSET_GPR);
++ emit_lso(as, ARMI_STR, src, RID_SP, 0);
++ }
++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP &&
!irt_isnil((ir+1)->t))
++ type = ra_alloc1(as, ref+1, RSET_GPR);
++ else
++ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
++ emit_lso(as, ARMI_STR, type, RID_SP, 4);
+ }
+ } else {
+- /* Otherwise use [sp] and [sp+4] to hold the TValue. */
+- RegSet allow = rset_exclude(RSET_GPR, dest);
+- Reg type;
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
+- if (!irt_ispri(ir->t)) {
+- Reg src = ra_alloc1(as, ref, allow);
+- emit_lso(as, ARMI_STR, src, RID_SP, 0);
+- }
+- if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
+- type = ra_alloc1(as, ref+1, allow);
+- else
+- type = ra_allock(as, irt_toitype(ir->t), allow);
+- emit_lso(as, ARMI_STR, type, RID_SP, 4);
+ }
+ }
+
+@@ -811,16 +866,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu);
+
+ /* Load main position relative to tab->node into dest. */
+- khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
++ khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+ } else {
+ emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
+ emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
+- if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */
++ if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */
+ emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP);
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+- emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash));
++ emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid));
+ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
+ } else if (irref_isk(refkey)) {
+ emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash,
+@@ -867,7 +922,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg key = RID_NONE, type = RID_TMP, idx = node;
+ RegSet allow = rset_exclude(RSET_GPR, node);
+- lua_assert(ofs % sizeof(Node) == 0);
++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (ofs > 4095) {
+ idx = dest;
+ rset_clear(allow, dest);
+@@ -934,7 +989,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
+ static void asm_fref(ASMState *as, IRIns *ir)
+ {
+ UNUSED(as); UNUSED(ir);
+- lua_assert(!ra_used(ir));
++ lj_assertA(!ra_used(ir), "unfused FREF");
+ }
+
+ static void asm_strref(ASMState *as, IRIns *ir)
+@@ -971,39 +1026,43 @@ static void asm_strref(ASMState *as, IRIns *ir)
+
+ /* -- Loads and stores ---------------------------------------------------- */
+
+-static ARMIns asm_fxloadins(IRIns *ir)
++static ARMIns asm_fxloadins(ASMState *as, IRIns *ir)
+ {
++ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: return ARMI_LDRSB;
+ case IRT_U8: return ARMI_LDRB;
+ case IRT_I16: return ARMI_LDRSH;
+ case IRT_U16: return ARMI_LDRH;
+- case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D;
+- case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S;
++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D;
++ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */
+ default: return ARMI_LDR;
+ }
+ }
+
+-static ARMIns asm_fxstoreins(IRIns *ir)
++static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir)
+ {
++ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: case IRT_U8: return ARMI_STRB;
+ case IRT_I16: case IRT_U16: return ARMI_STRH;
+- case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D;
+- case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S;
++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D;
++ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */
+ default: return ARMI_STR;
+ }
+ }
+
+ static void asm_fload(ASMState *as, IRIns *ir)
+ {
+- if (ir->op1 == REF_NIL) {
+- lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ ARMIns ai = asm_fxloadins(as, ir);
++ Reg idx;
++ int32_t ofs;
++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
++ idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J),
RSET_GPR);
++ ofs = 0;
+ } else {
+- Reg dest = ra_dest(as, ir, RSET_GPR);
+- Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
+- ARMIns ai = asm_fxloadins(ir);
+- int32_t ofs;
++ idx = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+ ofs = asm_fuseabase(as, ir->op1);
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
+@@ -1012,11 +1071,11 @@ static void asm_fload(ASMState *as, IRIns *ir)
+ }
+ }
+ ofs = field_ofs[ir->op2];
+- if ((ai & 0x04000000))
+- emit_lso(as, ai, dest, idx, ofs);
+- else
+- emit_lsox(as, ai, dest, idx, ofs);
+ }
++ if ((ai & 0x04000000))
++ emit_lso(as, ai, dest, idx, ofs);
++ else
++ emit_lsox(as, ai, dest, idx, ofs);
+ }
+
+ static void asm_fstore(ASMState *as, IRIns *ir)
+@@ -1026,7 +1085,7 @@ static void asm_fstore(ASMState *as, IRIns *ir)
+ IRIns *irf = IR(ir->op1);
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+ int32_t ofs = field_ofs[irf->op2];
+- ARMIns ai = asm_fxstoreins(ir);
++ ARMIns ai = asm_fxstoreins(as, ir);
+ if ((ai & 0x04000000))
+ emit_lso(as, ai, src, idx, ofs);
+ else
+@@ -1038,8 +1097,8 @@ static void asm_xload(ASMState *as, IRIns *ir)
+ {
+ Reg dest = ra_dest(as, ir,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
++ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
+ }
+
+ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+@@ -1047,7 +1106,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+ if (ir->r != RID_SINK) {
+ Reg src = ra_alloc1(as, ir->op2,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
+ rset_exclude(RSET_GPR, src), ofs);
+ }
+ }
+@@ -1066,13 +1125,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ rset_clear(allow, type);
+ }
+ if (ra_used(ir)) {
+- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+- irt_isint(ir->t) || irt_isaddr(ir->t));
++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
++ irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad load type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ }
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow,
+ (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
+ if (!hiop || type == RID_NONE) {
+ rset_clear(allow, idx);
+ if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0
&&
+@@ -1133,10 +1194,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ IRType t = hiop ? IRT_NUM : irt_type(ir->t);
+ Reg dest = RID_NONE, type = RID_NONE, base;
+ RegSet allow = RSET_GPR;
+- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
+- lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
++ "bad parent SLOAD"); /* Handled by asm_head_side(). */
++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
++ "inconsistent SLOAD variant");
+ #if LJ_SOFTFP
+- lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
++ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
++ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
+ if (hiop && ra_used(ir+1)) {
+ type = ra_dest(as, ir+1, allow);
+ rset_clear(allow, type);
+@@ -1152,8 +1216,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ Reg tmp = RID_NONE;
+ if ((ir->op2 & IRSLOAD_CONVERT))
+ tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
+- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+- irt_isint(ir->t) || irt_isaddr(ir->t));
++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
++ irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad SLOAD type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ base = ra_alloc1(as, REF_BASE, allow);
+@@ -1218,7 +1283,8 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ IRRef args[4];
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+ RegSet drop = RSET_SCRATCH;
+- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL));
++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL),
++ "bad CNEW/CNEWI operands");
+
+ as->gcsteps++;
+ if (ra_hasreg(ir->r))
+@@ -1230,10 +1296,10 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ /* Initialize immutable cdata object. */
+ if (ir->o == IR_CNEWI) {
+ int32_t ofs = sizeof(GCcdata);
+- lua_assert(sz == 4 || sz == 8);
++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ if (sz == 8) {
+ ofs += 4; ir++;
+- lua_assert(ir->o == IR_HIOP);
++ lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI");
+ }
+ for (;;) {
+ Reg r = ra_alloc1(as, ir->op2, allow);
+@@ -1268,8 +1334,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+ ra_releasetmp(as, ASMREF_TMP1));
+ }
+-#else
+-#define asm_cnew(as, ir) ((void)0)
+ #endif
+
+ /* -- Write barriers ------------------------------------------------------ */
+@@ -1301,7 +1365,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
+ MCLabel l_end;
+ Reg obj, val, tmp;
+ /* No need for other object barriers (yet). */
+- lua_assert(IR(ir->op1)->o == IR_UREFC);
++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+@@ -1364,8 +1428,6 @@ static void asm_callround(ASMState *as, IRIns *ir, int id)
+
+ static void asm_fpmath(ASMState *as, IRIns *ir)
+ {
+- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+- return;
+ if (ir->op2 <= IRFPM_TRUNC)
+ asm_callround(as, ir, ir->op2);
+ else if (ir->op2 == IRFPM_SQRT)
+@@ -1412,14 +1474,29 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)
+ emit_dn(as, ai^m, dest, left);
+ }
+
+-static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
++/* Try to drop cmp r, #0. */
++static ARMIns asm_drop_cmp0(ASMState *as, ARMIns ai)
+ {
+- if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
++ if (as->flagmcp == as->mcp) {
++ uint32_t cc = (as->mcp[1] >> 28);
+ as->flagmcp = NULL;
+- as->mcp++;
+- ai |= ARMI_S;
++ if (cc <= CC_NE) {
++ as->mcp++;
++ ai |= ARMI_S;
++ } else if (cc == CC_GE) {
++ *++as->mcp ^= ((CC_GE^CC_PL) << 28);
++ ai |= ARMI_S;
++ } else if (cc == CC_LT) {
++ *++as->mcp ^= ((CC_LT^CC_MI) << 28);
++ ai |= ARMI_S;
++ } /* else: other conds don't work in general. */
+ }
+- asm_intop(as, ir, ai);
++ return ai;
++}
++
++static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
++{
++ asm_intop(as, ir, asm_drop_cmp0(as, ai));
+ }
+
+ static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
+@@ -1492,15 +1569,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ #define asm_mulov(as, ir) asm_mul(as, ir)
+
+ #if !LJ_SOFTFP
+-#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
+-#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
++#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
+ #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
+-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
+-#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
+ #endif
+
+-#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
+-
+ static void asm_neg(ASMState *as, IRIns *ir)
+ {
+ #if !LJ_SOFTFP
+@@ -1514,20 +1586,7 @@ static void asm_neg(ASMState *as, IRIns *ir)
+
+ static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
+ {
+- if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
+- uint32_t cc = (as->mcp[1] >> 28);
+- as->flagmcp = NULL;
+- if (cc <= CC_NE) {
+- as->mcp++;
+- ai |= ARMI_S;
+- } else if (cc == CC_GE) {
+- *++as->mcp ^= ((CC_GE^CC_PL) << 28);
+- ai |= ARMI_S;
+- } else if (cc == CC_LT) {
+- *++as->mcp ^= ((CC_LT^CC_MI) << 28);
+- ai |= ARMI_S;
+- } /* else: other conds don't work with bit ops. */
+- }
++ ai = asm_drop_cmp0(as, ai);
+ if (ir->op2 == 0) {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+@@ -1582,7 +1641,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
+ #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
+ #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
+ #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
+-#define asm_brol(as, ir) lua_assert(0)
++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
+
+ static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
+ {
+@@ -1657,8 +1716,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
+ asm_intmin_max(as, ir, cc);
+ }
+
+-#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
+-#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
++#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL)
++#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE)
+
+ /* -- Comparisons --------------------------------------------------------- */
+
+@@ -1733,7 +1792,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
+ Reg left;
+ uint32_t m;
+ int cmpprev0 = 0;
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
++ "bad comparison data type %d", irt_type(ir->t));
+ if (asm_swapops(as, lref, rref)) {
+ Reg tmp = lref; lref = rref; rref = tmp;
+ if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
+@@ -1825,15 +1885,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir)
+ }
+ #endif
+
+-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
++/* -- Split register ops -------------------------------------------------- */
+
+-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
++/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */
+ static void asm_hiop(ASMState *as, IRIns *ir)
+ {
+-#if LJ_HASFFI || LJ_SOFTFP
+ /* HIOP is marked as a store because it needs its own DCE logic. */
+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
++#if LJ_HASFFI || LJ_SOFTFP
+ if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */
+ as->curins--; /* Always skip the loword comparison. */
+ #if LJ_SOFTFP
+@@ -1850,7 +1910,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
+ as->curins--; /* Always skip the loword min/max. */
+ if (uselo || usehi)
+- asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
++ asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
+ return;
+ #elif LJ_HASFFI
+ } else if ((ir-1)->o == IR_CONV) {
+@@ -1864,6 +1924,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ asm_xstore_(as, ir, 4);
+ return;
+ }
++#endif
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
+ switch ((ir-1)->o) {
+ #if LJ_HASFFI
+@@ -1882,6 +1943,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ asm_intneg(as, ir, ARMI_RSC);
+ asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);
+ break;
++ case IR_CNEWI:
++ /* Nothing to do here. Handled by lo op itself. */
++ break;
+ #endif
+ #if LJ_SOFTFP
+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+@@ -1889,24 +1953,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ if (!uselo)
+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
+ break;
++ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
++ /* Nothing to do here. Handled by lo op itself. */
++ break;
+ #endif
+- case IR_CALLN:
+- case IR_CALLS:
+- case IR_CALLXS:
++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ break;
+-#if LJ_SOFTFP
+- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
+-#endif
+- case IR_CNEWI:
+- /* Nothing to do here. Handled by lo op itself. */
+- break;
+- default: lua_assert(0); break;
++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+ }
+-#else
+- UNUSED(as); UNUSED(ir); lua_assert(0);
+-#endif
+ }
+
+ /* -- Profiling ----------------------------------------------------------- */
+@@ -1930,7 +1986,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
+ if (irp) {
+ if (!ra_hasspill(irp->s)) {
+ pbase = irp->r;
+- lua_assert(ra_hasreg(pbase));
++ lj_assertA(ra_hasreg(pbase), "base reg lost");
+ } else if (allow) {
+ pbase = rset_pickbot(allow);
+ } else {
+@@ -1942,7 +1998,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
+ }
+ emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
+ k = emit_isk12(0, (int32_t)(8*topslot));
+- lua_assert(k);
++ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
+ emit_n(as, ARMI_CMP^k, RID_TMP);
+ emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
+ emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
+@@ -1979,7 +2035,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ #if LJ_SOFTFP
+ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
+ Reg tmp;
+- lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
++ /* LJ_SOFTFP: must be a number constant. */
++ lj_assertA(irref_isk(ref), "unsplit FP op");
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo,
+ rset_exclude(RSET_GPREVEN, RID_BASE));
+ emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
+@@ -1993,7 +2050,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ } else {
+ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
+ Reg type;
+- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
irt_isinteger(ir->t));
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
irt_isinteger(ir->t),
++ "restore of IR type %d", irt_type(ir->t));
+ if (!irt_ispri(ir->t)) {
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
+ emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
+@@ -2006,6 +2064,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ } else if ((sn & SNAP_SOFTFPNUM)) {
+ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));
+ #endif
++ } else if ((sn & SNAP_KEYINDEX)) {
++ type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd);
+ } else {
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
+ }
+@@ -2013,11 +2073,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ }
+ checkmclim(as);
+ }
+- lua_assert(map + nent == flinks);
++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+ }
+
+ /* -- GC handling --------------------------------------------------------- */
+
++/* Marker to prevent patching the GC check exit. */
++#define ARM_NOPATCH_GC_CHECK (ARMI_BIC|ARMI_K12)
++
+ /* Check GC threshold and do one or more GC steps. */
+ static void asm_gc_check(ASMState *as)
+ {
+@@ -2029,6 +2092,7 @@ static void asm_gc_check(ASMState *as)
+ l_end = emit_label(as);
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+ asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
++ *--as->mcp = ARM_NOPATCH_GC_CHECK;
+ emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ASMREF_TMP2; /* MSize steps */
+@@ -2063,6 +2127,12 @@ static void asm_loop_fixup(ASMState *as)
+ }
+ }
+
++/* Fixup the tail of the loop. */
++static void asm_loop_tail_fixup(ASMState *as)
++{
++ UNUSED(as); /* Nothing to do. */
++}
++
+ /* -- Head of trace ------------------------------------------------------- */
+
+ /* Reload L register from g->cur_L. */
+@@ -2099,7 +2169,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet
allow)
+ rset_clear(allow, ra_dest(as, ir, allow));
+ } else {
+ Reg r = irp->r;
+- lua_assert(ra_hasreg(r));
++ lj_assertA(ra_hasreg(r), "base reg lost");
+ rset_clear(allow, r);
+ if (r != ir->r && !rset_test(as->freeset, r))
+ ra_restore(as, regcost_ref(as->cost[r]));
+@@ -2121,7 +2191,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+ } else {
+ /* Patch stack adjustment. */
+ uint32_t k = emit_isk12(ARMI_ADD, spadj);
+- lua_assert(k);
++ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
+ p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+ }
+ /* Patch exit branch. */
+@@ -2197,13 +2267,14 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
+ /* Look for bl_cc exitstub, replace with b_cc target. */
+ uint32_t ins = *p;
+ if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u
&&
+- ((ins ^ (px-p)) & 0x00ffffffu) == 0) {
++ ((ins ^ (px-p)) & 0x00ffffffu) == 0 &&
++ p[-1] != ARM_NOPATCH_GC_CHECK) {
+ *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu);
+ cend = p+1;
+ if (!cstart) cstart = p;
+ }
+ }
+- lua_assert(cstart != NULL);
++ lj_assertJ(cstart != NULL, "exit stub %d not found", exitno);
+ lj_mcode_sync(cstart, cend);
+ lj_mcode_patch(J, mcarea, 1);
+ }
+diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
+index 8fd92e76..67c53ee2 100644
+--- a/src/lj_asm_arm64.h
++++ b/src/lj_asm_arm64.h
+@@ -1,6 +1,6 @@
+ /*
+ ** ARM64 IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
+ ** Sponsored by Cisco Systems, Inc.
+@@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+ asm_mclimit(as);
+ /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ...
*/
+ for (i = nexits-1; (int32_t)i >= 0; i--)
+- *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
+- *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
++ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
++ *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
+ mxp--;
+- *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
+- *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
++ *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
++ *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
+ as->mctop = mxp;
+ }
+
+@@ -77,7 +77,7 @@ static void asm_guardcc(ASMState *as, A64CC cc)
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+- *p = A64I_B | ((target-p) & 0x03ffffffu);
++ *p = A64I_B | A64F_S26(target-p);
+ emit_cond_branch(as, cc^1, p-1);
+ return;
+ }
+@@ -91,7 +91,7 @@ static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+- *p = A64I_B | ((target-p) & 0x03ffffffu);
++ *p = A64I_B | A64F_S26(target-p);
+ emit_tnb(as, ai^0x01000000u, r, bit, p-1);
+ return;
+ }
+@@ -105,7 +105,7 @@ static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+- *p = A64I_B | ((target-p) & 0x03ffffffu);
++ *p = A64I_B | A64F_S26(target-p);
+ emit_cnb(as, ai^0x01000000u, r, p-1);
+ return;
+ }
+@@ -198,6 +198,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp,
RegSet allow,
+ return RID_GL;
+ }
+ }
++ } else if (ir->o == IR_TMPREF) {
++ *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv);
++ return RID_GL;
+ }
+ }
+ *ofsp = 0;
+@@ -213,7 +216,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref,
RegSet allow)
+ return A64F_M(ir->r);
+ } else if (irref_isk(ref)) {
+ uint32_t m;
+- int64_t k = get_k64val(ir);
++ int64_t k = get_k64val(as, ref);
+ if ((ai & 0x1f000000) == 0x0a000000)
+ m = emit_isk13(k, irt_is64(ir->t));
+ else
+@@ -295,8 +298,10 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef
ref,
+ } else if (asm_isk32(as, ir->op1, &ofs)) {
+ ref = ir->op2;
+ } else {
+- Reg rn = ra_alloc1(as, ir->op1, allow);
+- IRIns *irr = IR(ir->op2);
++ Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2;
++ Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1;
++ Reg rn = ra_alloc1(as, refv, allow);
++ IRIns *irr = IR(refk);
+ uint32_t m;
+ if (irr+1 == ir && !ra_used(irr) &&
+ irr->o == IR_ADD && irref_isk(irr->op2)) {
+@@ -307,7 +312,7 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
+ goto skipopm;
+ }
+ }
+- m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
++ m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn));
+ ofs = sizeof(GCstr);
+ skipopm:
+ emit_lso(as, ai, rd, rd, ofs);
+@@ -352,9 +357,9 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins
air)
+ static int asm_fuseandshift(ASMState *as, IRIns *ir)
+ {
+ IRIns *irl = IR(ir->op1);
+- lua_assert(ir->o == IR_BAND);
++ lj_assertA(ir->o == IR_BAND, "bad usage");
+ if (canfuse(as, irl) && irref_isk(ir->op2)) {
+- uint64_t mask = get_k64val(IR(ir->op2));
++ uint64_t mask = get_k64val(as, ir->op2);
+ if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o ==
IR_BSHL)) {
+ int32_t shmask = irt_is64(irl->t) ? 63 : 31;
+ int32_t shift = (IR(irl->op2)->i & shmask);
+@@ -382,7 +387,7 @@ static int asm_fuseandshift(ASMState *as, IRIns *ir)
+ static int asm_fuseorshift(ASMState *as, IRIns *ir)
+ {
+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+- lua_assert(ir->o == IR_BOR);
++ lj_assertA(ir->o == IR_BOR, "bad usage");
+ if (canfuse(as, irl) && canfuse(as, irr) &&
+ ((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
+ (irl->o == IR_BSHL && irr->o == IR_BSHR))) {
+@@ -426,7 +431,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ if (ref) {
+ if (irt_isfp(ir->t)) {
+ if (fpr <= REGARG_LASTFPR) {
+- lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */
++ lj_assertA(rset_test(as->freeset, fpr),
++ "reg %d not free", fpr); /* Must have been evicted. */
+ ra_leftov(as, fpr, ref);
+ fpr++;
+ } else {
+@@ -436,7 +442,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ }
+ } else {
+ if (gpr <= REGARG_LASTGPR) {
+- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
+ ra_leftov(as, gpr, ref);
+ gpr++;
+ } else {
+@@ -453,11 +460,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+ {
+ RegSet drop = RSET_SCRATCH;
++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
++ if (hiop && ra_hasreg((ir+1)->r))
++ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+- lua_assert(!irt_ispri(ir->t));
++ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (irt_isfp(ir->t)) {
+ if (ci->flags & CCI_CASTU64) {
+ Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
+@@ -466,6 +476,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
+ } else {
+ ra_destreg(as, ir, RID_FPRET);
+ }
++ } else if (hiop) {
++ ra_destpair(as, ir);
+ } else {
+ ra_destreg(as, ir, RID_RET);
+ }
+@@ -515,6 +527,21 @@ static void asm_retf(ASMState *as, IRIns *ir)
+ emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
+ }
+
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irgc;
++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
++ emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP,
tmp);
++ emit_getgl(as, RID_TMP, cur_L);
++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
+ /* -- Type conversions ---------------------------------------------------- */
+
+ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+@@ -544,7 +571,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+ IRRef lref = ir->op1;
+- lua_assert(irt_type(ir->t) != st);
++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+ if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+@@ -564,7 +591,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
+@@ -584,7 +612,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
+ st == IRT_U8 ? A64I_UXTBw :
+ st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV
EXT");
+ emit_dn(as, ai, dest, left);
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+@@ -597,7 +625,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ emit_dn(as, A64I_SXTW, dest, left);
+ }
+ } else {
+- if (st64) {
++ if (st64 && !(ir->op2 & IRCONV_NONE)) {
+ /* This is either a 32 bit reg/reg mov which zeroes the hiword
+ ** or a load of the loword from a 64 bit address.
+ */
+@@ -648,7 +676,8 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef
ref)
+ {
+ RegSet allow = rset_exclude(RSET_GPR, base);
+ IRIns *ir = IR(ref);
+- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
++ "store of IR type %d", irt_type(ir->t));
+ if (irref_isk(ref)) {
+ TValue k;
+ lj_ir_kvalue(as->J->L, &k, ir);
+@@ -669,22 +698,23 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs,
IRRef ref)
+ }
+
+ /* Get pointer to TValue. */
+-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
+ {
+- IRIns *ir = IR(ref);
+- if (irt_isnum(ir->t)) {
+- if (irref_isk(ref)) {
+- /* Use the number constant itself as a TValue. */
+- ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
++ if ((mode & IRTMPREF_IN1)) {
++ IRIns *ir = IR(ref);
++ if (irt_isnum(ir->t)) {
++ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
++ /* Use the number constant itself as a TValue. */
++ ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
++ return;
++ }
++ emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0);
+ } else {
+- /* Otherwise force a spill and use the spill slot. */
+- emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
++ asm_tvstore64(as, dest, 0, ref);
+ }
+- } else {
+- /* Otherwise use g->tmptv to hold the TValue. */
+- asm_tvstore64(as, dest, 0, ref);
+- ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
+ }
++ /* g->tmptv holds the TValue(s). */
++ emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest,
RID_GL);
+ }
+
+ static void asm_aref(ASMState *as, IRIns *ir)
+@@ -722,6 +752,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ Reg dest = ra_dest(as, ir, allow);
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+ Reg key = 0, tmp = RID_TMP;
++ Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
+ IRRef refkey = ir->op2;
+ IRIns *irkey = IR(refkey);
+ int isk = irref_isk(ir->op2);
+@@ -751,6 +782,28 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ }
+ }
+
++ /* Allocate constants early. */
++ if (irt_isnum(kt)) {
++ if (!isk) {
++ tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
++ ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
++ rset_clear(allow, tisnum);
++ }
++ } else if (irt_isaddr(kt)) {
++ if (isk) {
++ int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
++ scr = ra_allock(as, kk, allow);
++ } else {
++ scr = ra_scratch(as, allow);
++ }
++ rset_clear(allow, scr);
++ } else {
++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
++ type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
++ scr = ra_scratch(as, rset_clear(allow, type));
++ rset_clear(allow, scr);
++ }
++
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
+ l_end = emit_label(as);
+ as->invmcp = NULL;
+@@ -780,9 +833,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_nm(as, A64I_CMPx, key, tmp);
+ emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+ } else {
+- Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
+- Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
+- rset_clear(allow, tisnum);
+ emit_nm(as, A64I_FCMPd, key, ftmp);
+ emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
+ emit_cond_branch(as, CC_LO, l_next);
+@@ -790,36 +840,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
+ }
+ } else if (irt_isaddr(kt)) {
+- Reg scr;
+ if (isk) {
+- int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
+- scr = ra_allock(as, kk, allow);
+ emit_nm(as, A64I_CMPx, scr, tmp);
+ emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+ } else {
+- scr = ra_scratch(as, allow);
+ emit_nm(as, A64I_CMPx, tmp, scr);
+ emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
+ }
+- rset_clear(allow, scr);
+ } else {
+- Reg type, scr;
+- lua_assert(irt_ispri(kt) && !irt_isnil(kt));
+- type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
+- scr = ra_scratch(as, rset_clear(allow, type));
+- rset_clear(allow, scr);
+- emit_nm(as, A64I_CMPw, scr, type);
++ emit_nm(as, A64I_CMPx, scr, type);
+ emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
+ }
+
+ *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
+ if (!isk && irt_isaddr(kt)) {
+- Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
++ type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
+ emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
+ rset_clear(allow, type);
+ }
+ /* Load main position relative to tab->node into dest. */
+- khash = isk ? ir_khash(irkey) : 1;
++ khash = isk ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
+ } else {
+@@ -831,9 +871,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
+ emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+ } else if (irt_isstr(kt)) {
+- /* Fetch of str->hash is cheaper than ra_allock. */
++ /* Fetch of str->sid is cheaper than ra_allock. */
+ emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+- emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash));
++ emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
+ emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+ } else { /* Must match with hash*() in lj_tab.c. */
+ emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+@@ -869,14 +909,12 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+ int bigofs = !emit_checkofs(A64I_LDRx, ofs);
+- RegSet allow = RSET_GPR;
+ Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+- Reg node = ra_alloc1(as, ir->op1, allow);
+- Reg key = ra_scratch(as, rset_clear(allow, node));
+- Reg idx = node;
++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
++ Reg key, idx = node;
++ RegSet allow = rset_exclude(RSET_GPR, node);
+ uint64_t k;
+- lua_assert(ofs % sizeof(Node) == 0);
+- rset_clear(allow, key);
++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (bigofs) {
+ idx = dest;
+ rset_clear(allow, dest);
+@@ -892,7 +930,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ } else {
+ k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
+ }
+- emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow));
++ key = ra_scratch(as, allow);
++ emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
+ emit_lso(as, A64I_LDRx, key, idx, kofs);
+ if (bigofs)
+ emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
+@@ -925,7 +964,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
+ static void asm_fref(ASMState *as, IRIns *ir)
+ {
+ UNUSED(as); UNUSED(ir);
+- lua_assert(!ra_used(ir));
++ lj_assertA(!ra_used(ir), "unfused FREF");
+ }
+
+ static void asm_strref(ASMState *as, IRIns *ir)
+@@ -977,7 +1016,7 @@ static void asm_fload(ASMState *as, IRIns *ir)
+ Reg idx;
+ A64Ins ai = asm_fxloadins(ir);
+ int32_t ofs;
+- if (ir->op1 == REF_NIL) {
++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = RID_GL;
+ ofs = (ir->op2 << 2) - GG_OFS(g);
+ } else {
+@@ -1008,7 +1047,7 @@ static void asm_fstore(ASMState *as, IRIns *ir)
+ static void asm_xload(ASMState *as, IRIns *ir)
+ {
+ Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
++ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+ }
+
+@@ -1026,8 +1065,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ Reg idx, tmp, type;
+ int32_t ofs = 0;
+ RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+- lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+- irt_isint(ir->t));
++ lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
++ irt_isint(ir->t),
++ "bad load type %d", irt_type(ir->t));
+ if (ra_used(ir)) {
+ Reg dest = ra_dest(as, ir, allow);
+ tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
+@@ -1043,10 +1083,12 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ }
+ type = ra_scratch(as, rset_clear(gpr, tmp));
+ idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
+ /* Always do the type check, even if the load result is unused. */
+ asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
+ if (irt_type(ir->t) >= IRT_NUM) {
+- lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
++ lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
++ "bad load type %d", irt_type(ir->t));
+ emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+ ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
+ } else if (irt_isaddr(ir->t)) {
+@@ -1056,7 +1098,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
+ } else {
+ emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+- ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp);
++ ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp);
+ }
+ if (ofs & FUSE_REG)
+ emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs &
31));
+@@ -1111,8 +1153,10 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ IRType1 t = ir->t;
+ Reg dest = RID_NONE, base;
+ RegSet allow = RSET_GPR;
+- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
+- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
++ "bad parent SLOAD"); /* Handled by asm_head_side(). */
++ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
++ "inconsistent SLOAD variant");
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) &&
irt_isint(t)) {
+ dest = ra_scratch(as, RSET_FPR);
+ asm_tointg(as, ir, dest);
+@@ -1121,7 +1165,8 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ Reg tmp = RID_NONE;
+ if ((ir->op2 & IRSLOAD_CONVERT))
+ tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
+- lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t));
++ lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t),
++ "bad SLOAD type %d", irt_type(t));
+ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
+ base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest));
+ if (irt_isaddr(t)) {
+@@ -1161,7 +1206,8 @@ dotypecheck:
+ /* Need type check, even if the load result is unused. */
+ asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
+ if (irt_type(t) >= IRT_NUM) {
+- lua_assert(irt_isinteger(t) || irt_isnum(t));
++ lj_assertA(irt_isinteger(t) || irt_isnum(t),
++ "bad SLOAD type %d", irt_type(t));
+ emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+ ra_allock(as, LJ_TISNUM << 15, allow), tmp);
+ } else if (irt_isnil(t)) {
+@@ -1196,7 +1242,8 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+ IRRef args[4];
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL));
++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL),
++ "bad CNEW/CNEWI operands");
+
+ as->gcsteps++;
+ asm_setupresult(as, ir, ci); /* GCcdata * */
+@@ -1204,7 +1251,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ if (ir->o == IR_CNEWI) {
+ int32_t ofs = sizeof(GCcdata);
+ Reg r = ra_alloc1(as, ir->op2, allow);
+- lua_assert(sz == 4 || sz == 8);
++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+@@ -1231,8 +1278,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+ ra_releasetmp(as, ASMREF_TMP1));
+ }
+-#else
+-#define asm_cnew(as, ir) ((void)0)
+ #endif
+
+ /* -- Write barriers ------------------------------------------------------ */
+@@ -1241,17 +1286,13 @@ static void asm_tbar(ASMState *as, IRIns *ir)
+ {
+ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+- Reg gr = ra_allock(as, i64ptr(J2G(as->J)),
+- rset_exclude(rset_exclude(RSET_GPR, tab), link));
+ Reg mark = RID_TMP;
+ MCLabel l_end = emit_label(as);
+ emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
+ emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+- emit_lso(as, A64I_STRx, tab, gr,
+- (int32_t)offsetof(global_State, gc.grayagain));
++ emit_setgl(as, tab, gc.grayagain);
+ emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
+- emit_lso(as, A64I_LDRx, link, gr,
+- (int32_t)offsetof(global_State, gc.grayagain));
++ emit_getgl(as, link, gc.grayagain);
+ emit_cond_branch(as, CC_EQ, l_end);
+ emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
+ emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+@@ -1265,13 +1306,13 @@ static void asm_obar(ASMState *as, IRIns *ir)
+ RegSet allow = RSET_GPR;
+ Reg obj, val, tmp;
+ /* No need for other object barriers (yet). */
+- lua_assert(IR(ir->op1)->o == IR_UREFC);
++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ir->op1; /* TValue *tv */
+ asm_gencall(as, ci, args);
+- ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) );
++ emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
+ obj = IR(ir->op1)->r;
+ tmp = ra_scratch(as, rset_exclude(allow, obj));
+ emit_cond_branch(as, CC_EQ, l_end);
+@@ -1309,8 +1350,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
+ } else if (fpm <= IRFPM_TRUNC) {
+ asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
+ fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
+- } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
+- return;
+ } else {
+ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
+ }
+@@ -1417,46 +1456,12 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ asm_intmul(as, ir);
+ }
+
+-static void asm_div(ASMState *as, IRIns *ir)
+-{
+-#if LJ_HASFFI
+- if (!irt_isnum(ir->t))
+- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+- IRCALL_lj_carith_divu64);
+- else
+-#endif
+- asm_fparith(as, ir, A64I_FDIVd);
+-}
+-
+-static void asm_pow(ASMState *as, IRIns *ir)
+-{
+-#if LJ_HASFFI
+- if (!irt_isnum(ir->t))
+- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+- IRCALL_lj_carith_powu64);
+- else
+-#endif
+- asm_callid(as, ir, IRCALL_lj_vm_powi);
+-}
+-
+ #define asm_addov(as, ir) asm_add(as, ir)
+ #define asm_subov(as, ir) asm_sub(as, ir)
+ #define asm_mulov(as, ir) asm_mul(as, ir)
+
++#define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd)
+ #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS)
+-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
+-#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
+-
+-static void asm_mod(ASMState *as, IRIns *ir)
+-{
+-#if LJ_HASFFI
+- if (!irt_isint(ir->t))
+- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+- IRCALL_lj_carith_modu64);
+- else
+-#endif
+- asm_callid(as, ir, IRCALL_lj_vm_modi);
+-}
+
+ static void asm_neg(ASMState *as, IRIns *ir)
+ {
+@@ -1571,7 +1576,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai,
A64Shift sh)
+ #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
+ #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
+ #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
+-#define asm_brol(as, ir) lua_assert(0)
++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
+
+ static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc)
+ {
+@@ -1587,7 +1592,7 @@ static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc)
+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 31);
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = ((left >> 8) & 31); left &= 31;
+- emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right);
++ emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left);
+ emit_nm(as, A64I_FCMPd, left, right);
+ }
+
+@@ -1599,8 +1604,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC
fcc)
+ asm_intmin_max(as, ir, cc);
+ }
+
+-#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
+-#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
++#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL)
++#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE)
+
+ /* -- Comparisons --------------------------------------------------------- */
+
+@@ -1652,15 +1657,16 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
+ Reg left;
+ uint32_t m;
+ int cmpprev0 = 0;
+- lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
+- irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
++ lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) ||
++ irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t),
++ "bad comparison data type %d", irt_type(ir->t));
+ if (asm_swapops(as, lref, rref)) {
+ IRRef tmp = lref; lref = rref; rref = tmp;
+ if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
+ else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */
+ }
+ oldcc = cc;
+- if (irref_isk(rref) && get_k64val(IR(rref)) == 0) {
++ if (irref_isk(rref) && get_k64val(as, rref) == 0) {
+ IRIns *irl = IR(lref);
+ if (cc == CC_GE) cc = CC_PL;
+ else if (cc == CC_LT) cc = CC_MI;
+@@ -1675,7 +1681,7 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
+ Reg tmp = blref; blref = brref; brref = tmp;
+ }
+ if (irref_isk(brref)) {
+- uint64_t k = get_k64val(IR(brref));
++ uint64_t k = get_k64val(as, brref);
+ if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
+ asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
+ ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
+@@ -1719,12 +1725,25 @@ static void asm_comp(ASMState *as, IRIns *ir)
+
+ #define asm_equal(as, ir) asm_comp(as, ir)
+
+-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
++/* -- Split register ops -------------------------------------------------- */
+
+-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
++/* Hiword op of a split 64/64 bit op. Previous op is the loword op. */
+ static void asm_hiop(ASMState *as, IRIns *ir)
+ {
+- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */
++ /* HIOP is marked as a store because it needs its own DCE logic. */
++ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
++ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
++ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
++ switch ((ir-1)->o) {
++ case IR_CALLN:
++ case IR_CALLL:
++ case IR_CALLS:
++ case IR_CALLXS:
++ if (!uselo)
++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
++ break;
++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
++ }
+ }
+
+ /* -- Profiling ----------------------------------------------------------- */
+@@ -1732,7 +1751,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ static void asm_prof(ASMState *as, IRIns *ir)
+ {
+ uint32_t k = emit_isk13(HOOK_PROFILE, 0);
+- lua_assert(k != 0);
++ lj_assertA(k != 0, "HOOK_PROFILE does not fit in K13");
+ UNUSED(ir);
+ asm_guardcc(as, CC_NE);
+ emit_n(as, A64I_TSTw^k, RID_TMP);
+@@ -1750,7 +1769,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
+ if (irp) {
+ if (!ra_hasspill(irp->s)) {
+ pbase = irp->r;
+- lua_assert(ra_hasreg(pbase));
++ lj_assertA(ra_hasreg(pbase), "base reg lost");
+ } else if (allow) {
+ pbase = rset_pickbot(allow);
+ } else {
+@@ -1762,7 +1781,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
+ }
+ emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
+ k = emit_isk12((8*topslot));
+- lua_assert(k);
++ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
+ emit_n(as, A64I_CMPx^k, RID_TMP);
+ emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
+ emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
+@@ -1795,7 +1814,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ IRIns *ir = IR(ref);
+ if ((sn & SNAP_NORESTORE))
+ continue;
+- if (irt_isnum(ir->t)) {
++ if ((sn & SNAP_KEYINDEX)) {
++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
++ Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) :
++ ra_alloc1(as, ref, allow);
++ rset_clear(allow, r);
++ emit_lso(as, A64I_STRw, r, RID_BASE, ofs);
++ emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4);
++ } else if (irt_isnum(ir->t)) {
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
+ } else {
+@@ -1803,36 +1829,38 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ }
+ checkmclim(as);
+ }
+- lua_assert(map + nent == flinks);
++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+ }
+
+ /* -- GC handling --------------------------------------------------------- */
+
++/* Marker to prevent patching the GC check exit. */
++#define ARM64_NOPATCH_GC_CHECK \
++ (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP))
++
+ /* Check GC threshold and do one or more GC steps. */
+ static void asm_gc_check(ASMState *as)
+ {
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+ IRRef args[2];
+ MCLabel l_end;
+- Reg tmp1, tmp2;
++ Reg tmp2;
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+ asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
++ *--as->mcp = ARM64_NOPATCH_GC_CHECK;
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ASMREF_TMP2; /* MSize steps */
+ asm_gencall(as, ci, args);
+- tmp1 = ra_releasetmp(as, ASMREF_TMP1);
++ emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
+ tmp2 = ra_releasetmp(as, ASMREF_TMP2);
+ emit_loadi(as, tmp2, as->gcsteps);
+ /* Jump around GC step if GC total < GC threshold. */
+ emit_cond_branch(as, CC_LS, l_end);
+ emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
+- emit_lso(as, A64I_LDRx, tmp2, tmp1,
+- (int32_t)offsetof(global_State, gc.threshold));
+- emit_lso(as, A64I_LDRx, RID_TMP, tmp1,
+- (int32_t)offsetof(global_State, gc.total));
+- ra_allockreg(as, i64ptr(J2G(as->J)), tmp1);
++ emit_getgl(as, tmp2, gc.threshold);
++ emit_getgl(as, RID_TMP, gc.total);
+ as->gcsteps = 0;
+ checkmclim(as);
+ }
+@@ -1851,10 +1879,16 @@ static void asm_loop_fixup(ASMState *as)
+ p[-2] |= ((uint32_t)delta & mask) << 5;
+ } else {
+ ptrdiff_t delta = target - (p - 1);
+- p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu);
++ p[-1] = A64I_B | A64F_S26(delta);
+ }
+ }
+
++/* Fixup the tail of the loop. */
++static void asm_loop_tail_fixup(ASMState *as)
++{
++ UNUSED(as); /* Nothing to do. */
++}
++
+ /* -- Head of trace ------------------------------------------------------- */
+
+ /* Reload L register from g->cur_L. */
+@@ -1891,7 +1925,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet
allow)
+ rset_clear(allow, ra_dest(as, ir, allow));
+ } else {
+ Reg r = irp->r;
+- lua_assert(ra_hasreg(r));
++ lj_assertA(ra_hasreg(r), "base reg lost");
+ rset_clear(allow, r);
+ if (r != ir->r && !rset_test(as->freeset, r))
+ ra_restore(as, regcost_ref(as->cost[r]));
+@@ -1915,12 +1949,12 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+ } else {
+ /* Patch stack adjustment. */
+ uint32_t k = emit_isk12(spadj);
+- lua_assert(k);
++ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
+ p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
+ }
+ /* Patch exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
+- p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu);
++ p[-1] = A64I_B | A64F_S26((target-p)+1);
+ }
+
+ /* Prepare tail of code. */
+@@ -1983,40 +2017,54 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
+ {
+ MCode *p = T->mcode;
+ MCode *pe = (MCode *)((char *)p + T->szmcode);
+- MCode *cstart = NULL, *cend = p;
++ MCode *cstart = NULL;
+ MCode *mcarea = lj_mcode_patch(J, p, 0);
+ MCode *px = exitstub_trace_addr(T, exitno);
++ int patchlong = 1;
++ /* Note: this assumes a trace exit is only ever patched once. */
+ for (; p < pe; p++) {
+ /* Look for exitstub branch, replace with branch to target. */
++ ptrdiff_t delta = target - p;
+ MCode ins = A64I_LE(*p);
+ if ((ins & 0xff000000u) == 0x54000000u &&
+ ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
+- /* Patch bcc exitstub. */
+- *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) &
0x00ffffe0u));
+- cend = p+1;
+- if (!cstart) cstart = p;
++ /* Patch bcc, if within range. */
++ if (A64F_S_OK(delta, 19)) {
++ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
++ if (!cstart) cstart = p;
++ }
+ } else if ((ins & 0xfc000000u) == 0x14000000u &&
+ ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
+- /* Patch b exitstub. */
+- *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu));
+- cend = p+1;
++ /* Patch b. */
++ lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range");
++ *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta));
+ if (!cstart) cstart = p;
+ } else if ((ins & 0x7e000000u) == 0x34000000u &&
+ ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
+- /* Patch cbz/cbnz exitstub. */
+- *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u));
+- cend = p+1;
+- if (!cstart) cstart = p;
++ /* Patch cbz/cbnz, if within range. */
++ if (p[-1] == ARM64_NOPATCH_GC_CHECK) {
++ patchlong = 0;
++ } else if (A64F_S_OK(delta, 19)) {
++ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
++ if (!cstart) cstart = p;
++ }
+ } else if ((ins & 0x7e000000u) == 0x36000000u &&
+ ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
+- /* Patch tbz/tbnz exitstub. */
+- *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) &
0x0007ffe0u));
+- cend = p+1;
+- if (!cstart) cstart = p;
++ /* Patch tbz/tbnz, if within range. */
++ if (A64F_S_OK(delta, 14)) {
++ *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta));
++ if (!cstart) cstart = p;
++ }
+ }
+ }
+- lua_assert(cstart != NULL);
+- lj_mcode_sync(cstart, cend);
++ /* Always patch long-range branch in exit stub itself. Except, if we can't. */
++ if (patchlong) {
++ ptrdiff_t delta = target - px;
++ lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range");
++ *px = A64I_B | A64F_S26(delta);
++ if (!cstart) cstart = px;
++ }
++ if (cstart) lj_mcode_sync(cstart, px+1);
+ lj_mcode_patch(J, mcarea, 1);
+ }
+
+diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
+index affe7d89..1497ad9c 100644
+--- a/src/lj_asm_mips.h
++++ b/src/lj_asm_mips.h
+@@ -1,6 +1,6 @@
+ /*
+ ** MIPS IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* -- Register allocator extensions --------------------------------------- */
+@@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
+ {
+ Reg r = IR(ref)->r;
+ if (ra_noreg(r)) {
+- if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref))
== 0)
++ if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref)
== 0)
+ return RID_ZERO;
+ r = ra_allocref(as, ref, allow);
+ } else {
+@@ -64,18 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+ /* Setup spare long-range jump slots per mcarea. */
+ static void asm_sparejump_setup(ASMState *as)
+ {
+- MCode *mxp = as->mcbot;
+- /* Assumes sizeof(MCLink) == 8. */
+- if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) {
+- lua_assert(MIPSI_NOP == 0);
+- memset(mxp+2, 0, MIPS_SPAREJUMP*8);
+- mxp += MIPS_SPAREJUMP*2;
+- lua_assert(mxp < as->mctop);
+- lj_mcode_sync(as->mcbot, mxp);
+- lj_mcode_commitbot(as->J, mxp);
+- as->mcbot = mxp;
+- as->mclim = as->mcbot + MCLIM_REDZONE;
++ MCode *mxp = as->mctop;
++ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
++ mxp -= MIPS_SPAREJUMP*2;
++ lj_assertA(MIPSI_NOP == 0, "bad NOP");
++ memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode));
++ as->mctop = mxp;
++ }
++}
++
++static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump)
++{
++ MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size);
++ int slot = MIPS_SPAREJUMP;
++ while (slot--) {
++ mxp -= 2;
++ if (*mxp == tjump) {
++ return mxp;
++ } else if (*mxp == MIPSI_NOP) {
++ *mxp = tjump;
++ return mxp;
++ }
+ }
++ return NULL;
+ }
+
+ /* Setup exit stub after the end of each trace. */
+@@ -85,7 +96,8 @@ static void asm_exitstub_setup(ASMState *as)
+ /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
+ *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
+ *--mxp = MIPSI_J|((((uintptr_t)(void
*)lj_vm_exit_handler)>>2)&0x03ffffffu);
+- lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0);
++ lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0,
++ "branch target out of range");
+ *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
+ as->mctop = mxp;
+ }
+@@ -102,7 +114,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
+ as->invmcp = NULL;
+ as->loopinv = 1;
+ as->mcp = p+1;
++#if !LJ_TARGET_MIPSR6
+ mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */
++#else
++ mi = mi ^ ((mi>>28) == 1 ? 0x04000000u :
++ (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */
++#endif
+ target = p; /* Patch target later in asm_loop_fixup. */
+ }
+ emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
+@@ -176,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp,
RegSet allow)
+ return ra_allock(as, ofs-(int16_t)ofs, allow);
+ }
+ }
++ } else if (ir->o == IR_TMPREF) {
++ *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
++ return RID_JGL;
+ }
+ }
+ *ofsp = 0;
+@@ -191,20 +211,20 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef
ref,
+ if (ra_noreg(ir->r) && canfuse(as, ir)) {
+ if (ir->o == IR_ADD) {
+ intptr_t ofs2;
+- if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)),
++ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
+ checki16(ofs2))) {
+ ref = ir->op1;
+ ofs = (int32_t)ofs2;
+ }
+ } else if (ir->o == IR_STRREF) {
+ intptr_t ofs2 = 65536;
+- lua_assert(ofs == 0);
++ lj_assertA(ofs == 0, "bad usage");
+ ofs = (int32_t)sizeof(GCstr);
+ if (irref_isk(ir->op2)) {
+- ofs2 = ofs + get_kval(IR(ir->op2));
++ ofs2 = ofs + get_kval(as, ir->op2);
+ ref = ir->op1;
+ } else if (irref_isk(ir->op1)) {
+- ofs2 = ofs + get_kval(IR(ir->op1));
++ ofs2 = ofs + get_kval(as, ir->op1);
+ ref = ir->op2;
+ }
+ if (!checki16(ofs2)) {
+@@ -248,7 +268,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ #if !LJ_SOFTFP
+ if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
+ !(ci->flags & CCI_VARARG)) {
+- lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
++ lj_assertA(rset_test(as->freeset, fpr),
++ "reg %d not free", fpr); /* Already evicted. */
+ ra_leftov(as, fpr, ref);
+ fpr += LJ_32 ? 2 : 1;
+ gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1;
+@@ -260,7 +281,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ #endif
+ if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
+ if (gpr <= REGARG_LASTGPR) {
+- lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Already evicted. */
+ #if !LJ_SOFTFP
+ if (irt_isfp(ir->t)) {
+ RegSet of = as->freeset;
+@@ -273,7 +295,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ #if LJ_32
+ emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
+ emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
+- lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */
++ lj_assertA(rset_test(as->freeset, gpr+1),
++ "reg %d not free", gpr+1); /* Already evicted. */
+ gpr += 2;
+ #else
+ emit_tg(as, MIPSI_DMFC1, gpr, r);
+@@ -291,7 +314,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ {
+ ra_leftov(as, gpr, ref);
+ gpr++;
+-#if LJ_64
++#if LJ_64 && !LJ_SOFTFP
+ fpr++;
+ #endif
+ }
+@@ -302,7 +325,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ emit_spstore(as, ir, r, ofs);
+ ofs += irt_isnum(ir->t) ? 8 : 4;
+ #else
+- emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR)
&& !irt_is64(ir->t)) ? 4 : 0));
++ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) &&
!irt_is64(ir->t)) ? 4 : 0));
+ ofs += 8;
+ #endif
+ }
+@@ -313,7 +336,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ #endif
+ if (gpr <= REGARG_LASTGPR) {
+ gpr++;
+-#if LJ_64
++#if LJ_64 && !LJ_SOFTFP
+ fpr++;
+ #endif
+ } else {
+@@ -328,22 +351,18 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+ {
+ RegSet drop = RSET_SCRATCH;
+-#if LJ_32
+ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+-#endif
+ #if !LJ_SOFTFP
+ if ((ci->flags & CCI_NOFPRCLOBBER))
+ drop &= ~RSET_FPR;
+ #endif
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
+-#if LJ_32
+ if (hiop && ra_hasreg((ir+1)->r))
+ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
+-#endif
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+- lua_assert(!irt_ispri(ir->t));
++ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
+ if ((ci->flags & CCI_CASTU64)) {
+ int32_t ofs = sps_scale(ir->s);
+@@ -369,10 +388,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
+ } else {
+ ra_destreg(as, ir, RID_FPRET);
+ }
+-#if LJ_32
+ } else if (hiop) {
+ ra_destpair(as, ir);
+-#endif
+ } else {
+ ra_destreg(as, ir, RID_RET);
+ }
+@@ -391,7 +408,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
+ func = ir->op2; irf = IR(func);
+ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+ if (irref_isk(func)) { /* Call to constant address. */
+- ci.func = (ASMFunction)(void *)get_kval(irf);
++ ci.func = (ASMFunction)(void *)get_kval(as, func);
+ } else { /* Need specific register for indirect calls. */
+ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
+ MCode *p = as->mcp;
+@@ -411,7 +428,11 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
+ {
+ /* The modified regs must match with the *.dasc implementation. */
+ RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
+- RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR);
++ RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)
++#if LJ_TARGET_MIPSR6
++ |RID2RSET(RID_F21)
++#endif
++ ;
+ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
+ ra_evictset(as, drop);
+ ra_destreg(as, ir, RID_FPRET);
+@@ -438,6 +459,27 @@ static void asm_retf(ASMState *as, IRIns *ir)
+ emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
+ }
+
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irgc;
++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
++ if ((as->flags & JIT_F_MIPSXXR2)) {
++ emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp,
++ lj_fls(SBUF_MASK_FLAG), 0);
++ } else {
++ emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
++ emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
++ }
++ emit_getgl(as, RID_TMP, cur_L);
++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
+ /* -- Type conversions ---------------------------------------------------- */
+
+ #if !LJ_SOFTFP
+@@ -445,8 +487,13 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+ {
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+ Reg dest = ra_dest(as, ir, RSET_GPR);
++#if !LJ_TARGET_MIPSR6
+ asm_guard(as, MIPSI_BC1F, 0, 0);
+ emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
++#else
++ asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31));
++ emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left);
++#endif
+ emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
+ emit_tg(as, MIPSI_MFC1, dest, tmp);
+ emit_fg(as, MIPSI_CVT_W_D, tmp, left);
+@@ -462,12 +509,36 @@ static void asm_tobit(ASMState *as, IRIns *ir)
+ emit_tg(as, MIPSI_MFC1, dest, tmp);
+ emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
+ }
++#elif LJ_64 /* && LJ_SOFTFP */
++static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
++{
++ /* The modified regs must match with the *.dasc implementation. */
++ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
++ RID2RSET(RID_R1)|RID2RSET(RID_R12);
++ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
++ ra_evictset(as, drop);
++ /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
++ ra_destreg(as, ir, RID_RET);
++ asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO);
++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0);
++ if (r == RID_NONE)
++ ra_leftov(as, REGARG_FIRSTGPR, ir->op1);
++ else if (r != REGARG_FIRSTGPR)
++ emit_move(as, REGARG_FIRSTGPR, r);
++}
++
++static void asm_tobit(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ emit_dta(as, MIPSI_SLL, dest, dest, 0);
++ asm_callid(as, ir, IRCALL_lj_vm_tobit);
++}
+ #endif
+
+ static void asm_conv(ASMState *as, IRIns *ir)
+ {
+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+-#if !LJ_SOFTFP
++#if !LJ_SOFTFP32
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+ #endif
+ #if LJ_64
+@@ -475,15 +546,20 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ #endif
+ IRRef lref = ir->op1;
+ #if LJ_32
+- lua_assert(!(irt_isint64(ir->t) ||
+- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
++ /* 64 bit integer conversions are handled by SPLIT. */
++ lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)),
++ "IR %04d has unsplit 64 bit type",
++ (int)(ir - as->ir) - REF_BIAS);
+ #endif
+-#if LJ_32 && LJ_SOFTFP
++#if LJ_SOFTFP32
+ /* FP conversions are handled by SPLIT. */
+- lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
++ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
++ "IR %04d has FP type",
++ (int)(ir - as->ir) - REF_BIAS);
+ /* Can't check for same types: SPLIT uses CONV
int.int + BXOR for sfp NEG. */
+ #else
+- lua_assert(irt_type(ir->t) != st);
++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
++#if !LJ_SOFTFP
+ if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+@@ -541,7 +617,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+@@ -575,8 +652,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ (void *)&as->J->k64[LJ_K64_M2P64],
+ rset_exclude(RSET_GPR, dest));
+ emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
+- emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
+- emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
++#if !LJ_TARGET_MIPSR6
++ emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
++ emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
++#else
++ emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
++ emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp);
++#endif
+ emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
+ (void *)&as->J->k64[LJ_K64_2P63],
+ rset_exclude(RSET_GPR, dest));
+@@ -587,8 +669,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ (void *)&as->J->k32[LJ_K32_M2P64],
+ rset_exclude(RSET_GPR, dest));
+ emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
+- emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
+- emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
++#if !LJ_TARGET_MIPSR6
++ emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
++ emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
++#else
++ emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
++ emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp);
++#endif
+ emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
+ (void *)&as->J->k32[LJ_K32_2P63],
+ rset_exclude(RSET_GPR, dest));
+@@ -609,12 +696,49 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ }
+ }
+ } else
++#else
++ if (irt_isfp(ir->t)) {
++#if LJ_64 && LJ_HASFFI
++ if (stfp) { /* FP to FP conversion. */
++ asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d :
++ IRCALL_softfp_d2f);
++ } else { /* Integer to FP conversion. */
++ IRCallID cid = ((IRT_IS64 >> st) & 1) ?
++ (irt_isnum(ir->t) ?
++ (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) :
++ (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) :
++ (irt_isnum(ir->t) ?
++ (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) :
++ (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f));
++ asm_callid(as, ir, cid);
++ }
++#else
++ asm_callid(as, ir, IRCALL_softfp_i2d);
++#endif
++ } else if (stfp) { /* FP to integer conversion. */
++ if (irt_isguard(ir->t)) {
++ /* Checked conversions are only supported from number to int. */
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
++ asm_tointg(as, ir, RID_NONE);
++ } else {
++ IRCallID cid = irt_is64(ir->t) ?
++ ((st == IRT_NUM) ?
++ (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
++ (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
++ ((st == IRT_NUM) ?
++ (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
++ (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
++ asm_callid(as, ir, cid);
++ }
++ } else
++#endif
+ #endif
+ {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV
EXT");
+ if ((ir->op2 & IRCONV_SEXT)) {
+ if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
+ emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
+@@ -645,7 +769,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ }
+ }
+ } else {
+- if (st64) {
++ if (st64 && !(ir->op2 & IRCONV_NONE)) {
+ /* This is either a 32 bit reg/reg mov which zeroes the hiword
+ ** or a load of the loword from a 64 bit address.
+ */
+@@ -666,7 +790,7 @@ static void asm_strto(ASMState *as, IRIns *ir)
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+ IRRef args[2];
+ int32_t ofs = 0;
+-#if LJ_SOFTFP
++#if LJ_SOFTFP32
+ ra_evictset(as, RSET_SCRATCH);
+ if (ra_used(ir)) {
+ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
+@@ -711,7 +835,8 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef
ref)
+ {
+ RegSet allow = rset_exclude(RSET_GPR, base);
+ IRIns *ir = IR(ref);
+- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
++ "store of IR type %d", irt_type(ir->t));
+ if (irref_isk(ref)) {
+ TValue k;
+ lj_ir_kvalue(as->J->L, &k, ir);
+@@ -732,34 +857,63 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs,
IRRef ref)
+ #endif
+
+ /* Get pointer to TValue. */
+-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
+ {
+- IRIns *ir = IR(ref);
+- if (irt_isnum(ir->t)) {
+- if (irref_isk(ref)) /* Use the number constant itself as a TValue. */
+- ra_allockreg(as, igcptr(ir_knum(ir)), dest);
+- else /* Otherwise force a spill and use the spill slot. */
+- emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
+- } else {
+- /* Otherwise use g->tmptv to hold the TValue. */
++ int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
++ if ((mode & IRTMPREF_IN1)) {
++ IRIns *ir = IR(ref);
++ if (irt_isnum(ir->t)) {
++ if ((mode & IRTMPREF_OUT1)) {
++#if LJ_SOFTFP
++ emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
++#if LJ_64
++ emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64);
++#else
++ lj_assertA(irref_isk(ref), "unsplit FP op");
++ emit_setgl(as,
++ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
++ tmptv.u32.lo);
++ emit_setgl(as,
++ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
++ tmptv.u32.hi);
++#endif
++#else
++ Reg src = ra_alloc1(as, ref, RSET_FPR);
++ emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
++ emit_tsi(as, MIPSI_SDC1, (src & 31), RID_JGL, tmpofs);
++#endif
++ } else if (irref_isk(ref)) {
++ /* Use the number constant itself as a TValue. */
++ ra_allockreg(as, igcptr(ir_knum(ir)), dest);
++ } else {
++#if LJ_SOFTFP32
++ lj_assertA(0, "unsplit FP op");
++#else
++ /* Otherwise force a spill and use the spill slot. */
++ emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
++#endif
++ }
++ } else {
++ /* Otherwise use g->tmptv to hold the TValue. */
+ #if LJ_32
+- RegSet allow = rset_exclude(RSET_GPR, dest);
+- Reg type;
+- emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State,
tmptv)-32768));
+- if (!irt_ispri(ir->t)) {
+- Reg src = ra_alloc1(as, ref, allow);
+- emit_setgl(as, src, tmptv.gcr);
+- }
+- if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
+- type = ra_alloc1(as, ref+1, allow);
+- else
+- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+- emit_setgl(as, type, tmptv.it);
++ Reg type;
++ emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs);
++ if (!irt_ispri(ir->t)) {
++ Reg src = ra_alloc1(as, ref, RSET_GPR);
++ emit_setgl(as, src, tmptv.gcr);
++ }
++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP &&
!irt_isnil((ir+1)->t))
++ type = ra_alloc1(as, ref+1, RSET_GPR);
++ else
++ type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR);
++ emit_setgl(as, type, tmptv.it);
+ #else
+- asm_tvstore64(as, dest, 0, ref);
+- emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL,
+- (int32_t)(offsetof(global_State, tmptv)-32768));
++ asm_tvstore64(as, dest, 0, ref);
++ emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs);
+ #endif
++ }
++ } else {
++ emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
+ }
+ }
+
+@@ -780,8 +934,12 @@ static void asm_aref(ASMState *as, IRIns *ir)
+ }
+ base = ra_alloc1(as, ir->op1, RSET_GPR);
+ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
++#if !LJ_TARGET_MIPSR6
+ emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base);
+ emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
++#else
++ emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base);
++#endif
+ }
+
+ /* Inlined hash lookup. Specialized for key type and for const keys.
+@@ -799,6 +957,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ Reg dest = ra_dest(as, ir, allow);
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
++#if LJ_64
++ Reg cmp64 = RID_NONE;
++#endif
+ IRRef refkey = ir->op2;
+ IRIns *irkey = IR(refkey);
+ int isk = irref_isk(refkey);
+@@ -807,11 +968,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ MCLabel l_end, l_loop, l_next;
+
+ rset_clear(allow, tab);
+-#if LJ_32 && LJ_SOFTFP
+- if (!isk) {
+- key = ra_alloc1(as, refkey, allow);
+- rset_clear(allow, key);
+- if (irkey[1].o == IR_HIOP) {
++ if (!LJ_SOFTFP && irt_isnum(kt)) {
++ key = ra_alloc1(as, refkey, RSET_FPR);
++ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
++ } else {
++ if (!irt_ispri(kt)) {
++ key = ra_alloc1(as, refkey, allow);
++ rset_clear(allow, key);
++ }
++#if LJ_32
++ if (LJ_SOFTFP && irkey[1].o == IR_HIOP) {
+ if (ra_hasreg((irkey+1)->r)) {
+ type = tmpnum = (irkey+1)->r;
+ tmp1 = ra_scratch(as, allow);
+@@ -822,25 +988,33 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ }
+ rset_clear(allow, tmpnum);
+ } else {
+- type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
++ type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
+ rset_clear(allow, type);
+ }
+- }
+-#else
+- if (irt_isnum(kt)) {
+- key = ra_alloc1(as, refkey, RSET_FPR);
+- tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
+- } else if (!irt_ispri(kt)) {
+- key = ra_alloc1(as, refkey, allow);
+- rset_clear(allow, key);
+-#if LJ_32
+- type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
+- rset_clear(allow, type);
+ #endif
+ }
+-#endif
+ tmp2 = ra_scratch(as, allow);
+ rset_clear(allow, tmp2);
++#if LJ_64
++ if (LJ_SOFTFP || !irt_isnum(kt)) {
++ /* Allocate cmp64 register used for 64-bit comparisons */
++ if (LJ_SOFTFP && irt_isnum(kt)) {
++ cmp64 = key;
++ } else if (!isk && irt_isaddr(kt)) {
++ cmp64 = tmp2;
++ } else {
++ int64_t k;
++ if (isk && irt_isaddr(kt)) {
++ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
++ } else {
++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
++ k = ~((int64_t)~irt_toitype(kt) << 47);
++ }
++ cmp64 = ra_allock(as, k, allow);
++ rset_clear(allow, cmp64);
++ }
++ }
++#endif
+
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
+ l_end = emit_label(as);
+@@ -861,8 +1035,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ l_end = asm_exitstub_addr(as);
+ }
+ if (!LJ_SOFTFP && irt_isnum(kt)) {
++#if !LJ_TARGET_MIPSR6
+ emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
+ emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
++#else
++ emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end);
++ emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key);
++#endif
+ *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
+ emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
+ emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
+@@ -883,21 +1062,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
+ emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
+- } else if (irt_isaddr(kt)) {
+- Reg refk = tmp2;
+- if (isk) {
+- int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
+- refk = ra_allock(as, k, allow);
+- rset_clear(allow, refk);
+- }
+- emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end);
+- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key));
+ } else {
+- Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
+- rset_clear(allow, pri);
+- lua_assert(irt_ispri(kt) && !irt_isnil(kt));
+- emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end);
+- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key));
++ emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end);
++ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
+ }
+ *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
+ if (!isk && irt_isaddr(kt)) {
+@@ -908,7 +1075,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ #endif
+
+ /* Load main position relative to tab->node into dest. */
+- khash = isk ? ir_khash(irkey) : 1;
++ khash = isk ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
+ } else {
+@@ -916,7 +1083,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ if (isk)
+ tmphash = ra_allock(as, khash, allow);
+ emit_dst(as, MIPSI_AADDU, dest, dest, tmp1);
+- lua_assert(sizeof(Node) == 24);
++ lj_assertA(sizeof(Node) == 24, "bad Node size");
+ emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
+ emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
+ emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
+@@ -926,7 +1093,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ if (isk) {
+ /* Nothing to do. */
+ } else if (irt_isstr(kt)) {
+- emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash));
++ emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid));
+ } else { /* Must match with hash*() in lj_tab.c. */
+ emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2);
+ emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);
+@@ -961,7 +1128,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
+ if (irt_isnum(kt)) {
+ emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
+- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0);
++ emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0);
+ emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0);
+ #if !LJ_SOFTFP
+ emit_tg(as, MIPSI_DMFC1, tmp1, key);
+@@ -994,7 +1161,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ Reg key = ra_scratch(as, allow);
+ int64_t k;
+ #endif
+- lua_assert(ofs % sizeof(Node) == 0);
++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (ofs > 32736) {
+ idx = dest;
+ rset_clear(allow, dest);
+@@ -1023,7 +1190,7 @@ nolo:
+ emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
+ #else
+ if (irt_ispri(irkey->t)) {
+- lua_assert(!irt_isnil(irkey->t));
++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
+ k = ~((int64_t)~irt_toitype(irkey->t) << 47);
+ } else if (irt_isnum(irkey->t)) {
+ k = (int64_t)ir_knum(irkey)->u64;
+@@ -1062,7 +1229,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
+ static void asm_fref(ASMState *as, IRIns *ir)
+ {
+ UNUSED(as); UNUSED(ir);
+- lua_assert(!ra_used(ir));
++ lj_assertA(!ra_used(ir), "unfused FREF");
+ }
+
+ static void asm_strref(ASMState *as, IRIns *ir)
+@@ -1117,26 +1284,36 @@ static void asm_strref(ASMState *as, IRIns *ir)
+
+ /* -- Loads and stores ---------------------------------------------------- */
+
+-static MIPSIns asm_fxloadins(IRIns *ir)
++static MIPSIns asm_fxloadins(ASMState *as, IRIns *ir)
+ {
++ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: return MIPSI_LB;
+ case IRT_U8: return MIPSI_LBU;
+ case IRT_I16: return MIPSI_LH;
+ case IRT_U16: return MIPSI_LHU;
+- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1;
++ case IRT_NUM:
++ lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
++ if (!LJ_SOFTFP) return MIPSI_LDC1;
++ /* fallthrough */
+ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
++ /* fallthrough */
+ default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW;
+ }
+ }
+
+-static MIPSIns asm_fxstoreins(IRIns *ir)
++static MIPSIns asm_fxstoreins(ASMState *as, IRIns *ir)
+ {
++ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: case IRT_U8: return MIPSI_SB;
+ case IRT_I16: case IRT_U16: return MIPSI_SH;
+- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1;
++ case IRT_NUM:
++ lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
++ if (!LJ_SOFTFP) return MIPSI_SDC1;
++ /* fallthrough */
+ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
++ /* fallthrough */
+ default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW;
+ }
+ }
+@@ -1144,10 +1321,10 @@ static MIPSIns asm_fxstoreins(IRIns *ir)
+ static void asm_fload(ASMState *as, IRIns *ir)
+ {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+- MIPSIns mi = asm_fxloadins(ir);
++ MIPSIns mi = asm_fxloadins(as, ir);
+ Reg idx;
+ int32_t ofs;
+- if (ir->op1 == REF_NIL) {
++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = RID_JGL;
+ ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
+ } else {
+@@ -1161,7 +1338,7 @@ static void asm_fload(ASMState *as, IRIns *ir)
+ }
+ ofs = field_ofs[ir->op2];
+ }
+- lua_assert(!irt_isfp(ir->t));
++ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
+ emit_tsi(as, mi, dest, idx, ofs);
+ }
+
+@@ -1172,8 +1349,8 @@ static void asm_fstore(ASMState *as, IRIns *ir)
+ IRIns *irf = IR(ir->op1);
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+ int32_t ofs = field_ofs[irf->op2];
+- MIPSIns mi = asm_fxstoreins(ir);
+- lua_assert(!irt_isfp(ir->t));
++ MIPSIns mi = asm_fxstoreins(as, ir);
++ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE");
+ emit_tsi(as, mi, src, idx, ofs);
+ }
+ }
+@@ -1182,8 +1359,9 @@ static void asm_xload(ASMState *as, IRIns *ir)
+ {
+ Reg dest = ra_dest(as, ir,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
++ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
++ "unaligned XLOAD");
++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
+ }
+
+ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+@@ -1191,7 +1369,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+ if (ir->r != RID_SINK) {
+ Reg src = ra_alloc1z(as, ir->op2,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
+ rset_exclude(RSET_GPR, src), ofs);
+ }
+ }
+@@ -1200,7 +1378,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+
+ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ {
+- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP);
++ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
+ Reg dest = RID_NONE, type = RID_TMP, idx;
+ RegSet allow = RSET_GPR;
+ int32_t ofs = 0;
+@@ -1213,8 +1391,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ }
+ }
+ if (ra_used(ir)) {
+- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+- irt_isint(ir->t) || irt_isaddr(ir->t));
++ lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
++ irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad load type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ #if LJ_64
+@@ -1225,6 +1404,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ #endif
+ }
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
+ rset_clear(allow, idx);
+ if (irt_isnum(t)) {
+ asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
+@@ -1262,10 +1442,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
+ int32_t ofs = 0;
+ if (ir->r == RID_SINK)
+ return;
+- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
+- src = ra_alloc1(as, ir->op2, RSET_FPR);
++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
++ src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+- emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
++ emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs);
+ } else {
+ #if LJ_32
+ if (!irt_ispri(ir->t)) {
+@@ -1313,45 +1493,64 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ IRType1 t = ir->t;
+ #if LJ_32
+ int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
+- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP);
++ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
+ if (hiop)
+ t.irt = IRT_NUM;
+ #else
+ int32_t ofs = 8*((int32_t)ir->op1-2);
+ #endif
+- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
+- lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+-#if LJ_32 && LJ_SOFTFP
+- lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
++ "bad parent SLOAD"); /* Handled by asm_head_side(). */
++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
++ "inconsistent SLOAD variant");
++#if LJ_SOFTFP32
++ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
++ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
+ if (hiop && ra_used(ir+1)) {
+ type = ra_dest(as, ir+1, allow);
+ rset_clear(allow, type);
+ }
+ #else
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) &&
irt_isint(t)) {
+- dest = ra_scratch(as, RSET_FPR);
++ dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR);
+ asm_tointg(as, ir, dest);
+ t.irt = IRT_NUM; /* Continue with a regular number type check. */
+ } else
+ #endif
+ if (ra_used(ir)) {
+- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+- irt_isint(ir->t) || irt_isaddr(ir->t));
++ lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
++ irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad SLOAD type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ base = ra_alloc1(as, REF_BASE, allow);
+ rset_clear(allow, base);
+- if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
++ if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) {
+ if (irt_isint(t)) {
+- Reg tmp = ra_scratch(as, RSET_FPR);
++ Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
++#if LJ_SOFTFP
++ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
++ ra_destreg(as, ir, RID_RET);
++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0);
++ if (tmp != REGARG_FIRSTGPR)
++ emit_move(as, REGARG_FIRSTGPR, tmp);
++#else
+ emit_tg(as, MIPSI_MFC1, dest, tmp);
+ emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
++#endif
+ dest = tmp;
+ t.irt = IRT_NUM; /* Check for original type. */
+ } else {
+ Reg tmp = ra_scratch(as, RSET_GPR);
++#if LJ_SOFTFP
++ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
++ ra_destreg(as, ir, RID_RET);
++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0);
++ emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0);
++#else
+ emit_fg(as, MIPSI_CVT_D_W, dest, dest);
+ emit_tg(as, MIPSI_MTC1, tmp, dest);
++#endif
+ dest = tmp;
+ t.irt = IRT_INT; /* Check for original type. */
+ }
+@@ -1400,7 +1599,7 @@ dotypecheck:
+ if (irt_isnum(t)) {
+ asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
+ emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
+- if (ra_hasreg(dest))
++ if (!LJ_SOFTFP && ra_hasreg(dest))
+ emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
+ } else {
+ asm_guard(as, MIPSI_BNE, RID_TMP,
+@@ -1410,7 +1609,7 @@ dotypecheck:
+ }
+ emit_tsi(as, MIPSI_LD, type, base, ofs);
+ } else if (ra_hasreg(dest)) {
+- if (irt_isnum(t))
++ if (!LJ_SOFTFP && irt_isnum(t))
+ emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
+ else
+ emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base,
+@@ -1431,7 +1630,8 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+ IRRef args[4];
+ RegSet drop = RSET_SCRATCH;
+- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL));
++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL),
++ "bad CNEW/CNEWI operands");
+
+ as->gcsteps++;
+ if (ra_hasreg(ir->r))
+@@ -1447,7 +1647,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ int32_t ofs = sizeof(GCcdata);
+ if (sz == 8) {
+ ofs += 4;
+- lua_assert((ir+1)->o == IR_HIOP);
++ lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI");
+ if (LJ_LE) ir++;
+ }
+ for (;;) {
+@@ -1458,10 +1658,10 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ ofs -= 4; if (LJ_BE) ir++; else ir--;
+ }
+ #else
+- emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow),
++ emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow),
+ RID_RET, sizeof(GCcdata));
+ #endif
+- lua_assert(sz == 4 || sz == 8);
++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+ args[0] = ASMREF_L; /* lua_State *L */
+@@ -1484,8 +1684,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+ ra_releasetmp(as, ASMREF_TMP1));
+ }
+-#else
+-#define asm_cnew(as, ir) ((void)0)
+ #endif
+
+ /* -- Write barriers ------------------------------------------------------ */
+@@ -1513,7 +1711,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
+ MCLabel l_end;
+ Reg obj, val, tmp;
+ /* No need for other object barriers (yet). */
+- lua_assert(IR(ir->op1)->o == IR_UREFC);
++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+@@ -1549,33 +1747,46 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+ emit_fg(as, mi, dest, left);
+ }
++#endif
+
++#if !LJ_SOFTFP32
+ static void asm_fpmath(ASMState *as, IRIns *ir)
+ {
+- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+- return;
++#if !LJ_SOFTFP
+ if (ir->op2 <= IRFPM_TRUNC)
+ asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
+ else if (ir->op2 == IRFPM_SQRT)
+ asm_fpunary(as, ir, MIPSI_SQRT_D);
+ else
++#endif
+ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
+ }
+ #endif
+
++#if !LJ_SOFTFP
++#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D)
++#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D)
++#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D)
++#elif LJ_64 /* && LJ_SOFTFP */
++#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add)
++#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub)
++#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul)
++#endif
++
+ static void asm_add(ASMState *as, IRIns *ir)
+ {
+ IRType1 t = ir->t;
+-#if !LJ_SOFTFP
++#if !LJ_SOFTFP32
+ if (irt_isnum(t)) {
+- asm_fparith(as, ir, MIPSI_ADD_D);
++ asm_fpadd(as, ir);
+ } else
+ #endif
+ {
++ /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ if (irref_isk(ir->op2)) {
+- intptr_t k = get_kval(IR(ir->op2));
++ intptr_t k = get_kval(as, ir->op2);
+ if (checki16(k)) {
+ emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest,
+ left, k);
+@@ -1590,9 +1801,9 @@ static void asm_add(ASMState *as, IRIns *ir)
+
+ static void asm_sub(ASMState *as, IRIns *ir)
+ {
+-#if !LJ_SOFTFP
++#if !LJ_SOFTFP32
+ if (irt_isnum(ir->t)) {
+- asm_fparith(as, ir, MIPSI_SUB_D);
++ asm_fpsub(as, ir);
+ } else
+ #endif
+ {
+@@ -1606,9 +1817,9 @@ static void asm_sub(ASMState *as, IRIns *ir)
+
+ static void asm_mul(ASMState *as, IRIns *ir)
+ {
+-#if !LJ_SOFTFP
++#if !LJ_SOFTFP32
+ if (irt_isnum(ir->t)) {
+- asm_fparith(as, ir, MIPSI_MUL_D);
++ asm_fpmul(as, ir);
+ } else
+ #endif
+ {
+@@ -1616,46 +1827,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+ right = (left >> 8); left &= 255;
+ if (LJ_64 && irt_is64(ir->t)) {
++#if !LJ_TARGET_MIPSR6
+ emit_dst(as, MIPSI_MFLO, dest, 0, 0);
+ emit_dst(as, MIPSI_DMULT, 0, left, right);
++#else
++ emit_dst(as, MIPSI_DMUL, dest, left, right);
++#endif
+ } else {
+ emit_dst(as, MIPSI_MUL, dest, left, right);
+ }
+ }
+ }
+
+-static void asm_mod(ASMState *as, IRIns *ir)
++#if !LJ_SOFTFP32
++static void asm_fpdiv(ASMState *as, IRIns *ir)
+ {
+-#if LJ_64 && LJ_HASFFI
+- if (!irt_isint(ir->t))
+- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+- IRCALL_lj_carith_modu64);
+- else
+-#endif
+- asm_callid(as, ir, IRCALL_lj_vm_modi);
+-}
+-
+ #if !LJ_SOFTFP
+-static void asm_pow(ASMState *as, IRIns *ir)
+-{
+-#if LJ_64 && LJ_HASFFI
+- if (!irt_isnum(ir->t))
+- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+- IRCALL_lj_carith_powu64);
+- else
+-#endif
+- asm_callid(as, ir, IRCALL_lj_vm_powi);
+-}
+-
+-static void asm_div(ASMState *as, IRIns *ir)
+-{
+-#if LJ_64 && LJ_HASFFI
+- if (!irt_isnum(ir->t))
+- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+- IRCALL_lj_carith_divu64);
+- else
+-#endif
+ asm_fparith(as, ir, MIPSI_DIV_D);
++#else
++ asm_callid(as, ir, IRCALL_softfp_div);
++#endif
+ }
+ #endif
+
+@@ -1665,6 +1856,13 @@ static void asm_neg(ASMState *as, IRIns *ir)
+ if (irt_isnum(ir->t)) {
+ asm_fpunary(as, ir, MIPSI_NEG_D);
+ } else
++#elif LJ_64 /* && LJ_SOFTFP */
++ if (irt_isnum(ir->t)) {
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
++ emit_dst(as, MIPSI_XOR, dest, left,
++ ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest)));
++ } else
+ #endif
+ {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+@@ -1674,14 +1872,22 @@ static void asm_neg(ASMState *as, IRIns *ir)
+ }
+ }
+
++#if !LJ_SOFTFP
+ #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
+-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
+-#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
++#elif LJ_64 /* && LJ_SOFTFP */
++static void asm_abs(ASMState *as, IRIns *ir)
++{
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
++ emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0);
++}
++#endif
+
+ static void asm_arithov(ASMState *as, IRIns *ir)
+ {
++ /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */
+ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
+- lua_assert(!irt_is64(ir->t));
++ lj_assertA(!irt_is64(ir->t), "bad usage");
+ if (irref_isk(ir->op2)) {
+ int k = IR(ir->op2)->i;
+ if (ir->o == IR_SUBOV) k = -k;
+@@ -1724,9 +1930,14 @@ static void asm_mulov(ASMState *as, IRIns *ir)
+ right), dest));
+ asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
+ emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
++#if !LJ_TARGET_MIPSR6
+ emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
+ emit_dst(as, MIPSI_MFLO, dest, 0, 0);
+ emit_dst(as, MIPSI_MULT, 0, left, right);
++#else
++ emit_dst(as, MIPSI_MUL, dest, left, right);
++ emit_dst(as, MIPSI_MUH, tmp, left, right);
++#endif
+ }
+
+ #if LJ_32 && LJ_HASFFI
+@@ -1863,7 +2074,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns
mik)
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ if (irref_isk(ir->op2)) {
+- intptr_t k = get_kval(IR(ir->op2));
++ intptr_t k = get_kval(as, ir->op2);
+ if (checku16(k)) {
+ emit_tsi(as, mik, dest, left, k);
+ return;
+@@ -1896,7 +2107,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi,
MIPSIns mik)
+ #define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
+ #define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
+ #define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
+-#define asm_brol(as, ir) lua_assert(0)
++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
+
+ static void asm_bror(ASMState *as, IRIns *ir)
+ {
+@@ -1919,15 +2130,21 @@ static void asm_bror(ASMState *as, IRIns *ir)
+ }
+ }
+
+-#if LJ_32 && LJ_SOFTFP
++#if LJ_SOFTFP
+ static void asm_sfpmin_max(ASMState *as, IRIns *ir)
+ {
+ CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin :
IRCALL_lj_vm_sfmax];
++#if LJ_64
++ IRRef args[2];
++ args[0] = ir->op1;
++ args[1] = ir->op2;
++#else
+ IRRef args[4];
+ args[0^LJ_BE] = ir->op1;
+ args[1^LJ_BE] = (ir+1)->op1;
+ args[2^LJ_BE] = ir->op2;
+ args[3^LJ_BE] = (ir+1)->op2;
++#endif
+ asm_setupresult(as, ir, &ci);
+ emit_call(as, (void *)ci.func, 0);
+ ci.func = NULL;
+@@ -1937,29 +2154,52 @@ static void asm_sfpmin_max(ASMState *as, IRIns *ir)
+
+ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
+ {
+- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
++#if LJ_SOFTFP
++ asm_sfpmin_max(as, ir);
++#else
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = (left >> 8); left &= 255;
++#if !LJ_TARGET_MIPSR6
+ if (dest == left) {
+- emit_fg(as, MIPSI_MOVT_D, dest, right);
++ emit_fg(as, MIPSI_MOVF_D, dest, right);
+ } else {
+- emit_fg(as, MIPSI_MOVF_D, dest, left);
++ emit_fg(as, MIPSI_MOVT_D, dest, left);
+ if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
+ }
+- emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
++ emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right);
++#else
++ emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right);
++#endif
++#endif
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+ right = (left >> 8); left &= 255;
+- if (dest == left) {
+- emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
++ if (left == right) {
++ if (dest != left) emit_move(as, dest, left);
+ } else {
+- emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
+- if (dest != right) emit_move(as, dest, right);
++#if !LJ_TARGET_MIPSR6
++ if (dest == left) {
++ emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
++ } else {
++ emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
++ if (dest != right) emit_move(as, dest, right);
++ }
++#else
++ emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
++ if (dest != right) {
++ emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP);
++ emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP);
++ } else {
++ emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP);
++ emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP);
++ }
++#endif
++ emit_dst(as, MIPSI_SLT, RID_TMP,
++ ismax ? left : right, ismax ? right : left);
+ }
+- emit_dst(as, MIPSI_SLT, RID_TMP,
+- ismax ? left : right, ismax ? right : left);
+ }
+ }
+
+@@ -1968,18 +2208,24 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
+
+ /* -- Comparisons --------------------------------------------------------- */
+
+-#if LJ_32 && LJ_SOFTFP
++#if LJ_SOFTFP
+ /* SFP comparisons. */
+ static void asm_sfpcomp(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
+ RegSet drop = RSET_SCRATCH;
+ Reg r;
++#if LJ_64
++ IRRef args[2];
++ args[0] = ir->op1;
++ args[1] = ir->op2;
++#else
+ IRRef args[4];
+ args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1;
+ args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2;
++#endif
+
+- for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
++ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) {
+ if (!rset_test(as->freeset, r) &&
+ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
+ rset_clear(drop, r);
+@@ -2033,21 +2279,33 @@ static void asm_comp(ASMState *as, IRIns *ir)
+ {
+ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
+ IROp op = ir->o;
+- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
++#if LJ_SOFTFP
++ asm_sfpcomp(as, ir);
++#else
++#if !LJ_TARGET_MIPSR6
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = (left >> 8); left &= 255;
+ asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
+ emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
++#else
++ Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR);
++ right = (left >> 8); left &= 255;
++ tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
++ asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
++ emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left,
right);
++#endif
++#endif
+ } else {
+ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (op == IR_ABC) op = IR_UGT;
+- if ((op&4) == 0 && irref_isk(ir->op2) &&
get_kval(IR(ir->op2)) == 0) {
++ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as,
ir->op2) == 0) {
+ MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
+ ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
+ asm_guard(as, mi, left, 0);
+ } else {
+ if (irref_isk(ir->op2)) {
+- intptr_t k = get_kval(IR(ir->op2));
++ intptr_t k = get_kval(as, ir->op2);
+ if ((op&2)) k++;
+ if (checki16(k)) {
+ asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
+@@ -2069,9 +2327,17 @@ static void asm_equal(ASMState *as, IRIns *ir)
+ Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ?
+ RSET_FPR : RSET_GPR);
+ right = (left >> 8); left &= 255;
+- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
++ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
++#if LJ_SOFTFP
++ asm_sfpcomp(as, ir);
++#elif !LJ_TARGET_MIPSR6
+ asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
+ emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
++#else
++ Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
++ asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
++ emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right);
++#endif
+ } else {
+ asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
+ }
+@@ -2114,15 +2380,15 @@ static void asm_comp64eq(ASMState *as, IRIns *ir)
+ }
+ #endif
+
+-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
++/* -- Split register ops -------------------------------------------------- */
+
+-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
++/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
+ static void asm_hiop(ASMState *as, IRIns *ir)
+ {
+-#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
+ /* HIOP is marked as a store because it needs its own DCE logic. */
+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
++#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
+ if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
+ as->curins--; /* Always skip the CONV. */
+ #if LJ_HASFFI && !LJ_SOFTFP
+@@ -2169,37 +2435,33 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ }
+ return;
+ }
++#endif
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
+ switch ((ir-1)->o) {
+-#if LJ_HASFFI
++#if LJ_32 && LJ_HASFFI
+ case IR_ADD: as->curins--; asm_add64(as, ir); break;
+ case IR_SUB: as->curins--; asm_sub64(as, ir); break;
+ case IR_NEG: as->curins--; asm_neg64(as, ir); break;
++ case IR_CNEWI:
++ /* Nothing to do here. Handled by lo op itself. */
++ break;
+ #endif
+-#if LJ_SOFTFP
++#if LJ_32 && LJ_SOFTFP
+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+ case IR_STRTO:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
+ break;
++ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
++ /* Nothing to do here. Handled by lo op itself. */
++ break;
+ #endif
+- case IR_CALLN:
+- case IR_CALLS:
+- case IR_CALLXS:
++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ break;
+-#if LJ_SOFTFP
+- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
+-#endif
+- case IR_CNEWI:
+- /* Nothing to do here. Handled by lo op itself. */
+- break;
+- default: lua_assert(0); break;
++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+ }
+-#else
+- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */
+-#endif
+ }
+
+ /* -- Profiling ----------------------------------------------------------- */
+@@ -2264,15 +2526,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ if ((sn & SNAP_NORESTORE))
+ continue;
+ if (irt_isnum(ir->t)) {
+-#if LJ_SOFTFP
++#if LJ_SOFTFP32
+ Reg tmp;
+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+- lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
++ /* LJ_SOFTFP: must be a number constant. */
++ lj_assertA(irref_isk(ref), "unsplit FP op");
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
+ emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
+ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
+ emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
++#elif LJ_SOFTFP /* && LJ_64 */
++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
++ emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs);
+ #else
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
+@@ -2281,7 +2547,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ #if LJ_32
+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+ Reg type;
+- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
irt_isinteger(ir->t));
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
irt_isinteger(ir->t),
++ "restore of IR type %d", irt_type(ir->t));
+ if (!irt_ispri(ir->t)) {
+ Reg src = ra_alloc1(as, ref, allow);
+ rset_clear(allow, src);
+@@ -2294,6 +2561,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ } else if ((sn & SNAP_SOFTFPNUM)) {
+ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
+ #endif
++ } else if ((sn & SNAP_KEYINDEX)) {
++ type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
+ } else {
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+ }
+@@ -2304,11 +2573,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ }
+ checkmclim(as);
+ }
+- lua_assert(map + nent == flinks);
++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+ }
+
+ /* -- GC handling --------------------------------------------------------- */
+
++/* Marker to prevent patching the GC check exit. */
++#define MIPS_NOPATCH_GC_CHECK MIPSI_OR
++
+ /* Check GC threshold and do one or more GC steps. */
+ static void asm_gc_check(ASMState *as)
+ {
+@@ -2324,6 +2596,7 @@ static void asm_gc_check(ASMState *as)
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ASMREF_TMP2; /* MSize steps */
+ asm_gencall(as, ci, args);
++ l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */
+ emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
+ tmp = ra_releasetmp(as, ASMREF_TMP2);
+ emit_loadi(as, tmp, as->gcsteps);
+@@ -2352,6 +2625,12 @@ static void asm_loop_fixup(ASMState *as)
+ }
+ }
+
++/* Fixup the tail of the loop. */
++static void asm_loop_tail_fixup(ASMState *as)
++{
++ if (as->loopinv) as->mctop--;
++}
++
+ /* -- Head of trace ------------------------------------------------------- */
+
+ /* Coalesce BASE register for a root trace. */
+@@ -2359,7 +2638,6 @@ static void asm_head_root_base(ASMState *as)
+ {
+ IRIns *ir = IR(REF_BASE);
+ Reg r = ir->r;
+- if (as->loopinv) as->mctop--;
+ if (ra_hasreg(r)) {
+ ra_free(as, r);
+ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+@@ -2374,7 +2652,6 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet
allow)
+ {
+ IRIns *ir = IR(REF_BASE);
+ Reg r = ir->r;
+- if (as->loopinv) as->mctop--;
+ if (ra_hasreg(r)) {
+ ra_free(as, r);
+ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+@@ -2466,32 +2743,39 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
+ MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
+ for (p++; p < pe; p++) {
+ if (*p == exitload) { /* Look for load of exit number. */
+- if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */
++ /* Look for exitstub branch. Yes, this covers all used branch variants. */
++ if (((p[-1] ^ (px-p)) & 0xffffu) == 0 &&
++ ((p[-1] & 0xf0000000u) == MIPSI_BEQ ||
++ (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ ||
++#if !LJ_TARGET_MIPSR6
++ (p[-1] & 0xffe00000u) == MIPSI_BC1F
++#else
++ (p[-1] & 0xff600000u) == MIPSI_BC1EQZ
++#endif
++ ) && p[-2] != MIPS_NOPATCH_GC_CHECK) {
+ ptrdiff_t delta = target - p;
+ if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */
+ patchbranch:
+ p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
+ *p = MIPSI_NOP; /* Replace the load of the exit number. */
+- cstop = p;
++ cstop = p+1;
+ if (!cstart) cstart = p-1;
+ } else { /* Branch out of range. Use spare jump slot in mcarea. */
+- int i;
+- for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) {
+- if (mcarea[i] == tjump) {
+- delta = mcarea+i - p;
+- goto patchbranch;
+- } else if (mcarea[i] == MIPSI_NOP) {
+- mcarea[i] = tjump;
+- cstart = mcarea+i;
+- delta = mcarea+i - p;
++ MCode *mcjump = asm_sparejump_use(mcarea, tjump);
++ if (mcjump) {
++ lj_mcode_sync(mcjump, mcjump+1);
++ delta = mcjump - p;
++ if (((delta + 0x8000) >> 16) == 0) {
+ goto patchbranch;
++ } else {
++ lj_assertJ(0, "spare jump out of range: -Osizemcode too big");
+ }
+ }
+ /* Ignore jump slot overflow. Child trace is simply not attached. */
+ }
+ } else if (p+1 == pe) {
+ /* Patch NOP after code for inverted loop branch. Use of J is ok. */
+- lua_assert(p[1] == MIPSI_NOP);
++ lj_assertJ(p[1] == MIPSI_NOP, "expected NOP");
+ p[1] = tjump;
+ *p = MIPSI_NOP; /* Replace the load of the exit number. */
+ cstop = p+2;
+diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
+index 6daa861b..ac5d88ce 100644
+--- a/src/lj_asm_ppc.h
++++ b/src/lj_asm_ppc.h
+@@ -1,6 +1,6 @@
+ /*
+ ** PPC IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* -- Register allocator extensions --------------------------------------- */
+@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp,
RegSet allow)
+ return ra_allock(as, ofs-(int16_t)ofs, allow);
+ }
+ }
++ } else if (ir->o == IR_TMPREF) {
++ *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
++ return RID_JGL;
+ }
+ }
+ *ofsp = 0;
+@@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
+ return;
+ }
+ } else if (ir->o == IR_STRREF) {
+- lua_assert(ofs == 0);
++ lj_assertA(ofs == 0, "bad usage");
+ ofs = (int32_t)sizeof(GCstr);
+ if (irref_isk(ir->op2)) {
+ ofs += IR(ir->op2)->i;
+@@ -226,6 +229,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef
ref,
+ emit_tab(as, pi, rt, left, right);
+ }
+
++#if !LJ_SOFTFP
+ /* Fuse to multiply-add/sub instruction. */
+ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
+ {
+@@ -245,6 +249,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns
pir)
+ }
+ return 0;
+ }
++#endif
+
+ /* -- Calls --------------------------------------------------------------- */
+
+@@ -253,16 +258,21 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ {
+ uint32_t n, nargs = CCI_XNARGS(ci);
+ int32_t ofs = 8;
+- Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
++ Reg gpr = REGARG_FIRSTGPR;
++#if !LJ_SOFTFP
++ Reg fpr = REGARG_FIRSTFPR;
++#endif
+ if ((void *)ci->func)
+ emit_call(as, (void *)ci->func);
+ for (n = 0; n < nargs; n++) { /* Setup args. */
+ IRRef ref = args[n];
+ if (ref) {
+ IRIns *ir = IR(ref);
++#if !LJ_SOFTFP
+ if (irt_isfp(ir->t)) {
+ if (fpr <= REGARG_LASTFPR) {
+- lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
++ lj_assertA(rset_test(as->freeset, fpr),
++ "reg %d not free", fpr); /* Already evicted. */
+ ra_leftov(as, fpr, ref);
+ fpr++;
+ } else {
+@@ -271,9 +281,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ emit_spstore(as, ir, r, ofs);
+ ofs += irt_isnum(ir->t) ? 8 : 4;
+ }
+- } else {
++ } else
++#endif
++ {
+ if (gpr <= REGARG_LASTGPR) {
+- lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Already evicted. */
+ ra_leftov(as, gpr, ref);
+ gpr++;
+ } else {
+@@ -290,8 +303,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ }
+ checkmclim(as);
+ }
++#if !LJ_SOFTFP
+ if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
+ emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
++#endif
+ }
+
+ /* Setup result reg/sp for call. Evict scratch regs. */
+@@ -299,16 +314,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const
CCallInfo *ci)
+ {
+ RegSet drop = RSET_SCRATCH;
+ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
++#if !LJ_SOFTFP
+ if ((ci->flags & CCI_NOFPRCLOBBER))
+ drop &= ~RSET_FPR;
++#endif
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
+ if (hiop && ra_hasreg((ir+1)->r))
+ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+- lua_assert(!irt_ispri(ir->t));
+- if (irt_isfp(ir->t)) {
++ lj_assertA(!irt_ispri(ir->t), "PRI dest");
++ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
+ if ((ci->flags & CCI_CASTU64)) {
+ /* Use spill slot or temp slots. */
+ int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
+@@ -323,10 +340,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
+ } else {
+ ra_destreg(as, ir, RID_FPRET);
+ }
+-#if LJ_32
+ } else if (hiop) {
+ ra_destpair(as, ir);
+-#endif
+ } else {
+ ra_destreg(as, ir, RID_RET);
+ }
+@@ -375,8 +390,24 @@ static void asm_retf(ASMState *as, IRIns *ir)
+ emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
+ }
+
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irgc;
++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
++ emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31);
++ emit_getgl(as, RID_TMP, cur_L);
++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
+ /* -- Type conversions ---------------------------------------------------- */
+
++#if !LJ_SOFTFP
+ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+ {
+ RegSet allow = RSET_FPR;
+@@ -409,15 +440,27 @@ static void asm_tobit(ASMState *as, IRIns *ir)
+ emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
+ emit_fab(as, PPCI_FADD, tmp, left, right);
+ }
++#endif
+
+ static void asm_conv(ASMState *as, IRIns *ir)
+ {
+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
++#if !LJ_SOFTFP
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
++#endif
+ IRRef lref = ir->op1;
+- lua_assert(irt_type(ir->t) != st);
+- lua_assert(!(irt_isint64(ir->t) ||
+- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
++ /* 64 bit integer conversions are handled by SPLIT. */
++ lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)),
++ "IR %04d has unsplit 64 bit type",
++ (int)(ir - as->ir) - REF_BIAS);
++#if LJ_SOFTFP
++ /* FP conversions are handled by SPLIT. */
++ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
++ "IR %04d has FP type",
++ (int)(ir - as->ir) - REF_BIAS);
++ /* Can't check for same types: SPLIT uses CONV
int.int + BXOR for sfp NEG. */
++#else
++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+ if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+@@ -446,7 +489,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+@@ -476,11 +520,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ emit_fb(as, PPCI_FCTIWZ, tmp, left);
+ }
+ }
+- } else {
++ } else
++#endif
++ {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV
EXT");
+ if ((ir->op2 & IRCONV_SEXT))
+ emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left);
+ else
+@@ -496,42 +542,95 @@ static void asm_strto(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+ IRRef args[2];
+- int32_t ofs;
++ int32_t ofs = SPOFS_TMP;
++#if LJ_SOFTFP
++ ra_evictset(as, RSET_SCRATCH);
++ if (ra_used(ir)) {
++ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
++ (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
++ int i;
++ for (i = 0; i < 2; i++) {
++ Reg r = (ir+i)->r;
++ if (ra_hasreg(r)) {
++ ra_free(as, r);
++ ra_modified(as, r);
++ emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
++ }
++ }
++ ofs = sps_scale(ir->s & ~1);
++ } else {
++ Reg rhi = ra_dest(as, ir+1, RSET_GPR);
++ Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
++ emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
++ emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
++ }
++ }
++#else
+ RegSet drop = RSET_SCRATCH;
+ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
+ ra_evictset(as, drop);
++ if (ir->s) ofs = sps_scale(ir->s);
++#endif
+ asm_guardcc(as, CC_EQ);
+ emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
+ args[0] = ir->op1; /* GCstr *str */
+ args[1] = ASMREF_TMP1; /* TValue *n */
+ asm_gencall(as, ci, args);
+ /* Store the result to the spill slot or temp slots. */
+- ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
+ emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
+ }
+
+ /* -- Memory references --------------------------------------------------- */
+
+ /* Get pointer to TValue. */
+-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
+ {
+- IRIns *ir = IR(ref);
+- if (irt_isnum(ir->t)) {
+- if (irref_isk(ref)) /* Use the number constant itself as a TValue. */
+- ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+- else /* Otherwise force a spill and use the spill slot. */
+- emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
+- } else {
+- /* Otherwise use g->tmptv to hold the TValue. */
+- RegSet allow = rset_exclude(RSET_GPR, dest);
+- Reg type;
+- emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State,
tmptv)-32768);
+- if (!irt_ispri(ir->t)) {
+- Reg src = ra_alloc1(as, ref, allow);
+- emit_setgl(as, src, tmptv.gcr);
++ int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
++ if ((mode & IRTMPREF_IN1)) {
++ IRIns *ir = IR(ref);
++ if (irt_isnum(ir->t)) {
++ if ((mode & IRTMPREF_OUT1)) {
++#if LJ_SOFTFP
++ lj_assertA(irref_isk(ref), "unsplit FP op");
++ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
++ emit_setgl(as,
++ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
++ tmptv.u32.lo);
++ emit_setgl(as,
++ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
++ tmptv.u32.hi);
++#else
++ Reg src = ra_alloc1(as, ref, RSET_FPR);
++ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
++ emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs);
++#endif
++ } else if (irref_isk(ref)) {
++ /* Use the number constant itself as a TValue. */
++ ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
++ } else {
++#if LJ_SOFTFP
++ lj_assertA(0, "unsplit FP op");
++#else
++ /* Otherwise force a spill and use the spill slot. */
++ emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
++#endif
++ }
++ } else {
++ /* Otherwise use g->tmptv to hold the TValue. */
++ Reg type;
++ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
++ if (!irt_ispri(ir->t)) {
++ Reg src = ra_alloc1(as, ref, RSET_GPR);
++ emit_setgl(as, src, tmptv.gcr);
++ }
++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP &&
!irt_isnil((ir+1)->t))
++ type = ra_alloc1(as, ref+1, RSET_GPR);
++ else
++ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
++ emit_setgl(as, type, tmptv.it);
+ }
+- type = ra_allock(as, irt_toitype(ir->t), allow);
+- emit_setgl(as, type, tmptv.it);
++ } else {
++ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+ }
+ }
+
+@@ -574,11 +673,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ Reg tisnum = RID_NONE, tmpnum = RID_NONE;
+ IRRef refkey = ir->op2;
+ IRIns *irkey = IR(refkey);
++ int isk = irref_isk(refkey);
+ IRType1 kt = irkey->t;
+ uint32_t khash;
+ MCLabel l_end, l_loop, l_next;
+
+ rset_clear(allow, tab);
++#if LJ_SOFTFP
++ if (!isk) {
++ key = ra_alloc1(as, refkey, allow);
++ rset_clear(allow, key);
++ if (irkey[1].o == IR_HIOP) {
++ if (ra_hasreg((irkey+1)->r)) {
++ tmpnum = (irkey+1)->r;
++ ra_noweak(as, tmpnum);
++ } else {
++ tmpnum = ra_allocref(as, refkey+1, allow);
++ }
++ rset_clear(allow, tmpnum);
++ }
++ }
++#else
+ if (irt_isnum(kt)) {
+ key = ra_alloc1(as, refkey, RSET_FPR);
+ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
+@@ -588,6 +703,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ key = ra_alloc1(as, refkey, allow);
+ rset_clear(allow, key);
+ }
++#endif
+ tmp2 = ra_scratch(as, allow);
+ rset_clear(allow, tmp2);
+
+@@ -610,7 +726,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ asm_guardcc(as, CC_EQ);
+ else
+ emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
+- if (irt_isnum(kt)) {
++ if (!LJ_SOFTFP && irt_isnum(kt)) {
+ emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
+ emit_condbranch(as, PPCI_BC, CC_GE, l_next);
+ emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
+@@ -620,7 +736,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_ab(as, PPCI_CMPW, tmp2, key);
+ emit_condbranch(as, PPCI_BC, CC_NE, l_next);
+ }
+- emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
++ if (LJ_SOFTFP && ra_hasreg(tmpnum))
++ emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
++ else
++ emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
+ if (!irt_ispri(kt))
+ emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
+ }
+@@ -629,35 +748,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ (((char *)as->mcp-(char *)l_loop) & 0xffffu);
+
+ /* Load main position relative to tab->node into dest. */
+- khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
++ khash = isk ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
+ } else {
+ Reg tmphash = tmp1;
+- if (irref_isk(refkey))
++ if (isk)
+ tmphash = ra_allock(as, khash, allow);
+ emit_tab(as, PPCI_ADD, dest, dest, tmp1);
+ emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
+ emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
+ emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
+ emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
+- if (irref_isk(refkey)) {
++ if (isk) {
+ /* Nothing to do. */
+ } else if (irt_isstr(kt)) {
+- emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
++ emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid));
+ } else { /* Must match with hash*() in lj_tab.c. */
+ emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1);
+ emit_rotlwi(as, tmp2, tmp2, HASH_ROT3);
+ emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
+ emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
+ emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
+- if (irt_isnum(kt)) {
++ if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
++#if LJ_SOFTFP
++ emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
++ emit_rotlwi(as, dest, tmp1, HASH_ROT1);
++ emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
++#else
+ int32_t ofs = ra_spill(as, irkey);
+ emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
+ emit_rotlwi(as, dest, tmp1, HASH_ROT1);
+ emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
+ emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
+ emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
++#endif
+ } else {
+ emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
+ emit_rotlwi(as, dest, tmp1, HASH_ROT1);
+@@ -678,7 +803,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg key = RID_NONE, type = RID_TMP, idx = node;
+ RegSet allow = rset_exclude(RSET_GPR, node);
+- lua_assert(ofs % sizeof(Node) == 0);
++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (ofs > 32736) {
+ idx = dest;
+ rset_clear(allow, dest);
+@@ -737,7 +862,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
+ static void asm_fref(ASMState *as, IRIns *ir)
+ {
+ UNUSED(as); UNUSED(ir);
+- lua_assert(!ra_used(ir));
++ lj_assertA(!ra_used(ir), "unfused FREF");
+ }
+
+ static void asm_strref(ASMState *as, IRIns *ir)
+@@ -777,26 +902,28 @@ static void asm_strref(ASMState *as, IRIns *ir)
+
+ /* -- Loads and stores ---------------------------------------------------- */
+
+-static PPCIns asm_fxloadins(IRIns *ir)
++static PPCIns asm_fxloadins(ASMState *as, IRIns *ir)
+ {
++ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */
+ case IRT_U8: return PPCI_LBZ;
+ case IRT_I16: return PPCI_LHA;
+ case IRT_U16: return PPCI_LHZ;
+- case IRT_NUM: return PPCI_LFD;
+- case IRT_FLOAT: return PPCI_LFS;
++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD;
++ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
+ default: return PPCI_LWZ;
+ }
+ }
+
+-static PPCIns asm_fxstoreins(IRIns *ir)
++static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir)
+ {
++ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: case IRT_U8: return PPCI_STB;
+ case IRT_I16: case IRT_U16: return PPCI_STH;
+- case IRT_NUM: return PPCI_STFD;
+- case IRT_FLOAT: return PPCI_STFS;
++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD;
++ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
+ default: return PPCI_STW;
+ }
+ }
+@@ -804,12 +931,12 @@ static PPCIns asm_fxstoreins(IRIns *ir)
+ static void asm_fload(ASMState *as, IRIns *ir)
+ {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+- PPCIns pi = asm_fxloadins(ir);
++ PPCIns pi = asm_fxloadins(as, ir);
+ Reg idx;
+ int32_t ofs;
+- if (ir->op1 == REF_NIL) {
++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = RID_JGL;
+- ofs = (ir->op2 << 2) - 32768;
++ ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
+ } else {
+ idx = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+@@ -821,7 +948,7 @@ static void asm_fload(ASMState *as, IRIns *ir)
+ }
+ ofs = field_ofs[ir->op2];
+ }
+- lua_assert(!irt_isi8(ir->t));
++ lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8");
+ emit_tai(as, pi, dest, idx, ofs);
+ }
+
+@@ -832,18 +959,19 @@ static void asm_fstore(ASMState *as, IRIns *ir)
+ IRIns *irf = IR(ir->op1);
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+ int32_t ofs = field_ofs[irf->op2];
+- PPCIns pi = asm_fxstoreins(ir);
++ PPCIns pi = asm_fxstoreins(as, ir);
+ emit_tai(as, pi, src, idx, ofs);
+ }
+ }
+
+ static void asm_xload(ASMState *as, IRIns *ir)
+ {
+- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
++ Reg dest = ra_dest(as, ir,
++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
++ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
+ if (irt_isi8(ir->t))
+ emit_as(as, PPCI_EXTSB, dest, dest);
+- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
+ }
+
+ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+@@ -857,8 +985,9 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+ Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
+ asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
+ } else {
+- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
++ Reg src = ra_alloc1(as, ir->op2,
++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
+ rset_exclude(RSET_GPR, src), ofs);
+ }
+ }
+@@ -871,24 +1000,39 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
+ RegSet allow = RSET_GPR;
+ int32_t ofs = AHUREF_LSX;
++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
++ t.irt = IRT_NUM;
++ if (ra_used(ir+1)) {
++ type = ra_dest(as, ir+1, allow);
++ rset_clear(allow, type);
++ }
++ ofs = 0;
++ }
+ if (ra_used(ir)) {
+- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
+- if (!irt_isnum(t)) ofs = 0;
+- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
++ irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad load type %d", irt_type(ir->t));
++ if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
++ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ }
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
++ if (ir->o == IR_VLOAD) {
++ ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 :
++ ir->op2 ? 8 * ir->op2 : AHUREF_LSX;
++ }
+ if (irt_isnum(t)) {
+ Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx));
+ asm_guardcc(as, CC_GE);
+ emit_ab(as, PPCI_CMPLW, type, tisnum);
+ if (ra_hasreg(dest)) {
+- if (ofs == AHUREF_LSX) {
++ if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
+ tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
+ (idx&255)), (idx>>8)));
+ emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
+ } else {
+- emit_fai(as, PPCI_LFD, dest, idx, ofs);
++ emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
++ ofs+4*LJ_SOFTFP);
+ }
+ }
+ } else {
+@@ -911,7 +1055,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
+ int32_t ofs = AHUREF_LSX;
+ if (ir->r == RID_SINK)
+ return;
+- if (irt_isnum(ir->t)) {
++ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
+ src = ra_alloc1(as, ir->op2, RSET_FPR);
+ } else {
+ if (!irt_ispri(ir->t)) {
+@@ -919,11 +1063,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
+ rset_clear(allow, src);
+ ofs = 0;
+ }
+- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
++ type = ra_alloc1(as, (ir+1)->op2, allow);
++ else
++ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+ rset_clear(allow, type);
+ }
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+- if (irt_isnum(ir->t)) {
++ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
+ if (ofs == AHUREF_LSX) {
+ emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
+ emit_slwi(as, RID_TMP, (idx>>8), 3);
+@@ -948,21 +1095,39 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ IRType1 t = ir->t;
+ Reg dest = RID_NONE, type = RID_NONE, base;
+ RegSet allow = RSET_GPR;
+- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
+- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+- lua_assert(LJ_DUALNUM ||
+- !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
++ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
++ if (hiop)
++ t.irt = IRT_NUM;
++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
++ "bad parent SLOAD"); /* Handled by asm_head_side(). */
++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
++ "inconsistent SLOAD variant");
++ lj_assertA(LJ_DUALNUM ||
++ !irt_isint(t) ||
++ (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
++ "bad SLOAD type");
++#if LJ_SOFTFP
++ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
++ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
++ if (hiop && ra_used(ir+1)) {
++ type = ra_dest(as, ir+1, allow);
++ rset_clear(allow, type);
++ }
++#else
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) &&
irt_isint(t)) {
+ dest = ra_scratch(as, RSET_FPR);
+ asm_tointg(as, ir, dest);
+ t.irt = IRT_NUM; /* Continue with a regular number type check. */
+- } else if (ra_used(ir)) {
+- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
+- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
++ } else
++#endif
++ if (ra_used(ir)) {
++ lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t),
++ "bad SLOAD type %d", irt_type(ir->t));
++ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ base = ra_alloc1(as, REF_BASE, allow);
+ rset_clear(allow, base);
+- if ((ir->op2 & IRSLOAD_CONVERT)) {
++ if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
+ if (irt_isint(t)) {
+ emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
+ dest = ra_scratch(as, RSET_FPR);
+@@ -994,10 +1159,13 @@ dotypecheck:
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+ Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
+ asm_guardcc(as, CC_GE);
+- emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
++#if !LJ_SOFTFP
+ type = RID_TMP;
++#endif
++ emit_ab(as, PPCI_CMPLW, type, tisnum);
+ }
+- if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
++ if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
++ base, ofs-(LJ_SOFTFP?0:4));
+ } else {
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+ asm_guardcc(as, CC_NE);
+@@ -1021,7 +1189,8 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+ IRRef args[4];
+ RegSet drop = RSET_SCRATCH;
+- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL));
++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL),
++ "bad CNEW/CNEWI operands");
+
+ as->gcsteps++;
+ if (ra_hasreg(ir->r))
+@@ -1034,10 +1203,10 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ if (ir->o == IR_CNEWI) {
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+ int32_t ofs = sizeof(GCcdata);
+- lua_assert(sz == 4 || sz == 8);
++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ if (sz == 8) {
+ ofs += 4;
+- lua_assert((ir+1)->o == IR_HIOP);
++ lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI");
+ }
+ for (;;) {
+ Reg r = ra_alloc1(as, ir->op2, allow);
+@@ -1068,8 +1237,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+ ra_releasetmp(as, ASMREF_TMP1));
+ }
+-#else
+-#define asm_cnew(as, ir) ((void)0)
+ #endif
+
+ /* -- Write barriers ------------------------------------------------------ */
+@@ -1083,7 +1250,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
+ emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist));
+ emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked));
+ emit_setgl(as, tab, gc.grayagain);
+- lua_assert(LJ_GC_BLACK == 0x04);
++ lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK");
+ emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */
+ emit_getgl(as, link, gc.grayagain);
+ emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
+@@ -1098,7 +1265,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
+ MCLabel l_end;
+ Reg obj, val, tmp;
+ /* No need for other object barriers (yet). */
+- lua_assert(IR(ir->op1)->o == IR_UREFC);
++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+@@ -1119,6 +1286,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
+
+ /* -- Arithmetic and logic operations ------------------------------------- */
+
++#if !LJ_SOFTFP
+ static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
+ {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+@@ -1139,20 +1307,22 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
+
+ static void asm_fpmath(ASMState *as, IRIns *ir)
+ {
+- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+- return;
+ if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
+ asm_fpunary(as, ir, PPCI_FSQRT);
+ else
+ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
+ }
++#endif
+
+ static void asm_add(ASMState *as, IRIns *ir)
+ {
++#if !LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
+ asm_fparith(as, ir, PPCI_FADD);
+- } else {
++ } else
++#endif
++ {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ PPCIns pi;
+@@ -1191,10 +1361,13 @@ static void asm_add(ASMState *as, IRIns *ir)
+
+ static void asm_sub(ASMState *as, IRIns *ir)
+ {
++#if !LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
+ asm_fparith(as, ir, PPCI_FSUB);
+- } else {
++ } else
++#endif
++ {
+ PPCIns pi = PPCI_SUBF;
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left, right;
+@@ -1220,9 +1393,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
+
+ static void asm_mul(ASMState *as, IRIns *ir)
+ {
++#if !LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ asm_fparith(as, ir, PPCI_FMUL);
+- } else {
++ } else
++#endif
++ {
+ PPCIns pi = PPCI_MULLW;
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+@@ -1244,15 +1420,16 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ }
+ }
+
+-#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV)
+-#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
+-#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
++#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV)
+
+ static void asm_neg(ASMState *as, IRIns *ir)
+ {
++#if !LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ asm_fpunary(as, ir, PPCI_FNEG);
+- } else {
++ } else
++#endif
++ {
+ Reg dest, left;
+ PPCIns pi = PPCI_NEG;
+ if (as->flagmcp == as->mcp) {
+@@ -1267,8 +1444,6 @@ static void asm_neg(ASMState *as, IRIns *ir)
+ }
+
+ #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
+-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
+-#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
+
+ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
+ {
+@@ -1561,11 +1736,42 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi,
PPCIns pik)
+ #define asm_brol(as, ir) \
+ asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
+ PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
+-#define asm_bror(as, ir) lua_assert(0)
++#define asm_bror(as, ir) lj_assertA(0, "unexpected BROR")
++
++#if LJ_SOFTFP
++static void asm_sfpmin_max(ASMState *as, IRIns *ir)
++{
++ CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
++ IRRef args[4];
++ MCLabel l_right, l_end;
++ Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
++ Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
++ Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
++ PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
++ righthi = (lefthi >> 8); lefthi &= 255;
++ rightlo = (leftlo >> 8); leftlo &= 255;
++ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
++ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
++ l_end = emit_label(as);
++ if (desthi != righthi) emit_mr(as, desthi, righthi);
++ if (destlo != rightlo) emit_mr(as, destlo, rightlo);
++ l_right = emit_label(as);
++ if (l_end != l_right) emit_jmp(as, l_end);
++ if (desthi != lefthi) emit_mr(as, desthi, lefthi);
++ if (destlo != leftlo) emit_mr(as, destlo, leftlo);
++ if (l_right == as->mcp+1) {
++ cond ^= 4; l_right = l_end; ++as->mcp;
++ }
++ emit_condbranch(as, PPCI_BC, cond, l_right);
++ ra_evictset(as, RSET_SCRATCH);
++ emit_cmpi(as, RID_RET, 1);
++ asm_gencall(as, &ci, args);
++}
++#endif
+
+ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
+ {
+- if (irt_isnum(ir->t)) {
++ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg tmp = dest;
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+@@ -1573,9 +1779,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
+ if (tmp == left || tmp == right)
+ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR,
+ dest), left), right));
+- emit_facb(as, PPCI_FSEL, dest, tmp,
+- ismax ? left : right, ismax ? right : left);
+- emit_fab(as, PPCI_FSUB, tmp, left, right);
++ emit_facb(as, PPCI_FSEL, dest, tmp, left, right);
++ emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left);
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg tmp1 = RID_TMP, tmp2 = dest;
+@@ -1653,7 +1858,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg
cr, PPCCC cc)
+ static void asm_comp(ASMState *as, IRIns *ir)
+ {
+ PPCCC cc = asm_compmap[ir->o];
+- if (irt_isnum(ir->t)) {
++ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = (left >> 8); left &= 255;
+ asm_guardcc(as, (cc >> 4));
+@@ -1674,6 +1879,44 @@ static void asm_comp(ASMState *as, IRIns *ir)
+
+ #define asm_equal(as, ir) asm_comp(as, ir)
+
++#if LJ_SOFTFP
++/* SFP comparisons. */
++static void asm_sfpcomp(ASMState *as, IRIns *ir)
++{
++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
++ RegSet drop = RSET_SCRATCH;
++ Reg r;
++ IRRef args[4];
++ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
++ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
++
++ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
++ if (!rset_test(as->freeset, r) &&
++ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
++ rset_clear(drop, r);
++ }
++ ra_evictset(as, drop);
++ asm_setupresult(as, ir, ci);
++ switch ((IROp)ir->o) {
++ case IR_ULT:
++ asm_guardcc(as, CC_EQ);
++ emit_ai(as, PPCI_CMPWI, RID_RET, 0);
++ case IR_ULE:
++ asm_guardcc(as, CC_EQ);
++ emit_ai(as, PPCI_CMPWI, RID_RET, 1);
++ break;
++ case IR_GE: case IR_GT:
++ asm_guardcc(as, CC_EQ);
++ emit_ai(as, PPCI_CMPWI, RID_RET, 2);
++ default:
++ asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
++ emit_ai(as, PPCI_CMPWI, RID_RET, 0);
++ break;
++ }
++ asm_gencall(as, ci, args);
++}
++#endif
++
+ #if LJ_HASFFI
+ /* 64 bit integer comparisons. */
+ static void asm_comp64(ASMState *as, IRIns *ir)
+@@ -1698,24 +1941,41 @@ static void asm_comp64(ASMState *as, IRIns *ir)
+ }
+ #endif
+
+-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
++/* -- Split register ops -------------------------------------------------- */
+
+-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
++/* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */
+ static void asm_hiop(ASMState *as, IRIns *ir)
+ {
+-#if LJ_HASFFI
+ /* HIOP is marked as a store because it needs its own DCE logic. */
+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
++#if LJ_HASFFI || LJ_SOFTFP
+ if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
+ as->curins--; /* Always skip the CONV. */
++#if LJ_HASFFI && !LJ_SOFTFP
+ if (usehi || uselo)
+ asm_conv64(as, ir);
+ return;
++#endif
+ } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
+ as->curins--; /* Always skip the loword comparison. */
++#if LJ_SOFTFP
++ if (!irt_isint(ir->t)) {
++ asm_sfpcomp(as, ir-1);
++ return;
++ }
++#endif
++#if LJ_HASFFI
+ asm_comp64(as, ir);
++#endif
++ return;
++#if LJ_SOFTFP
++ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
++ as->curins--; /* Always skip the loword min/max. */
++ if (uselo || usehi)
++ asm_sfpmin_max(as, ir-1);
+ return;
++#endif
+ } else if ((ir-1)->o == IR_XSTORE) {
+ as->curins--; /* Handle both stores here. */
+ if ((ir-1)->r != RID_SINK) {
+@@ -1724,24 +1984,33 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ }
+ return;
+ }
++#endif
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
+ switch ((ir-1)->o) {
++#if LJ_HASFFI
+ case IR_ADD: as->curins--; asm_add64(as, ir); break;
+ case IR_SUB: as->curins--; asm_sub64(as, ir); break;
+ case IR_NEG: as->curins--; asm_neg64(as, ir); break;
+- case IR_CALLN:
+- case IR_CALLXS:
++ case IR_CNEWI:
++ /* Nothing to do here. Handled by lo op itself. */
++ break;
++#endif
++#if LJ_SOFTFP
++ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
++ case IR_STRTO:
+ if (!uselo)
+- ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
++ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
+ break;
+- case IR_CNEWI:
++ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
+ /* Nothing to do here. Handled by lo op itself. */
+ break;
+- default: lua_assert(0); break;
+- }
+-#else
+- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */
+ #endif
++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
++ if (!uselo)
++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
++ break;
++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
++ }
+ }
+
+ /* -- Profiling ----------------------------------------------------------- */
+@@ -1797,12 +2066,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ if ((sn & SNAP_NORESTORE))
+ continue;
+ if (irt_isnum(ir->t)) {
++#if LJ_SOFTFP
++ Reg tmp;
++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
++ /* LJ_SOFTFP: must be a number constant. */
++ lj_assertA(irref_isk(ref), "unsplit FP op");
++ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
++ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
++ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
++ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
++ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
++#else
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
++#endif
+ } else {
+ Reg type;
+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
irt_isinteger(ir->t));
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
irt_isinteger(ir->t),
++ "restore of IR type %d", irt_type(ir->t));
+ if (!irt_ispri(ir->t)) {
+ Reg src = ra_alloc1(as, ref, allow);
+ rset_clear(allow, src);
+@@ -1811,6 +2093,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ if ((sn & (SNAP_CONT|SNAP_FRAME))) {
+ if (s == 0) continue; /* Do not overwrite link to previous frame. */
+ type = ra_allock(as, (int32_t)(*flinks--), allow);
++#if LJ_SOFTFP
++ } else if ((sn & SNAP_SOFTFPNUM)) {
++ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
++#endif
++ } else if ((sn & SNAP_KEYINDEX)) {
++ type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
+ } else {
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+ }
+@@ -1818,11 +2106,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ }
+ checkmclim(as);
+ }
+- lua_assert(map + nent == flinks);
++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+ }
+
+ /* -- GC handling --------------------------------------------------------- */
+
++/* Marker to prevent patching the GC check exit. */
++#define PPC_NOPATCH_GC_CHECK PPCI_ORIS
++
+ /* Check GC threshold and do one or more GC steps. */
+ static void asm_gc_check(ASMState *as)
+ {
+@@ -1834,6 +2125,7 @@ static void asm_gc_check(ASMState *as)
+ l_end = emit_label(as);
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+ asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
++ *--as->mcp = PPC_NOPATCH_GC_CHECK;
+ emit_ai(as, PPCI_CMPWI, RID_RET, 0);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ASMREF_TMP2; /* MSize steps */
+@@ -1865,6 +2157,12 @@ static void asm_loop_fixup(ASMState *as)
+ }
+ }
+
++/* Fixup the tail of the loop. */
++static void asm_loop_tail_fixup(ASMState *as)
++{
++ UNUSED(as); /* Nothing to do. */
++}
++
+ /* -- Head of trace ------------------------------------------------------- */
+
+ /* Coalesce BASE register for a root trace. */
+@@ -1916,7 +2214,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+ as->mctop = p;
+ } else {
+ /* Patch stack adjustment. */
+- lua_assert(checki16(CFRAME_SIZE+spadj));
++ lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range");
+ p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
+ p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
+ }
+@@ -1947,14 +2245,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const
CCallInfo *ci)
+ int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+ asm_collectargs(as, ir, ci, args);
+ for (i = 0; i < nargs; i++)
+- if (args[i] && irt_isfp(IR(args[i])->t)) {
++ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
+ if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
+ } else {
+ if (ngpr > 0) ngpr--; else nslots++;
+ }
+ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
+ as->evenspill = nslots;
+- return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
++ return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
++ REGSP_HINT(RID_RET);
+ }
+
+ static void asm_setup_target(ASMState *as)
+@@ -1972,7 +2271,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
+ MCode *px = exitstub_trace_addr(T, exitno);
+ MCode *cstart = NULL;
+ MCode *mcarea = lj_mcode_patch(J, p, 0);
+- int clearso = 0;
++ int clearso = 0, patchlong = 1;
+ for (; p < pe; p++) {
+ /* Look for exitstub branch, try to replace with branch to target. */
+ uint32_t ins = *p;
+@@ -1984,7 +2283,9 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
+ delta -= sizeof(MCode);
+ }
+ /* Many, but not all short-range branches can be patched directly. */
+- if (((delta + 0x8000) >> 16) == 0) {
++ if (p[-1] == PPC_NOPATCH_GC_CHECK) {
++ patchlong = 0;
++ } else if (((delta + 0x8000) >> 16) == 0) {
+ *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) |
+ ((delta & 0x8000) * (PPCF_Y/0x8000));
+ if (!cstart) cstart = p;
+@@ -1992,14 +2293,17 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
+ } else if ((ins & 0xfc000000u) == PPCI_B &&
+ ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) {
+ ptrdiff_t delta = (char *)target - (char *)p;
+- lua_assert(((delta + 0x02000000) >> 26) == 0);
++ lj_assertJ(((delta + 0x02000000) >> 26) == 0,
++ "branch target out of range");
+ *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
+ if (!cstart) cstart = p;
+ }
+ }
+- { /* Always patch long-range branch in exit stub itself. */
++ /* Always patch long-range branch in exit stub itself. Except, if we can't. */
++ if (patchlong) {
+ ptrdiff_t delta = (char *)target - (char *)px - clearso;
+- lua_assert(((delta + 0x02000000) >> 26) == 0);
++ lj_assertJ(((delta + 0x02000000) >> 26) == 0,
++ "branch target out of range");
+ *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
+ }
+ if (!cstart) cstart = px;
+diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
+index 3e189b1d..5eb18365 100644
+--- a/src/lj_asm_x86.h
++++ b/src/lj_asm_x86.h
+@@ -1,6 +1,6 @@
+ /*
+ ** x86/x64 IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* -- Guard handling ------------------------------------------------------ */
+@@ -31,7 +31,7 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
+ #endif
+ /* Jump to exit handler which fills in the ExitState. */
+ *mxp++ = XI_JMP; mxp += 4;
+- *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
++ *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler);
+ /* Commit the code for this group (even if assembly fails later on). */
+ lj_mcode_commitbot(as->J, mxp);
+ as->mcbot = mxp;
+@@ -60,7 +60,7 @@ static void asm_guardcc(ASMState *as, int cc)
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+- *(int32_t *)(p+1) = jmprel(p+5, target);
++ *(int32_t *)(p+1) = jmprel(as->J, p+5, target);
+ target = p;
+ cc ^= 1;
+ if (as->realign) {
+@@ -131,7 +131,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
+ as->mrm.ofs = 0;
+ if (irb->o == IR_FLOAD) {
+ IRIns *ira = IR(irb->op1);
+- lua_assert(irb->op2 == IRFL_TAB_ARRAY);
++ lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
+ /* We can avoid the FLOAD of t->array for colocated arrays. */
+ if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
+ !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
+@@ -150,7 +150,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
+ static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
+ {
+ IRIns *irx;
+- lua_assert(ir->o == IR_AREF);
++ lj_assertA(ir->o == IR_AREF, "expected AREF");
+ as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow);
+ irx = IR(ir->op2);
+ if (irref_isk(ir->op2)) {
+@@ -216,9 +216,17 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
+ #endif
+ }
+ break;
++ case IR_TMPREF:
++#if LJ_GC64
++ as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv);
++ as->mrm.base = RID_DISPATCH;
++ as->mrm.idx = RID_NONE;
++#else
++ as->mrm.ofs = igcptr(&J2G(as->J)->tmptv);
++ as->mrm.base = as->mrm.idx = RID_NONE;
++#endif
++ return;
+ default:
+- lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO
||
+- ir->o == IR_KKPTR);
+ break;
+ }
+ }
+@@ -230,9 +238,10 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
+ /* Fuse FLOAD/FREF reference into memory operand. */
+ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
+ {
+- lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
++ lj_assertA(ir->o == IR_FLOAD || ir->o == IR_FREF,
++ "bad IR op %d", ir->o);
+ as->mrm.idx = RID_NONE;
+- if (ir->op1 == REF_NIL) {
++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ #if LJ_GC64
+ as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch);
+ as->mrm.base = RID_DISPATCH;
+@@ -271,7 +280,7 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
+ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
+ {
+ IRIns *irr;
+- lua_assert(ir->o == IR_STRREF);
++ lj_assertA(ir->o == IR_STRREF, "bad IR op %d", ir->o);
+ as->mrm.base = as->mrm.idx = RID_NONE;
+ as->mrm.scale = XM_SCALE1;
+ as->mrm.ofs = sizeof(GCstr);
+@@ -378,15 +387,17 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
+ checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
+ as->mrm.ofs = (int32_t)mcpofs(as, k);
+ as->mrm.base = RID_RIP;
+- } else {
++ } else { /* Intern 64 bit constant at bottom of mcode. */
+ if (ir->i) {
+- lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
++ lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
++ "bad interned 64 bit constant");
+ } else {
+ while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
+ *(uint64_t*)as->mcbot = *k;
+ ir->i = (int32_t)(as->mctop - as->mcbot);
+ as->mcbot += 8;
+ as->mclim = as->mcbot + MCLIM_REDZONE;
++ lj_mcode_commitbot(as->J, as->mcbot);
+ }
+ as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
+ as->mrm.base = RID_RIP;
+@@ -419,12 +430,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
+ }
+ if (ir->o == IR_KNUM) {
+ RegSet avail = as->freeset & ~as->modset & RSET_FPR;
+- lua_assert(allow != RSET_EMPTY);
++ lj_assertA(allow != RSET_EMPTY, "no register allowed");
+ if (!(avail & (avail-1))) /* Fuse if less than two regs available. */
+ return asm_fuseloadk64(as, ir);
+ } else if (ref == REF_BASE || ir->o == IR_KINT64) {
+ RegSet avail = as->freeset & ~as->modset & RSET_GPR;
+- lua_assert(allow != RSET_EMPTY);
++ lj_assertA(allow != RSET_EMPTY, "no register allowed");
+ if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
+ if (ref == REF_BASE) {
+ #if LJ_GC64
+@@ -476,6 +487,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
+ }
+ } else if (ir->o == IR_VLOAD && !(LJ_GC64 &&
irt_isaddr(ir->t))) {
+ asm_fuseahuref(as, ir->op1, xallow);
++ as->mrm.ofs += 8 * ir->op2;
+ return RID_MRM;
+ }
+ }
+@@ -605,7 +617,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ #endif
+ emit_loadi(as, r, ir->i);
+ } else {
+- lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
++ /* Must have been evicted. */
++ lj_assertA(rset_test(as->freeset, r), "reg %d not free", r);
+ if (ra_hasreg(ir->r)) {
+ ra_noweak(as, ir->r);
+ emit_movrr(as, ir, r, ir->r);
+@@ -614,7 +627,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ }
+ }
+ } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */
+- lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k.
*/
++ lj_assertA(!(irt_isfloat(ir->t) && irref_isk(ref)),
++ "unexpected float constant");
+ if (LJ_32 && (ofs & 4) && irref_isk(ref)) {
+ /* Split stores for unaligned FP consts. */
+ emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
+@@ -645,7 +659,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+ {
+ RegSet drop = RSET_SCRATCH;
+- int hiop = (LJ_32 && (ir+1)->o == IR_HIOP &&
!irt_isnil((ir+1)->t));
++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+ if ((ci->flags & CCI_NOFPRCLOBBER))
+ drop &= ~RSET_FPR;
+ if (ra_hasreg(ir->r))
+@@ -685,12 +699,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const
CCallInfo *ci)
+ irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
+ }
+ #endif
+-#if LJ_32
+ } else if (hiop) {
+ ra_destpair(as, ir);
+-#endif
+ } else {
+- lua_assert(!irt_ispri(ir->t));
++ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ ra_destreg(as, ir, RID_RET);
+ }
+ } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags &
CCI_CASTU64)) {
+@@ -775,6 +787,21 @@ static void asm_retf(ASMState *as, IRIns *ir)
+ #endif
+ }
+
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irgc;
++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
++ emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++ emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L);
++ emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG);
++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
+ /* -- Type conversions ---------------------------------------------------- */
+
+ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+@@ -809,8 +836,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64));
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+ IRRef lref = ir->op1;
+- lua_assert(irt_type(ir->t) != st);
+- lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT.
*/
++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
++ lj_assertA(!(LJ_32 && (irt_isint64(ir->t) || st64)),
++ "IR %04d has unsplit 64 bit type",
++ (int)(ir - as->ir) - REF_BIAS);
+ if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+@@ -846,7 +875,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+@@ -881,7 +911,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ Reg left, dest = ra_dest(as, ir, RSET_GPR);
+ RegSet allow = RSET_GPR;
+ x86Op op;
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV
EXT");
+ if (st == IRT_I8) {
+ op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX;
+ } else if (st == IRT_U8) {
+@@ -915,7 +945,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ }
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+- if (st64) {
++ if (st64 && !(ir->op2 & IRCONV_NONE)) {
+ Reg left = asm_fuseload(as, lref, RSET_GPR);
+ /* This is either a 32 bit reg/reg mov which zeroes the hiword
+ ** or a load of the loword from a 64 bit address.
+@@ -952,7 +982,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
+ emit_sjcc(as, CC_NS, l_end);
+ emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
+ } else {
+- lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64);
++ lj_assertA(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64, "bad type for
CONV");
+ }
+ emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0);
+ /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */
+@@ -966,8 +996,8 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
+ IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+ IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+ Reg lo, hi;
+- lua_assert(st == IRT_NUM || st == IRT_FLOAT);
+- lua_assert(dt == IRT_I64 || dt == IRT_U64);
++ lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV");
++ lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV");
+ hi = ra_dest(as, ir, RSET_GPR);
+ lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
+ if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
+@@ -1041,47 +1071,48 @@ static void asm_strto(ASMState *as, IRIns *ir)
+ /* -- Memory references --------------------------------------------------- */
+
+ /* Get pointer to TValue. */
+-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
+ {
+- IRIns *ir = IR(ref);
+- if (irt_isnum(ir->t)) {
+- /* For numbers use the constant itself or a spill slot as a TValue. */
+- if (irref_isk(ref))
+- emit_loada(as, dest, ir_knum(ir));
+- else
+- emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
+- } else {
+- /* Otherwise use g->tmptv to hold the TValue. */
+-#if LJ_GC64
+- if (irref_isk(ref)) {
+- TValue k;
+- lj_ir_kvalue(as->J->L, &k, ir);
+- emit_movmroi(as, dest, 4, k.u32.hi);
+- emit_movmroi(as, dest, 0, k.u32.lo);
++ if ((mode & IRTMPREF_IN1)) {
++ IRIns *ir = IR(ref);
++ if (irt_isnum(ir->t)) {
++ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
++ /* Use the number constant itself as a TValue. */
++ emit_loada(as, dest, ir_knum(ir));
++ return;
++ }
++ emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0);
+ } else {
+- /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
+- Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+- if (irt_is64(ir->t)) {
+- emit_u32(as, irt_toitype(ir->t) << 15);
+- emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
++#if LJ_GC64
++ if (irref_isk(ref)) {
++ TValue k;
++ lj_ir_kvalue(as->J->L, &k, ir);
++ emit_movmroi(as, dest, 4, k.u32.hi);
++ emit_movmroi(as, dest, 0, k.u32.lo);
+ } else {
+- /* Currently, no caller passes integers that might end up here. */
+- emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
++ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
++ if (irt_is64(ir->t)) {
++ emit_u32(as, irt_toitype(ir->t) << 15);
++ emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
++ } else {
++ emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
++ }
++ emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+ }
+- emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+- }
+ #else
+- if (!irref_isk(ref)) {
+- Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+- emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+- } else if (!irt_ispri(ir->t)) {
+- emit_movmroi(as, dest, 0, ir->i);
+- }
+- if (!(LJ_64 && irt_islightud(ir->t)))
+- emit_movmroi(as, dest, 4, irt_toitype(ir->t));
++ if (!irref_isk(ref)) {
++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
++ emit_movtomro(as, REX_64IR(ir, src), dest, 0);
++ } else if (!irt_ispri(ir->t)) {
++ emit_movmroi(as, dest, 0, ir->i);
++ }
++ if (!(LJ_64 && irt_islightud(ir->t)))
++ emit_movmroi(as, dest, 4, irt_toitype(ir->t));
+ #endif
+- emit_loada(as, dest, &J2G(as->J)->tmptv);
++ }
+ }
++ emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the
TValue(s). */
+ }
+
+ static void asm_aref(ASMState *as, IRIns *ir)
+@@ -1179,13 +1210,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
+ }
+ } else {
+- lua_assert(irt_ispri(kt) && !irt_isnil(kt));
++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
+ emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
+ emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
+ #else
+ } else {
+ if (!irt_ispri(kt)) {
+- lua_assert(irt_isaddr(kt));
++ lj_assertA(irt_isaddr(kt), "bad HREF key type");
+ if (isk)
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr),
+ ptr2addr(ir_kgc(irkey)));
+@@ -1193,7 +1224,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr));
+ emit_sjcc(as, CC_NE, l_next);
+ }
+- lua_assert(!irt_isnil(kt));
++ lj_assertA(!irt_isnil(kt), "bad HREF key type");
+ emit_i8(as, irt_toitype(kt));
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
+ #endif
+@@ -1208,23 +1239,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ #endif
+
+ /* Load main position relative to tab->node into dest. */
+- khash = isk ? ir_khash(irkey) : 1;
++ khash = isk ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
+ } else {
+ emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
+- if ((as->flags & JIT_F_PREFER_IMUL)) {
+- emit_i8(as, sizeof(Node));
+- emit_rr(as, XO_IMULi8, dest, dest);
+- } else {
+- emit_shifti(as, XOg_SHL, dest, 3);
+- emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
+- }
++ emit_shifti(as, XOg_SHL, dest, 3);
++ emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
+ if (isk) {
+ emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
+ emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
+ } else if (irt_isstr(kt)) {
+- emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash));
++ emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid));
+ emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
+ } else { /* Must match with hashrot() in lj_tab.c. */
+ emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));
+@@ -1275,10 +1301,10 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ #if !LJ_64
+ MCLabel l_exit;
+ #endif
+- lua_assert(ofs % sizeof(Node) == 0);
++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (ra_hasreg(dest)) {
+ if (ofs != 0) {
+- if (dest == node && !(as->flags & JIT_F_LEA_AGU))
++ if (dest == node)
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
+ else
+ emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
+@@ -1292,7 +1318,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node));
+ emit_rmro(as, XO_CMP, key|REX_64, node,
+ ofs + (int32_t)offsetof(Node, key.u64));
+- lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
++ lj_assertA(irt_isnum(irkey->t) || irt_isgcv(irkey->t),
++ "bad HREFK key type");
+ /* Assumes -0.0 is already canonicalized to +0.0. */
+ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
+ #if LJ_GC64
+@@ -1303,7 +1330,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
+ #endif
+ } else {
+- lua_assert(!irt_isnil(irkey->t));
++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
+ #if LJ_GC64
+ emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
+ emit_rmro(as, XO_ARITHi, XOg_CMP, node,
+@@ -1327,13 +1354,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ (int32_t)ir_knum(irkey)->u32.hi);
+ } else {
+ if (!irt_ispri(irkey->t)) {
+- lua_assert(irt_isgcv(irkey->t));
++ lj_assertA(irt_isgcv(irkey->t), "bad HREFK key type");
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
+ ofs + (int32_t)offsetof(Node, key.gcr),
+ ptr2addr(ir_kgc(irkey)));
+ emit_sjcc(as, CC_NE, l_exit);
+ }
+- lua_assert(!irt_isnil(irkey->t));
++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
+ emit_i8(as, irt_toitype(irkey->t));
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
+ ofs + (int32_t)offsetof(Node, key.it));
+@@ -1406,7 +1433,8 @@ static void asm_fxload(ASMState *as, IRIns *ir)
+ if (LJ_64 && irt_is64(ir->t))
+ dest |= REX_64;
+ else
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
++ "unsplit 64 bit load");
+ xo = XO_MOV;
+ break;
+ }
+@@ -1451,13 +1479,16 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
+ case IRT_NUM: xo = XO_MOVSDto; break;
+ case IRT_FLOAT: xo = XO_MOVSSto; break;
+ #if LJ_64 && !LJ_GC64
+- case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
++ case IRT_LIGHTUD:
++ /* NYI: mask 64 bit lightuserdata. */
++ lj_assertA(0, "store of lightuserdata");
+ #endif
+ default:
+ if (LJ_64 && irt_is64(ir->t))
+ src |= REX_64;
+ else
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
++ "unsplit 64 bit store");
+ xo = XO_MOVto;
+ break;
+ }
+@@ -1471,8 +1502,8 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
+ emit_i8(as, k);
+ emit_mrm(as, XO_MOVmib, 0, RID_MRM);
+ } else {
+- lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
+- irt_isaddr(ir->t));
++ lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
++ irt_isaddr(ir->t), "bad store type");
+ emit_i32(as, k);
+ emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM);
+ }
+@@ -1507,13 +1538,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ #if LJ_GC64
+ Reg tmp = RID_NONE;
+ #endif
+- lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+- (LJ_DUALNUM && irt_isint(ir->t)));
++ lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
++ (LJ_DUALNUM && irt_isint(ir->t)),
++ "bad load type %d", irt_type(ir->t));
+ #if LJ_64 && !LJ_GC64
+ if (irt_islightud(ir->t)) {
+ Reg dest = asm_load_lightud64(as, ir, 1);
+ if (ra_hasreg(dest)) {
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
++ if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
+ emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
+ }
+ return;
+@@ -1523,6 +1556,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+ Reg dest = ra_dest(as, ir, allow);
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
++ if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
+ #if LJ_GC64
+ if (irt_isaddr(ir->t)) {
+ emit_shifti(as, XOg_SHR|REX_64, dest, 17);
+@@ -1550,12 +1584,14 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ }
+ #endif
+ asm_fuseahuref(as, ir->op1, gpr);
++ if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
+ }
+ /* Always do the type check, even if the load result is unused. */
+ as->mrm.ofs += 4;
+ asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
+ if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
+- lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
++ lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
++ "bad load type %d", irt_type(ir->t));
+ #if LJ_GC64
+ emit_u32(as, LJ_TISNUM << 15);
+ #else
+@@ -1637,13 +1673,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
+ #endif
+ emit_mrm(as, XO_MOVto, src, RID_MRM);
+ } else if (!irt_ispri(irr->t)) {
+- lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM &&
irt_isinteger(ir->t)));
++ lj_assertA(irt_isaddr(ir->t) || (LJ_DUALNUM &&
irt_isinteger(ir->t)),
++ "bad store type");
+ emit_i32(as, irr->i);
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
+ }
+ as->mrm.ofs += 4;
+ #if LJ_GC64
+- lua_assert(LJ_DUALNUM && irt_isinteger(ir->t));
++ lj_assertA(LJ_DUALNUM && irt_isinteger(ir->t), "bad store
type");
+ emit_i32(as, LJ_TNUMX << 15);
+ #else
+ emit_i32(as, (int32_t)irt_toitype(ir->t));
+@@ -1658,10 +1695,14 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
+ IRType1 t = ir->t;
+ Reg base;
+- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
+- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+- lua_assert(LJ_DUALNUM ||
+- !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
++ "bad parent SLOAD"); /* Handled by asm_head_side(). */
++ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
++ "inconsistent SLOAD variant");
++ lj_assertA(LJ_DUALNUM ||
++ !irt_isint(t) ||
++ (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
++ "bad SLOAD type");
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) &&
irt_isint(t)) {
+ Reg left = ra_scratch(as, RSET_FPR);
+ asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
+@@ -1681,7 +1722,8 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR;
+ Reg dest = ra_dest(as, ir, allow);
+ base = ra_alloc1(as, REF_BASE, RSET_GPR);
+- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
++ lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t),
++ "bad SLOAD type %d", irt_type(t));
+ if ((ir->op2 & IRSLOAD_CONVERT)) {
+ t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
+ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
+@@ -1727,7 +1769,8 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ /* Need type check, even if the load result is unused. */
+ asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
+ if (LJ_64 && irt_type(t) >= IRT_NUM) {
+- lua_assert(irt_isinteger(t) || irt_isnum(t));
++ lj_assertA(irt_isinteger(t) || irt_isnum(t),
++ "bad SLOAD type %d", irt_type(t));
+ #if LJ_GC64
+ emit_u32(as, LJ_TISNUM << 15);
+ #else
+@@ -1758,7 +1801,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ emit_i8(as, irt_toitype(t));
+ emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
+ emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
+- emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4);
++ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs);
+ #else
+ } else {
+ emit_i8(as, irt_toitype(t));
+@@ -1779,7 +1822,8 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ CTInfo info = lj_ctype_info(cts, id, &sz);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+ IRRef args[4];
+- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL));
++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL),
++ "bad CNEW/CNEWI operands");
+
+ as->gcsteps++;
+ asm_setupresult(as, ir, ci); /* GCcdata * */
+@@ -1809,7 +1853,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ int32_t ofs = sizeof(GCcdata);
+ if (sz == 8) {
+ ofs += 4; ir++;
+- lua_assert(ir->o == IR_HIOP);
++ lj_assertA(ir->o == IR_HIOP, "missing CNEWI HIOP");
+ }
+ do {
+ if (irref_isk(ir->op2)) {
+@@ -1823,7 +1867,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ ofs -= 4; ir--;
+ } while (1);
+ #endif
+- lua_assert(sz == 4 || sz == 8);
++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+ args[0] = ASMREF_L; /* lua_State *L */
+@@ -1847,8 +1891,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
+ }
+-#else
+-#define asm_cnew(as, ir) ((void)0)
+ #endif
+
+ /* -- Write barriers ------------------------------------------------------ */
+@@ -1875,7 +1917,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
+ MCLabel l_end;
+ Reg obj;
+ /* No need for other object barriers (yet). */
+- lua_assert(IR(ir->op1)->o == IR_UREFC);
++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+@@ -1954,15 +1996,11 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
+ fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
+ ra_left(as, RID_XMM0, ir->op1);
+ }
+- } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
+- /* Rejoined to pow(). */
+ } else {
+ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
+ }
+ }
+
+-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
+-
+ static void asm_ldexp(ASMState *as, IRIns *ir)
+ {
+ int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
+@@ -1992,22 +2030,11 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
+ ra_left(as, RID_EAX, ir->op2);
+ }
+
+-static void asm_pow(ASMState *as, IRIns *ir)
+-{
+-#if LJ_64 && LJ_HASFFI
+- if (!irt_isnum(ir->t))
+- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+- IRCALL_lj_carith_powu64);
+- else
+-#endif
+- asm_fppowi(as, ir);
+-}
+-
+ static int asm_swapops(ASMState *as, IRIns *ir)
+ {
+ IRIns *irl = IR(ir->op1);
+ IRIns *irr = IR(ir->op2);
+- lua_assert(ra_noreg(irr->r));
++ lj_assertA(ra_noreg(irr->r), "bad usage");
+ if (!irm_iscomm(lj_ir_mode[ir->o]))
+ return 0; /* Can't swap non-commutative operations. */
+ if (irref_isk(ir->op2))
+@@ -2060,8 +2087,9 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
+ int32_t k = 0;
+ if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */
+ MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2);
+- if ((p[1] & 15) < 14) {
+- if ((p[1] & 15) >= 12) p[1] -= 4; /* L <->S, NL <-> NS */
++ MCode *q = p[0] == 0x0f ? p+1 : p;
++ if ((*q & 15) < 14) {
++ if ((*q & 15) >= 12) *q -= 4; /* L <->S, NL <-> NS */
+ as->flagmcp = NULL;
+ as->mcp = p;
+ } /* else: cannot transform LE/NLE to cc without use of OF. */
+@@ -2178,8 +2206,7 @@ static void asm_add(ASMState *as, IRIns *ir)
+ {
+ if (irt_isnum(ir->t))
+ asm_fparith(as, ir, XO_ADDSD);
+- else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp ||
+- irt_is64(ir->t) || !asm_lea(as, ir))
++ else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir))
+ asm_intarith(as, ir, XOg_ADD);
+ }
+
+@@ -2199,27 +2226,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ asm_intarith(as, ir, XOg_X_IMUL);
+ }
+
+-static void asm_div(ASMState *as, IRIns *ir)
+-{
+-#if LJ_64 && LJ_HASFFI
+- if (!irt_isnum(ir->t))
+- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+- IRCALL_lj_carith_divu64);
+- else
+-#endif
+- asm_fparith(as, ir, XO_DIVSD);
+-}
+-
+-static void asm_mod(ASMState *as, IRIns *ir)
+-{
+-#if LJ_64 && LJ_HASFFI
+- if (!irt_isint(ir->t))
+- asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+- IRCALL_lj_carith_modu64);
+- else
+-#endif
+- asm_callid(as, ir, IRCALL_lj_vm_modi);
+-}
++#define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD)
+
+ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
+ {
+@@ -2319,7 +2326,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs,
x86Op xv)
+ dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
+ if (dest == RID_ECX) {
+ dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX));
+- emit_rr(as, XO_MOV, RID_ECX, dest);
++ emit_rr(as, XO_MOV, REX_64IR(ir, RID_ECX), dest);
+ }
+ right = irr->r;
+ if (ra_noreg(right))
+@@ -2417,8 +2424,9 @@ static void asm_comp(ASMState *as, IRIns *ir)
+ IROp leftop = (IROp)(IR(lref)->o);
+ Reg r64 = REX_64IR(ir, 0);
+ int32_t imm = 0;
+- lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
+- irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
++ lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) ||
++ irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t),
++ "bad comparison data type %d", irt_type(ir->t));
+ /* Swap constants (only for ABC) and fusable loads to the right. */
+ if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
+ if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */
+@@ -2500,7 +2508,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
+ /* Use test r,r instead of cmp r,0. */
+ x86Op xo = XO_TEST;
+ if (irt_isu8(ir->t)) {
+- lua_assert(ir->o == IR_EQ || ir->o == IR_NE);
++ lj_assertA(ir->o == IR_EQ || ir->o == IR_NE, "bad usage");
+ xo = XO_TESTb;
+ if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) {
+ if (LJ_64) {
+@@ -2602,15 +2610,15 @@ static void asm_comp_int64(ASMState *as, IRIns *ir)
+ }
+ #endif
+
+-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
++/* -- Split register ops -------------------------------------------------- */
+
+-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
++/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
+ static void asm_hiop(ASMState *as, IRIns *ir)
+ {
+-#if LJ_32 && LJ_HASFFI
+ /* HIOP is marked as a store because it needs its own DCE logic. */
+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
++#if LJ_32 && LJ_HASFFI
+ if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
+ as->curins--; /* Always skip the CONV. */
+ if (usehi || uselo)
+@@ -2624,8 +2632,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ asm_fxstore(as, ir);
+ return;
+ }
++#endif
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
+ switch ((ir-1)->o) {
++#if LJ_32 && LJ_HASFFI
+ case IR_ADD:
+ as->flagmcp = NULL;
+ as->curins--;
+@@ -2648,19 +2658,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ asm_neg_not(as, ir-1, XOg_NEG);
+ break;
+ }
+- case IR_CALLN:
+- case IR_CALLXS:
+- if (!uselo)
+- ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+- break;
+ case IR_CNEWI:
+ /* Nothing to do here. Handled by CNEWI itself. */
+ break;
+- default: lua_assert(0); break;
+- }
+-#else
+- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */
+ #endif
++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
++ if (!uselo)
++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
++ break;
++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
++ }
+ }
+
+ /* -- Profiling ----------------------------------------------------------- */
+@@ -2721,12 +2728,21 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ IRIns *ir = IR(ref);
+ if ((sn & SNAP_NORESTORE))
+ continue;
+- if (irt_isnum(ir->t)) {
++ if ((sn & SNAP_KEYINDEX)) {
++ emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX);
++ if (irref_isk(ref)) {
++ emit_movmroi(as, RID_BASE, ofs, ir->i);
++ } else {
++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
++ emit_movtomro(as, src, RID_BASE, ofs);
++ }
++ } else if (irt_isnum(ir->t)) {
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
+ } else {
+- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+- (LJ_DUALNUM && irt_isinteger(ir->t)));
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
++ (LJ_DUALNUM && irt_isinteger(ir->t)),
++ "restore of IR type %d", irt_type(ir->t));
+ if (!irref_isk(ref)) {
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
+ #if LJ_GC64
+@@ -2771,7 +2787,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ }
+ checkmclim(as);
+ }
+- lua_assert(map + nent == flinks);
++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+ }
+
+ /* -- GC handling --------------------------------------------------------- */
+@@ -2815,16 +2831,16 @@ static void asm_loop_fixup(ASMState *as)
+ MCode *target = as->mcp;
+ if (as->realign) { /* Realigned loops use short jumps. */
+ as->realign = NULL; /* Stop another retry. */
+- lua_assert(((intptr_t)target & 15) == 0);
++ lj_assertA(((intptr_t)target & 15) == 0, "loop realign failed");
+ if (as->loopinv) { /* Inverted loop branch? */
+ p -= 5;
+ p[0] = XI_JMP;
+- lua_assert(target - p >= -128);
++ lj_assertA(target - p >= -128, "loop realign failed");
+ p[-1] = (MCode)(target - p); /* Patch sjcc. */
+ if (as->loopinv == 2)
+ p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */
+ } else {
+- lua_assert(target - p >= -128);
++ lj_assertA(target - p >= -128, "loop realign failed");
+ p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */
+ p[-2] = XI_JMPs;
+ }
+@@ -2853,6 +2869,12 @@ static void asm_loop_fixup(ASMState *as)
+ }
+ }
+
++/* Fixup the tail of the loop. */
++static void asm_loop_tail_fixup(ASMState *as)
++{
++ UNUSED(as); /* Nothing to do. */
++}
++
+ /* -- Head of trace ------------------------------------------------------- */
+
+ /* Coalesce BASE register for a root trace. */
+@@ -2901,7 +2923,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+ MCode *target, *q;
+ int32_t spadj = as->T->spadjust;
+ if (spadj == 0) {
+- p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
++ p -= LJ_64 ? 7 : 6;
+ } else {
+ MCode *p1;
+ /* Patch stack adjustment. */
+@@ -2913,24 +2935,15 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+ p1 = p-9;
+ *(int32_t *)p1 = spadj;
+ }
+- if ((as->flags & JIT_F_LEA_AGU)) {
+-#if LJ_64
+- p1[-4] = 0x48;
+-#endif
+- p1[-3] = (MCode)XI_LEA;
+- p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
+- p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
+- } else {
+ #if LJ_64
+- p1[-3] = 0x48;
++ p1[-3] = 0x48;
+ #endif
+- p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
+- p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
+- }
++ p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
++ p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
+ }
+ /* Patch exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
+- *(int32_t *)(p-4) = jmprel(p, target);
++ *(int32_t *)(p-4) = jmprel(as->J, p, target);
+ p[-5] = XI_JMP;
+ /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
+ for (q = as->mctop-1; q >= p; q--)
+@@ -2957,7 +2970,7 @@ static void asm_tail_prep(ASMState *as)
+ as->invmcp = as->mcp = p;
+ } else {
+ /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
+- as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0));
++ as->mcp = p - (LJ_64 ? 7 : 6);
+ as->invmcp = NULL;
+ }
+ }
+@@ -3097,23 +3110,30 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
+ MSize len = T->szmcode;
+ MCode *px = exitstub_addr(J, exitno) - 6;
+ MCode *pe = p+len-6;
++ MCode *pgc = NULL;
+ #if LJ_GC64
+ uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
+ #else
+ uint32_t statei = u32ptr(&J2G(J)->vmstate);
+ #endif
+ if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t
*)(p+len-4) == px)
+- *(int32_t *)(p+len-4) = jmprel(p+len, target);
++ *(int32_t *)(p+len-4) = jmprel(J, p+len, target);
+ /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
+ for (; p < pe; p += asm_x86_inslen(p)) {
+ intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64;
+ if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi)
+ break;
+ }
+- lua_assert(p < pe);
+- for (; p < pe; p += asm_x86_inslen(p))
+- if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px)
+- *(int32_t *)(p+2) = jmprel(p+6, target);
++ lj_assertJ(p < pe, "instruction length decoder failed");
++ for (; p < pe; p += asm_x86_inslen(p)) {
++ if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px
&&
++ p != pgc) {
++ *(int32_t *)(p+2) = jmprel(J, p+6, target);
++ } else if (*p == XI_CALL &&
++ (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) {
++ pgc = p+7; /* Do not patch GC check exit. */
++ }
++ }
+ lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
+ lj_mcode_patch(J, mcarea, 1);
+ }
+diff --git a/src/lj_assert.c b/src/lj_assert.c
+new file mode 100644
+index 00000000..35a63ce3
+--- /dev/null
++++ b/src/lj_assert.c
+@@ -0,0 +1,28 @@
++/*
++** Internal assertions.
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#define lj_assert_c
++#define LUA_CORE
++
++#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
++
++#include <stdio.h>
++
++#include "lj_obj.h"
++
++void lj_assert_fail(global_State *g, const char *file, int line,
++ const char *func, const char *fmt, ...)
++{
++ va_list argp;
++ va_start(argp, fmt);
++ fprintf(stderr, "LuaJIT ASSERT %s:%d: %s: ", file, line, func);
++ vfprintf(stderr, fmt, argp);
++ fputc('\n', stderr);
++ va_end(argp);
++ UNUSED(g); /* May be NULL. TODO: optionally dump state. */
++ abort();
++}
++
++#endif
+diff --git a/src/lj_bc.c b/src/lj_bc.c
+index a597692c..16c22dc3 100644
+--- a/src/lj_bc.c
++++ b/src/lj_bc.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Bytecode instruction modes.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_bc_c
+diff --git a/src/lj_bc.h b/src/lj_bc.h
+index 69a45f28..ad517b6b 100644
+--- a/src/lj_bc.h
++++ b/src/lj_bc.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Bytecode instruction format.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_BC_H
+diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
+index fdfc6ec0..d968d3f4 100644
+--- a/src/lj_bcdump.h
++++ b/src/lj_bcdump.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Bytecode dump definitions.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_BCDUMP_H
+diff --git a/src/lj_bcread.c b/src/lj_bcread.c
+index 48c5e7c7..298e6c45 100644
+--- a/src/lj_bcread.c
++++ b/src/lj_bcread.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Bytecode reader.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_bcread_c
+@@ -47,17 +47,17 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
+ /* Refill buffer. */
+ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
+ {
+- lua_assert(len != 0);
++ lj_assertLS(len != 0, "empty refill");
+ if (len > LJ_MAX_BUF || ls->c < 0)
+ bcread_error(ls, LJ_ERR_BCBAD);
+ do {
+ const char *buf;
+ size_t sz;
+- char *p = sbufB(&ls->sb);
++ char *p = ls->sb.b;
+ MSize n = (MSize)(ls->pe - ls->p);
+ if (n) { /* Copy remainder to buffer. */
+ if (sbuflen(&ls->sb)) { /* Move down in buffer. */
+- lua_assert(ls->pe == sbufP(&ls->sb));
++ lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer");
+ if (ls->p != p) memmove(p, ls->p, n);
+ } else { /* Copy from buffer provided by reader. */
+ p = lj_buf_need(&ls->sb, len);
+@@ -66,38 +66,39 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int
need)
+ ls->p = p;
+ ls->pe = p + n;
+ }
+- setsbufP(&ls->sb, p + n);
++ ls->sb.w = p + n;
+ buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader.
*/
+ if (buf == NULL || sz == 0) { /* EOF? */
+ if (need) bcread_error(ls, LJ_ERR_BCBAD);
+ ls->c = -1; /* Only bad if we get called again. */
+ break;
+ }
++ if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
+ if (n) { /* Append to buffer. */
+ n += (MSize)sz;
+ p = lj_buf_need(&ls->sb, n < len ? len : n);
+- memcpy(sbufP(&ls->sb), buf, sz);
+- setsbufP(&ls->sb, p + n);
++ memcpy(ls->sb.w, buf, sz);
++ ls->sb.w = p + n;
+ ls->p = p;
+ ls->pe = p + n;
+ } else { /* Return buffer provided by reader. */
+ ls->p = buf;
+ ls->pe = buf + sz;
+ }
+- } while (ls->p + len > ls->pe);
++ } while ((MSize)(ls->pe - ls->p) < len);
+ }
+
+ /* Need a certain number of bytes. */
+ static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
+ {
+- if (LJ_UNLIKELY(ls->p + len > ls->pe))
++ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
+ bcread_fill(ls, len, 1);
+ }
+
+ /* Want to read up to a certain number of bytes, but may need less. */
+ static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
+ {
+- if (LJ_UNLIKELY(ls->p + len > ls->pe))
++ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
+ bcread_fill(ls, len, 0);
+ }
+
+@@ -106,7 +107,7 @@ static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
+ {
+ uint8_t *p = (uint8_t *)ls->p;
+ ls->p += len;
+- lua_assert(ls->p <= ls->pe);
++ lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
+ return p;
+ }
+
+@@ -119,7 +120,7 @@ static void bcread_block(LexState *ls, void *q, MSize len)
+ /* Read byte from buffer. */
+ static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
+ {
+- lua_assert(ls->p < ls->pe);
++ lj_assertLS(ls->p < ls->pe, "buffer read overflow");
+ return (uint32_t)(uint8_t)*ls->p++;
+ }
+
+@@ -127,7 +128,7 @@ static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
+ static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
+ {
+ uint32_t v = lj_buf_ruleb128(&ls->p);
+- lua_assert(ls->p <= ls->pe);
++ lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
+ return v;
+ }
+
+@@ -144,7 +145,7 @@ static uint32_t bcread_uleb128_33(LexState *ls)
+ } while (*p++ >= 0x80);
+ }
+ ls->p = (char *)p;
+- lua_assert(ls->p <= ls->pe);
++ lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
+ return v;
+ }
+
+@@ -191,7 +192,7 @@ static void bcread_ktabk(LexState *ls, TValue *o)
+ o->u32.lo = bcread_uleb128(ls);
+ o->u32.hi = bcread_uleb128(ls);
+ } else {
+- lua_assert(tp <= BCDUMP_KTAB_TRUE);
++ lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
+ setpriV(o, ~tp);
+ }
+ }
+@@ -213,7 +214,7 @@ static GCtab *bcread_ktab(LexState *ls)
+ for (i = 0; i < nhash; i++) {
+ TValue key;
+ bcread_ktabk(ls, &key);
+- lua_assert(!tvisnil(&key));
++ lj_assertLS(!tvisnil(&key), "nil key");
+ bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
+ }
+ }
+@@ -250,7 +251,7 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
+ #endif
+ } else {
+ lua_State *L = ls->L;
+- lua_assert(tp == BCDUMP_KGC_CHILD);
++ lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp);
+ if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */
+ bcread_error(ls, LJ_ERR_BCBAD);
+ L->top--;
+@@ -398,11 +399,7 @@ static int bcread_header(LexState *ls)
+ if ((flags & BCDUMP_F_FFI)) {
+ #if LJ_HASFFI
+ lua_State *L = ls->L;
+- if (!ctype_ctsG(G(L))) {
+- ptrdiff_t oldtop = savestack(L, L->top);
+- luaopen_ffi(L); /* Load FFI library on-demand. */
+- L->top = restorestack(L, oldtop);
+- }
++ ctype_loadffi(L);
+ #else
+ return 0;
+ #endif
+@@ -421,7 +418,7 @@ static int bcread_header(LexState *ls)
+ GCproto *lj_bcread(LexState *ls)
+ {
+ lua_State *L = ls->L;
+- lua_assert(ls->c == BCDUMP_HEAD1);
++ lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header");
+ bcread_savetop(L, ls, L->top);
+ lj_buf_reset(&ls->sb);
+ /* Check for a valid bytecode dump header. */
+@@ -447,8 +444,7 @@ GCproto *lj_bcread(LexState *ls)
+ setprotoV(L, L->top, pt);
+ incr_top(L);
+ }
+- if ((int32_t)(2*(uint32_t)(ls->pe - ls->p)) > 0 ||
+- L->top-1 != bcread_oldtop(L, ls))
++ if ((ls->pe != ls->p && !ls->endmark) || L->top-1 !=
bcread_oldtop(L, ls))
+ bcread_error(ls, LJ_ERR_BCBAD);
+ /* Pop off last prototype. */
+ L->top--;
+diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
+index 5e05caea..c5c042e0 100644
+--- a/src/lj_bcwrite.c
++++ b/src/lj_bcwrite.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Bytecode writer.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_bcwrite_c
+@@ -29,8 +29,17 @@ typedef struct BCWriteCtx {
+ void *wdata; /* Writer callback data. */
+ int strip; /* Strip debug info. */
+ int status; /* Status from writer callback. */
++#ifdef LUA_USE_ASSERT
++ global_State *g;
++#endif
+ } BCWriteCtx;
+
++#ifdef LUA_USE_ASSERT
++#define lj_assertBCW(c, ...) lj_assertG_(ctx->g, (c), __VA_ARGS__)
++#else
++#define lj_assertBCW(c, ...) ((void)ctx)
++#endif
++
+ /* -- Bytecode writer ----------------------------------------------------- */
+
+ /* Write a single constant key/value of a template table. */
+@@ -53,7 +62,7 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
+ if (num == (lua_Number)k) { /* -0 is never a constant. */
+ *p++ = BCDUMP_KTAB_INT;
+ p = lj_strfmt_wuleb128(p, k);
+- setsbufP(&ctx->sb, p);
++ ctx->sb.w = p;
+ return;
+ }
+ }
+@@ -61,10 +70,10 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
+ p = lj_strfmt_wuleb128(p, o->u32.lo);
+ p = lj_strfmt_wuleb128(p, o->u32.hi);
+ } else {
+- lua_assert(tvispri(o));
++ lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
+ *p++ = BCDUMP_KTAB_NIL+~itype(o);
+ }
+- setsbufP(&ctx->sb, p);
++ ctx->sb.w = p;
+ }
+
+ /* Write a template table. */
+@@ -88,7 +97,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
+ /* Write number of array slots and hash slots. */
+ p = lj_strfmt_wuleb128(p, narray);
+ p = lj_strfmt_wuleb128(p, nhash);
+- setsbufP(&ctx->sb, p);
++ ctx->sb.w = p;
+ if (narray) { /* Write array entries (may contain nil). */
+ MSize i;
+ TValue *o = tvref(t->array);
+@@ -121,7 +130,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
+ tp = BCDUMP_KGC_STR + gco2str(o)->len;
+ need = 5+gco2str(o)->len;
+ } else if (o->gch.gct == ~LJ_TPROTO) {
+- lua_assert((pt->flags & PROTO_CHILD));
++ lj_assertBCW((pt->flags & PROTO_CHILD), "prototype has unexpected
child");
+ tp = BCDUMP_KGC_CHILD;
+ #if LJ_HASFFI
+ } else if (o->gch.gct == ~LJ_TCDATA) {
+@@ -132,12 +141,14 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
+ } else if (id == CTID_UINT64) {
+ tp = BCDUMP_KGC_U64;
+ } else {
+- lua_assert(id == CTID_COMPLEX_DOUBLE);
++ lj_assertBCW(id == CTID_COMPLEX_DOUBLE,
++ "bad cdata constant CTID %d", id);
+ tp = BCDUMP_KGC_COMPLEX;
+ }
+ #endif
+ } else {
+- lua_assert(o->gch.gct == ~LJ_TTAB);
++ lj_assertBCW(o->gch.gct == ~LJ_TTAB,
++ "bad constant GC type %d", o->gch.gct);
+ tp = BCDUMP_KGC_TAB;
+ need = 1+2*5;
+ }
+@@ -161,7 +172,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
+ }
+ #endif
+ }
+- setsbufP(&ctx->sb, p);
++ ctx->sb.w = p;
+ }
+ }
+
+@@ -195,7 +206,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
+ p = lj_strfmt_wuleb128(p, o->u32.hi);
+ }
+ }
+- setsbufP(&ctx->sb, p);
++ ctx->sb.w = p;
+ }
+
+ /* Write bytecode instructions. */
+@@ -219,10 +230,7 @@ static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto
*pt)
+ q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
+ } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
+ BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
+- BCIns ins = traceref(J, rd)->startins;
+- q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
+- q[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
+- q[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
++ memcpy(q, &traceref(J, rd)->startins, 4);
+ }
+ }
+ }
+@@ -273,7 +281,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
+ /* Write bytecode instructions and upvalue refs. */
+ p = bcwrite_bytecode(ctx, p, pt);
+ p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
+- setsbufP(&ctx->sb, p);
++ ctx->sb.w = p;
+
+ /* Write constants. */
+ bcwrite_kgc(ctx, pt);
+@@ -283,16 +291,16 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
+ if (sizedbg) {
+ p = lj_buf_more(&ctx->sb, sizedbg);
+ p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
+- setsbufP(&ctx->sb, p);
++ ctx->sb.w = p;
+ }
+
+ /* Pass buffer to writer function. */
+ if (ctx->status == 0) {
+ MSize n = sbuflen(&ctx->sb) - 5;
+ MSize nn = (lj_fls(n)+8)*9 >> 6;
+- char *q = sbufB(&ctx->sb) + (5 - nn);
++ char *q = ctx->sb.b + (5 - nn);
+ p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */
+- lua_assert(p == sbufB(&ctx->sb) + 5);
++ lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write");
+ ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
+ }
+ }
+@@ -316,8 +324,8 @@ static void bcwrite_header(BCWriteCtx *ctx)
+ p = lj_strfmt_wuleb128(p, len);
+ p = lj_buf_wmem(p, name, len);
+ }
+- ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
+- (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
++ ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b,
++ (MSize)(p - ctx->sb.b), ctx->wdata);
+ }
+
+ /* Write footer of bytecode dump. */
+@@ -352,6 +360,9 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void
*data,
+ ctx.wdata = data;
+ ctx.strip = strip;
+ ctx.status = 0;
++#ifdef LUA_USE_ASSERT
++ ctx.g = G(L);
++#endif
+ lj_buf_init(L, &ctx.sb);
+ status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
+ if (status == 0) status = ctx.status;
+diff --git a/src/lj_buf.c b/src/lj_buf.c
+index 0dfe7f98..d31bd99e 100644
+--- a/src/lj_buf.c
++++ b/src/lj_buf.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Buffer handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_buf_c
+@@ -20,44 +20,85 @@ static void buf_grow(SBuf *sb, MSize sz)
+ {
+ MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
+ char *b;
++ GCSize flag;
+ if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
+ while (nsz < sz) nsz += nsz;
+- b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz);
+- setmref(sb->b, b);
+- setmref(sb->p, b + len);
+- setmref(sb->e, b + nsz);
++ flag = sbufflag(sb);
++ if ((flag & SBUF_FLAG_COW)) { /* Copy-on-write semantics. */
++ lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW");
++ b = (char *)lj_mem_new(sbufL(sb), nsz);
++ setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW);
++ setgcrefnull(sbufX(sb)->cowref);
++ memcpy(b, sb->b, osz);
++ } else {
++ b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz);
++ }
++ if ((flag & SBUF_FLAG_EXT)) {
++ sbufX(sb)->r = sbufX(sb)->r - sb->b + b; /* Adjust read pointer, too. */
++ }
++ /* Adjust buffer pointers. */
++ sb->b = b;
++ sb->w = b + len;
++ sb->e = b + nsz;
++ if ((flag & SBUF_FLAG_BORROW)) { /* Adjust borrowed buffer pointers. */
++ SBuf *bsb = mref(sbufX(sb)->bsb, SBuf);
++ bsb->b = b;
++ bsb->w = b + len;
++ bsb->e = b + nsz;
++ }
+ }
+
+ LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
+ {
+- lua_assert(sz > sbufsz(sb));
++ lj_assertG_(G(sbufL(sb)), sz > sbufsz(sb), "SBuf overflow");
+ if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
+ lj_err_mem(sbufL(sb));
+ buf_grow(sb, sz);
+- return sbufB(sb);
++ return sb->b;
+ }
+
+ LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
+ {
+- MSize len = sbuflen(sb);
+- lua_assert(sz > sbufleft(sb));
+- if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+- lj_err_mem(sbufL(sb));
+- buf_grow(sb, len + sz);
+- return sbufP(sb);
++ if (sbufisext(sb)) {
++ SBufExt *sbx = (SBufExt *)sb;
++ MSize len = sbufxlen(sbx);
++ if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
++ lj_err_mem(sbufL(sbx));
++ if (len + sz > sbufsz(sbx)) { /* Must grow. */
++ buf_grow((SBuf *)sbx, len + sz);
++ } else if (sbufxslack(sbx) < (sbufsz(sbx) >> 3)) {
++ /* Also grow to avoid excessive compactions, if slack < size/8. */
++ buf_grow((SBuf *)sbx, sbuflen(sbx) + sz); /* Not sbufxlen! */
++ return sbx->w;
++ }
++ if (sbx->r != sbx->b) { /* Compact by moving down. */
++ memmove(sbx->b, sbx->r, len);
++ sbx->r = sbx->b;
++ sbx->w = sbx->b + len;
++ lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf
compact");
++ }
++ } else {
++ MSize len = sbuflen(sb);
++ lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow");
++ if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
++ lj_err_mem(sbufL(sb));
++ buf_grow(sb, len + sz);
++ }
++ return sb->w;
+ }
+
+ void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
+ {
+- char *b = sbufB(sb);
+- MSize osz = (MSize)(sbufE(sb) - b);
++ char *b = sb->b;
++ MSize osz = (MSize)(sb->e - b);
+ if (osz > 2*LJ_MIN_SBUF) {
+- MSize n = (MSize)(sbufP(sb) - b);
++ MSize n = (MSize)(sb->w - b);
+ b = lj_mem_realloc(L, b, osz, (osz >> 1));
+- setmref(sb->b, b);
+- setmref(sb->p, b + n);
+- setmref(sb->e, b + (osz >> 1));
++ sb->b = b;
++ sb->w = b + n;
++ sb->e = b + (osz >> 1);
+ }
++ lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");
+ }
+
+ char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
+@@ -67,30 +108,62 @@ char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
+ return lj_buf_need(sb, sz);
+ }
+
++#if LJ_HASBUFFER && LJ_HASJIT
++void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref)
++{
++ lua_State *L = sbufL(sbx);
++ lj_bufx_free(L, sbx);
++ lj_bufx_set_cow(L, sbx, p, len);
++ setgcref(sbx->cowref, ref);
++ lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref);
++}
++
++#if LJ_HASFFI
++MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz)
++{
++ lj_buf_more((SBuf *)sbx, sz);
++ return sbufleft(sbx);
++}
++#endif
++#endif
++
+ /* -- Low-level buffer put operations ------------------------------------- */
+
+ SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
+ {
+- char *p = lj_buf_more(sb, len);
+- p = lj_buf_wmem(p, q, len);
+- setsbufP(sb, p);
++ char *w = lj_buf_more(sb, len);
++ w = lj_buf_wmem(w, q, len);
++ sb->w = w;
+ return sb;
+ }
+
+-SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
++#if LJ_HASJIT || LJ_HASFFI
++static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c)
+ {
+- char *p = lj_buf_more(sb, 1);
+- *p++ = (char)c;
+- setsbufP(sb, p);
++ char *w = lj_buf_more2(sb, 1);
++ *w++ = (char)c;
++ sb->w = w;
+ return sb;
+ }
+
++SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
++{
++ char *w = sb->w;
++ if (LJ_LIKELY(w < sb->e)) {
++ *w++ = (char)c;
++ sb->w = w;
++ return sb;
++ }
++ return lj_buf_putchar2(sb, c);
++}
++#endif
++
+ SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
+ {
+ MSize len = s->len;
+- char *p = lj_buf_more(sb, len);
+- p = lj_buf_wmem(p, strdata(s), len);
+- setsbufP(sb, p);
++ char *w = lj_buf_more(sb, len);
++ w = lj_buf_wmem(w, strdata(s), len);
++ sb->w = w;
+ return sb;
+ }
+
+@@ -99,47 +172,47 @@ SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
+ SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
+ {
+ MSize len = s->len;
+- char *p = lj_buf_more(sb, len), *e = p+len;
++ char *w = lj_buf_more(sb, len), *e = w+len;
+ const char *q = strdata(s)+len-1;
+- while (p < e)
+- *p++ = *q--;
+- setsbufP(sb, p);
++ while (w < e)
++ *w++ = *q--;
++ sb->w = w;
+ return sb;
+ }
+
+ SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
+ {
+ MSize len = s->len;
+- char *p = lj_buf_more(sb, len), *e = p+len;
++ char *w = lj_buf_more(sb, len), *e = w+len;
+ const char *q = strdata(s);
+- for (; p < e; p++, q++) {
++ for (; w < e; w++, q++) {
+ uint32_t c = *(unsigned char *)q;
+ #if LJ_TARGET_PPC
+- *p = c + ((c >= 'A' && c <= 'Z') << 5);
++ *w = c + ((c >= 'A' && c <= 'Z') << 5);
+ #else
+ if (c >= 'A' && c <= 'Z') c += 0x20;
+- *p = c;
++ *w = c;
+ #endif
+ }
+- setsbufP(sb, p);
++ sb->w = w;
+ return sb;
+ }
+
+ SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
+ {
+ MSize len = s->len;
+- char *p = lj_buf_more(sb, len), *e = p+len;
++ char *w = lj_buf_more(sb, len), *e = w+len;
+ const char *q = strdata(s);
+- for (; p < e; p++, q++) {
++ for (; w < e; w++, q++) {
+ uint32_t c = *(unsigned char *)q;
+ #if LJ_TARGET_PPC
+- *p = c - ((c >= 'a' && c <= 'z') << 5);
++ *w = c - ((c >= 'a' && c <= 'z') << 5);
+ #else
+ if (c >= 'a' && c <= 'z') c -= 0x20;
+- *p = c;
++ *w = c;
+ #endif
+ }
+- setsbufP(sb, p);
++ sb->w = w;
+ return sb;
+ }
+
+@@ -148,21 +221,21 @@ SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
+ MSize len = s->len;
+ if (rep > 0 && len) {
+ uint64_t tlen = (uint64_t)rep * len;
+- char *p;
++ char *w;
+ if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
+ lj_err_mem(sbufL(sb));
+- p = lj_buf_more(sb, (MSize)tlen);
++ w = lj_buf_more(sb, (MSize)tlen);
+ if (len == 1) { /* Optimize a common case. */
+ uint32_t c = strdata(s)[0];
+- do { *p++ = c; } while (--rep > 0);
++ do { *w++ = c; } while (--rep > 0);
+ } else {
+ const char *e = strdata(s) + len;
+ do {
+ const char *q = strdata(s);
+- do { *p++ = *q++; } while (q < e);
++ do { *w++ = *q++; } while (q < e);
+ } while (--rep > 0);
+ }
+- setsbufP(sb, p);
++ sb->w = w;
+ }
+ return sb;
+ }
+@@ -173,27 +246,27 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i,
int32_t e)
+ if (i <= e) {
+ for (;;) {
+ cTValue *o = lj_tab_getint(t, i);
+- char *p;
++ char *w;
+ if (!o) {
+ badtype: /* Error: bad element type. */
+- setsbufP(sb, (void *)(intptr_t)i); /* Store failing index. */
++ sb->w = (char *)(intptr_t)i; /* Store failing index. */
+ return NULL;
+ } else if (tvisstr(o)) {
+ MSize len = strV(o)->len;
+- p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
++ w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
+ } else if (tvisint(o)) {
+- p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
++ w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
+ } else if (tvisnum(o)) {
+- p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
++ w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
+ } else {
+ goto badtype;
+ }
+ if (i++ == e) {
+- setsbufP(sb, p);
++ sb->w = w;
+ break;
+ }
+- if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
+- setsbufP(sb, p);
++ if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen);
++ sb->w = w;
+ }
+ }
+ return sb;
+@@ -203,7 +276,7 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i,
int32_t e)
+
+ GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
+ {
+- return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
++ return lj_str_new(sbufL(sb), sb->b, sbuflen(sb));
+ }
+
+ /* Concatenate two strings. */
+@@ -219,14 +292,14 @@ GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
+ /* Read ULEB128 from buffer. */
+ uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
+ {
+- const uint8_t *p = (const uint8_t *)*pp;
+- uint32_t v = *p++;
++ const uint8_t *w = (const uint8_t *)*pp;
++ uint32_t v = *w++;
+ if (LJ_UNLIKELY(v >= 0x80)) {
+ int sh = 0;
+ v &= 0x7f;
+- do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
++ do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80);
+ }
+- *pp = (const char *)p;
++ *pp = (const char *)w;
+ return v;
+ }
+
+diff --git a/src/lj_buf.h b/src/lj_buf.h
+index a4051694..e2ac922e 100644
+--- a/src/lj_buf.h
++++ b/src/lj_buf.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Buffer handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_BUF_H
+@@ -10,16 +10,60 @@
+ #include "lj_gc.h"
+ #include "lj_str.h"
+
+-/* Resizable string buffers. Struct definition in lj_obj.h. */
+-#define sbufB(sb) (mref((sb)->b, char))
+-#define sbufP(sb) (mref((sb)->p, char))
+-#define sbufE(sb) (mref((sb)->e, char))
+-#define sbufL(sb) (mref((sb)->L, lua_State))
+-#define sbufsz(sb) ((MSize)(sbufE((sb)) - sbufB((sb))))
+-#define sbuflen(sb) ((MSize)(sbufP((sb)) - sbufB((sb))))
+-#define sbufleft(sb) ((MSize)(sbufE((sb)) - sbufP((sb))))
+-#define setsbufP(sb, q) (setmref((sb)->p, (q)))
+-#define setsbufL(sb, l) (setmref((sb)->L, (l)))
++/* Resizable string buffers. */
++
++/* The SBuf struct definition is in lj_obj.h:
++** char *w; Write pointer.
++** char *e; End pointer.
++** char *b; Base pointer.
++** MRef L; lua_State, used for buffer resizing. Extension bits in 3 LSB.
++*/
++
++/* Extended string buffer. */
++typedef struct SBufExt {
++ SBufHeader;
++ union {
++ GCRef cowref; /* Copy-on-write object reference. */
++ MRef bsb; /* Borrowed string buffer. */
++ };
++ char *r; /* Read pointer. */
++ GCRef dict_str; /* Serialization string dictionary table. */
++ GCRef dict_mt; /* Serialization metatable dictionary table. */
++ int depth; /* Remaining recursion depth. */
++} SBufExt;
++
++#define sbufsz(sb) ((MSize)((sb)->e - (sb)->b))
++#define sbuflen(sb) ((MSize)((sb)->w - (sb)->b))
++#define sbufleft(sb) ((MSize)((sb)->e - (sb)->w))
++#define sbufxlen(sbx) ((MSize)((sbx)->w - (sbx)->r))
++#define sbufxslack(sbx) ((MSize)((sbx)->r - (sbx)->b))
++
++#define SBUF_MASK_FLAG (7)
++#define SBUF_MASK_L (~(GCSize)SBUF_MASK_FLAG)
++#define SBUF_FLAG_EXT 1 /* Extended string buffer. */
++#define SBUF_FLAG_COW 2 /* Copy-on-write buffer. */
++#define SBUF_FLAG_BORROW 4 /* Borrowed string buffer. */
++
++#define sbufL(sb) \
++ ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L))
++#define setsbufL(sb, l) (setmref((sb)->L, (l)))
++#define setsbufXL(sb, l, flag) \
++ (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag)))
++#define setsbufXL_(sb, l) \
++ (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) &
SBUF_MASK_FLAG)))
++
++#define sbufflag(sb) (mrefu((sb)->L))
++#define sbufisext(sb) (sbufflag((sb)) & SBUF_FLAG_EXT)
++#define sbufiscow(sb) (sbufflag((sb)) & SBUF_FLAG_COW)
++#define sbufisborrow(sb) (sbufflag((sb)) & SBUF_FLAG_BORROW)
++#define sbufiscoworborrow(sb) (sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW))
++#define sbufX(sb) \
++ (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt
*)(sb))
++#define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag)))
++
++#define tvisbuf(o) \
++ (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype ==
UDTYPE_BUFFER)
++#define bufV(o) check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o))))
+
+ /* Buffer management */
+ LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
+@@ -30,12 +74,12 @@ LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
+ static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
+ {
+ setsbufL(sb, L);
+- setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
++ sb->w = sb->e = sb->b = NULL;
+ }
+
+ static LJ_AINLINE void lj_buf_reset(SBuf *sb)
+ {
+- setmrefr(sb->p, sb->b);
++ sb->w = sb->b;
+ }
+
+ static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
+@@ -48,26 +92,77 @@ static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
+
+ static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
+ {
+- lj_mem_free(g, sbufB(sb), sbufsz(sb));
++ lj_assertG(!sbufisext(sb), "bad free of SBufExt");
++ lj_mem_free(g, sb->b, sbufsz(sb));
+ }
+
+ static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
+ {
+ if (LJ_UNLIKELY(sz > sbufsz(sb)))
+ return lj_buf_need2(sb, sz);
+- return sbufB(sb);
++ return sb->b;
+ }
+
+ static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
+ {
+ if (LJ_UNLIKELY(sz > sbufleft(sb)))
+ return lj_buf_more2(sb, sz);
+- return sbufP(sb);
++ return sb->w;
++}
++
++/* Extended buffer management */
++static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx)
++{
++ memset(sbx, 0, sizeof(SBufExt));
++ setsbufXL(sbx, L, SBUF_FLAG_EXT);
++}
++
++static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb)
++{
++ setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW);
++ setmref(sbx->bsb, sb);
++ sbx->r = sbx->w = sbx->b = sb->b;
++ sbx->e = sb->e;
++}
++
++static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx,
++ const char *p, MSize len)
++{
++ setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW);
++ sbx->r = sbx->b = (char *)p;
++ sbx->w = sbx->e = (char *)p + len;
++}
++
++static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx)
++{
++ if (sbufiscow(sbx)) {
++ setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW));
++ setgcrefnull(sbx->cowref);
++ sbx->b = sbx->e = NULL;
++ }
++ sbx->r = sbx->w = sbx->b;
+ }
+
++static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx)
++{
++ if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx));
++ setsbufXL(sbx, L, SBUF_FLAG_EXT);
++ setgcrefnull(sbx->cowref);
++ sbx->r = sbx->w = sbx->b = sbx->e = NULL;
++}
++
++#if LJ_HASBUFFER && LJ_HASJIT
++LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o);
++#if LJ_HASFFI
++LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz);
++#endif
++#endif
++
+ /* Low-level buffer put operations */
+ LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
++#if LJ_HASJIT || LJ_HASFFI
+ LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
++#endif
+ LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
+
+ static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
+@@ -77,9 +172,9 @@ static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize
len)
+
+ static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
+ {
+- char *p = lj_buf_more(sb, 1);
+- *p++ = (char)c;
+- setsbufP(sb, p);
++ char *w = lj_buf_more(sb, 1);
++ *w++ = (char)c;
++ sb->w = w;
+ }
+
+ /* High-level buffer put operations */
+@@ -97,7 +192,7 @@ LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
+
+ static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
+ {
+- return lj_str_new(L, sbufB(sb), sbuflen(sb));
++ return lj_str_new(L, sb->b, sbuflen(sb));
+ }
+
+ #endif
+diff --git a/src/lj_carith.c b/src/lj_carith.c
+index 218abd26..dc745a37 100644
+--- a/src/lj_carith.c
++++ b/src/lj_carith.c
+@@ -1,6 +1,6 @@
+ /*
+ ** C data arithmetic.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "lj_obj.h"
+@@ -122,7 +122,7 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS
mm)
+ setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2));
+ return 1;
+ } else {
+- lua_assert(mm == MM_le);
++ lj_assertL(mm == MM_le, "bad metamethod %d", mm);
+ setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2));
+ return 1;
+ }
+@@ -208,7 +208,9 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS
mm)
+ *up = lj_carith_powu64(u0, u1);
+ break;
+ case MM_unm: *up = (uint64_t)-(int64_t)u0; break;
+- default: lua_assert(0); break;
++ default:
++ lj_assertL(0, "bad metamethod %d", mm);
++ break;
+ }
+ lj_gc_check(L);
+ return 1;
+@@ -265,7 +267,7 @@ int lj_carith_op(lua_State *L, MMS mm)
+ {
+ CTState *cts = ctype_cts(L);
+ CDArith ca;
+- if (carith_checkarg(L, cts, &ca)) {
++ if (carith_checkarg(L, cts, &ca) && mm != MM_len && mm !=
MM_concat) {
+ if (carith_int64(L, cts, &ca, mm) || carith_ptr(L, cts, &ca, mm)) {
+ copyTV(L, &G(L)->tmptv2, L->top-1); /* Remember for trace recorder. */
+ return 1;
+@@ -301,7 +303,9 @@ uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
+ case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
+ case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
+ case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
+- default: lua_assert(0); break;
++ default:
++ lj_assertX(0, "bad shift op %d", op);
++ break;
+ }
+ return x;
+ }
+@@ -347,7 +351,6 @@ uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
+ }
+ }
+
+-
+ /* -- 64 bit integer arithmetic helpers ----------------------------------- */
+
+ #if LJ_32 && LJ_HASJIT
+diff --git a/src/lj_carith.h b/src/lj_carith.h
+index 67d976bf..2fa5c657 100644
+--- a/src/lj_carith.h
++++ b/src/lj_carith.h
+@@ -1,6 +1,6 @@
+ /*
+ ** C data arithmetic.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_CARITH_H
+diff --git a/src/lj_ccall.c b/src/lj_ccall.c
+index 5c252e5b..3c029823 100644
+--- a/src/lj_ccall.c
++++ b/src/lj_ccall.c
+@@ -1,6 +1,6 @@
+ /*
+ ** FFI C call handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "lj_obj.h"
+@@ -334,20 +334,21 @@
+ isfp = sz == 2*sizeof(float) ? 2 : 1;
+
+ #define CCALL_HANDLE_REGARG \
+- if (LJ_TARGET_IOS && isva) { \
++ if (LJ_TARGET_OSX && isva) { \
+ /* IOS: All variadic arguments are on the stack. */ \
+ } else if (isfp) { /* Try to pass argument in FPRs. */ \
+- int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \
++ int n2 = ctype_isvector(d->info) ? 1 : \
++ isfp == 1 ? n : (d->size >> (4-isfp)); \
+ if (nfpr + n2 <= CCALL_NARG_FPR) { \
+ dp = &cc->fpr[nfpr]; \
+ nfpr += n2; \
+ goto done; \
+ } else { \
+ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
+- if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
++ if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
+ } \
+ } else { /* Try to pass argument in GPRs. */ \
+- if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
++ if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
+ if (ngpr + n <= maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+@@ -355,7 +356,7 @@
+ goto done; \
+ } else { \
+ ngpr = maxgpr; /* Prevent reordering. */ \
+- if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
++ if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
+ } \
+ }
+
+@@ -387,6 +388,25 @@
+ #define CCALL_HANDLE_COMPLEXARG \
+ /* Pass complex by value in 2 or 4 GPRs. */
+
++#define CCALL_HANDLE_GPR \
++ /* Try to pass argument in GPRs. */ \
++ if (n > 1) { \
++ /* int64_t or complex (float). */ \
++ lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \
++ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
++ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
++ else if (ngpr + n > maxgpr) \
++ ngpr = maxgpr; /* Prevent reordering. */ \
++ } \
++ if (ngpr + n <= maxgpr) { \
++ dp = &cc->gpr[ngpr]; \
++ ngpr += n; \
++ goto done; \
++ } \
++
++#if LJ_ABI_SOFTFP
++#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
++#else
+ #define CCALL_HANDLE_REGARG \
+ if (isfp) { /* Try to pass argument in FPRs. */ \
+ if (nfpr + 1 <= CCALL_NARG_FPR) { \
+@@ -395,24 +415,16 @@
+ d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
+ goto done; \
+ } \
+- } else { /* Try to pass argument in GPRs. */ \
+- if (n > 1) { \
+- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
+- if (ctype_isinteger(d->info)) \
+- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
+- else if (ngpr + n > maxgpr) \
+- ngpr = maxgpr; /* Prevent reordering. */ \
+- } \
+- if (ngpr + n <= maxgpr) { \
+- dp = &cc->gpr[ngpr]; \
+- ngpr += n; \
+- goto done; \
+- } \
++ } else { \
++ CCALL_HANDLE_GPR \
+ }
++#endif
+
++#if !LJ_ABI_SOFTFP
+ #define CCALL_HANDLE_RET \
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
++#endif
+
+ #elif LJ_TARGET_MIPS32
+ /* -- MIPS o32 calling conventions ---------------------------------------- */
+@@ -631,7 +643,8 @@ static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl,
CTSize ofs)
+ ccall_classify_struct(cts, ct, rcl, ofs);
+ } else {
+ int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT;
+- lua_assert(ctype_hassize(ct->info));
++ lj_assertCTS(ctype_hassize(ct->info),
++ "classify ctype %08x without size", ct->info);
+ if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */
+ rcl[(ofs >= 8)] |= cl;
+ }
+@@ -656,12 +669,13 @@ static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl,
CTSize ofs)
+ }
+
+ /* Try to split up a small struct into registers. */
+-static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl)
++static int ccall_struct_reg(CCallState *cc, CTState *cts, GPRArg *dp, int *rcl)
+ {
+ MSize ngpr = cc->ngpr, nfpr = cc->nfpr;
+ uint32_t i;
++ UNUSED(cts);
+ for (i = 0; i < 2; i++) {
+- lua_assert(!(rcl[i] & CCALL_RCL_MEM));
++ lj_assertCTS(!(rcl[i] & CCALL_RCL_MEM), "pass mem struct in reg");
+ if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */
+ if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */
+ cc->gpr[ngpr++] = dp[i];
+@@ -682,7 +696,8 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d,
int *rcl,
+ dp[0] = dp[1] = 0;
+ /* Convert to temp. struct. */
+ lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
+- if (ccall_struct_reg(cc, dp, rcl)) { /* Register overflow? Pass on stack. */
++ if (ccall_struct_reg(cc, cts, dp, rcl)) {
++ /* Register overflow? Pass on stack. */
+ MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
+ if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */
+ cc->nsp = nsp + n;
+@@ -838,7 +853,8 @@ noth: /* Not a homogeneous float/double aggregate. */
+ return 0; /* Struct is in GPRs. */
+ }
+
+-void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft)
++static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
++ int ft)
+ {
+ if (LJ_ABI_SOFTFP ? ft :
+ ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
+@@ -978,7 +994,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
+ if (fid) { /* Get argument type from field. */
+ CType *ctf = ctype_get(cts, fid);
+ fid = ctf->sib;
+- lua_assert(ctype_isfield(ctf->info));
++ lj_assertL(ctype_isfield(ctf->info), "field expected");
+ did = ctype_cid(ctf->info);
+ } else {
+ if (!(ct->info & CTF_VARARG))
+@@ -1080,7 +1096,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
+ }
+ if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
+
+-#if LJ_TARGET_X64 || LJ_TARGET_PPC
++#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
+ cc->nfpr = nfpr; /* Required for vararg functions. */
+ #endif
+ cc->nsp = nsp;
+@@ -1126,7 +1142,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType
*ct,
+ CCALL_HANDLE_RET
+ #endif
+ /* No reference types end up here, so there's no need for the CTypeID. */
+- lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)));
++ lj_assertL(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)),
++ "unexpected reference ctype");
+ return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp);
+ }
+
+@@ -1150,7 +1167,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd)
+ lj_vm_ffi_call(&cc);
+ if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */
+ TValue tv;
+- setlightudV(&tv, (void *)cc.func);
++ tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000);
+ setboolV(lj_tab_set(L, cts->miscmap, &tv), 1);
+ }
+ ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */
+diff --git a/src/lj_ccall.h b/src/lj_ccall.h
+index 59f66481..aae5777b 100644
+--- a/src/lj_ccall.h
++++ b/src/lj_ccall.h
+@@ -1,6 +1,6 @@
+ /*
+ ** FFI C call handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_CCALL_H
+@@ -86,9 +86,9 @@ typedef union FPRArg {
+ #elif LJ_TARGET_PPC
+
+ #define CCALL_NARG_GPR 8
+-#define CCALL_NARG_FPR 8
++#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
+ #define CCALL_NRET_GPR 4 /* For complex double. */
+-#define CCALL_NRET_FPR 1
++#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
+ #define CCALL_SPS_EXTRA 4
+ #define CCALL_SPS_FREE 0
+
+diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
+index 846827b1..80d738c6 100644
+--- a/src/lj_ccallback.c
++++ b/src/lj_ccallback.c
+@@ -1,6 +1,6 @@
+ /*
+ ** FFI C callback handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "lj_obj.h"
+@@ -107,9 +107,9 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p)
+ /* Initialize machine code for callback function pointers. */
+ #if LJ_OS_NOJIT
+ /* Disabled callback support. */
+-#define callback_mcode_init(g, p) UNUSED(p)
++#define callback_mcode_init(g, p) (p)
+ #elif LJ_TARGET_X86ORX64
+-static void callback_mcode_init(global_State *g, uint8_t *page)
++static void *callback_mcode_init(global_State *g, uint8_t *page)
+ {
+ uint8_t *p = page;
+ uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback;
+@@ -143,10 +143,10 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
+ *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2);
+ }
+ }
+- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
++ return p;
+ }
+ #elif LJ_TARGET_ARM
+-static void callback_mcode_init(global_State *g, uint32_t *page)
++static void *callback_mcode_init(global_State *g, uint32_t *page)
+ {
+ uint32_t *p = page;
+ void *target = (void *)lj_vm_ffi_callback;
+@@ -165,10 +165,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
+ *p = ARMI_B | ((page-p-2) & 0x00ffffffu);
+ p++;
+ }
+- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
++ return p;
+ }
+ #elif LJ_TARGET_ARM64
+-static void callback_mcode_init(global_State *g, uint32_t *page)
++static void *callback_mcode_init(global_State *g, uint32_t *page)
+ {
+ uint32_t *p = page;
+ void *target = (void *)lj_vm_ffi_callback;
+@@ -185,10 +185,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
+ *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
+ p++;
+ }
+- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
++ return p;
+ }
+ #elif LJ_TARGET_PPC
+-static void callback_mcode_init(global_State *g, uint32_t *page)
++static void *callback_mcode_init(global_State *g, uint32_t *page)
+ {
+ uint32_t *p = page;
+ void *target = (void *)lj_vm_ffi_callback;
+@@ -204,10 +204,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
+ *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
+ p++;
+ }
+- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
++ return p;
+ }
+ #elif LJ_TARGET_MIPS
+-static void callback_mcode_init(global_State *g, uint32_t *page)
++static void *callback_mcode_init(global_State *g, uint32_t *page)
+ {
+ uint32_t *p = page;
+ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
+@@ -236,11 +236,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
+ p++;
+ *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot;
+ }
+- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
++ return p;
+ }
+ #else
+ /* Missing support for this architecture. */
+-#define callback_mcode_init(g, p) UNUSED(p)
++#define callback_mcode_init(g, p) (p)
+ #endif
+
+ /* -- Machine code management --------------------------------------------- */
+@@ -256,6 +256,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
+ #ifndef MAP_ANONYMOUS
+ #define MAP_ANONYMOUS MAP_ANON
+ #endif
++#ifdef PROT_MPROTECT
++#define CCPROT_CREATE (PROT_MPROTECT(PROT_EXEC))
++#else
++#define CCPROT_CREATE 0
++#endif
+
+ #endif
+
+@@ -263,15 +268,15 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
+ static void callback_mcode_new(CTState *cts)
+ {
+ size_t sz = (size_t)CALLBACK_MCODE_SIZE;
+- void *p;
++ void *p, *pe;
+ if (CALLBACK_MAX_SLOT == 0)
+ lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
+ #if LJ_TARGET_WINDOWS
+- p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
++ p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ if (!p)
+ lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
+ #elif LJ_TARGET_POSIX
+- p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS,
++ p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS,
+ -1, 0);
+ if (p == MAP_FAILED)
+ lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
+@@ -280,12 +285,15 @@ static void callback_mcode_new(CTState *cts)
+ p = lj_mem_new(cts->L, sz);
+ #endif
+ cts->cb.mcode = p;
+- callback_mcode_init(cts->g, p);
++ pe = callback_mcode_init(cts->g, p);
++ UNUSED(pe);
++ lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz,
++ "miscalculated CALLBACK_MAX_SLOT");
+ lj_mcode_sync(p, (char *)p + sz);
+ #if LJ_TARGET_WINDOWS
+ {
+ DWORD oprot;
+- VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot);
++ LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
+ }
+ #elif LJ_TARGET_POSIX
+ mprotect(p, sz, (PROT_READ|PROT_EXEC));
+@@ -406,7 +414,7 @@ void lj_ccallback_mcode_free(CTState *cts)
+ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
+ } \
+ } else { \
+- if (!LJ_TARGET_IOS && n > 1) \
++ if (!LJ_TARGET_OSX && n > 1) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
+ if (ngpr + n <= maxgpr) { \
+ sp = &cts->cb.gpr[ngpr]; \
+@@ -419,6 +427,24 @@ void lj_ccallback_mcode_free(CTState *cts)
+
+ #elif LJ_TARGET_PPC
+
++#define CALLBACK_HANDLE_GPR \
++ if (n > 1) { \
++ lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */
\
++ ctype_isinteger(cta->info)) && n == 2, /* int64_t. */ \
++ "bad GPR type"); \
++ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
++ } \
++ if (ngpr + n <= maxgpr) { \
++ sp = &cts->cb.gpr[ngpr]; \
++ ngpr += n; \
++ goto done; \
++ }
++
++#if LJ_ABI_SOFTFP
++#define CALLBACK_HANDLE_REGARG \
++ CALLBACK_HANDLE_GPR \
++ UNUSED(isfp);
++#else
+ #define CALLBACK_HANDLE_REGARG \
+ if (isfp) { \
+ if (nfpr + 1 <= CCALL_NARG_FPR) { \
+@@ -427,20 +453,15 @@ void lj_ccallback_mcode_free(CTState *cts)
+ goto done; \
+ } \
+ } else { /* Try to pass argument in GPRs. */ \
+- if (n > 1) { \
+- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
+- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
+- } \
+- if (ngpr + n <= maxgpr) { \
+- sp = &cts->cb.gpr[ngpr]; \
+- ngpr += n; \
+- goto done; \
+- } \
++ CALLBACK_HANDLE_GPR \
+ }
++#endif
+
++#if !LJ_ABI_SOFTFP
+ #define CALLBACK_HANDLE_RET \
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
++#endif
+
+ #elif LJ_TARGET_MIPS32
+
+@@ -533,13 +554,13 @@ static void callback_conv_args(CTState *cts, lua_State *L)
+ if (LJ_FR2) {
+ (o++)->u64 = LJ_CONT_FFI_CALLBACK;
+ (o++)->u64 = rid;
+- o++;
+ } else {
+ o->u32.lo = LJ_CONT_FFI_CALLBACK;
+ o->u32.hi = rid;
+ o++;
+ }
+ setframe_gc(o, obj2gco(fn), fntp);
++ if (LJ_FR2) o++;
+ setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
+ L->top = L->base = ++o;
+ if (!ct)
+@@ -567,7 +588,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
+ CTSize sz;
+ int isfp;
+ MSize n;
+- lua_assert(ctype_isfield(ctf->info));
++ lj_assertCTS(ctype_isfield(ctf->info), "field expected");
+ cta = ctype_rawchild(cts, ctf);
+ isfp = ctype_isfp(cta->info);
+ sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
+@@ -659,7 +680,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
+ {
+ lua_State *L = cts->L;
+ global_State *g = cts->g;
+- lua_assert(L != NULL);
++ lj_assertG(L != NULL, "uninitialized cts->L in callback");
+ if (tvref(g->jit_base)) {
+ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
+ if (g->panic) g->panic(L);
+@@ -744,7 +765,7 @@ static CType *callback_checkfunc(CTState *cts, CType *ct)
+ CType *ctf = ctype_get(cts, fid);
+ if (!ctype_isattrib(ctf->info)) {
+ CType *cta;
+- lua_assert(ctype_isfield(ctf->info));
++ lj_assertCTS(ctype_isfield(ctf->info), "field expected");
+ cta = ctype_rawchild(cts, ctf);
+ if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) ||
+ (ctype_isnum(cta->info) && cta->size <= 8)) ||
+diff --git a/src/lj_ccallback.h b/src/lj_ccallback.h
+index a8cdad38..9506ce42 100644
+--- a/src/lj_ccallback.h
++++ b/src/lj_ccallback.h
+@@ -1,6 +1,6 @@
+ /*
+ ** FFI C callback handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_CCALLBACK_H
+diff --git a/src/lj_cconv.c b/src/lj_cconv.c
+index 13b8230d..613f66e2 100644
+--- a/src/lj_cconv.c
++++ b/src/lj_cconv.c
+@@ -1,6 +1,6 @@
+ /*
+ ** C type conversions.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "lj_obj.h"
+@@ -8,6 +8,7 @@
+ #if LJ_HASFFI
+
+ #include "lj_err.h"
++#include "lj_buf.h"
+ #include "lj_tab.h"
+ #include "lj_ctype.h"
+ #include "lj_cdata.h"
+@@ -122,19 +123,25 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
+ CTInfo dinfo = d->info, sinfo = s->info;
+ void *tmpptr;
+
+- lua_assert(!ctype_isenum(dinfo) && !ctype_isenum(sinfo));
+- lua_assert(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo));
++ lj_assertCTS(!ctype_isenum(dinfo) && !ctype_isenum(sinfo),
++ "unresolved enum");
++ lj_assertCTS(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo),
++ "unstripped attribute");
+
+ if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT)
+ goto err_conv;
+
+ /* Some basic sanity checks. */
+- lua_assert(!ctype_isnum(dinfo) || dsize > 0);
+- lua_assert(!ctype_isnum(sinfo) || ssize > 0);
+- lua_assert(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4);
+- lua_assert(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4);
+- lua_assert(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize);
+- lua_assert(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize);
++ lj_assertCTS(!ctype_isnum(dinfo) || dsize > 0, "bad size for number
type");
++ lj_assertCTS(!ctype_isnum(sinfo) || ssize > 0, "bad size for number
type");
++ lj_assertCTS(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4,
++ "bad size for bool type");
++ lj_assertCTS(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4,
++ "bad size for bool type");
++ lj_assertCTS(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize,
++ "bad size for integer type");
++ lj_assertCTS(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize,
++ "bad size for integer type");
+
+ switch (cconv_idx2(dinfo, sinfo)) {
+ /* Destination is a bool. */
+@@ -357,7 +364,7 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
+ if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s)
+ goto err_conv; /* Must be exact same type. */
+ copyval: /* Copy value. */
+- lua_assert(dsize == ssize);
++ lj_assertCTS(dsize == ssize, "value copy with different sizes");
+ memcpy(dp, sp, dsize);
+ break;
+
+@@ -389,7 +396,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid,
+ lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s,
+ (uint8_t *)&o->n, sp, 0);
+ /* Numbers are NOT canonicalized here! Beware of uninitialized data. */
+- lua_assert(tvisnum(o));
++ lj_assertCTS(tvisnum(o), "non-canonical NaN passed");
+ }
+ } else {
+ uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0);
+@@ -406,7 +413,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid,
+ CTSize sz;
+ copyval: /* Copy value. */
+ sz = s->size;
+- lua_assert(sz != CTSIZE_INVALID);
++ lj_assertCTS(sz != CTSIZE_INVALID, "value copy with invalid size");
+ /* Attributes are stripped, qualifiers are kept (but mostly ignored). */
+ cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz);
+ setcdataV(cts->L, o, cd);
+@@ -421,19 +428,22 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
+ CTInfo info = s->info;
+ CTSize pos, bsz;
+ uint32_t val;
+- lua_assert(ctype_isbitfield(info));
++ lj_assertCTS(ctype_isbitfield(info), "bitfield expected");
+ /* NYI: packed bitfields may cause misaligned reads. */
+ switch (ctype_bitcsz(info)) {
+ case 4: val = *(uint32_t *)sp; break;
+ case 2: val = *(uint16_t *)sp; break;
+ case 1: val = *(uint8_t *)sp; break;
+- default: lua_assert(0); val = 0; break;
++ default:
++ lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info));
++ val = 0;
++ break;
+ }
+ /* Check if a packed bitfield crosses a container boundary. */
+ pos = ctype_bitpos(info);
+ bsz = ctype_bitbsz(info);
+- lua_assert(pos < 8*ctype_bitcsz(info));
+- lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info));
++ lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position");
++ lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield
size");
+ if (pos + bsz > 8*ctype_bitcsz(info))
+ lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT);
+ if (!(info & CTF_BOOL)) {
+@@ -449,7 +459,7 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
+ }
+ } else {
+ uint32_t b = (val >> pos) & 1;
+- lua_assert(bsz == 1);
++ lj_assertCTS(bsz == 1, "bad bool bitfield size");
+ setboolV(o, b);
+ setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */
+ }
+@@ -553,13 +563,15 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
+ sid = cdataV(o)->ctypeid;
+ s = ctype_get(cts, sid);
+ if (ctype_isref(s->info)) { /* Resolve reference for value. */
+- lua_assert(s->size == CTSIZE_PTR);
++ lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized");
+ sp = *(void **)sp;
+ sid = ctype_cid(s->info);
+ }
+ s = ctype_raw(cts, sid);
+ if (ctype_isfunc(s->info)) {
++ CTypeID did = ctype_typeid(cts, d);
+ sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
++ d = ctype_get(cts, did); /* cts->tab may have been reallocated. */
+ } else {
+ if (ctype_isenum(s->info)) s = ctype_child(cts, s);
+ goto doconv;
+@@ -571,7 +583,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
+ CType *cct = lj_ctype_getfield(cts, d, str, &ofs);
+ if (!cct || !ctype_isconstval(cct->info))
+ goto err_conv;
+- lua_assert(d->size == 4);
++ lj_assertCTS(d->size == 4, "only 32 bit enum supported"); /* NYI */
+ sp = (uint8_t *)&cct->size;
+ sid = ctype_cid(cct->info);
+ } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */
+@@ -610,8 +622,10 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
+ tmpptr = uddata(ud);
+ if (ud->udtype == UDTYPE_IO_FILE)
+ tmpptr = *(void **)tmpptr;
++ else if (ud->udtype == UDTYPE_BUFFER)
++ tmpptr = ((SBufExt *)tmpptr)->r;
+ } else if (tvislightud(o)) {
+- tmpptr = lightudV(o);
++ tmpptr = lightudV(cts->g, o);
+ } else if (tvisfunc(o)) {
+ void *p = lj_ccallback_new(cts, d, funcV(o));
+ if (p) {
+@@ -635,10 +649,10 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue
*o)
+ CTInfo info = d->info;
+ CTSize pos, bsz;
+ uint32_t val, mask;
+- lua_assert(ctype_isbitfield(info));
++ lj_assertCTS(ctype_isbitfield(info), "bitfield expected");
+ if ((info & CTF_BOOL)) {
+ uint8_t tmpbool;
+- lua_assert(ctype_bitbsz(info) == 1);
++ lj_assertCTS(ctype_bitbsz(info) == 1, "bad bool bitfield size");
+ lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0);
+ val = tmpbool;
+ } else {
+@@ -647,8 +661,8 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
+ }
+ pos = ctype_bitpos(info);
+ bsz = ctype_bitbsz(info);
+- lua_assert(pos < 8*ctype_bitcsz(info));
+- lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info));
++ lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position");
++ lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield
size");
+ /* Check if a packed bitfield crosses a container boundary. */
+ if (pos + bsz > 8*ctype_bitcsz(info))
+ lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT);
+@@ -659,7 +673,9 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
+ case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break;
+ case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break;
+ case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break;
+- default: lua_assert(0); break;
++ default:
++ lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info));
++ break;
+ }
+ }
+
+diff --git a/src/lj_cconv.h b/src/lj_cconv.h
+index 0a0b66c9..cd927328 100644
+--- a/src/lj_cconv.h
++++ b/src/lj_cconv.h
+@@ -1,6 +1,6 @@
+ /*
+ ** C type conversions.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_CCONV_H
+@@ -27,13 +27,14 @@ enum {
+ static LJ_AINLINE uint32_t cconv_idx(CTInfo info)
+ {
+ uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */
+- lua_assert(ctype_type(info) <= CT_MAYCONVERT);
++ lj_assertX(ctype_type(info) <= CT_MAYCONVERT,
++ "cannot convert ctype %08x", info);
+ #if LJ_64
+ idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u);
+ #else
+ idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) &
15u);
+ #endif
+- lua_assert(idx < 8);
++ lj_assertX(idx < 8, "cannot convert ctype %08x", info);
+ return idx;
+ }
+
+diff --git a/src/lj_cdata.c b/src/lj_cdata.c
+index 68e16d76..ffc31078 100644
+--- a/src/lj_cdata.c
++++ b/src/lj_cdata.c
+@@ -1,6 +1,6 @@
+ /*
+ ** C data management.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "lj_obj.h"
+@@ -35,7 +35,7 @@ GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize
align)
+ uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
+ uintptr_t almask = (1u << align) - 1u;
+ GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
+- lua_assert((char *)cd - p < 65536);
++ lj_assertL((char *)cd - p < 65536, "excessive cdata alignment");
+ cdatav(cd)->offset = (uint16_t)((char *)cd - p);
+ cdatav(cd)->extra = extra;
+ cdatav(cd)->len = sz;
+@@ -76,8 +76,8 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
+ } else if (LJ_LIKELY(!cdataisv(cd))) {
+ CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid);
+ CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR;
+- lua_assert(ctype_hassize(ct->info) || ctype_isfunc(ct->info) ||
+- ctype_isextern(ct->info));
++ lj_assertG(ctype_hassize(ct->info) || ctype_isfunc(ct->info) ||
++ ctype_isextern(ct->info), "free of ctype without a size");
+ lj_mem_free(g, cd, sizeof(GCcdata) + sz);
+ } else {
+ lj_mem_free(g, memcdatav(cd), sizecdatav(cd));
+@@ -115,7 +115,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
uint8_t **pp,
+
+ /* Resolve reference for cdata object. */
+ if (ctype_isref(ct->info)) {
+- lua_assert(ct->size == CTSIZE_PTR);
++ lj_assertCTS(ct->size == CTSIZE_PTR, "ref is not pointer-sized");
+ p = *(uint8_t **)p;
+ ct = ctype_child(cts, ct);
+ }
+@@ -126,7 +126,8 @@ collect_attrib:
+ if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size;
+ ct = ctype_child(cts, ct);
+ }
+- lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */
++ /* Interning rejects refs to refs. */
++ lj_assertCTS(!ctype_isref(ct->info), "bad ref of ref");
+
+ if (tvisint(key)) {
+ idx = (ptrdiff_t)intV(key);
+@@ -212,7 +213,8 @@ collect_attrib:
+ static void cdata_getconst(CTState *cts, TValue *o, CType *ct)
+ {
+ CType *ctt = ctype_child(cts, ct);
+- lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4);
++ lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4,
++ "only 32 bit const supported"); /* NYI */
+ /* Constants are already zero-extended/sign-extended to 32 bits. */
+ if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0)
+ setnumV(o, (lua_Number)(uint32_t)ct->size);
+@@ -233,13 +235,14 @@ int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp)
+ }
+
+ /* Get child type of pointer/array/field. */
+- lua_assert(ctype_ispointer(s->info) || ctype_isfield(s->info));
++ lj_assertCTS(ctype_ispointer(s->info) || ctype_isfield(s->info),
++ "pointer or field expected");
+ sid = ctype_cid(s->info);
+ s = ctype_get(cts, sid);
+
+ /* Resolve reference for field. */
+ if (ctype_isref(s->info)) {
+- lua_assert(s->size == CTSIZE_PTR);
++ lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized");
+ sp = *(uint8_t **)sp;
+ sid = ctype_cid(s->info);
+ s = ctype_get(cts, sid);
+@@ -266,12 +269,13 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o,
CTInfo qual)
+ }
+
+ /* Get child type of pointer/array/field. */
+- lua_assert(ctype_ispointer(d->info) || ctype_isfield(d->info));
++ lj_assertCTS(ctype_ispointer(d->info) || ctype_isfield(d->info),
++ "pointer or field expected");
+ d = ctype_child(cts, d);
+
+ /* Resolve reference for field. */
+ if (ctype_isref(d->info)) {
+- lua_assert(d->size == CTSIZE_PTR);
++ lj_assertCTS(d->size == CTSIZE_PTR, "ref is not pointer-sized");
+ dp = *(uint8_t **)dp;
+ d = ctype_child(cts, d);
+ }
+@@ -286,7 +290,8 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o,
CTInfo qual)
+ d = ctype_child(cts, d);
+ }
+
+- lua_assert(ctype_hassize(d->info) && !ctype_isvoid(d->info));
++ lj_assertCTS(ctype_hassize(d->info), "store to ctype without size");
++ lj_assertCTS(!ctype_isvoid(d->info), "store to void type");
+
+ if (((d->info|qual) & CTF_CONST)) {
+ err_const:
+diff --git a/src/lj_cdata.h b/src/lj_cdata.h
+index 5bb0f5dc..b93bec86 100644
+--- a/src/lj_cdata.h
++++ b/src/lj_cdata.h
+@@ -1,6 +1,6 @@
+ /*
+ ** C data management.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_CDATA_H
+@@ -18,7 +18,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz)
+ if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */
+ return ((void *)(uintptr_t)*(uint32_t *)p);
+ } else {
+- lua_assert(sz == CTSIZE_PTR);
++ lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz);
+ return *(void **)p;
+ }
+ }
+@@ -29,7 +29,7 @@ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v)
+ if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */
+ *(uint32_t *)p = (uint32_t)(uintptr_t)v;
+ } else {
+- lua_assert(sz == CTSIZE_PTR);
++ lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz);
+ *(void **)p = (void *)v;
+ }
+ }
+@@ -40,7 +40,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id,
CTSize sz)
+ GCcdata *cd;
+ #ifdef LUA_USE_ASSERT
+ CType *ct = ctype_raw(cts, id);
+- lua_assert((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz);
++ lj_assertCTS((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz,
++ "inconsistent size of fixed-size cdata alloc");
+ #endif
+ cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz);
+ cd->gct = ~LJ_TCDATA;
+diff --git a/src/lj_clib.c b/src/lj_clib.c
+index 61426590..d8636a48 100644
+--- a/src/lj_clib.c
++++ b/src/lj_clib.c
+@@ -1,6 +1,6 @@
+ /*
+ ** FFI C library loader.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "lj_obj.h"
+@@ -119,12 +119,13 @@ static void *clib_loadlib(lua_State *L, const char *name, int
global)
+ RTLD_LAZY | (global?RTLD_GLOBAL:RTLD_LOCAL));
+ if (!h) {
+ const char *e, *err = dlerror();
+- if (*err == '/' && (e = strchr(err, ':')) &&
++ if (err && *err == '/' && (e = strchr(err, ':'))
&&
+ (name = clib_resolve_lds(L, strdata(lj_str_new(L, err, e-err))))) {
+ h = dlopen(name, RTLD_LAZY | (global?RTLD_GLOBAL:RTLD_LOCAL));
+ if (h) return h;
+ err = dlerror();
+ }
++ if (!err) err = "dlopen failed";
+ lj_err_callermsg(L, err);
+ }
+ return h;
+@@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
+ /* Default libraries. */
+ enum {
+ CLIB_HANDLE_EXE,
++#if !LJ_TARGET_UWP
+ CLIB_HANDLE_DLL,
+ CLIB_HANDLE_CRT,
+ CLIB_HANDLE_KERNEL32,
+ CLIB_HANDLE_USER32,
+ CLIB_HANDLE_GDI32,
++#endif
+ CLIB_HANDLE_MAX
+ };
+
+@@ -208,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name)
+ static void *clib_loadlib(lua_State *L, const char *name, int global)
+ {
+ DWORD oldwerr = GetLastError();
+- void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0);
++ void *h = LJ_WIN_LOADLIBA(clib_extname(L, name));
+ if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
+ SetLastError(oldwerr);
+ UNUSED(global);
+@@ -218,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int
global)
+ static void clib_unloadlib(CLibrary *cl)
+ {
+ if (cl->handle == CLIB_DEFHANDLE) {
++#if !LJ_TARGET_UWP
+ MSize i;
+ for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) {
+ void *h = clib_def_handle[i];
+@@ -226,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl)
+ FreeLibrary((HINSTANCE)h);
+ }
+ }
++#endif
+ } else if (cl->handle) {
+ FreeLibrary((HINSTANCE)cl->handle);
+ }
+ }
+
++#if LJ_TARGET_UWP
++EXTERN_C IMAGE_DOS_HEADER __ImageBase;
++#endif
++
+ static void *clib_getsym(CLibrary *cl, const char *name)
+ {
+ void *p = NULL;
+@@ -239,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name)
+ for (i = 0; i < CLIB_HANDLE_MAX; i++) {
+ HINSTANCE h = (HINSTANCE)clib_def_handle[i];
+ if (!(void *)h) { /* Resolve default library handles (once). */
++#if LJ_TARGET_UWP
++ h = (HINSTANCE)&__ImageBase;
++#else
+ switch (i) {
+ case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
NULL, &h); break;
+ case CLIB_HANDLE_DLL:
+@@ -249,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name)
+
GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+ (const char *)&_fmode, &h);
+ break;
+- case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0);
break;
+- case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break;
+- case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break;
++ case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break;
++ case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break;
++ case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break;
+ }
+ if (!h) continue;
++#endif
+ clib_def_handle[i] = (void *)h;
+ }
+ p = (void *)GetProcAddress(h, name);
+@@ -337,7 +350,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
+ lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name));
+ if (ctype_isconstval(ct->info)) {
+ CType *ctt = ctype_child(cts, ct);
+- lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4);
++ lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4,
++ "only 32 bit const supported"); /* NYI */
+ if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0)
+ setnumV(tv, (lua_Number)(uint32_t)ct->size);
+ else
+@@ -349,7 +363,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
+ #endif
+ void *p = clib_getsym(cl, sym);
+ GCcdata *cd;
+- lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info));
++ lj_assertCTS(ctype_isfunc(ct->info) || ctype_isextern(ct->info),
++ "unexpected ctype %08x in clib", ct->info);
+ #if LJ_TARGET_X86 && LJ_ABI_WIN
+ /* Retry with decorated name for fastcall/stdcall functions. */
+ if (!p && ctype_isfunc(ct->info)) {
+@@ -372,6 +387,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
+ cd = lj_cdata_new(cts, id, CTSIZE_PTR);
+ *(void **)cdataptr(cd) = p;
+ setcdataV(L, tv, cd);
++ lj_gc_anybarriert(L, cl->cache);
+ }
+ }
+ return tv;
+diff --git a/src/lj_clib.h b/src/lj_clib.h
+index fcc9dac5..848543d5 100644
+--- a/src/lj_clib.h
++++ b/src/lj_clib.h
+@@ -1,6 +1,6 @@
+ /*
+ ** FFI C library loader.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_CLIB_H
+diff --git a/src/lj_cparse.c b/src/lj_cparse.c
+index 83cfd112..efe80759 100644
+--- a/src/lj_cparse.c
++++ b/src/lj_cparse.c
+@@ -1,6 +1,6 @@
+ /*
+ ** C declaration parser.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "lj_obj.h"
+@@ -28,6 +28,30 @@
+ ** If in doubt, please check the input against your favorite C compiler.
+ */
+
++#ifdef LUA_USE_ASSERT
++#define lj_assertCP(c, ...) (lj_assertG_(G(cp->L), (c), __VA_ARGS__))
++#else
++#define lj_assertCP(c, ...) ((void)cp)
++#endif
++
++/* -- Miscellaneous ------------------------------------------------------- */
++
++/* Match string against a C literal. */
++#define cp_str_is(str, k) \
++ ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1))
++
++/* Check string against a linear list of matches. */
++int lj_cparse_case(GCstr *str, const char *match)
++{
++ MSize len;
++ int n;
++ for (n = 0; (len = (MSize)*match++); n++, match += len) {
++ if (str->len == len && !memcmp(match, strdata(str), len))
++ return n;
++ }
++ return -1;
++}
++
+ /* -- C lexer ------------------------------------------------------------- */
+
+ /* C lexer token names. */
+@@ -43,7 +67,7 @@ LJ_NORET static void cp_err(CPState *cp, ErrMsg em);
+
+ static const char *cp_tok2str(CPState *cp, CPToken tok)
+ {
+- lua_assert(tok < CTOK_FIRSTDECL);
++ lj_assertCP(tok < CTOK_FIRSTDECL, "bad CPToken %d", tok);
+ if (tok > CTOK_OFS)
+ return ctoknames[tok-CTOK_OFS-1];
+ else if (!lj_char_iscntrl(tok))
+@@ -109,9 +133,9 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em,
...)
+ tokstr = NULL;
+ } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
+ tok >= CTOK_FIRSTDECL) {
+- if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
++ if (cp->sb.w == cp->sb.b) cp_save(cp, '$');
+ cp_save(cp, '\0');
+- tokstr = sbufB(&cp->sb);
++ tokstr = cp->sb.b;
+ } else {
+ tokstr = cp_tok2str(cp, tok);
+ }
+@@ -151,7 +175,8 @@ static CPToken cp_number(CPState *cp)
+ TValue o;
+ do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
+ cp_save(cp, '\0');
+- fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
++ fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1,
++ &o, STRSCAN_OPT_C);
+ if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
+ else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
+ else if (!(cp->mode & CPARSE_MODE_SKIP))
+@@ -254,7 +279,7 @@ static CPToken cp_string(CPState *cp)
+ return CTOK_STRING;
+ } else {
+ if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
+- cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
++ cp->val.i32 = (int32_t)(char)*cp->sb.b;
+ cp->val.id = CTID_INT32;
+ return CTOK_INTEGER;
+ }
+@@ -373,7 +398,7 @@ static void cp_init(CPState *cp)
+ cp->curpack = 0;
+ cp->packstack[0] = 255;
+ lj_buf_init(cp->L, &cp->sb);
+- lua_assert(cp->p != NULL);
++ lj_assertCP(cp->p != NULL, "uninitialized cp->p");
+ cp_get(cp); /* Read-ahead first char. */
+ cp->tok = 0;
+ cp->tmask = CPNS_DEFAULT;
+@@ -576,28 +601,34 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
+ k->id = k2.id > k3.id ? k2.id : k3.id;
+ continue;
+ }
++ /* fallthrough */
+ case 1:
+ if (cp_opt(cp, CTOK_OROR)) {
+ cp_expr_sub(cp, &k2, 2); k->i32 = k->u32 || k2.u32; k->id = CTID_INT32;
+ continue;
+ }
++ /* fallthrough */
+ case 2:
+ if (cp_opt(cp, CTOK_ANDAND)) {
+ cp_expr_sub(cp, &k2, 3); k->i32 = k->u32 && k2.u32; k->id =
CTID_INT32;
+ continue;
+ }
++ /* fallthrough */
+ case 3:
+ if (cp_opt(cp, '|')) {
+ cp_expr_sub(cp, &k2, 4); k->u32 = k->u32 | k2.u32; goto arith_result;
+ }
++ /* fallthrough */
+ case 4:
+ if (cp_opt(cp, '^')) {
+ cp_expr_sub(cp, &k2, 5); k->u32 = k->u32 ^ k2.u32; goto arith_result;
+ }
++ /* fallthrough */
+ case 5:
+ if (cp_opt(cp, '&')) {
+ cp_expr_sub(cp, &k2, 6); k->u32 = k->u32 & k2.u32; goto arith_result;
+ }
++ /* fallthrough */
+ case 6:
+ if (cp_opt(cp, CTOK_EQ)) {
+ cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 == k2.u32; k->id = CTID_INT32;
+@@ -606,6 +637,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
+ cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 != k2.u32; k->id = CTID_INT32;
+ continue;
+ }
++ /* fallthrough */
+ case 7:
+ if (cp_opt(cp, '<')) {
+ cp_expr_sub(cp, &k2, 8);
+@@ -640,6 +672,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
+ k->id = CTID_INT32;
+ continue;
+ }
++ /* fallthrough */
+ case 8:
+ if (cp_opt(cp, CTOK_SHL)) {
+ cp_expr_sub(cp, &k2, 9); k->u32 = k->u32 << k2.u32;
+@@ -652,6 +685,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
+ k->u32 = k->u32 >> k2.u32;
+ continue;
+ }
++ /* fallthrough */
+ case 9:
+ if (cp_opt(cp, '+')) {
+ cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 + k2.u32;
+@@ -661,6 +695,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
+ } else if (cp_opt(cp, '-')) {
+ cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 - k2.u32; goto arith_result;
+ }
++ /* fallthrough */
+ case 10:
+ if (cp_opt(cp, '*')) {
+ cp_expr_unary(cp, &k2); k->u32 = k->u32 * k2.u32; goto arith_result;
+@@ -824,12 +859,13 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
+ /* The cid is already part of info for copies of pointers/functions. */
+ idx = ct->next;
+ if (ctype_istypedef(info)) {
+- lua_assert(id == 0);
++ lj_assertCP(id == 0, "typedef not at toplevel");
+ id = ctype_cid(info);
+ /* Always refetch info/size, since struct/enum may have been completed. */
+ cinfo = ctype_get(cp->cts, id)->info;
+ csize = ctype_get(cp->cts, id)->size;
+- lua_assert(ctype_isstruct(cinfo) || ctype_isenum(cinfo));
++ lj_assertCP(ctype_isstruct(cinfo) || ctype_isenum(cinfo),
++ "typedef of bad type");
+ } else if (ctype_isfunc(info)) { /* Intern function. */
+ CType *fct;
+ CTypeID fid;
+@@ -862,7 +898,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
+ /* Inherit csize/cinfo from original type. */
+ } else {
+ if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */
+- lua_assert(id == 0);
++ lj_assertCP(id == 0, "number not at toplevel");
+ if (!(info & CTF_BOOL)) {
+ CTSize msize = ctype_msizeP(decl->attr);
+ CTSize vsize = ctype_vsizeP(decl->attr);
+@@ -917,7 +953,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
+ info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN);
+ info |= (cinfo & CTF_QUAL); /* Inherit qual. */
+ } else {
+- lua_assert(ctype_isvoid(info));
++ lj_assertCP(ctype_isvoid(info), "bad ctype %08x", info);
+ }
+ csize = size;
+ cinfo = info+id;
+@@ -929,8 +965,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
+
+ /* -- C declaration parser ------------------------------------------------ */
+
+-#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
+-
+ /* Reset declaration state to declaration specifier. */
+ static void cp_decl_reset(CPDecl *decl)
+ {
+@@ -1059,44 +1093,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl)
+ if (cp->tok == CTOK_IDENT) {
+ GCstr *attrstr = cp->str;
+ cp_next(cp);
+- switch (attrstr->hash) {
+- case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */
++ switch (lj_cparse_case(attrstr,
++ "\007aligned" "\013__aligned__"
++ "\006packed" "\012__packed__"
++ "\004mode" "\010__mode__"
++ "\013vector_size" "\017__vector_size__"
++#if LJ_TARGET_X86
++ "\007regparm" "\013__regparm__"
++ "\005cdecl" "\011__cdecl__"
++ "\010thiscall" "\014__thiscall__"
++ "\010fastcall" "\014__fastcall__"
++ "\007stdcall" "\013__stdcall__"
++ "\012sseregparm" "\016__sseregparm__"
++#endif
++ )) {
++ case 0: case 1: /* aligned */
+ cp_decl_align(cp, decl);
+ break;
+- case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */
++ case 2: case 3: /* packed */
+ decl->attr |= CTFP_PACKED;
+ break;
+- case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */
++ case 4: case 5: /* mode */
+ cp_decl_mode(cp, decl);
+ break;
+- case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */
++ case 6: case 7: /* vector_size */
+ {
+ CTSize vsize = cp_decl_sizeattr(cp);
+ if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize));
+ }
+ break;
+ #if LJ_TARGET_X86
+- case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */
++ case 8: case 9: /* regparm */
+ CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp));
+ decl->fattr |= CTFP_CCONV;
+ break;
+- case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */
++ case 10: case 11: /* cdecl */
+ CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL);
+ decl->fattr |= CTFP_CCONV;
+ break;
+- case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */
++ case 12: case 13: /* thiscall */
+ CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL);
+ decl->fattr |= CTFP_CCONV;
+ break;
+- case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */
++ case 14: case 15: /* fastcall */
+ CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL);
+ decl->fattr |= CTFP_CCONV;
+ break;
+- case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */
++ case 16: case 17: /* stdcall */
+ CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL);
+ decl->fattr |= CTFP_CCONV;
+ break;
+- case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */
++ case 18: case 19: /* sseregparm */
+ decl->fattr |= CTF_SSEREGPARM;
+ decl->fattr |= CTFP_CCONV;
+ break;
+@@ -1128,16 +1175,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl)
+ while (cp->tok == CTOK_IDENT) {
+ GCstr *attrstr = cp->str;
+ cp_next(cp);
+- switch (attrstr->hash) {
+- case H_(bc2395fa,98f267f8): /* align */
++ if (cp_str_is(attrstr, "align")) {
+ cp_decl_align(cp, decl);
+- break;
+- default: /* Ignore all other attributes. */
++ } else { /* Ignore all other attributes. */
+ if (cp_opt(cp, '(')) {
+ while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp);
+ cp_check(cp, ')');
+ }
+- break;
+ }
+ }
+ cp_check(cp, ')');
+@@ -1548,7 +1592,7 @@ end_decl:
+ cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC);
+ sz = sizeof(int);
+ }
+- lua_assert(sz != 0);
++ lj_assertCP(sz != 0, "basic ctype with zero size");
+ info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */
+ info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */
+ cp_push(decl, info, sz);
+@@ -1717,17 +1761,16 @@ static CTypeID cp_decl_abstract(CPState *cp)
+ static void cp_pragma(CPState *cp, BCLine pragmaline)
+ {
+ cp_next(cp);
+- if (cp->tok == CTOK_IDENT &&
+- cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */
++ if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) {
+ cp_next(cp);
+ cp_check(cp, '(');
+ if (cp->tok == CTOK_IDENT) {
+- if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */
++ if (cp_str_is(cp->str, "push")) {
+ if (cp->curpack < CPARSE_MAX_PACKSTACK) {
+ cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
+ cp->curpack++;
+ }
+- } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */
++ } else if (cp_str_is(cp->str, "pop")) {
+ if (cp->curpack > 0) cp->curpack--;
+ } else {
+ cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
+@@ -1776,13 +1819,11 @@ static void cp_decl_multi(CPState *cp)
+ if (tok == CTOK_INTEGER) {
+ cp_line(cp, hashline);
+ continue;
+- } else if (tok == CTOK_IDENT &&
+- cp->str->hash == H_(187aab88,fcb60b42)) { /* line */
++ } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line"))
{
+ if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok);
+ cp_line(cp, hashline);
+ continue;
+- } else if (tok == CTOK_IDENT &&
+- cp->str->hash == H_(f5e6b4f8,1d509107)) { /* pragma */
++ } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma"))
{
+ cp_pragma(cp, hashline);
+ continue;
+ } else {
+@@ -1811,7 +1852,7 @@ static void cp_decl_multi(CPState *cp)
+ /* Treat both static and extern function declarations as extern. */
+ ct = ctype_get(cp->cts, ctypeid);
+ /* We always get new anonymous functions (typedefs are copied). */
+- lua_assert(gcref(ct->name) == NULL);
++ lj_assertCP(gcref(ct->name) == NULL, "unexpected named function");
+ id = ctypeid; /* Just name it. */
+ } else if ((scl & CDF_STATIC)) { /* Accept static constants. */
+ id = cp_decl_constinit(cp, &ct, ctypeid);
+@@ -1853,8 +1894,6 @@ static void cp_decl_single(CPState *cp)
+ if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF);
+ }
+
+-#undef H_
+-
+ /* ------------------------------------------------------------------------ */
+
+ /* Protected callback for C parser. */
+@@ -1870,7 +1909,7 @@ static TValue *cpcparser(lua_State *L, lua_CFunction dummy, void
*ud)
+ cp_decl_single(cp);
+ if (cp->param && cp->param != cp->L->top)
+ cp_err(cp, LJ_ERR_FFI_NUMPARAM);
+- lua_assert(cp->depth == 0);
++ lj_assertCP(cp->depth == 0, "unbalanced cparser declaration depth");
+ return NULL;
+ }
+
+diff --git a/src/lj_cparse.h b/src/lj_cparse.h
+index bad1060b..fd88a9f4 100644
+--- a/src/lj_cparse.h
++++ b/src/lj_cparse.h
+@@ -1,6 +1,6 @@
+ /*
+ ** C declaration parser.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_CPARSE_H
+@@ -60,6 +60,8 @@ typedef struct CPState {
+
+ LJ_FUNC int lj_cparse(CPState *cp);
+
++LJ_FUNC int lj_cparse_case(GCstr *str, const char *match);
++
+ #endif
+
+ #endif
+diff --git a/src/lj_crecord.c b/src/lj_crecord.c
+index e32ae23e..165f95d9 100644
+--- a/src/lj_crecord.c
++++ b/src/lj_crecord.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Trace recorder for C data operations.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_ffrecord_c
+@@ -61,7 +61,8 @@ static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o)
+ static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr)
+ {
+ CTypeID id;
+- lua_assert(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID);
++ lj_assertJ(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID,
++ "expected CTypeID cdata");
+ id = *(CTypeID *)cdataptr(cd);
+ tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT);
+ emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id));
+@@ -77,7 +78,7 @@ static CTypeID argv2ctype(jit_State *J, TRef tr, cTValue *o)
+ /* Specialize to the string containing the C type declaration. */
+ emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, s));
+ cp.L = J->L;
+- cp.cts = ctype_ctsG(J2G(J));
++ cp.cts = ctype_cts(J->L);
+ oldtop = cp.cts->top;
+ cp.srcname = strdata(s);
+ cp.p = strdata(s);
+@@ -212,7 +213,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp,
+ ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0);
+ ml[i].trofs = trofs;
+ i++;
+- rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1;
++ rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1;
+ if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */
+ rwin = 0;
+ for ( ; j < i; j++) {
+@@ -237,13 +238,14 @@ static void crec_copy(jit_State *J, TRef trdst, TRef trsrc, TRef
trlen,
+ if (len > CREC_COPY_MAXLEN) goto fallback;
+ if (ct) {
+ CTState *cts = ctype_ctsG(J2G(J));
+- lua_assert(ctype_isarray(ct->info) || ctype_isstruct(ct->info));
++ lj_assertJ(ctype_isarray(ct->info) || ctype_isstruct(ct->info),
++ "copy of non-aggregate");
+ if (ctype_isarray(ct->info)) {
+ CType *cct = ctype_rawchild(cts, ct);
+ tp = crec_ct2irt(cts, cct);
+ if (tp == IRT_CDATA) goto rawcopy;
+ step = lj_ir_type_size[tp];
+- lua_assert((len & (step-1)) == 0);
++ lj_assertJ((len & (step-1)) == 0, "copy of fractional size");
+ } else if ((ct->info & CTF_UNION)) {
+ step = (1u << ctype_align(ct->info));
+ goto rawcopy;
+@@ -614,10 +616,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp,
cTValue *sval)
+ sp = lj_ir_kptr(J, NULL);
+ } else if (tref_isudata(sp)) {
+ GCudata *ud = udataV(sval);
+- if (ud->udtype == UDTYPE_IO_FILE) {
++ if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) {
+ TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE);
+- emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE));
+- sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE);
++ emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype));
++ sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp,
++ ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE :
++ IRFL_SBUF_R);
+ } else {
+ sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata)));
+ }
+@@ -629,7 +633,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp,
cTValue *sval)
+ /* Specialize to the name of the enum constant. */
+ emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str));
+ if (cct && ctype_isconstval(cct->info)) {
+- lua_assert(ctype_child(cts, cct)->size == 4);
++ lj_assertJ(ctype_child(cts, cct)->size == 4,
++ "only 32 bit const supported"); /* NYI */
+ svisnz = (void *)(intptr_t)(ofs != 0);
+ sp = lj_ir_kint(J, (int32_t)ofs);
+ sid = ctype_cid(cct->info);
+@@ -643,8 +648,7 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp,
cTValue *sval)
+ }
+ } else if (tref_islightud(sp)) {
+ #if LJ_64
+- sp = emitir(IRT(IR_BAND, IRT_P64), sp,
+- lj_ir_kint64(J, U64x(00007fff,ffffffff)));
++ lj_trace_err(J, LJ_TRERR_NYICONV);
+ #endif
+ } else { /* NYI: tref_istab(sp). */
+ IRType t;
+@@ -757,7 +761,7 @@ static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr,
CTInfo info)
+ IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0);
+ TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0);
+ CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz;
+- lua_assert(t <= IRT_U32); /* NYI: 64 bit bitfields. */
++ lj_assertJ(t <= IRT_U32, "only 32 bit bitfields supported"); /* NYI */
+ if (rd->data == 0) { /* __index metamethod. */
+ if ((info & CTF_BOOL)) {
+ tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos))));
+@@ -769,7 +773,7 @@ static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr,
CTInfo info)
+ tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos));
+ tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift));
+ } else {
+- lua_assert(bsz < 32); /* Full-size fields cannot end up here. */
++ lj_assertJ(bsz < 32, "unexpected full bitfield index");
+ tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos));
+ tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1)));
+ /* We can omit the U32 to NUM conversion, since bsz < 32. */
+@@ -884,7 +888,7 @@ again:
+ crec_index_bf(J, rd, ptr, fct->info);
+ return;
+ } else {
+- lua_assert(ctype_isfield(fct->info));
++ lj_assertJ(ctype_isfield(fct->info), "field expected");
+ sid = ctype_cid(fct->info);
+ }
+ }
+@@ -1022,8 +1026,26 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID
id)
+ crec_ct_tv(J, dc, dp, sp, sval);
+ }
+ } else if (ctype_isstruct(d->info)) {
+- CTypeID fid = d->sib;
++ CTypeID fid;
+ MSize i = 1;
++ if (!J->base[1]) { /* Handle zero-fill of struct-of-NYI. */
++ fid = d->sib;
++ while (fid) {
++ CType *df = ctype_get(cts, fid);
++ fid = df->sib;
++ if (ctype_isfield(df->info)) {
++ CType *dc;
++ if (!gcref(df->name)) continue; /* Ignore unnamed fields. */
++ dc = ctype_rawchild(cts, df); /* Field type. */
++ if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) ||
++ ctype_isenum(dc->info)))
++ goto special;
++ } else if (!ctype_isconstval(df->info)) {
++ goto special;
++ }
++ }
++ }
++ fid = d->sib;
+ while (fid) {
+ CType *df = ctype_get(cts, fid);
+ fid = df->sib;
+@@ -1048,6 +1070,11 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID
id)
+ dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
+ lj_ir_kintp(J, df->size + sizeof(GCcdata)));
+ crec_ct_tv(J, dc, dp, sp, sval);
++ if ((d->info & CTF_UNION)) {
++ if (d->size != dc->size) /* NYI: partial init of union. */
++ lj_trace_err(J, LJ_TRERR_NYICONV);
++ break;
++ }
+ } else if (!ctype_isconstval(df->info)) {
+ /* NYI: init bitfields and sub-structures. */
+ lj_trace_err(J, LJ_TRERR_NYICONV);
+@@ -1111,7 +1138,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
+ if (fid) { /* Get argument type from field. */
+ CType *ctf = ctype_get(cts, fid);
+ fid = ctf->sib;
+- lua_assert(ctype_isfield(ctf->info));
++ lj_assertJ(ctype_isfield(ctf->info), "field expected");
+ did = ctype_cid(ctf->info);
+ } else {
+ if (!(ct->info & CTF_VARARG))
+@@ -1130,7 +1157,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
+ else
+ tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT);
+ }
+- } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4)
{
++ } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size >
4) {
+ lj_needsplit(J);
+ }
+ #if LJ_TARGET_X86
+@@ -1209,8 +1236,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
+ TRef tr;
+ TValue tv;
+ /* Check for blacklisted C functions that might call a callback. */
+- setlightudV(&tv,
+- cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4));
++ tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8
: 4) >> 2) | U64x(800000000, 00000000);
+ if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv)))
+ lj_trace_err(J, LJ_TRERR_BLACKL);
+ if (ctype_isvoid(ctr->info)) {
+@@ -1530,8 +1556,10 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData
*rd)
+ }
+ {
+ TRef tr;
+- if (!(tr = crec_arith_int64(J, sp, s, (MMS)rd->data)) &&
+- !(tr = crec_arith_ptr(J, sp, s, (MMS)rd->data)) &&
++ MMS mm = (MMS)rd->data;
++ if ((mm == MM_len || mm == MM_concat ||
++ (!(tr = crec_arith_int64(J, sp, s, mm)) &&
++ !(tr = crec_arith_ptr(J, sp, s, mm)))) &&
+ !(tr = crec_arith_meta(J, sp, s, cts, rd)))
+ return;
+ J->base[0] = tr;
+@@ -1879,10 +1907,36 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData
*rd)
+ d = ctype_get(cts, CTID_DOUBLE);
+ J->base[0] = crec_ct_tv(J, d, 0, J->base[0], &rd->argv[0]);
+ } else {
++ /* Specialize to the ctype that couldn't be converted. */
++ argv2cdata(J, J->base[0], &rd->argv[0]);
+ J->base[0] = TREF_NIL;
+ }
+ }
+
++TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o)
++{
++ CTypeID id = argv2cdata(J, tr, o)->ctypeid;
++ if (!(id == CTID_INT64 || id == CTID_UINT64))
++ lj_trace_err(J, LJ_TRERR_BADTYPE);
++ lj_needsplit(J);
++ return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr,
++ IRFL_CDATA_INT64);
++}
++
++#if LJ_HASBUFFER
++TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o)
++{
++ CTState *cts = ctype_ctsG(J2G(J));
++ if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE);
++ return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o);
++}
++
++TRef lj_crecord_topuint8(jit_State *J, TRef tr)
++{
++ return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr);
++}
++#endif
++
+ #undef IR
+ #undef emitir
+ #undef emitconv
+diff --git a/src/lj_crecord.h b/src/lj_crecord.h
+index c165def4..e1a2d9c0 100644
+--- a/src/lj_crecord.h
++++ b/src/lj_crecord.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Trace recorder for C data operations.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_CRECORD_H
+@@ -33,6 +33,11 @@ LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData
*rd);
+ LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
+
+ LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
++LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o);
++#if LJ_HASBUFFER
++LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o);
++LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr);
++#endif
+ #endif
+
+ #endif
+diff --git a/src/lj_ctype.c b/src/lj_ctype.c
+index 0ea89c74..6741437c 100644
+--- a/src/lj_ctype.c
++++ b/src/lj_ctype.c
+@@ -1,6 +1,6 @@
+ /*
+ ** C type management.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "lj_obj.h"
+@@ -153,7 +153,7 @@ CTypeID lj_ctype_new(CTState *cts, CType **ctp)
+ {
+ CTypeID id = cts->top;
+ CType *ct;
+- lua_assert(cts->L);
++ lj_assertCTS(cts->L, "uninitialized cts->L");
+ if (LJ_UNLIKELY(id >= cts->sizetab)) {
+ if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV);
+ #ifdef LUAJIT_CTYPE_CHECK_ANCHOR
+@@ -182,7 +182,7 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size)
+ {
+ uint32_t h = ct_hashtype(info, size);
+ CTypeID id = cts->hash[h];
+- lua_assert(cts->L);
++ lj_assertCTS(cts->L, "uninitialized cts->L");
+ while (id) {
+ CType *ct = ctype_get(cts, id);
+ if (ct->info == info && ct->size == size)
+@@ -298,9 +298,9 @@ CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem)
+ }
+ ct = ctype_raw(cts, arrid);
+ }
+- lua_assert(ctype_isvlarray(ct->info)); /* Must be a VLA. */
++ lj_assertCTS(ctype_isvlarray(ct->info), "VLA expected");
+ ct = ctype_rawchild(cts, ct); /* Get array element. */
+- lua_assert(ctype_hassize(ct->info));
++ lj_assertCTS(ctype_hassize(ct->info), "bad VLA without size");
+ /* Calculate actual size of VLA and check for overflow. */
+ xsz += (uint64_t)ct->size * nelem;
+ return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID;
+@@ -323,7 +323,8 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp)
+ } else {
+ if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN);
+ qual |= (info & ~(CTF_ALIGN|CTMASK_CID));
+- lua_assert(ctype_hassize(info) || ctype_isfunc(info));
++ lj_assertCTS(ctype_hassize(info) || ctype_isfunc(info),
++ "ctype without size");
+ *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size;
+ break;
+ }
+@@ -528,7 +529,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id)
+ ctype_appc(ctr, ')');
+ break;
+ default:
+- lua_assert(0);
++ lj_assertG_(ctr->cts->g, 0, "bad ctype %08x", info);
+ break;
+ }
+ ct = ctype_get(ctr->cts, ctype_cid(info));
+@@ -582,7 +583,7 @@ GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
+ lj_strfmt_putfnum(sb, STRFMT_G14, re.n);
+ if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+');
+ lj_strfmt_putfnum(sb, STRFMT_G14, im.n);
+- lj_buf_putchar(sb, sbufP(sb)[-1] >= 'a' ? 'I' : 'i');
++ lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i');
+ return lj_buf_str(L, sb);
+ }
+
+diff --git a/src/lj_ctype.h b/src/lj_ctype.h
+index 0c220a88..700250df 100644
+--- a/src/lj_ctype.h
++++ b/src/lj_ctype.h
+@@ -1,6 +1,6 @@
+ /*
+ ** C type management.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_CTYPE_H
+@@ -260,6 +260,12 @@ typedef struct CTState {
+
+ #define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */
+
++#ifdef LUA_USE_ASSERT
++#define lj_assertCTS(c, ...) (lj_assertG_(cts->g, (c), __VA_ARGS__))
++#else
++#define lj_assertCTS(c, ...) ((void)cts)
++#endif
++
+ /* -- Predefined types ---------------------------------------------------- */
+
+ /* Target-dependent types. */
+@@ -292,6 +298,7 @@ typedef struct CTState {
+ _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \
+ _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \
+ _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \
++ _(P_UINT8, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_UINT8) \
+ _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \
+ _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \
+ CTTYDEFP(_) \
+@@ -383,6 +390,16 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L)
+ return cts;
+ }
+
++/* Load FFI library on-demand. */
++#define ctype_loadffi(L) \
++ do { \
++ if (!ctype_ctsG(G(L))) { \
++ ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \
++ luaopen_ffi(L); \
++ L->top = (TValue *)(mref(L->stack, char) + oldtop); \
++ } \
++ } while (0)
++
+ /* Save and restore state of C type table. */
+ #define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts)
+ #define LJ_CTYPE_RESTORE(cts) \
+@@ -392,7 +409,8 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L)
+ /* Check C type ID for validity when assertions are enabled. */
+ static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id)
+ {
+- lua_assert(id > 0 && id < cts->top); UNUSED(cts);
++ UNUSED(cts);
++ lj_assertCTS(id > 0 && id < cts->top, "bad CTID %d", id);
+ return id;
+ }
+
+@@ -408,8 +426,9 @@ static LJ_AINLINE CType *ctype_get(CTState *cts, CTypeID id)
+ /* Get child C type. */
+ static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct)
+ {
+- lua_assert(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) ||
+- ctype_isbitfield(ct->info))); /* These don't have children. */
++ lj_assertCTS(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) ||
++ ctype_isbitfield(ct->info)),
++ "ctype %08x has no children", ct->info);
+ return ctype_get(cts, ctype_cid(ct->info));
+ }
+
+diff --git a/src/lj_debug.c b/src/lj_debug.c
+index 959dc289..3dffad90 100644
+--- a/src/lj_debug.c
++++ b/src/lj_debug.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Debugging and introspection.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_debug_c
+@@ -55,7 +55,8 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue
*nextframe)
+ const BCIns *ins;
+ GCproto *pt;
+ BCPos pos;
+- lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD);
++ lj_assertL(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD,
++ "function or frame expected");
+ if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */
+ return NO_BCPOS;
+ } else if (nextframe == NULL) { /* Lua function on top. */
+@@ -93,6 +94,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue
*nextframe)
+ }
+ }
+ ins = cframe_pc(cf);
++ if (!ins) return NO_BCPOS;
+ }
+ }
+ pt = funcproto(fn);
+@@ -100,7 +102,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue
*nextframe)
+ #if LJ_HASJIT
+ if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */
+ GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins));
+- lua_assert(bc_isret(bc_op(ins[-1])));
++ lj_assertL(bc_isret(bc_op(ins[-1])), "return bytecode expected");
+ pos = proto_bcpos(pt, mref(T->startpc, const BCIns));
+ }
+ #endif
+@@ -133,7 +135,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue
*nextframe)
+ BCPos pc = debug_framepc(L, fn, nextframe);
+ if (pc != NO_BCPOS) {
+ GCproto *pt = funcproto(fn);
+- lua_assert(pc <= pt->sizebc);
++ lj_assertL(pc <= pt->sizebc, "PC out of range");
+ return lj_debug_line(pt, pc);
+ }
+ return -1;
+@@ -214,26 +216,29 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
+ const char *lj_debug_uvname(GCproto *pt, uint32_t idx)
+ {
+ const uint8_t *p = proto_uvinfo(pt);
+- lua_assert(idx < pt->sizeuv);
++ lj_assertX(idx < pt->sizeuv, "bad upvalue index");
+ if (!p) return "";
+ if (idx) while (*p++ || --idx) ;
+ return (const char *)p;
+ }
+
+ /* Get name and value of upvalue. */
+-const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp)
++const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, GCobj **op)
+ {
+ if (tvisfunc(o)) {
+ GCfunc *fn = funcV(o);
+ if (isluafunc(fn)) {
+ GCproto *pt = funcproto(fn);
+ if (idx < pt->sizeuv) {
+- *tvp = uvval(&gcref(fn->l.uvptr[idx])->uv);
++ GCobj *uvo = gcref(fn->l.uvptr[idx]);
++ *tvp = uvval(&uvo->uv);
++ *op = uvo;
+ return lj_debug_uvname(pt, idx);
+ }
+ } else {
+ if (idx < fn->c.nupvalues) {
+ *tvp = &fn->c.upvalue[idx];
++ *op = obj2gco(fn);
+ return "";
+ }
+ }
+@@ -429,20 +434,21 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
int ext)
+ GCfunc *fn;
+ if (*what == '>') {
+ TValue *func = L->top - 1;
+- api_check(L, tvisfunc(func));
++ if (!tvisfunc(func)) return 0;
+ fn = funcV(func);
+ L->top--;
+ what++;
+ } else {
+ uint32_t offset = (uint32_t)ar->i_ci & 0xffff;
+ uint32_t size = (uint32_t)ar->i_ci >> 16;
+- lua_assert(offset != 0);
++ lj_assertL(offset != 0, "bad frame offset");
+ frame = tvref(L->stack) + offset;
+ if (size) nextframe = frame + size;
+- lua_assert(frame <= tvref(L->maxstack) &&
+- (!nextframe || nextframe <= tvref(L->maxstack)));
++ lj_assertL(frame <= tvref(L->maxstack) &&
++ (!nextframe || nextframe <= tvref(L->maxstack)),
++ "broken frame chain");
+ fn = frame_func(frame);
+- lua_assert(fn->c.gct == ~LJ_TFUNC);
++ lj_assertL(fn->c.gct == ~LJ_TFUNC, "bad frame function");
+ }
+ for (; *what; what++) {
+ if (*what == 'S') {
+@@ -642,7 +648,7 @@ void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int
depth)
+ level += dir;
+ }
+ if (lastlen)
+- setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */
++ sb->w = sb->b + lastlen; /* Zap trailing separator. */
+ }
+ #endif
+
+diff --git a/src/lj_debug.h b/src/lj_debug.h
+index 5917c00b..a6e21701 100644
+--- a/src/lj_debug.h
++++ b/src/lj_debug.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Debugging and introspection.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_DEBUG_H
+@@ -29,7 +29,8 @@ typedef struct lj_Debug {
+ LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size);
+ LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc);
+ LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
+-LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp);
++LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp,
++ GCobj **op);
+ LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
+ BCReg slot, const char **name);
+ LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
+diff --git a/src/lj_def.h b/src/lj_def.h
+index 2d8fff66..3a28026c 100644
+--- a/src/lj_def.h
++++ b/src/lj_def.h
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT common internal definitions.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_DEF_H
+@@ -8,8 +8,8 @@
+
+ #include "lua.h"
+
+-#if defined(_MSC_VER)
+-/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */
++#if defined(_MSC_VER) && (_MSC_VER < 1700)
++/* Old MSVC is stuck in the last century and doesn't have C99's stdint.h. */
+ typedef __int8 int8_t;
+ typedef __int16 int16_t;
+ typedef __int32 int32_t;
+@@ -80,7 +80,6 @@ typedef unsigned int uintptr_t;
+ #define LJ_MIN_SBUF 32 /* Min. string buffer length. */
+ #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
+ #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
+-#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */
+
+ /* JIT compiler limits. */
+ #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
+@@ -105,9 +104,10 @@ typedef unsigned int uintptr_t;
+ #define checku16(x) ((x) == (int32_t)(uint16_t)(x))
+ #define checki32(x) ((x) == (int32_t)(x))
+ #define checku32(x) ((x) == (uint32_t)(x))
++#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0)
+ #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
+ #define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0)
+-#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1)
++#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1)
+
+ /* Every half-decent C compiler transforms this into a rotate instruction. */
+ #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
+@@ -120,7 +120,7 @@ typedef uintptr_t BloomFilter;
+ #define bloomset(b, x) ((b) |= bloombit((x)))
+ #define bloomtest(b, x) ((b) & bloombit((x)))
+
+-#if defined(__GNUC__) || defined(__psp2__)
++#if defined(__GNUC__) || defined(__clang__) || defined(__psp2__)
+
+ #define LJ_NORET __attribute__((noreturn))
+ #define LJ_ALIGN(n) __attribute__((aligned(n)))
+@@ -182,7 +182,7 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x)
+ {
+ return ((uint64_t)lj_bswap((uint32_t)x)<<32) |
lj_bswap((uint32_t)(x>>32));
+ }
+-#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
++#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) ||
__clang__
+ static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
+ {
+ return (uint32_t)__builtin_bswap32((int32_t)x);
+@@ -263,19 +263,19 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
+ return _CountLeadingZeros(x) ^ 31;
+ }
+ #else
+-unsigned char _BitScanForward(uint32_t *, unsigned long);
+-unsigned char _BitScanReverse(uint32_t *, unsigned long);
++unsigned char _BitScanForward(unsigned long *, unsigned long);
++unsigned char _BitScanReverse(unsigned long *, unsigned long);
+ #pragma intrinsic(_BitScanForward)
+ #pragma intrinsic(_BitScanReverse)
+
+ static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
+ {
+- uint32_t r; _BitScanForward(&r, x); return r;
++ unsigned long r; _BitScanForward(&r, x); return (uint32_t)r;
+ }
+
+ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
+ {
+- uint32_t r; _BitScanReverse(&r, x); return r;
++ unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
+ }
+ #endif
+
+@@ -338,14 +338,28 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v)
+ #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET
+ #define LJ_ASMF_NORET LJ_ASMF LJ_NORET
+
+-/* Runtime assertions. */
+-#ifdef lua_assert
+-#define check_exp(c, e) (lua_assert(c), (e))
+-#define api_check(l, e) lua_assert(e)
++/* Internal assertions. */
++#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
++#define lj_assert_check(g, c, ...) \
++ ((c) ? (void)0 : \
++ (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0))
++#define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__)
+ #else
+-#define lua_assert(c) ((void)0)
++#define lj_checkapi(c, ...) ((void)L)
++#endif
++
++#ifdef LUA_USE_ASSERT
++#define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__)
++#define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__)
++#define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__)
++#define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__)
++#define check_exp(c, e) (lj_assertX((c), #c), (e))
++#else
++#define lj_assertG_(g, c, ...) ((void)0)
++#define lj_assertG(c, ...) ((void)g)
++#define lj_assertL(c, ...) ((void)L)
++#define lj_assertX(c, ...) ((void)0)
+ #define check_exp(c, e) (e)
+-#define api_check luai_apicheck
+ #endif
+
+ /* Static assertions. */
+@@ -359,4 +373,9 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v)
+ extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1])
+ #endif
+
++/* PRNG state. Need this here, details in lj_prng.h. */
++typedef struct PRNGState {
++ uint64_t u[4];
++} PRNGState;
++
+ #endif
+diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
+index 5d6795f8..7b73d3dd 100644
+--- a/src/lj_dispatch.c
++++ b/src/lj_dispatch.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Instruction dispatch handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_dispatch_c
+@@ -68,6 +68,8 @@ void lj_dispatch_init(GG_State *GG)
+ /* The JIT engine is off by default. luaopen_jit() turns it on. */
+ disp[BC_FORL] = disp[BC_IFORL];
+ disp[BC_ITERL] = disp[BC_IITERL];
++ /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */
++ disp[BC_ITERN] = &lj_vm_IITERN;
+ disp[BC_LOOP] = disp[BC_ILOOP];
+ disp[BC_FUNCF] = disp[BC_IFUNCF];
+ disp[BC_FUNCV] = disp[BC_IFUNCV];
+@@ -118,19 +120,21 @@ void lj_dispatch_update(global_State *g)
+ mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0;
+ if (oldmode != mode) { /* Mode changed? */
+ ASMFunction *disp = G2GG(g)->dispatch;
+- ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv;
++ ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv;
+ g->dispatchmode = mode;
+
+ /* Hotcount if JIT is on, but not while recording. */
+ if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) {
+ f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]);
+ f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]);
++ f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]);
+ f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]);
+ f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]);
+ f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]);
+ } else { /* Otherwise use the non-hotcounting instructions. */
+ f_forl = disp[GG_LEN_DDISP+BC_IFORL];
+ f_iterl = disp[GG_LEN_DDISP+BC_IITERL];
++ f_itern = &lj_vm_IITERN;
+ f_loop = disp[GG_LEN_DDISP+BC_ILOOP];
+ f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]);
+ f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]);
+@@ -138,6 +142,7 @@ void lj_dispatch_update(global_State *g)
+ /* Init static counting instruction dispatch first (may be copied below). */
+ disp[GG_LEN_DDISP+BC_FORL] = f_forl;
+ disp[GG_LEN_DDISP+BC_ITERL] = f_iterl;
++ disp[GG_LEN_DDISP+BC_ITERN] = f_itern;
+ disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
+
+ /* Set dynamic instruction dispatch. */
+@@ -165,6 +170,7 @@ void lj_dispatch_update(global_State *g)
+ /* Otherwise set dynamic counting ins. */
+ disp[BC_FORL] = f_forl;
+ disp[BC_ITERL] = f_iterl;
++ disp[BC_ITERN] = f_itern;
+ disp[BC_LOOP] = f_loop;
+ /* Set dynamic return dispatch. */
+ if ((mode & DISPMODE_RET)) {
+@@ -252,15 +258,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
+ } else {
+ if (!(mode & LUAJIT_MODE_ON))
+ G2J(g)->flags &= ~(uint32_t)JIT_F_ON;
+-#if LJ_TARGET_X86ORX64
+- else if ((G2J(g)->flags & JIT_F_SSE2))
+- G2J(g)->flags |= (uint32_t)JIT_F_ON;
+- else
+- return 0; /* Don't turn on JIT compiler without SSE2 support. */
+-#else
+ else
+ G2J(g)->flags |= (uint32_t)JIT_F_ON;
+-#endif
+ lj_dispatch_update(g);
+ }
+ break;
+@@ -302,7 +301,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
+ if (idx != 0) {
+ cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx;
+ if (tvislightud(tv))
+- g->wrapf = (lua_CFunction)lightudV(tv);
++ g->wrapf = (lua_CFunction)lightudV(g, tv);
+ else
+ return 0; /* Failed. */
+ } else {
+@@ -374,7 +373,7 @@ static void callhook(lua_State *L, int event, BCLine line)
+ hook_enter(g);
+ #endif
+ hookf(L, &ar);
+- lua_assert(hook_active(g));
++ lj_assertG(hook_active(g), "active hook flag removed");
+ setgcref(g->cur_L, obj2gco(L));
+ #if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
+ lj_profile_hook_leave(g);
+@@ -422,7 +421,8 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc)
+ #endif
+ J->L = L;
+ lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
+- lua_assert(L->top - L->base == delta);
++ lj_assertG(L->top - L->base == delta,
++ "unbalanced stack after tracing of instruction");
+ }
+ }
+ #endif
+@@ -482,7 +482,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns
*pc)
+ #endif
+ pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1);
+ lj_trace_hot(J, pc);
+- lua_assert(L->top - L->base == delta);
++ lj_assertG(L->top - L->base == delta,
++ "unbalanced stack after hot call");
+ goto out;
+ } else if (J->state != LJ_TRACE_IDLE &&
+ !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
+@@ -491,7 +492,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns
*pc)
+ #endif
+ /* Record the FUNC* bytecodes, too. */
+ lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
+- lua_assert(L->top - L->base == delta);
++ lj_assertG(L->top - L->base == delta,
++ "unbalanced stack after hot instruction");
+ }
+ #endif
+ if ((g->hookmask & LUA_MASKCALL)) {
+diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
+index 5bda51a2..2331bd42 100644
+--- a/src/lj_dispatch.h
++++ b/src/lj_dispatch.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Instruction dispatch handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_DISPATCH_H
+@@ -31,7 +31,7 @@ extern double __divdf3(double a, double b);
+ #define SFGOTDEF(_)
+ #endif
+ #if LJ_HASJIT
+-#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot)
++#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot)
+ #else
+ #define JITGOTDEF(_)
+ #endif
+@@ -89,12 +89,20 @@ typedef uint16_t HotCount;
+ typedef struct GG_State {
+ lua_State L; /* Main thread. */
+ global_State g; /* Global state. */
++#if LJ_TARGET_ARM
++ /* Make g reachable via K12 encoded DISPATCH-relative addressing. */
++ uint8_t align1[(16-sizeof(global_State))&15];
++#endif
+ #if LJ_TARGET_MIPS
+ ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */
+ #endif
+ #if LJ_HASJIT
+ jit_State J; /* JIT state. */
+ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
++#if LJ_TARGET_ARM
++ /* Ditto for J. */
++ uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
++#endif
+ #endif
+ ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */
+ BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */
+diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
+index dee8bdcc..615e4c3a 100644
+--- a/src/lj_emit_arm.h
++++ b/src/lj_emit_arm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** ARM instruction emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* -- Constant encoding --------------------------------------------------- */
+@@ -81,7 +81,8 @@ static void emit_m(ASMState *as, ARMIns ai, Reg rm)
+
+ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
+ {
+- lua_assert(ofs >= -255 && ofs <= 255);
++ lj_assertA(ofs >= -255 && ofs <= 255,
++ "load/store offset %d out of range", ofs);
+ if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
+ *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) |
+ ((ofs & 0xf0) << 4) | (ofs & 0x0f);
+@@ -89,7 +90,8 @@ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t
ofs)
+
+ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
+ {
+- lua_assert(ofs >= -4095 && ofs <= 4095);
++ lj_assertA(ofs >= -4095 && ofs <= 4095,
++ "load/store offset %d out of range", ofs);
+ /* Combine LDR/STR pairs to LDRD/STRD. */
+ if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4))
&&
+ (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn &&
+@@ -106,7 +108,8 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t
ofs)
+ #if !LJ_SOFTFP
+ static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
+ {
+- lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0);
++ lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0,
++ "load/store offset %d out of range", ofs);
+ if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
+ *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2);
+ }
+@@ -124,7 +127,7 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef ref = regcost_ref(as->cost[r]);
+- lua_assert(r != d);
++ lj_assertA(r != d, "dest reg not free");
+ if (emit_canremat(ref)) {
+ int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
+ uint32_t k = emit_isk12(ARMI_ADD, delta);
+@@ -142,13 +145,13 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
+ }
+
+ /* Try to find a two step delta relative to another constant. */
+-static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
++static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
+ {
+ RegSet work = ~as->freeset & RSET_GPR;
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef ref = regcost_ref(as->cost[r]);
+- lua_assert(r != d);
++ lj_assertA(r != rd, "dest reg %d not free", rd);
+ if (emit_canremat(ref)) {
+ int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i;
+ if (other) {
+@@ -159,8 +162,8 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
+ k2 = emit_isk12(0, delta & (255 << sh));
+ k = emit_isk12(0, delta & ~(255 << sh));
+ if (k) {
+- emit_dn(as, ARMI_ADD^k2^inv, d, d);
+- emit_dn(as, ARMI_ADD^k^inv, d, r);
++ emit_dn(as, ARMI_ADD^k2^inv, rd, rd);
++ emit_dn(as, ARMI_ADD^k^inv, rd, r);
+ return 1;
+ }
+ }
+@@ -171,23 +174,24 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
+ }
+
+ /* Load a 32 bit constant into a GPR. */
+-static void emit_loadi(ASMState *as, Reg r, int32_t i)
++static void emit_loadi(ASMState *as, Reg rd, int32_t i)
+ {
+ uint32_t k = emit_isk12(ARMI_MOV, i);
+- lua_assert(rset_test(as->freeset, r) || r == RID_TMP);
++ lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP,
++ "dest reg %d not free", rd);
+ if (k) {
+ /* Standard K12 constant. */
+- emit_d(as, ARMI_MOV^k, r);
++ emit_d(as, ARMI_MOV^k, rd);
+ } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u)
{
+ /* 16 bit loword constant for ARMv6T2. */
+- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r);
+- } else if (emit_kdelta1(as, r, i)) {
++ emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
++ } else if (emit_kdelta1(as, rd, i)) {
+ /* One step delta relative to another constant. */
+ } else if ((as->flags & JIT_F_ARMV6T2)) {
+ /* 32 bit hiword/loword constant for ARMv6T2. */
+- emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) &
0xf000)<<4), r);
+- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r);
+- } else if (emit_kdelta2(as, r, i)) {
++ emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) &
0xf000)<<4), rd);
++ emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
++ } else if (emit_kdelta2(as, rd, i)) {
+ /* Two step delta relative to another constant. */
+ } else {
+ /* Otherwise construct the constant with up to 4 instructions. */
+@@ -197,15 +201,15 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
+ int32_t m = i & (255 << sh);
+ i &= ~(255 << sh);
+ if (i == 0) {
+- emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r);
++ emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd);
+ break;
+ }
+- emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r);
++ emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd);
+ }
+ }
+ }
+
+-#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
++#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr)))
+
+ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
+
+@@ -261,7 +265,7 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target)
+ {
+ MCode *p = as->mcp;
+ ptrdiff_t delta = (target - p) - 1;
+- lua_assert(((delta + 0x00800000) >> 24) == 0);
++ lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of
range");
+ *--p = ai | ((uint32_t)delta & 0x00ffffffu);
+ as->mcp = p;
+ }
+@@ -289,7 +293,7 @@ static void emit_call(ASMState *as, void *target)
+ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
+ {
+ #if LJ_SOFTFP
+- lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
++ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
+ #else
+ if (dst >= RID_MAX_GPR) {
+ emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,
+@@ -313,7 +317,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
+ static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+ {
+ #if LJ_SOFTFP
+- lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
++ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
+ #else
+ if (r >= RID_MAX_GPR)
+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
+@@ -326,7 +330,7 @@ static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base,
int32_t ofs)
+ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+ {
+ #if LJ_SOFTFP
+- lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
++ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
+ #else
+ if (r >= RID_MAX_GPR)
+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
+diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
+index 6da4c7d4..00086e8a 100644
+--- a/src/lj_emit_arm64.h
++++ b/src/lj_emit_arm64.h
+@@ -1,6 +1,6 @@
+ /*
+ ** ARM64 instruction emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
+ ** Sponsored by Cisco Systems, Inc.
+@@ -8,8 +8,9 @@
+
+ /* -- Constant encoding --------------------------------------------------- */
+
+-static uint64_t get_k64val(IRIns *ir)
++static uint64_t get_k64val(ASMState *as, IRRef ref)
+ {
++ IRIns *ir = IR(ref);
+ if (ir->o == IR_KINT64) {
+ return ir_kint64(ir)->u64;
+ } else if (ir->o == IR_KGC) {
+@@ -17,7 +18,8 @@ static uint64_t get_k64val(IRIns *ir)
+ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+ return (uint64_t)ir_kptr(ir);
+ } else {
+- lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
++ "bad 64 bit const IR op %d", ir->o);
+ return ir->i; /* Sign-extended. */
+ }
+ }
+@@ -122,7 +124,7 @@ static int emit_checkofs(A64Ins ai, int64_t ofs)
+ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
+ {
+ int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
+- lua_assert(ot);
++ lj_assertA(ot, "load/store offset %d out of range", ofs);
+ /* Combine LDR/STR pairs to LDP/STP. */
+ if ((sc == 2 || sc == 3) &&
+ (!(ai & 0x400000) || rd != rn) &&
+@@ -161,15 +163,15 @@ nopair:
+ /* Try to find an N-step delta relative to other consts with N < lim. */
+ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
+ {
+- RegSet work = ~as->freeset & RSET_GPR;
++ RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
+ if (lim <= 1) return 0; /* Can't beat that. */
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef ref = regcost_ref(as->cost[r]);
+- lua_assert(r != rd);
++ lj_assertA(r != rd, "dest reg %d not free", rd);
+ if (ref < REF_TRUE) {
+ uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
+- get_k64val(IR(ref));
++ get_k64val(as, ref);
+ int64_t delta = (int64_t)(k - kx);
+ if (delta == 0) {
+ emit_dm(as, A64I_MOVx, rd, r);
+@@ -192,39 +194,41 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
+
+ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
+ {
+- uint32_t k13 = emit_isk13(u64, is64);
+- if (k13) { /* Can the constant be represented as a bitmask immediate? */
+- emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
+- } else {
+- int i, zeros = 0, ones = 0, neg;
+- if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
+- /* Count homogeneous 16 bit fragments. */
+- for (i = 0; i < 4; i++) {
+- uint64_t frag = (u64 >> i*16) & 0xffff;
+- zeros += (frag == 0);
+- ones += (frag == 0xffff);
++ int i, zeros = 0, ones = 0, neg;
++ if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
++ /* Count homogeneous 16 bit fragments. */
++ for (i = 0; i < 4; i++) {
++ uint64_t frag = (u64 >> i*16) & 0xffff;
++ zeros += (frag == 0);
++ ones += (frag == 0xffff);
++ }
++ neg = ones > zeros; /* Use MOVN if it pays off. */
++ if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */
++ uint32_t k13 = emit_isk13(u64, is64);
++ if (k13) {
++ emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
++ return;
+ }
+- neg = ones > zeros; /* Use MOVN if it pays off. */
+- if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
+- int shift = 0, lshift = 0;
+- uint64_t n64 = neg ? ~u64 : u64;
+- if (n64 != 0) {
+- /* Find first/last fragment to be filled. */
+- shift = (63-emit_clz64(n64)) & ~15;
+- lshift = emit_ctz64(n64) & ~15;
+- }
+- /* MOVK requires the original value (u64). */
+- while (shift > lshift) {
+- uint32_t u16 = (u64 >> shift) & 0xffff;
+- /* Skip fragments that are correctly filled by MOVN/MOVZ. */
+- if (u16 != (neg ? 0xffff : 0))
+- emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
+- shift -= 16;
+- }
+- /* But MOVN needs an inverted value (n64). */
+- emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
+- A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
++ }
++ if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
++ int shift = 0, lshift = 0;
++ uint64_t n64 = neg ? ~u64 : u64;
++ if (n64 != 0) {
++ /* Find first/last fragment to be filled. */
++ shift = (63-emit_clz64(n64)) & ~15;
++ lshift = emit_ctz64(n64) & ~15;
++ }
++ /* MOVK requires the original value (u64). */
++ while (shift > lshift) {
++ uint32_t u16 = (u64 >> shift) & 0xffff;
++ /* Skip fragments that are correctly filled by MOVN/MOVZ. */
++ if (u16 != (neg ? 0xffff : 0))
++ emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
++ shift -= 16;
+ }
++ /* But MOVN needs an inverted value (n64). */
++ emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
++ A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
+ }
+ }
+
+@@ -241,7 +245,7 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
+ #define mcpofs(as, k) \
+ ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
+ #define checkmcpofs(as, k) \
+- ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0)
++ (A64F_S_OK(mcpofs(as, k)>>2, 19))
+
+ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
+
+@@ -312,7 +316,7 @@ static void emit_cond_branch(ASMState *as, A64CC cond, MCode
*target)
+ {
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = target - p;
+- lua_assert(((delta + 0x40000) >> 19) == 0);
++ lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
+ *p = A64I_BCC | A64F_S19(delta) | cond;
+ }
+
+@@ -320,24 +324,25 @@ static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
+ {
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = target - p;
+- lua_assert(((delta + 0x02000000) >> 26) == 0);
+- *p = ai | ((uint32_t)delta & 0x03ffffffu);
++ lj_assertA(A64F_S_OK(delta, 26), "branch target out of range");
++ *p = ai | A64F_S26(delta);
+ }
+
+ static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
+ {
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = target - p;
+- lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0);
++ lj_assertA(bit < 63, "bit number out of range");
++ lj_assertA(A64F_S_OK(delta, 14), "branch target out of range");
+ if (bit > 31) ai |= A64I_X;
+- *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r;
++ *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r;
+ }
+
+ static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
+ {
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = target - p;
+- lua_assert(((delta + 0x40000) >> 19) == 0);
++ lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
+ *p = ai | A64F_S19(delta) | r;
+ }
+
+@@ -347,8 +352,8 @@ static void emit_call(ASMState *as, void *target)
+ {
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = (char *)target - (char *)p;
+- if ((((delta>>2) + 0x02000000) >> 26) == 0) {
+- *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu);
++ if (A64F_S_OK(delta>>2, 26)) {
++ *p = A64I_BL | A64F_S26(delta>>2);
+ } else { /* Target out of range: need indirect call. But don't use R0-R7. */
+ Reg r = ra_allock(as, i64ptr(target),
+ RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
+diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
+index 8a9ee24d..c13615dd 100644
+--- a/src/lj_emit_mips.h
++++ b/src/lj_emit_mips.h
+@@ -1,28 +1,32 @@
+ /*
+ ** MIPS instruction emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #if LJ_64
+-static intptr_t get_k64val(IRIns *ir)
++static intptr_t get_k64val(ASMState *as, IRRef ref)
+ {
++ IRIns *ir = IR(ref);
+ if (ir->o == IR_KINT64) {
+ return (intptr_t)ir_kint64(ir)->u64;
+ } else if (ir->o == IR_KGC) {
+ return (intptr_t)ir_kgc(ir);
+ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+ return (intptr_t)ir_kptr(ir);
++ } else if (LJ_SOFTFP && ir->o == IR_KNUM) {
++ return (intptr_t)ir_knum(ir)->u64;
+ } else {
+- lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
++ "bad 64 bit const IR op %d", ir->o);
+ return ir->i; /* Sign-extended. */
+ }
+ }
+ #endif
+
+ #if LJ_64
+-#define get_kval(ir) get_k64val(ir)
++#define get_kval(as, ref) get_k64val(as, ref)
+ #else
+-#define get_kval(ir) ((ir)->i)
++#define get_kval(as, ref) (IR((ref))->i)
+ #endif
+
+ /* -- Emit basic instructions --------------------------------------------- */
+@@ -66,7 +70,7 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp,
uint32_t shift)
+ }
+ }
+
+-#if LJ_64
++#if LJ_64 || LJ_HASBUFFER
+ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
+ uint32_t lsb)
+ {
+@@ -80,18 +84,18 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs,
uint32_t msb,
+ #define emit_canremat(ref) ((ref) <= REF_BASE)
+
+ /* Try to find a one step delta relative to another constant. */
+-static int emit_kdelta1(ASMState *as, Reg t, intptr_t i)
++static int emit_kdelta1(ASMState *as, Reg rd, intptr_t i)
+ {
+ RegSet work = ~as->freeset & RSET_GPR;
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef ref = regcost_ref(as->cost[r]);
+- lua_assert(r != t);
++ lj_assertA(r != rd, "dest reg %d not free", rd);
+ if (ref < ASMREF_L) {
+ intptr_t delta = (intptr_t)((uintptr_t)i -
+- (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref))));
++ (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref)));
+ if (checki16(delta)) {
+- emit_tsi(as, MIPSI_AADDIU, t, r, delta);
++ emit_tsi(as, MIPSI_AADDIU, rd, r, delta);
+ return 1;
+ }
+ }
+@@ -136,6 +140,7 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
+ } else if (emit_kdelta1(as, r, (intptr_t)u64)) {
+ return;
+ } else {
++ /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */
+ if ((u64 & 0xffff)) {
+ emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff);
+ }
+@@ -220,7 +225,7 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt,
MCode *target)
+ {
+ MCode *p = as->mcp;
+ ptrdiff_t delta = target - p;
+- lua_assert(((delta + 0x8000) >> 16) == 0);
++ lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of
range");
+ *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu);
+ as->mcp = p;
+ }
+@@ -234,10 +239,22 @@ static void emit_jmp(ASMState *as, MCode *target)
+ static void emit_call(ASMState *as, void *target, int needcfa)
+ {
+ MCode *p = as->mcp;
+- *--p = MIPSI_NOP;
++#if LJ_TARGET_MIPSR6
++ ptrdiff_t delta = (char *)target - (char *)p;
++ if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first.
*/
++ *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu);
++ as->mcp = p;
++ return;
++ }
++#endif
++ *--p = MIPSI_NOP; /* Delay slot. */
+ if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) {
++#if !LJ_TARGET_MIPSR6
+ *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) |
+ (((uintptr_t)target >>2) & 0x03ffffffu);
++#else
++ *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu);
++#endif
+ } else { /* Target out of range: need indirect call. */
+ *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
+ needcfa = 1;
+@@ -284,7 +301,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base,
int32_t ofs)
+ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
+ {
+ if (ofs) {
+- lua_assert(checki16(ofs));
++ lj_assertA(checki16(ofs), "offset %d out of range", ofs);
+ emit_tsi(as, MIPSI_AADDIU, r, r, ofs);
+ }
+ }
+diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
+index 21c3c2ac..649a6d17 100644
+--- a/src/lj_emit_ppc.h
++++ b/src/lj_emit_ppc.h
+@@ -1,6 +1,6 @@
+ /*
+ ** PPC instruction emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* -- Emit basic instructions --------------------------------------------- */
+@@ -41,13 +41,13 @@ static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs,
+
+ static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n)
+ {
+- lua_assert(n >= 0 && n < 32);
++ lj_assertA(n >= 0 && n < 32, "shift out or range");
+ emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n);
+ }
+
+ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
+ {
+- lua_assert(n >= 0 && n < 32);
++ lj_assertA(n >= 0 && n < 32, "shift out or range");
+ emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31);
+ }
+
+@@ -57,17 +57,17 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
+ #define emit_canremat(ref) ((ref) <= REF_BASE)
+
+ /* Try to find a one step delta relative to another constant. */
+-static int emit_kdelta1(ASMState *as, Reg t, int32_t i)
++static int emit_kdelta1(ASMState *as, Reg rd, int32_t i)
+ {
+ RegSet work = ~as->freeset & RSET_GPR;
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef ref = regcost_ref(as->cost[r]);
+- lua_assert(r != t);
++ lj_assertA(r != rd, "dest reg %d not free", rd);
+ if (ref < ASMREF_L) {
+ int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
+ if (checki16(delta)) {
+- emit_tai(as, PPCI_ADDI, t, r, delta);
++ emit_tai(as, PPCI_ADDI, rd, r, delta);
+ return 1;
+ }
+ }
+@@ -144,7 +144,7 @@ static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode
*target)
+ {
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = (char *)target - (char *)p;
+- lua_assert(((delta + 0x8000) >> 16) == 0);
++ lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of
range");
+ pi ^= (delta & 0x8000) * (PPCF_Y/0x8000);
+ *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu);
+ }
+diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
+index 5207f9da..85202768 100644
+--- a/src/lj_emit_x86.h
++++ b/src/lj_emit_x86.h
+@@ -1,6 +1,6 @@
+ /*
+ ** x86/x64 instruction emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* -- Emit basic instructions --------------------------------------------- */
+@@ -45,7 +45,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
+ *(uint32_t *)(p+delta-5) = (uint32_t)xo;
+ return p+delta-5;
+ }
+-#if defined(__GNUC__)
++#if defined(__GNUC__) || defined(__clang__)
+ if (__builtin_constant_p(xo) && n == -2)
+ p[delta-2] = (MCode)(xo >> 24);
+ else if (__builtin_constant_p(xo) && n == -3)
+@@ -92,7 +92,7 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
+ /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
+ static int32_t ptr2addr(const void *p)
+ {
+- lua_assert((uintptr_t)p < (uintptr_t)0x80000000);
++ lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G
range");
+ return i32ptr(p);
+ }
+ #else
+@@ -208,7 +208,7 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
+ rb = RID_ESP;
+ #endif
+ } else if (LJ_GC64 && rb == RID_RIP) {
+- lua_assert(as->mrm.idx == RID_NONE);
++ lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have
index");
+ mode = XM_OFS0;
+ p -= 4;
+ *(int32_t *)p = as->mrm.ofs;
+@@ -274,10 +274,12 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs,
int32_t i)
+ /* mov r, i / xor r, r */
+ static void emit_loadi(ASMState *as, Reg r, int32_t i)
+ {
+- /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */
++ /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP/jcc. */
+ if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP ||
+ (as->curins+1 < as->T->nins &&
+- IR(as->curins+1)->o == IR_HIOP)))) {
++ IR(as->curins+1)->o == IR_HIOP))) &&
++ !((*as->mcp == 0x0f && (as->mcp[1] & 0xf0) == XI_JCCn) ||
++ (*as->mcp & 0xf0) == XI_JCCs)) {
+ emit_rr(as, XO_ARITH(XOg_XOR), r, r);
+ } else {
+ MCode *p = as->mcp;
+@@ -343,9 +345,27 @@ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void
*addr)
+ emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
+ } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
+ emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
+- } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) {
+- emit_rmro(as, xo, rr, rr, 0);
+- emit_loadu64(as, rr, (uintptr_t)addr);
++ } else if (!checki32((intptr_t)addr)) {
++ Reg ra = (rr & 15);
++ if (xo != XO_MOV) {
++ /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
++ uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
++ uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
++ ra = RID_DISPATCH;
++ if (checku32(dispaddr)) {
++ emit_loadi(as, ra, (int32_t)dispaddr);
++ } else { /* Full-size 64 bit load. */
++ MCode *p = as->mcp;
++ *(uint64_t *)(p-8) = dispaddr;
++ p[-9] = (MCode)(XI_MOVri+(ra&7));
++ p[-10] = 0x48 + ((ra>>3)&1);
++ p -= 10;
++ as->mcp = p;
++ }
++ if (xo == XO_GROUP3b) emit_i8(as, i8);
++ }
++ emit_rmro(as, xo, rr, ra, 0);
++ emit_loadu64(as, ra, (uintptr_t)addr);
+ } else
+ #endif
+ {
+@@ -381,7 +401,8 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
+ emit_rma(as, xo, r64, k);
+ } else {
+ if (ir->i) {
+- lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
++ lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
++ "bad interned 64 bit constant");
+ } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
+ emit_loadu64(as, r, *k);
+ return;
+@@ -413,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target)
+ {
+ MCode *p = as->mcp;
+ ptrdiff_t delta = target - p;
+- lua_assert(delta == (int8_t)delta);
++ lj_assertA(delta == (int8_t)delta, "short jump target out of range");
+ p[-1] = (MCode)(int8_t)delta;
+ p[-2] = XI_JMPs;
+ as->mcp = p - 2;
+@@ -425,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target)
+ {
+ MCode *p = as->mcp;
+ ptrdiff_t delta = target - p;
+- lua_assert(delta == (int8_t)delta);
++ lj_assertA(delta == (int8_t)delta, "short jump target out of range");
+ p[-1] = (MCode)(int8_t)delta;
+ p[-2] = (MCode)(XI_JCCs+(cc&15));
+ as->mcp = p - 2;
+@@ -451,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source)
+ #define emit_label(as) ((as)->mcp)
+
+ /* Compute relative 32 bit offset for jump and call instructions. */
+-static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target)
++static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
+ {
+ ptrdiff_t delta = target - p;
+- lua_assert(delta == (int32_t)delta);
++ UNUSED(J);
++ lj_assertJ(delta == (int32_t)delta, "jump target out of range");
+ return (int32_t)delta;
+ }
+
+@@ -462,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target)
+ static void emit_jcc(ASMState *as, int cc, MCode *target)
+ {
+ MCode *p = as->mcp;
+- *(int32_t *)(p-4) = jmprel(p, target);
++ *(int32_t *)(p-4) = jmprel(as->J, p, target);
+ p[-5] = (MCode)(XI_JCCn+(cc&15));
+ p[-6] = 0x0f;
+ as->mcp = p - 6;
+@@ -472,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target)
+ static void emit_jmp(ASMState *as, MCode *target)
+ {
+ MCode *p = as->mcp;
+- *(int32_t *)(p-4) = jmprel(p, target);
++ *(int32_t *)(p-4) = jmprel(as->J, p, target);
+ p[-5] = XI_JMP;
+ as->mcp = p - 5;
+ }
+@@ -489,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target)
+ return;
+ }
+ #endif
+- *(int32_t *)(p-4) = jmprel(p, target);
++ *(int32_t *)(p-4) = jmprel(as->J, p, target);
+ p[-5] = XI_CALL;
+ as->mcp = p - 5;
+ }
+@@ -539,10 +561,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base,
int32_t ofs)
+ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
+ {
+ if (ofs) {
+- if ((as->flags & JIT_F_LEA_AGU))
+- emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
+- else
+- emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
++ emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
+ }
+ }
+
+diff --git a/src/lj_err.c b/src/lj_err.c
+index b6be357e..fda4a59c 100644
+--- a/src/lj_err.c
++++ b/src/lj_err.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Error handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_err_c
+@@ -29,12 +29,18 @@
+ ** Pros and Cons:
+ **
+ ** - EXT requires unwind tables for *all* functions on the C stack between
+-** the pcall/catch and the error/throw. This is the default on x64,
+-** but needs to be manually enabled on x86/PPC for non-C++ code.
++** the pcall/catch and the error/throw. C modules used by Lua code can
++** throw errors, so these need to have unwind tables, too. Transitively
++** this applies to all system libraries used by C modules -- at least
++** when they have callbacks which may throw an error.
+ **
+-** - INT is faster when actually throwing errors (but this happens rarely).
++** - INT is faster when actually throwing errors, but this happens rarely.
+ ** Setting up error handlers is zero-cost in any case.
+ **
++** - INT needs to save *all* callee-saved registers when entering the
++** interpreter. EXT only needs to save those actually used inside the
++** interpreter. JIT-compiled code may need to save some more.
++**
+ ** - EXT provides full interoperability with C++ exceptions. You can throw
+ ** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames.
+ ** C++ destructors are called as needed. C++ exceptions caught by pcall
+@@ -46,27 +52,38 @@
+ ** the wrapper function feature. Lua errors thrown through C++ frames
+ ** cannot be caught by C++ code and C++ destructors are not run.
+ **
+-** EXT is the default on x64 systems and on Windows, INT is the default on all
+-** other systems.
++** - EXT can handle errors from internal helper functions that are called
++** from JIT-compiled code (except for Windows/x86 and 32 bit ARM).
++** INT has no choice but to call the panic handler, if this happens.
++** Note: this is mainly relevant for out-of-memory errors.
++**
++** EXT is the default on all systems where the toolchain produces unwind
++** tables by default (*). This is hard-coded and/or detected in src/Makefile.
++** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
++**
++** INT is the default on all other systems.
++**
++** EXT can be manually enabled for toolchains that are able to produce
++** conforming unwind tables:
++** "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL"
++** As explained above, *all* C code used directly or indirectly by LuaJIT
++** must be compiled with -funwind-tables (or -fexceptions). C++ code must
++** *not* be compiled with -fno-exceptions.
++**
++** If you're unsure whether error handling inside the VM works correctly,
++** try running this and check whether it prints "OK":
+ **
+-** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
+-** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
+-** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set
+-** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules
+-** and all C libraries that have callbacks which may be used to call back
+-** into Lua. C++ code must *not* be compiled with -fno-exceptions.
++** luajit -e "print(select(2, load('OK')):match('OK'))"
+ **
+-** EXT is mandatory on WIN64 since the calling convention has an abundance
+-** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
+-** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
++** (*) Originally, toolchains only generated unwind tables for C++ code. For
++** interoperability reasons, this can be manually enabled for plain C code,
++** too (with -funwind-tables). With the introduction of the x64 architecture,
++** the corresponding POSIX and Windows ABIs mandated unwind tables for all
++** code. Over the following years most desktop and server platforms have
++** enabled unwind tables by default on all architectures. OTOH mobile and
++** embedded platforms do not consistently mandate unwind tables.
+ */
+
+-#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL))
&& !LJ_NO_UNWIND
+-#define LJ_UNWIND_EXT 1
+-#elif LJ_TARGET_WINDOWS
+-#define LJ_UNWIND_EXT 1
+-#endif
+-
+ /* -- Error messages ------------------------------------------------------ */
+
+ /* Error message strings. */
+@@ -150,6 +167,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
+ case FRAME_CONT: /* Continuation frame. */
+ if (frame_iscont_fficb(frame))
+ goto unwind_c;
++ /* fallthrough */
+ case FRAME_VARG: /* Vararg frame. */
+ frame = frame_prevd(frame);
+ break;
+@@ -183,7 +201,172 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
+
+ /* -- External frame unwinding -------------------------------------------- */
+
+-#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN
++#if LJ_ABI_WIN
++
++/*
++** Someone in Redmond owes me several days of my life. A lot of this is
++** undocumented or just plain wrong on MSDN. Some of it can be gathered
++** from 3rd party docs or must be found by trial-and-error. They really
++** don't want you to write your own language-specific exception handler
++** or to interact gracefully with MSVC. :-(
++**
++** Apparently MSVC doesn't call C++ destructors for foreign exceptions
++** unless you compile your C++ code with /EHa. Unfortunately this means
++** catch (...) also catches things like access violations. The use of
++** _set_se_translator doesn't really help, because it requires /EHa, too.
++*/
++
++#define WIN32_LEAN_AND_MEAN
++#include <windows.h>
++
++#if LJ_TARGET_X86
++typedef void *UndocumentedDispatcherContext; /* Unused on x86. */
++#else
++/* Taken from:
http://www.nynaeve.net/?p=99 */
++typedef struct UndocumentedDispatcherContext {
++ ULONG64 ControlPc;
++ ULONG64 ImageBase;
++ PRUNTIME_FUNCTION FunctionEntry;
++ ULONG64 EstablisherFrame;
++ ULONG64 TargetIp;
++ PCONTEXT ContextRecord;
++ void (*LanguageHandler)(void);
++ PVOID HandlerData;
++ PUNWIND_HISTORY_TABLE HistoryTable;
++ ULONG ScopeIndex;
++ ULONG Fill0;
++} UndocumentedDispatcherContext;
++#endif
++
++/* Another wild guess. */
++extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
++
++#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
++/* Workaround for broken MinGW64 declaration. */
++VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID)
asm("RtlUnwindEx");
++#define RtlUnwindEx RtlUnwindEx_FIXED
++#endif
++
++#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363)
++#define LJ_GCC_EXCODE ((DWORD)0x20474343)
++
++#define LJ_EXCODE ((DWORD)0xe24c4a00)
++#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c))
++#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
++#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
++
++/* Windows exception handler for interpreter frame. */
++LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
++ void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
++{
++#if LJ_TARGET_X86
++ void *cf = (char *)f - CFRAME_OFS_SEH;
++#else
++ void *cf = f;
++#endif
++ lua_State *L = cframe_L(cf);
++ int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
++ LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
++ if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */
++ /* Unwind internal frames. */
++ err_unwind(L, cf, errcode);
++ } else {
++ void *cf2 = err_unwind(L, cf, 0);
++ if (cf2) { /* We catch it, so start unwinding the upper frames. */
++ if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
++ rec->ExceptionCode == LJ_GCC_EXCODE) {
++#if !LJ_TARGET_CYGWIN
++ __DestructExceptionObject(rec, 1);
++#endif
++ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
++ } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
++ /* Don't catch access violations etc. */
++ return 1; /* ExceptionContinueSearch */
++ }
++#if LJ_TARGET_X86
++ UNUSED(ctx);
++ UNUSED(dispatch);
++ /* Call all handlers for all lower C frames (including ourselves) again
++ ** with EH_UNWINDING set. Then call the specified function, passing cf
++ ** and errcode.
++ */
++ lj_vm_rtlunwind(cf, (void *)rec,
++ (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
++ (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
++ /* lj_vm_rtlunwind does not return. */
++#else
++ /* Unwind the stack and call all handlers for all lower C frames
++ ** (including ourselves) again with EH_UNWINDING set. Then set
++ ** stack pointer = cf, result = errcode and jump to the specified target.
++ */
++ RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD)
?
++ lj_vm_unwind_ff_eh :
++ lj_vm_unwind_c_eh),
++ rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
++ /* RtlUnwindEx should never return. */
++#endif
++ }
++ }
++ return 1; /* ExceptionContinueSearch */
++}
++
++#if LJ_UNWIND_JIT
++
++#if LJ_TARGET_X64
++#define CONTEXT_REG_PC Rip
++#elif LJ_TARGET_ARM64
++#define CONTEXT_REG_PC Pc
++#else
++#error "NYI: Windows arch-specific unwinder for JIT-compiled code"
++#endif
++
++/* Windows unwinder for JIT-compiled code. */
++static void err_unwind_win_jit(global_State *g, int errcode)
++{
++ CONTEXT ctx;
++ UNWIND_HISTORY_TABLE hist;
++
++ memset(&hist, 0, sizeof(hist));
++ RtlCaptureContext(&ctx);
++ while (1) {
++ uintptr_t frame, base, addr = ctx.CONTEXT_REG_PC;
++ void *hdata;
++ PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist);
++ if (!func) { /* Found frame without .pdata: must be JIT-compiled code. */
++ ExitNo exitno;
++ uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
++ if (stub) { /* Jump to side exit to unwind the trace. */
++ ctx.CONTEXT_REG_PC = stub;
++ G2J(g)->exitcode = errcode;
++ RtlRestoreContext(&ctx, NULL); /* Does not return. */
++ }
++ break;
++ }
++ RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func,
++ &ctx, &hdata, &frame, NULL);
++ if (!addr) break;
++ }
++ /* Unwinding failed, if we end up here. */
++}
++#endif
++
++/* Raise Windows exception. */
++static void err_raise_ext(global_State *g, int errcode)
++{
++#if LJ_UNWIND_JIT
++ if (tvref(g->jit_base)) {
++ err_unwind_win_jit(g, errcode);
++ return; /* Unwinding failed. */
++ }
++#elif LJ_HASJIT
++ /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */
++ setmref(g->jit_base, NULL);
++#endif
++ UNUSED(g);
++ RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
++}
++
++#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__))
+
+ /*
+ ** We have to use our own definitions instead of the mandatory (!) unwind.h,
+@@ -193,6 +376,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
+ typedef struct _Unwind_Context _Unwind_Context;
+
+ #define _URC_OK 0
++#define _URC_FATAL_PHASE2_ERROR 2
+ #define _URC_FATAL_PHASE1_ERROR 3
+ #define _URC_HANDLER_FOUND 6
+ #define _URC_INSTALL_CONTEXT 7
+@@ -212,9 +396,11 @@ typedef struct _Unwind_Exception
+ void (*excleanup)(int, struct _Unwind_Exception *);
+ uintptr_t p1, p2;
+ } __attribute__((__aligned__)) _Unwind_Exception;
++#define UNWIND_EXCEPTION_TYPE _Unwind_Exception
+
+ extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
+ extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
++extern uintptr_t _Unwind_GetIP(_Unwind_Context *);
+ extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
+ extern void _Unwind_DeleteException(_Unwind_Exception *);
+ extern int _Unwind_RaiseException(_Unwind_Exception *);
+@@ -232,7 +418,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
+ lua_State *L;
+ if (version != 1)
+ return _URC_FATAL_PHASE1_ERROR;
+- UNUSED(uexclass);
+ cf = (void *)_Unwind_GetCFA(ctx);
+ L = cframe_L(cf);
+ if ((actions & _UA_SEARCH_PHASE)) {
+@@ -280,25 +465,139 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
+ ** it on non-x64 because the interpreter restores all callee-saved regs.
+ */
+ lj_err_throw(L, errcode);
++#if LJ_TARGET_X64
++#error "Broken build system -- only use the provided Makefiles!"
++#endif
+ #endif
+ }
+ return _URC_CONTINUE_UNWIND;
+ }
+
+-#if LJ_UNWIND_EXT
+-#if LJ_TARGET_OSX || defined(__OpenBSD__)
+-/* Sorry, no thread safety for OSX. Complain to Apple, not me. */
+-static _Unwind_Exception static_uex;
++#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
++struct dwarf_eh_bases { void *tbase, *dbase, *func; };
++extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases);
++
++/* Verify that external error handling actually has a chance to work. */
++void lj_err_verify(void)
++{
++ struct dwarf_eh_bases ehb;
++ lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build:
external frame unwinding enabled, but missing -funwind-tables");
++ /* Check disabled, because of broken Fedora/ARM64. See #722.
++ lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken
build: external frame unwinding enabled, but system libraries have no unwind
tables");
++ */
++}
++#endif
++
++#if LJ_UNWIND_JIT
++/* DWARF2 personality handler for JIT-compiled code. */
++static int err_unwind_jit(int version, int actions,
++ uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
++{
++ /* NYI: FFI C++ exception interoperability. */
++ if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass))
++ return _URC_FATAL_PHASE1_ERROR;
++ if ((actions & _UA_SEARCH_PHASE)) {
++ return _URC_HANDLER_FOUND;
++ }
++ if ((actions & _UA_CLEANUP_PHASE)) {
++ global_State *g = *(global_State **)(uex+1);
++ ExitNo exitno;
++ uintptr_t addr = _Unwind_GetIP(ctx); /* Return address _after_ call. */
++ uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
++ lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame");
++ if (stub) { /* Jump to side exit to unwind the trace. */
++ G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass);
++#ifdef LJ_TARGET_MIPS
++ _Unwind_SetGR(ctx, 4, stub);
++ _Unwind_SetGR(ctx, 5, exitno);
++ _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub);
+ #else
+-static __thread _Unwind_Exception static_uex;
++ _Unwind_SetIP(ctx, stub);
+ #endif
++ return _URC_INSTALL_CONTEXT;
++ }
++ return _URC_FATAL_PHASE2_ERROR;
++ }
++ return _URC_FATAL_PHASE1_ERROR;
++}
+
+-/* Raise DWARF2 exception. */
+-static void err_raise_ext(int errcode)
++/* DWARF2 template frame info for JIT-compiled code.
++**
++** After copying the template to the start of the mcode segment,
++** the frame handler function and the code size is patched.
++** The frame handler always installs a new context to jump to the exit,
++** so don't bother to add any unwind opcodes.
++*/
++static const uint8_t err_frame_jit_template[] = {
++#if LJ_BE
++ 0,0,0,
++#endif
++ LJ_64 ? 0x1c : 0x14, /* CIE length. */
++#if LJ_LE
++ 0,0,0,
++#endif
++ 0,0,0,0, 1, 'z','P','R',0, /* CIE mark, CIE version,
augmentation. */
++ 1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG, /* Code/data align, RA. */
++#if LJ_64
++ 10, 0, 0,0,0,0,0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */
++ 0,0,0,0,0, /* Alignment. */
++#else
++ 6, 0, 0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */
++ 0, /* Alignment. */
++#endif
++#if LJ_BE
++ 0,0,0,
++#endif
++ LJ_64 ? 0x14 : 0x10, /* FDE length. */
++ 0,0,0,
++ LJ_64 ? 0x24 : 0x1c, /* CIE offset. */
++ 0,0,0,
++ LJ_64 ? 0x14 : 0x10, /* Code offset. After Final FDE. */
++#if LJ_LE
++ 0,0,0,
++#endif
++ 0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */
++#if LJ_64
++ 0,0,0,0, /* Alignment. */
++#endif
++ 0,0,0,0 /* Final FDE. */
++};
++
++#define ERR_FRAME_JIT_OFS_HANDLER 0x12
++#define ERR_FRAME_JIT_OFS_FDE (LJ_64 ? 0x20 : 0x18)
++#define ERR_FRAME_JIT_OFS_CODE_SIZE (LJ_64 ? 0x2c : 0x24)
++#if LJ_TARGET_OSX
++#define ERR_FRAME_JIT_OFS_REGISTER ERR_FRAME_JIT_OFS_FDE
++#else
++#define ERR_FRAME_JIT_OFS_REGISTER 0
++#endif
++
++extern void __register_frame(const void *);
++extern void __deregister_frame(const void *);
++
++uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info)
++{
++ void **handler;
++ memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template));
++ handler = (void *)err_unwind_jit;
++ memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler));
++ *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) =
++ (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base));
++ __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
++#ifdef LUA_USE_ASSERT
++ {
++ struct dwarf_eh_bases ehb;
++ lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb),
++ "bad JIT unwind table registration");
++ }
++#endif
++ return info + sizeof(err_frame_jit_template);
++}
++
++void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info)
+ {
+- static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
+- static_uex.excleanup = NULL;
+- _Unwind_RaiseException(&static_uex);
++ UNUSED(base); UNUSED(sz);
++ __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
+ }
+ #endif
+
+@@ -310,6 +609,7 @@ static void err_raise_ext(int errcode)
+ #define _US_FORCE_UNWIND 8
+
+ typedef struct _Unwind_Control_Block _Unwind_Control_Block;
++#define UNWIND_EXCEPTION_TYPE _Unwind_Control_Block
+
+ struct _Unwind_Control_Block {
+ uint64_t exclass;
+@@ -368,136 +668,63 @@ LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block
*ucb,
+ }
+ if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
+ return _URC_FAILURE;
++#ifdef LUA_USE_ASSERT
++ /* We should never get here unless this is a forced unwind aka backtrace. */
++ if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) {
++ _Unwind_SetGR(ctx, 0, 0xff33aa88);
++ }
++#endif
+ return _URC_CONTINUE_UNWIND;
+ }
+
+-#if LJ_UNWIND_EXT
+-static __thread _Unwind_Control_Block static_uex;
++#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
++typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *);
++extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
+
+-static void err_raise_ext(int errcode)
++static int err_verify_bt(_Unwind_Context *ctx, int *got)
+ {
+- memset(&static_uex, 0, sizeof(static_uex));
+- static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
+- _Unwind_RaiseException(&static_uex);
++ if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; }
++ else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); }
++ return _URC_OK;
+ }
+-#endif
+
+-#endif /* LJ_TARGET_ARM */
+-
+-#elif LJ_ABI_WIN
++/* Verify that external error handling actually has a chance to work. */
++void lj_err_verify(void)
++{
++ int got = 0;
++ _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got);
++ lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing
-funwind-tables");
++}
++#endif
+
+ /*
+-** Someone in Redmond owes me several days of my life. A lot of this is
+-** undocumented or just plain wrong on MSDN. Some of it can be gathered
+-** from 3rd party docs or must be found by trial-and-error. They really
+-** don't want you to write your own language-specific exception handler
+-** or to interact gracefully with MSVC. :-(
++** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM.
+ **
+-** Apparently MSVC doesn't call C++ destructors for foreign exceptions
+-** unless you compile your C++ code with /EHa. Unfortunately this means
+-** catch (...) also catches things like access violations. The use of
+-** _set_se_translator doesn't really help, because it requires /EHa, too.
++** The quirky ARM unwind API doesn't have __register_frame().
++** A potential workaround might involve _Unwind_Backtrace.
++** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway,
++** since they are built without unwind tables by default.
+ */
+
+-#define WIN32_LEAN_AND_MEAN
+-#include <windows.h>
+-
+-#if LJ_TARGET_X64
+-/* Taken from:
http://www.nynaeve.net/?p=99 */
+-typedef struct UndocumentedDispatcherContext {
+- ULONG64 ControlPc;
+- ULONG64 ImageBase;
+- PRUNTIME_FUNCTION FunctionEntry;
+- ULONG64 EstablisherFrame;
+- ULONG64 TargetIp;
+- PCONTEXT ContextRecord;
+- void (*LanguageHandler)(void);
+- PVOID HandlerData;
+- PUNWIND_HISTORY_TABLE HistoryTable;
+- ULONG ScopeIndex;
+- ULONG Fill0;
+-} UndocumentedDispatcherContext;
+-#else
+-typedef void *UndocumentedDispatcherContext;
+-#endif
+-
+-/* Another wild guess. */
+-extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
+-
+-#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
+-/* Workaround for broken MinGW64 declaration. */
+-VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID)
asm("RtlUnwindEx");
+-#define RtlUnwindEx RtlUnwindEx_FIXED
+-#endif
++#endif /* LJ_TARGET_ARM */
+
+-#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363)
+-#define LJ_GCC_EXCODE ((DWORD)0x20474343)
+
+-#define LJ_EXCODE ((DWORD)0xe24c4a00)
+-#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c))
+-#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
+-#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
++#if LJ_UNWIND_EXT
++static __thread struct {
++ UNWIND_EXCEPTION_TYPE ex;
++ global_State *g;
++} static_uex;
+
+-/* Windows exception handler for interpreter frame. */
+-LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
+- void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
++/* Raise external exception. */
++static void err_raise_ext(global_State *g, int errcode)
+ {
+-#if LJ_TARGET_X64
+- void *cf = f;
+-#else
+- void *cf = (char *)f - CFRAME_OFS_SEH;
+-#endif
+- lua_State *L = cframe_L(cf);
+- int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
+- LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
+- if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */
+- /* Unwind internal frames. */
+- err_unwind(L, cf, errcode);
+- } else {
+- void *cf2 = err_unwind(L, cf, 0);
+- if (cf2) { /* We catch it, so start unwinding the upper frames. */
+- if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
+- rec->ExceptionCode == LJ_GCC_EXCODE) {
+-#if LJ_TARGET_WINDOWS
+- __DestructExceptionObject(rec, 1);
+-#endif
+- setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+- } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
+- /* Don't catch access violations etc. */
+- return 1; /* ExceptionContinueSearch */
+- }
+-#if LJ_TARGET_X64
+- /* Unwind the stack and call all handlers for all lower C frames
+- ** (including ourselves) again with EH_UNWINDING set. Then set
+- ** rsp = cf, rax = errcode and jump to the specified target.
+- */
+- RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD)
?
+- lj_vm_unwind_ff_eh :
+- lj_vm_unwind_c_eh),
+- rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
+- /* RtlUnwindEx should never return. */
+-#else
+- UNUSED(ctx);
+- UNUSED(dispatch);
+- /* Call all handlers for all lower C frames (including ourselves) again
+- ** with EH_UNWINDING set. Then call the specified function, passing cf
+- ** and errcode.
+- */
+- lj_vm_rtlunwind(cf, (void *)rec,
+- (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+- (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
+- /* lj_vm_rtlunwind does not return. */
+-#endif
+- }
+- }
+- return 1; /* ExceptionContinueSearch */
++ memset(&static_uex, 0, sizeof(static_uex));
++ static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode);
++ static_uex.g = g;
++ _Unwind_RaiseException(&static_uex.ex);
+ }
+
+-/* Raise Windows exception. */
+-static void err_raise_ext(int errcode)
+-{
+- RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
+-}
++#endif
+
+ #endif
+
+@@ -508,22 +735,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int
errcode)
+ {
+ global_State *g = G(L);
+ lj_trace_abort(g);
+- setmref(g->jit_base, NULL);
+ L->status = LUA_OK;
+ #if LJ_UNWIND_EXT
+- err_raise_ext(errcode);
++ err_raise_ext(g, errcode);
+ /*
+ ** A return from this function signals a corrupt C stack that cannot be
+ ** unwound. We have no choice but to call the panic function and exit.
+ **
+ ** Usually this is caused by a C function without unwind information.
+- ** This should never happen on x64, but may happen if you've manually
+- ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every*
+- ** non-C++ file with -funwind-tables.
++ ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL
++ ** and forgot to recompile *every* non-C++ file with -funwind-tables.
+ */
+ if (G(L)->panic)
+ G(L)->panic(L);
+ #else
++#if LJ_HASJIT
++ setmref(g->jit_base, NULL);
++#endif
+ {
+ void *cf = err_unwind(L, NULL, errcode);
+ if (cframe_unwind_ff(cf))
+@@ -585,6 +813,7 @@ static ptrdiff_t finderrfunc(lua_State *L)
+ if (cframe_canyield(cf)) return 0;
+ if (cframe_errfunc(cf) >= 0)
+ return cframe_errfunc(cf);
++ cf = cframe_prev(cf);
+ frame = frame_prevd(frame);
+ break;
+ case FRAME_PCALL:
+@@ -593,7 +822,7 @@ static ptrdiff_t finderrfunc(lua_State *L)
+ return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */
+ return 0;
+ default:
+- lua_assert(0);
++ lj_assertL(0, "bad frame type");
+ return 0;
+ }
+ }
+@@ -601,9 +830,9 @@ static ptrdiff_t finderrfunc(lua_State *L)
+ }
+
+ /* Runtime error. */
+-LJ_NOINLINE void lj_err_run(lua_State *L)
++LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
+ {
+- ptrdiff_t ef = finderrfunc(L);
++ ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
+ if (ef) {
+ TValue *errfunc = restorestack(L, ef);
+ TValue *top = L->top;
+@@ -622,6 +851,16 @@ LJ_NOINLINE void lj_err_run(lua_State *L)
+ lj_err_throw(L, LUA_ERRRUN);
+ }
+
++#if LJ_HASJIT
++LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
++{
++ if (errcode == LUA_ERRRUN)
++ lj_err_run(L);
++ else
++ lj_err_throw(L, errcode);
++}
++#endif
++
+ /* Formatted runtime error message. */
+ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
+ {
+@@ -690,9 +929,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
+ const BCIns *pc = cframe_Lpc(L);
+ if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
+ const char *tname = lj_typename(o);
++ setframe_gc(o, obj2gco(L), LJ_TTHREAD);
+ if (LJ_FR2) o++;
+ setframe_pc(o, pc);
+- setframe_gc(o, obj2gco(L), LJ_TTHREAD);
+ L->top = L->base = o+1;
+ err_msgv(L, LJ_ERR_BADCALL, tname);
+ }
+@@ -702,25 +941,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
+ /* Error in context of caller. */
+ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
+ {
+- TValue *frame = L->base-1;
+- TValue *pframe = NULL;
+- if (frame_islua(frame)) {
+- pframe = frame_prevl(frame);
+- } else if (frame_iscont(frame)) {
+- if (frame_iscont_fficb(frame)) {
+- pframe = frame;
+- frame = NULL;
+- } else {
+- pframe = frame_prevd(frame);
++ TValue *frame = NULL, *pframe = NULL;
++ if (!(LJ_HASJIT && tvref(G(L)->jit_base))) {
++ frame = L->base-1;
++ if (frame_islua(frame)) {
++ pframe = frame_prevl(frame);
++ } else if (frame_iscont(frame)) {
++ if (frame_iscont_fficb(frame)) {
++ pframe = frame;
++ frame = NULL;
++ } else {
++ pframe = frame_prevd(frame);
+ #if LJ_HASFFI
+- /* Remove frame for FFI metamethods. */
+- if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
+- frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
+- L->base = pframe+1;
+- L->top = frame;
+- setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
+- }
++ /* Remove frame for FFI metamethods. */
++ if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
++ frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
++ L->base = pframe+1;
++ L->top = frame;
++ setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
++ }
+ #endif
++ }
+ }
+ }
+ lj_debug_addloc(L, msg, pframe, frame);
+diff --git a/src/lj_err.h b/src/lj_err.h
+index cba5fb71..2e8a251f 100644
+--- a/src/lj_err.h
++++ b/src/lj_err.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Error handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_ERR_H
+@@ -23,7 +23,10 @@ LJ_DATA const char *lj_err_allmsg;
+ LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
+ LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
+ LJ_FUNC_NORET void lj_err_mem(lua_State *L);
+-LJ_FUNC_NORET void lj_err_run(lua_State *L);
++LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
++#if LJ_HASJIT
++LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
++#endif
+ LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em);
+ LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
+ BCLine line, ErrMsg em, va_list argp);
+@@ -38,4 +41,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em,
...);
+ LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname);
+ LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt);
+
++#if LJ_UNWIND_JIT && !LJ_ABI_WIN
++LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info);
++LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info);
++#else
++#define lj_err_register_mcode(base, sz, info) (info)
++#define lj_err_deregister_mcode(base, sz, info) UNUSED(base)
++#endif
++
++#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT)
++LJ_FUNC void lj_err_verify(void);
++#else
++#define lj_err_verify() ((void)0)
++#endif
++
+ #endif
+diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
+index 060a9f89..89e67496 100644
+--- a/src/lj_errmsg.h
++++ b/src/lj_errmsg.h
+@@ -1,6 +1,6 @@
+ /*
+ ** VM error messages.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* This file may be included multiple times with different ERRDEF macros. */
+@@ -67,6 +67,7 @@ ERRDEF(PROTMT, "cannot change a protected metatable")
+ ERRDEF(UNPACK, "too many results to unpack")
+ ERRDEF(RDRSTR, "reader function must return a string")
+ ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to "
LUA_QL("print"))
++ERRDEF(NUMRNG, "number out of range")
+ ERRDEF(IDXRNG, "index out of range")
+ ERRDEF(BASERNG, "base out of range")
+ ERRDEF(LVLRNG, "level out of range")
+@@ -101,11 +102,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)")
+ ERRDEF(BADMODN, "name conflict for module " LUA_QS)
+ #if LJ_HASJIT
+ ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?")
+-#if LJ_TARGET_X86ORX64
+-ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2")
+-#else
+ ERRDEF(NOJIT, "JIT compiler disabled")
+-#endif
+ #elif defined(LJ_ARCH_NOJIT)
+ ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)")
+ #else
+@@ -183,6 +180,19 @@ ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields")
+ ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)")
+ #endif
+
++#if LJ_HASBUFFER
++/* String buffer errors. */
++ERRDEF(BUFFER_SELF, "cannot put buffer into itself")
++ERRDEF(BUFFER_BADOPT, "bad options table")
++ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS)
++ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x")
++ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d")
++ERRDEF(BUFFER_DEPTH, "too deep to serialize")
++ERRDEF(BUFFER_DUPKEY, "duplicate table key")
++ERRDEF(BUFFER_EOB, "unexpected end of buffer")
++ERRDEF(BUFFER_LEFTOV, "left-over data in buffer")
++#endif
++
+ #undef ERRDEF
+
+ /* Detecting unused error messages:
+diff --git a/src/lj_ff.h b/src/lj_ff.h
+index 31d65a00..e355f44e 100644
+--- a/src/lj_ff.h
++++ b/src/lj_ff.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Fast function IDs.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_FF_H
+diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
+index dfdee2db..97b40f98 100644
+--- a/src/lj_ffrecord.c
++++ b/src/lj_ffrecord.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Fast function call recorder.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_ffrecord_c
+@@ -11,6 +11,7 @@
+ #if LJ_HASJIT
+
+ #include "lj_err.h"
++#include "lj_buf.h"
+ #include "lj_str.h"
+ #include "lj_tab.h"
+ #include "lj_frame.h"
+@@ -28,6 +29,7 @@
+ #include "lj_vm.h"
+ #include "lj_strscan.h"
+ #include "lj_strfmt.h"
++#include "lj_serialize.h"
+
+ /* Some local macros to save typing. Undef'd at the end. */
+ #define IR(ref) (&J->cur.ir[(ref)])
+@@ -107,6 +109,10 @@ static void recff_stitch(jit_State *J)
+ const BCIns *pc = frame_pc(base-1);
+ TValue *pframe = frame_prevl(base-1);
+
++ /* Check for this now. Throwing in lj_record_stop messes up the stack. */
++ if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap])
++ lj_trace_err(J, LJ_TRERR_SNAPOV);
++
+ /* Move func + args up in Lua stack and insert continuation. */
+ memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
+ setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
+@@ -182,6 +188,14 @@ static TRef recff_bufhdr(jit_State *J)
+ lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
+ }
+
++/* Emit TMPREF. */
++static TRef recff_tmpref(jit_State *J, TRef tr, int mode)
++{
++ if (!LJ_DUALNUM && tref_isinteger(tr))
++ tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
++ return emitir(IRT(IR_TMPREF, IRT_PGC), tr, mode);
++}
++
+ /* -- Base library fast functions ----------------------------------------- */
+
+ static void LJ_FASTCALL recff_assert(jit_State *J, RecordFFData *rd)
+@@ -281,7 +295,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd)
+ if (tref_isstr(tr))
+ J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
+ else if (tref_istab(tr))
+- J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr);
++ J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL);
+ /* else: Interpreter will throw. */
+ UNUSED(rd);
+ }
+@@ -296,7 +310,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv)
+ } else {
+ TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
+ TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
+- emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#'));
++ emitir(IRTGI(IR_EQ), trchar, lj_ir_kint(J, '#'));
+ }
+ return 0;
+ } else { /* select(n, ...) */
+@@ -317,9 +331,9 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
+ ptrdiff_t n = (ptrdiff_t)J->maxslot;
+ if (start < 0) start += n;
+ else if (start > n) start = n;
+- rd->nres = n - start;
+ if (start >= 1) {
+ ptrdiff_t i;
++ rd->nres = n - start;
+ for (i = 0; i < n - start; i++)
+ J->base[i] = J->base[start+i];
+ } /* else: Interpreter will throw. */
+@@ -455,6 +469,7 @@ static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
+ #endif
+ lj_record_call(J, 0, J->maxslot - 1);
+ rd->nres = -1; /* Pending call. */
++ J->needsnap = 1; /* Start catching on-trace errors. */
+ } /* else: Interpreter will throw. */
+ }
+
+@@ -490,6 +505,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
+ if (errcode)
+ lj_err_throw(J->L, errcode); /* Propagate errors. */
+ rd->nres = -1; /* Pending call. */
++ J->needsnap = 1; /* Start catching on-trace errors. */
+ } /* else: Interpreter will throw. */
+ }
+
+@@ -505,6 +521,40 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData
*rd)
+ recff_nyiu(J, rd);
+ }
+
++static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd)
++{
++#if LJ_BE
++ /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
++ ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
++ */
++ recff_nyi(J, rd);
++#else
++ TRef tab = J->base[0];
++ if (tref_istab(tab)) {
++ RecordIndex ix;
++ cTValue *keyv;
++ ix.tab = tab;
++ if (tref_isnil(J->base[1])) { /* Shortcut for start of traversal. */
++ ix.key = lj_ir_kint(J, 0);
++ keyv = niltvg(J2G(J));
++ } else {
++ TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
++ ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp);
++ keyv = &rd->argv[1];
++ }
++ copyTV(J->L, &ix.tabv, &rd->argv[0]);
++ ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv);
++ /* Omit the value, if not used by the caller. */
++ ix.idxchain = (J->framedepth && frame_islua(J->L->base-1)
&&
++ bc_b(frame_pc(J->L->base-1)[-1])-1 < 2);
++ ix.mobj = 0; /* We don't need the next index. */
++ rd->nres = lj_record_next(J, &ix);
++ J->base[0] = ix.key;
++ J->base[1] = ix.val;
++ } /* else: Interpreter will throw. */
++#endif
++}
++
+ /* -- Math library fast functions ----------------------------------------- */
+
+ static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
+@@ -563,7 +613,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData
*rd)
+ {
+ TRef tr = lj_ir_tonum(J, J->base[0]);
+ TRef tr2 = lj_ir_tonum(J, J->base[1]);
+- J->base[0] = emitir(IRTN(IR_ATAN2), tr, tr2);
++ J->base[0] = lj_ir_call(J, IRCALL_atan2, tr, tr2);
+ UNUSED(rd);
+ }
+
+@@ -580,43 +630,12 @@ static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData
*rd)
+ UNUSED(rd);
+ }
+
+-/* Record math.asin, math.acos, math.atan. */
+-static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd)
+-{
+- TRef y = lj_ir_tonum(J, J->base[0]);
+- TRef x = lj_ir_knum_one(J);
+- uint32_t ffid = rd->data;
+- if (ffid != FF_math_atan) {
+- TRef tmp = emitir(IRTN(IR_MUL), y, y);
+- tmp = emitir(IRTN(IR_SUB), x, tmp);
+- tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT);
+- if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; }
+- }
+- J->base[0] = emitir(IRTN(IR_ATAN2), y, x);
+-}
+-
+-static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd)
++static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd)
+ {
+ TRef tr = lj_ir_tonum(J, J->base[0]);
+ J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data);
+ }
+
+-static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
+-{
+- TRef tr = J->base[0];
+- if (tref_isinteger(tr)) {
+- J->base[0] = tr;
+- J->base[1] = lj_ir_kint(J, 0);
+- } else {
+- TRef trt;
+- tr = lj_ir_tonum(J, tr);
+- trt = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC);
+- J->base[0] = trt;
+- J->base[1] = emitir(IRTN(IR_SUB), tr, trt);
+- }
+- rd->nres = 2;
+-}
+-
+ static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
+ {
+ J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
+@@ -647,7 +666,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData
*rd)
+ GCudata *ud = udataV(&J->fn->c.upvalue[0]);
+ TRef tr, one;
+ lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */
+- tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud)));
++ tr = lj_ir_call(J, IRCALL_lj_prng_u64d, lj_ir_kptr(J, uddata(ud)));
+ one = lj_ir_knum_one(J);
+ tr = emitir(IRTN(IR_SUB), tr, one);
+ if (J->base[0]) {
+@@ -738,7 +757,7 @@ static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData
*rd)
+ #if LJ_HASFFI
+ TRef hdr = recff_bufhdr(J);
+ TRef tr = recff_bit64_tohex(J, rd, hdr);
+- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+ #else
+ recff_nyiu(J, rd); /* Don't bother working around this NYI. */
+ #endif
+@@ -864,8 +883,10 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData
*rd)
+ if (i > 1) { /* Concatenate the strings, if there's more than one. */
+ TRef hdr = recff_bufhdr(J), tr = hdr;
+ for (i = 0; J->base[i] != 0; i++)
+- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
+- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
++ } else if (i == 0) {
++ J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
+ }
+ UNUSED(rd);
+ }
+@@ -881,19 +902,19 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData
*rd)
+ emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
+ if (vrep > 1) {
+ TRef hdr2 = recff_bufhdr(J);
+- TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep);
+- tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str);
+- str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
++ TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep);
++ tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str);
++ str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2);
+ }
+ }
+ tr = hdr = recff_bufhdr(J);
+ if (str2) {
+- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str);
++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str);
+ str = str2;
+ rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
+ }
+ tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
+- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+ }
+
+ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
+@@ -901,7 +922,7 @@ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData
*rd)
+ TRef str = lj_ir_tostr(J, J->base[0]);
+ TRef hdr = recff_bufhdr(J);
+ TRef tr = lj_ir_call(J, rd->data, hdr, str);
+- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+ }
+
+ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
+@@ -949,7 +970,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData
*rd)
+ str->len-(MSize)start, pat->len)) {
+ TRef pos;
+ emitir(IRTG(IR_NE, IRT_PGC), tr, trp0);
+- pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_PGC), trstr, tr0));
++ /* Recompute offset. trsptr may not point into trstr after folding. */
++ pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart);
+ J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
+ J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
+ rd->nres = 2;
+@@ -963,34 +985,40 @@ static void LJ_FASTCALL recff_string_find(jit_State *J,
RecordFFData *rd)
+ }
+ }
+
+-static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
++static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx)
+ {
+- TRef trfmt = lj_ir_tostr(J, J->base[0]);
+- GCstr *fmt = argv2str(J, &rd->argv[0]);
+- int arg = 1;
+- TRef hdr, tr;
++ ptrdiff_t arg = sbufx;
++ TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]);
++ GCstr *fmt = argv2str(J, &rd->argv[arg]);
+ FormatState fs;
+ SFormat sf;
+ /* Specialize to the format string. */
+ emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
+- tr = hdr = recff_bufhdr(J);
+ lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+ while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */
+- TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
++ TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg];
+ TRef trsf = lj_ir_kint(J, (int32_t)sf);
+ IRCallID id;
+ switch (STRFMT_TYPE(sf)) {
+ case STRFMT_LIT:
+- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
+ lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
+ break;
+ case STRFMT_INT:
+ id = IRCALL_lj_strfmt_putfnum_int;
+ handle_int:
+- if (!tref_isinteger(tra))
++ if (!tref_isinteger(tra)) {
++#if LJ_HASFFI
++ if (tref_iscdata(tra)) {
++ tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]);
++ tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
++ break;
++ }
++#endif
+ goto handle_num;
++ }
+ if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
+- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
+ emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
+ } else {
+ #if LJ_HASFFI
+@@ -1012,15 +1040,16 @@ static void LJ_FASTCALL recff_string_format(jit_State *J,
RecordFFData *rd)
+ handle_num:
+ tra = lj_ir_tonum(J, tra);
+ tr = lj_ir_call(J, id, tr, trsf, tra);
+- if (LJ_SOFTFP) lj_needsplit(J);
++ if (LJ_SOFTFP32) lj_needsplit(J);
+ break;
+ case STRFMT_STR:
+ if (!tref_isstr(tra)) {
+ recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */
++ /* NYI: also buffers. */
+ return;
+ }
+ if (sf == STRFMT_STR) /* Shortcut for plain %s. */
+- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra);
++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra);
+ else if ((sf & STRFMT_T_QUOTED))
+ tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
+ else
+@@ -1029,7 +1058,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J,
RecordFFData *rd)
+ case STRFMT_CHAR:
+ tra = lj_opt_narrow_toint(J, tra);
+ if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
+- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
+ emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
+ else
+ tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
+@@ -1041,9 +1070,326 @@ static void LJ_FASTCALL recff_string_format(jit_State *J,
RecordFFData *rd)
+ return;
+ }
+ }
+- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
++ if (sbufx) {
++ emitir(IRT(IR_USE, IRT_NIL), tr, 0);
++ } else {
++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
++ }
++}
++
++static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
++{
++ recff_format(J, rd, recff_bufhdr(J), 0);
++}
++
++/* -- Buffer library fast functions --------------------------------------- */
++
++#if LJ_HASBUFFER
++
++static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud)
++{
++ return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L);
++}
++
++static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val)
++{
++ TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L);
++ emitir(IRT(IR_FSTORE, IRT_PGC), fref, val);
++}
++
++static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl)
++{
++ return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl);
++}
++
++static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef
val)
++{
++ TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl);
++ emitir(IRT(IR_FSTORE, IRT_PTR), fref, val);
++}
++
++static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw)
++{
++ TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr);
++ if (LJ_64)
++ len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
++ return len;
++}
++
++/* Emit typecheck for string buffer. */
++static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, int arg)
++{
++ TRef trtype, ud = J->base[arg];
++ if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE);
++ trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
++ emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER));
++ J->needsnap = 1;
++ return ud;
+ }
+
++/* Emit BUFHDR for write to extended string buffer. */
++static TRef recff_sbufx_write(jit_State *J, TRef ud)
++{
++ TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata)));
++ return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
++}
++
++/* Check for integer in range for the buffer API. */
++static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, int arg)
++{
++ TRef tr = J->base[arg];
++ TRef trlim = lj_ir_kint(J, LJ_MAX_BUF);
++ if (tref_isinteger(tr)) {
++ emitir(IRTGI(IR_ULE), tr, trlim);
++ } else if (tref_isnum(tr)) {
++ tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
++ emitir(IRTGI(IR_ULE), tr, trlim);
++#if LJ_HASFFI
++ } else if (tref_iscdata(tr)) {
++ tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]);
++ emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF));
++ tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE);
++#else
++ UNUSED(rd);
++#endif
++ } else {
++ lj_trace_err(J, LJ_TRERR_BADTYPE);
++ }
++ return tr;
++}
++
++static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ SBufExt *sbx = bufV(&rd->argv[0]);
++ int iscow = (int)sbufiscow(sbx);
++ TRef trl = recff_sbufx_get_L(J, ud);
++ TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW));
++ TRef zero = lj_ir_kint(J, 0);
++ emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero);
++ if (iscow) {
++ trl = emitir(IRT(IR_BXOR, IRT_IGC), trl,
++ LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) :
++ lj_ir_kint(J, SBUF_FLAG_COW));
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero);
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero);
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero);
++ recff_sbufx_set_L(J, ud, trl);
++ emitir(IRT(IR_FSTORE, IRT_PGC),
++ emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero);
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero);
++ } else {
++ TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb);
++ }
++}
++
++static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
++ TRef len = recff_sbufx_len(J, trr, trw);
++ TRef trn = recff_sbufx_checkint(J, rd, 1);
++ len = emitir(IRTI(IR_MIN), len, trn);
++ trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
++}
++
++static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trbuf = recff_sbufx_write(J, ud);
++ TRef tr = J->base[1];
++ if (tref_isstr(tr)) {
++ TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
++ TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
++ lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
++#if LJ_HASFFI
++ } else if (tref_iscdata(tr)) {
++ TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]);
++ TRef len = recff_sbufx_checkint(J, rd, 2);
++ lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
++#endif
++ } /* else: Interpreter will throw. */
++}
++
++static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trbuf = recff_sbufx_write(J, ud);
++ TRef tr;
++ ptrdiff_t arg;
++ if (!J->base[1]) return;
++ for (arg = 1; (tr = J->base[arg]); arg++) {
++ if (tref_isstr(tr)) {
++ trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr);
++ } else if (tref_isnumber(tr)) {
++ trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf,
++ emitir(IRT(IR_TOSTR, IRT_STR), tr,
++ tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT));
++ } else if (tref_isudata(tr)) {
++ TRef ud2 = recff_sbufx_check(J, rd, arg);
++ TRef trr = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_R);
++ TRef trw = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_W);
++ TRef len = recff_sbufx_len(J, trr, trw);
++ emitir(IRTG(IR_NE, IRT_PGC), ud, ud2);
++ trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len);
++ } else {
++ recff_nyiu(J, rd);
++ }
++ }
++ emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
++}
++
++static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trbuf = recff_sbufx_write(J, ud);
++ recff_format(J, rd, trbuf, 1);
++}
++
++static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
++ TRef tr;
++ ptrdiff_t arg;
++ if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; }
++ for (arg = 0; (tr = J->base[arg+1]); arg++) {
++ TRef len = recff_sbufx_len(J, trr, trw);
++ if (tref_isnil(tr)) {
++ J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
++ trr = trw;
++ } else {
++ TRef trn = recff_sbufx_checkint(J, rd, arg+1);
++ TRef tru;
++ len = emitir(IRTI(IR_MIN), len, trn);
++ tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
++ J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
++ trr = tru; /* Doing the ADD before the SNEW generates better code. */
++ }
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
++ }
++ rd->nres = arg;
++}
++
++static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
++ J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw));
++}
++
++static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
++ J->base[0] = recff_sbufx_len(J, trr, trw);
++}
++
++#if LJ_HASFFI
++static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trbuf = recff_sbufx_write(J, ud);
++ TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]);
++ TRef len = recff_sbufx_checkint(J, rd, 2);
++ trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len);
++ emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
++}
++
++static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trbuf = recff_sbufx_write(J, ud);
++ TRef trsz = recff_sbufx_checkint(J, rd, 1);
++ J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz);
++ J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W));
++ rd->nres = 2;
++}
++
++static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef len = recff_sbufx_checkint(J, rd, 1);
++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
++ TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E);
++ TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw);
++ if (LJ_64)
++ left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
++ emitir(IRTGI(IR_ULE), len, left);
++ trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len);
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw);
++}
++
++static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
++ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
++ J->base[0] = lj_crecord_topuint8(J, trr);
++ J->base[1] = recff_sbufx_len(J, trr, trw);
++ rd->nres = 2;
++}
++#endif
++
++static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trbuf = recff_sbufx_write(J, ud);
++ TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
++ lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp);
++ /* No IR_USE needed, since the call is a store. */
++}
++
++static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd)
++{
++ TRef ud = recff_sbufx_check(J, rd, 0);
++ TRef trbuf = recff_sbufx_write(J, ud);
++ TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
++ TRef trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp);
++ IRType t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0]));
++ /* No IR_USE needed, since the call is a store. */
++ J->base[0] = lj_record_vload(J, tmp, 0, t);
++ /* The sbx->r store must be after the VLOAD type check, in case it fails. */
++ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
++}
++
++static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd)
++{
++ TRef tmp = recff_tmpref(J, J->base[0], IRTMPREF_IN1);
++ J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp);
++ /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */
++ emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0);
++ UNUSED(rd);
++}
++
++static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd)
++{
++ if (tvisstr(&rd->argv[0])) {
++ GCstr *str = strV(&rd->argv[0]);
++ SBufExt sbx;
++ IRType t;
++ TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
++ TRef tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]);
++ /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM.
++ ** That's why IRCALL_lj_serialize_decode needs a fake INT result.
++ */
++ emitir(IRT(IR_USE, IRT_NIL), tr, 0);
++ memset(&sbx, 0, sizeof(SBufExt));
++ lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len);
++ t = (IRType)lj_serialize_peektype(&sbx);
++ J->base[0] = lj_record_vload(J, tmp, 0, t);
++ } /* else: Interpreter will throw. */
++}
++
++#endif
++
+ /* -- Table library fast functions ---------------------------------------- */
+
+ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
+@@ -1054,7 +1400,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J,
RecordFFData *rd)
+ rd->nres = 0;
+ if (tref_istab(ix.tab) && ix.val) {
+ if (!J->base[2]) { /* Simple push: t[#t+1] = v */
+- TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab);
++ TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL);
+ GCtab *t = tabV(&rd->argv[0]);
+ ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1));
+ settabV(J->L, &ix.tabv, t);
+@@ -1078,11 +1424,11 @@ static void LJ_FASTCALL recff_table_concat(jit_State *J,
RecordFFData *rd)
+ lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
+ TRef tre = (J->base[1] && J->base[2] &&
!tref_isnil(J->base[3])) ?
+ lj_opt_narrow_toint(J, J->base[3]) :
+- lj_ir_call(J, IRCALL_lj_tab_len, tab);
++ emitir(IRTI(IR_ALEN), tab, TREF_NIL);
+ TRef hdr = recff_bufhdr(J);
+ TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
+ emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
+- J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
++ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+ } /* else: Interpreter will throw. */
+ UNUSED(rd);
+ }
+@@ -1114,13 +1460,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
+ {
+ TRef tr, ud, fp;
+ if (id) { /* io.func() */
+-#if LJ_GC64
+- /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
+ ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
+-#else
+- tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
+- ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
+-#endif
+ } else { /* fp:method() */
+ ud = J->base[0];
+ if (!tref_isudata(ud))
+diff --git a/src/lj_ffrecord.h b/src/lj_ffrecord.h
+index 3b407450..aca6ca1d 100644
+--- a/src/lj_ffrecord.h
++++ b/src/lj_ffrecord.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Fast function call recorder.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_FFRECORD_H
+diff --git a/src/lj_frame.h b/src/lj_frame.h
+index 19c49a4a..b6146454 100644
+--- a/src/lj_frame.h
++++ b/src/lj_frame.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Stack frames.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_FRAME_H
+@@ -46,7 +46,7 @@ enum {
+ #define frame_gc(f) (gcval((f)-1))
+ #define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz)
+ #define frame_pc(f) ((const BCIns *)frame_ftsz(f))
+-#define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp)))
++#define setframe_gc(f, p, tp) (setgcVraw((f), (p), (tp)))
+ #define setframe_ftsz(f, sz) ((f)->ftsz = (sz))
+ #define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc))
+ #else
+@@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
+ #endif
+ #define CFRAME_SHIFT_MULTRES 3
+ #elif LJ_TARGET_ARM64
+-#define CFRAME_OFS_ERRF 196
+-#define CFRAME_OFS_NRES 200
+-#define CFRAME_OFS_PREV 160
+-#define CFRAME_OFS_L 176
+-#define CFRAME_OFS_PC 168
+-#define CFRAME_OFS_MULTRES 192
++#define CFRAME_OFS_ERRF 36
++#define CFRAME_OFS_NRES 40
++#define CFRAME_OFS_PREV 0
++#define CFRAME_OFS_L 16
++#define CFRAME_OFS_PC 8
++#define CFRAME_OFS_MULTRES 32
+ #define CFRAME_SIZE 208
+ #define CFRAME_SHIFT_MULTRES 3
+ #elif LJ_TARGET_PPC
+@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
+ #define CFRAME_OFS_L 36
+ #define CFRAME_OFS_PC 32
+ #define CFRAME_OFS_MULTRES 28
+-#define CFRAME_SIZE 272
++#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
+ #define CFRAME_SHIFT_MULTRES 3
+ #endif
+ #elif LJ_TARGET_MIPS32
+diff --git a/src/lj_func.c b/src/lj_func.c
+index 639dad87..cf8ca08f 100644
+--- a/src/lj_func.c
++++ b/src/lj_func.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Function handling (prototypes, functions and upvalues).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -24,9 +24,11 @@ void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt)
+
+ /* -- Upvalues ------------------------------------------------------------ */
+
+-static void unlinkuv(GCupval *uv)
++static void unlinkuv(global_State *g, GCupval *uv)
+ {
+- lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
++ UNUSED(g);
++ lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
++ "broken upvalue chain");
+ setgcrefr(uvnext(uv)->prev, uv->prev);
+ setgcrefr(uvprev(uv)->next, uv->next);
+ }
+@@ -40,7 +42,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
+ GCupval *uv;
+ /* Search the sorted list of open upvalues. */
+ while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) {
+- lua_assert(!p->closed && uvval(p) != &p->tv);
++ lj_assertG(!p->closed && uvval(p) != &p->tv, "closed upvalue
in chain");
+ if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */
+ if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */
+ flipwhite(obj2gco(p));
+@@ -61,7 +63,8 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
+ setgcrefr(uv->next, g->uvhead.next);
+ setgcref(uvnext(uv)->prev, obj2gco(uv));
+ setgcref(g->uvhead.next, obj2gco(uv));
+- lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
++ lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
++ "broken upvalue chain");
+ return uv;
+ }
+
+@@ -84,12 +87,13 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
+ while (gcref(L->openupval) != NULL &&
+ uvval((uv = gco2uv(gcref(L->openupval)))) >= level) {
+ GCobj *o = obj2gco(uv);
+- lua_assert(!isblack(o) && !uv->closed && uvval(uv) !=
&uv->tv);
++ lj_assertG(!isblack(o), "bad black upvalue");
++ lj_assertG(!uv->closed && uvval(uv) != &uv->tv, "closed
upvalue in chain");
+ setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */
+ if (isdead(g, o)) {
+ lj_func_freeuv(g, uv);
+ } else {
+- unlinkuv(uv);
++ unlinkuv(g, uv);
+ lj_gc_closeuv(g, uv);
+ }
+ }
+@@ -98,7 +102,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
+ void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv)
+ {
+ if (!uv->closed)
+- unlinkuv(uv);
++ unlinkuv(g, uv);
+ lj_mem_freet(g, uv);
+ }
+
+diff --git a/src/lj_func.h b/src/lj_func.h
+index 901751b9..b864a5bf 100644
+--- a/src/lj_func.h
++++ b/src/lj_func.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Function handling (prototypes, functions and upvalues).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_FUNC_H
+diff --git a/src/lj_gc.c b/src/lj_gc.c
+index 2aaf5b2c..5a238542 100644
+--- a/src/lj_gc.c
++++ b/src/lj_gc.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Garbage collector.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -25,6 +25,7 @@
+ #include "lj_cdata.h"
+ #endif
+ #include "lj_trace.h"
++#include "lj_dispatch.h"
+ #include "lj_vm.h"
+
+ #define GCSTEPSIZE 1024u
+@@ -41,7 +42,8 @@
+
+ /* Mark a TValue (if needed). */
+ #define gc_marktv(g, tv) \
+- { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \
++ { lj_assertG(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct), \
++ "TValue and GC type mismatch"); \
+ if (tviswhite(tv)) gc_mark(g, gcV(tv)); }
+
+ /* Mark a GCobj (if needed). */
+@@ -55,21 +57,32 @@
+ static void gc_mark(global_State *g, GCobj *o)
+ {
+ int gct = o->gch.gct;
+- lua_assert(iswhite(o) && !isdead(g, o));
++ lj_assertG(iswhite(o), "mark of non-white object");
++ lj_assertG(!isdead(g, o), "mark of dead object");
+ white2gray(o);
+ if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) {
+ GCtab *mt = tabref(gco2ud(o)->metatable);
+ gray2black(o); /* Userdata are never gray. */
+ if (mt) gc_markobj(g, mt);
+ gc_markobj(g, tabref(gco2ud(o)->env));
++ if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) {
++ SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
++ if (sbufiscow(sbx) && gcref(sbx->cowref))
++ gc_markobj(g, gcref(sbx->cowref));
++ if (gcref(sbx->dict_str))
++ gc_markobj(g, gcref(sbx->dict_str));
++ if (gcref(sbx->dict_mt))
++ gc_markobj(g, gcref(sbx->dict_mt));
++ }
+ } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
+ GCupval *uv = gco2uv(o);
+ gc_marktv(g, uvval(uv));
+ if (uv->closed)
+ gray2black(o); /* Closed upvalues are never gray. */
+ } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
+- lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
+- gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE);
++ lj_assertG(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
++ gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE,
++ "bad GC type %d", gct);
+ setgcrefr(o->gch.gclist, g->gc.gray);
+ setgcref(g->gc.gray, o);
+ }
+@@ -102,7 +115,8 @@ static void gc_mark_uv(global_State *g)
+ {
+ GCupval *uv;
+ for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) {
+- lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
++ lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
++ "broken upvalue chain");
+ if (isgray(obj2gco(uv)))
+ gc_marktv(g, uvval(uv));
+ }
+@@ -197,7 +211,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t)
+ for (i = 0; i <= hmask; i++) {
+ Node *n = &node[i];
+ if (!tvisnil(&n->val)) { /* Mark non-empty slot. */
+- lua_assert(!tvisnil(&n->key));
++ lj_assertG(!tvisnil(&n->key), "mark of nil key in non-empty slot");
+ if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key);
+ if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val);
+ }
+@@ -212,7 +226,8 @@ static void gc_traverse_func(global_State *g, GCfunc *fn)
+ gc_markobj(g, tabref(fn->c.env));
+ if (isluafunc(fn)) {
+ uint32_t i;
+- lua_assert(fn->l.nupvalues <= funcproto(fn)->sizeuv);
++ lj_assertG(fn->l.nupvalues <= funcproto(fn)->sizeuv,
++ "function upvalues out of range");
+ gc_markobj(g, funcproto(fn));
+ for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */
+ gc_markobj(g, &gcref(fn->l.uvptr[i])->uv);
+@@ -228,7 +243,7 @@ static void gc_traverse_func(global_State *g, GCfunc *fn)
+ static void gc_marktrace(global_State *g, TraceNo traceno)
+ {
+ GCobj *o = obj2gco(traceref(G2J(g), traceno));
+- lua_assert(traceno != G2J(g)->cur.traceno);
++ lj_assertG(traceno != G2J(g)->cur.traceno, "active trace escaped");
+ if (iswhite(o)) {
+ white2gray(o);
+ setgcrefr(o->gch.gclist, g->gc.gray);
+@@ -309,7 +324,7 @@ static size_t propagatemark(global_State *g)
+ {
+ GCobj *o = gcref(g->gc.gray);
+ int gct = o->gch.gct;
+- lua_assert(isgray(o));
++ lj_assertG(isgray(o), "propagation of non-gray object");
+ gray2black(o);
+ setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */
+ if (LJ_LIKELY(gct == ~LJ_TTAB)) {
+@@ -341,7 +356,7 @@ static size_t propagatemark(global_State *g)
+ return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
+ T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry);
+ #else
+- lua_assert(0);
++ lj_assertG(0, "bad GC type %d", gct);
+ return 0;
+ #endif
+ }
+@@ -395,11 +410,13 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
+ if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */
+ gc_fullsweep(g, &gco2th(o)->openupval);
+ if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
+- lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED));
++ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
++ "sweep of undead object");
+ makewhite(g, o); /* Value is alive, change to the current white. */
+ p = &o->gch.nextgc;
+ } else { /* Otherwise value is dead, free it. */
+- lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED);
++ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
++ "sweep of unlive object");
+ setgcrefr(*p, o->gch.nextgc);
+ if (o == gcref(g->gc.root))
+ setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */
+@@ -409,6 +426,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
+ return p;
+ }
+
++/* Sweep one string interning table chain. Preserves hashalg bit. */
++static void gc_sweepstr(global_State *g, GCRef *chain)
++{
++ /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */
++ int ow = otherwhite(g);
++ uintptr_t u = gcrefu(*chain);
++ GCRef q;
++ GCRef *p = &q;
++ GCobj *o;
++ setgcrefp(q, (u & ~(uintptr_t)1));
++ while ((o = gcref(*p)) != NULL) {
++ if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
++ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
++ "sweep of undead string");
++ makewhite(g, o); /* String is alive, change to the current white. */
++ p = &o->gch.nextgc;
++ } else { /* Otherwise string is dead, free it. */
++ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
++ "sweep of unlive string");
++ setgcrefr(*p, o->gch.nextgc);
++ lj_str_free(g, gco2str(o));
++ }
++ }
++ setgcrefp(*chain, (gcrefu(q) | (u & 1)));
++}
++
+ /* Check whether we can clear a key or a value slot from a table. */
+ static int gc_mayclear(cTValue *o, int val)
+ {
+@@ -426,11 +469,12 @@ static int gc_mayclear(cTValue *o, int val)
+ }
+
+ /* Clear collected entries from weak tables. */
+-static void gc_clearweak(GCobj *o)
++static void gc_clearweak(global_State *g, GCobj *o)
+ {
++ UNUSED(g);
+ while (o) {
+ GCtab *t = gco2tab(o);
+- lua_assert((t->marked & LJ_GC_WEAK));
++ lj_assertG((t->marked & LJ_GC_WEAK), "clear of non-weak table");
+ if ((t->marked & LJ_GC_WEAKVAL)) {
+ MSize i, asize = t->asize;
+ for (i = 0; i < asize; i++) {
+@@ -466,6 +510,7 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
+ TValue *top;
+ lj_trace_abort(g);
+ hook_entergc(g); /* Disable hooks and new traces during __gc. */
++ if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
+ g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
+ top = L->top;
+ copyTV(L, top++, mo);
+@@ -474,6 +519,7 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
+ L->top = top+1;
+ errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */
+ hook_restore(g, oldh);
++ if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
+ g->gc.threshold = oldt; /* Restore GC threshold. */
+ if (errcode)
+ lj_err_throw(L, errcode); /* Propagate errors. */
+@@ -485,7 +531,7 @@ static void gc_finalize(lua_State *L)
+ global_State *g = G(L);
+ GCobj *o = gcnext(gcref(g->gc.mmudata));
+ cTValue *mo;
+- lua_assert(tvref(g->jit_base) == NULL); /* Must not be called on trace. */
++ lj_assertG(tvref(g->jit_base) == NULL, "finalizer called on trace");
+ /* Unchain from list of userdata to be finalized. */
+ if (o == gcref(g->gc.mmudata))
+ setgcrefnull(g->gc.mmudata);
+@@ -560,9 +606,9 @@ void lj_gc_freeall(global_State *g)
+ /* Free everything, except super-fixed objects (the main thread). */
+ g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED;
+ gc_fullsweep(g, &g->gc.root);
+- strmask = g->strmask;
++ strmask = g->str.mask;
+ for (i = 0; i <= strmask; i++) /* Free all string hash chains. */
+- gc_fullsweep(g, &g->strhash[i]);
++ gc_sweepstr(g, &g->str.tab[i]);
+ }
+
+ /* -- Collector ----------------------------------------------------------- */
+@@ -577,7 +623,7 @@ static void atomic(global_State *g, lua_State *L)
+
+ setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */
+ setgcrefnull(g->gc.weak);
+- lua_assert(!iswhite(obj2gco(mainthread(g))));
++ lj_assertG(!iswhite(obj2gco(mainthread(g))), "main thread turned white");
+ gc_markobj(g, L); /* Mark running thread. */
+ gc_traverse_curtrace(g); /* Traverse current trace. */
+ gc_mark_gcroot(g); /* Mark GC roots (again). */
+@@ -592,7 +638,7 @@ static void atomic(global_State *g, lua_State *L)
+ udsize += gc_propagate_gray(g); /* And propagate the marks. */
+
+ /* All marking done, clear weak tables. */
+- gc_clearweak(gcref(g->gc.weak));
++ gc_clearweak(g, gcref(g->gc.weak));
+
+ lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */
+
+@@ -625,21 +671,21 @@ static size_t gc_onestep(lua_State *L)
+ return 0;
+ case GCSsweepstring: {
+ GCSize old = g->gc.total;
+- gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
+- if (g->gc.sweepstr > g->strmask)
++ gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */
++ if (g->gc.sweepstr > g->str.mask)
+ g->gc.state = GCSsweep; /* All string hash chains sweeped. */
+- lua_assert(old >= g->gc.total);
++ lj_assertG(old >= g->gc.total, "sweep increased memory");
+ g->gc.estimate -= old - g->gc.total;
+ return GCSWEEPCOST;
+ }
+ case GCSsweep: {
+ GCSize old = g->gc.total;
+ setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
+- lua_assert(old >= g->gc.total);
++ lj_assertG(old >= g->gc.total, "sweep increased memory");
+ g->gc.estimate -= old - g->gc.total;
+ if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
+- if (g->strnum <= (g->strmask >> 2) && g->strmask >
LJ_MIN_STRTAB*2-1)
+- lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
++ if (g->str.num <= (g->str.mask >> 2) && g->str.mask >
LJ_MIN_STRTAB*2-1)
++ lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */
+ if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
+ g->gc.state = GCSfinalize;
+ #if LJ_HASFFI
+@@ -668,7 +714,7 @@ static size_t gc_onestep(lua_State *L)
+ g->gc.debt = 0;
+ return 0;
+ default:
+- lua_assert(0);
++ lj_assertG(0, "bad GC state");
+ return 0;
+ }
+ }
+@@ -742,7 +788,8 @@ void lj_gc_fullgc(lua_State *L)
+ }
+ while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep)
+ gc_onestep(L); /* Finish sweep. */
+- lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause);
++ lj_assertG(g->gc.state == GCSfinalize || g->gc.state == GCSpause,
++ "bad GC state");
+ /* Now perform a full GC. */
+ g->gc.state = GCSpause;
+ do { gc_onestep(L); } while (g->gc.state != GCSpause);
+@@ -755,9 +802,11 @@ void lj_gc_fullgc(lua_State *L)
+ /* Move the GC propagation frontier forward. */
+ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
+ {
+- lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) &&
!isdead(g, o));
+- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
+- lua_assert(o->gch.gct != ~LJ_TTAB);
++ lj_assertG(isblack(o) && iswhite(v) && !isdead(g, v) &&
!isdead(g, o),
++ "bad object states for forward barrier");
++ lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
++ "bad GC state");
++ lj_assertG(o->gch.gct != ~LJ_TTAB, "barrier object is not a table");
+ /* Preserve invariant during propagation. Otherwise it doesn't matter. */
+ if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic)
+ gc_mark(g, v); /* Move frontier forward. */
+@@ -794,7 +843,8 @@ void lj_gc_closeuv(global_State *g, GCupval *uv)
+ lj_gc_barrierf(g, o, gcV(&uv->tv));
+ } else {
+ makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */
+- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
++ lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
++ "bad GC state");
+ }
+ }
+ }
+@@ -814,12 +864,13 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno)
+ void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
+ {
+ global_State *g = G(L);
+- lua_assert((osz == 0) == (p == NULL));
++ lj_assertG((osz == 0) == (p == NULL), "realloc API violation");
+ p = g->allocf(g->allocd, p, osz, nsz);
+ if (p == NULL && nsz > 0)
+ lj_err_mem(L);
+- lua_assert((nsz == 0) == (p == NULL));
+- lua_assert(checkptrGC(p));
++ lj_assertG((nsz == 0) == (p == NULL), "allocf API violation");
++ lj_assertG(checkptrGC(p),
++ "allocated memory address %p outside required range", p);
+ g->gc.total = (g->gc.total - osz) + nsz;
+ return p;
+ }
+@@ -831,7 +882,8 @@ void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
+ GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
+ if (o == NULL)
+ lj_err_mem(L);
+- lua_assert(checkptrGC(o));
++ lj_assertG(checkptrGC(o),
++ "allocated memory address %p outside required range", o);
+ g->gc.total += size;
+ setgcrefr(o->gch.nextgc, g->gc.root);
+ setgcref(g->gc.root, o);
+diff --git a/src/lj_gc.h b/src/lj_gc.h
+index 669bbe92..af8c476b 100644
+--- a/src/lj_gc.h
++++ b/src/lj_gc.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Garbage collector.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_GC_H
+@@ -81,8 +81,10 @@ LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno);
+ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
+ {
+ GCobj *o = obj2gco(t);
+- lua_assert(isblack(o) && !isdead(g, o));
+- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
++ lj_assertG(isblack(o) && !isdead(g, o),
++ "bad object states for backward barrier");
++ lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
++ "bad GC state");
+ black2gray(o);
+ setgcrefr(t->gclist, g->gc.grayagain);
+ setgcref(g->gc.grayagain, o);
+diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
+index c219ffac..0e2777b8 100644
+--- a/src/lj_gdbjit.c
++++ b/src/lj_gdbjit.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Client for the GDB JIT API.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_gdbjit_c
+@@ -363,7 +363,7 @@ static const ELFheader elfhdr_template = {
+ .eosabi = 12,
+ #elif defined(__DragonFly__)
+ .eosabi = 0,
+-#elif (defined(__sun__) && defined(__svr4__))
++#elif LJ_TARGET_SOLARIS
+ .eosabi = 6,
+ #else
+ .eosabi = 0,
+@@ -724,7 +724,7 @@ static void gdbjit_buildobj(GDBJITctx *ctx)
+ SECTALIGN(ctx->p, sizeof(uintptr_t));
+ gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe);
+ ctx->objsize = (size_t)((char *)ctx->p - (char *)obj);
+- lua_assert(ctx->objsize < sizeof(GDBJITobj));
++ lj_assertX(ctx->objsize < sizeof(GDBJITobj), "GDBJITobj overflow");
+ }
+
+ #undef SECTALIGN
+@@ -782,7 +782,8 @@ void lj_gdbjit_addtrace(jit_State *J, GCtrace *T)
+ ctx.spadjp = CFRAME_SIZE_JIT +
+ (MSize)(parent ? traceref(J, parent)->spadjust : 0);
+ ctx.spadj = CFRAME_SIZE_JIT + T->spadjust;
+- lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) +
pt->sizebc);
++ lj_assertJ(startpc >= proto_bc(pt) && startpc < proto_bc(pt) +
pt->sizebc,
++ "start PC out of range");
+ ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
+ ctx.filename = proto_chunknamestr(pt);
+ if (*ctx.filename == '@' || *ctx.filename == '=')
+diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h
+index bbaa1568..53596c87 100644
+--- a/src/lj_gdbjit.h
++++ b/src/lj_gdbjit.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Client for the GDB JIT API.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_GDBJIT_H
+diff --git a/src/lj_ir.c b/src/lj_ir.c
+index 5baece67..71bf8855 100644
+--- a/src/lj_ir.c
++++ b/src/lj_ir.c
+@@ -1,6 +1,6 @@
+ /*
+ ** SSA IR (Intermediate Representation) emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_ir_c
+@@ -30,15 +30,16 @@
+ #endif
+ #include "lj_vm.h"
+ #include "lj_strscan.h"
++#include "lj_serialize.h"
+ #include "lj_strfmt.h"
+-#include "lj_lib.h"
++#include "lj_prng.h"
+
+ /* Some local macros to save typing. Undef'd at the end. */
+ #define IR(ref) (&J->cur.ir[(ref)])
+ #define fins (&J->fold.ins)
+
+ /* Pass IR on to next optimization in chain (FOLD). */
+-#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
++#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
+
+ /* -- IR tables ----------------------------------------------------------- */
+
+@@ -90,8 +91,9 @@ static void lj_ir_growbot(jit_State *J)
+ {
+ IRIns *baseir = J->irbuf + J->irbotlim;
+ MSize szins = J->irtoplim - J->irbotlim;
+- lua_assert(szins != 0);
+- lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim);
++ lj_assertJ(szins != 0, "zero IR size");
++ lj_assertJ(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim,
++ "unexpected IR growth");
+ if (J->cur.nins + (szins >> 1) < J->irtoplim) {
+ /* More than half of the buffer is free on top: shift up by a quarter. */
+ MSize ofs = szins >> 2;
+@@ -146,11 +148,12 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...)
+ }
+
+ /* Load field of type t from GG_State + offset. Must be 32 bit aligned. */
+-LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
++TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
+ {
+- lua_assert((ofs & 3) == 0);
++ lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset");
+ ofs >>= 2;
+- lua_assert(ofs >= IRFL__MAX && ofs <= 0x3ff); /* 10 bit FOLD key limit.
*/
++ lj_assertJ(ofs >= IRFL__MAX && ofs <= 0x3ff,
++ "GG_State field offset breaks 10 bit FOLD key limit");
+ lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs);
+ return lj_opt_fold(J);
+ }
+@@ -181,7 +184,7 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J)
+ static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
+ {
+ IRRef ref = J->cur.nk - 2;
+- lua_assert(J->state != LJ_TRACE_ASM);
++ lj_assertJ(J->state != LJ_TRACE_ASM, "bad JIT state");
+ if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
+ J->cur.nk = ref;
+ return ref;
+@@ -277,7 +280,7 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
+ {
+ IRIns *ir, *cir = J->cur.ir;
+ IRRef ref;
+- lua_assert(!isdead(J2G(J), o));
++ lj_assertJ(!isdead(J2G(J), o), "interning of dead GC object");
+ for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
+ if (ir_kgc(&cir[ref]) == o)
+ goto found;
+@@ -299,7 +302,7 @@ TRef lj_ir_ktrace(jit_State *J)
+ {
+ IRRef ref = ir_nextkgc(J);
+ IRIns *ir = IR(ref);
+- lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
++ lj_assertJ(irt_toitype_(IRT_P64) == LJ_TTRACE, "mismatched type mapping");
+ ir->t.irt = IRT_P64;
+ ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */
+ ir->op12 = 0;
+@@ -313,7 +316,7 @@ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr)
+ IRIns *ir, *cir = J->cur.ir;
+ IRRef ref;
+ #if LJ_64 && !LJ_GC64
+- lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr);
++ lj_assertJ((void *)(uintptr_t)u32ptr(ptr) == ptr, "out-of-range GC
pointer");
+ #endif
+ for (ref = J->chain[op]; ref; ref = cir[ref].prev)
+ if (ir_kptr(&cir[ref]) == ptr)
+@@ -360,7 +363,8 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot)
+ IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot);
+ IRRef ref;
+ /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */
+- lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot);
++ lj_assertJ(tref_isk(key) && slot == (IRRef)(IRRef1)slot,
++ "out-of-range key/slot");
+ for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev)
+ if (cir[ref].op12 == op12)
+ goto found;
+@@ -381,13 +385,15 @@ found:
+ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
+ {
+ UNUSED(L);
+- lua_assert(ir->o != IR_KSLOT); /* Common mistake. */
++ lj_assertL(ir->o != IR_KSLOT, "unexpected KSLOT"); /* Common mistake.
*/
+ switch (ir->o) {
+ case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
+ case IR_KINT: setintV(tv, ir->i); break;
+ case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
+- case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break;
+- case IR_KNULL: setlightudV(tv, NULL); break;
++ case IR_KPTR: case IR_KKPTR:
++ setnumV(tv, (lua_Number)(uintptr_t)ir_kptr(ir));
++ break;
++ case IR_KNULL: setintV(tv, 0); break;
+ case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break;
+ #if LJ_HASFFI
+ case IR_KINT64: {
+@@ -397,7 +403,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
+ break;
+ }
+ #endif
+- default: lua_assert(0); break;
++ default: lj_assertL(0, "bad IR constant op %d", ir->o); break;
+ }
+ }
+
+@@ -457,7 +463,7 @@ int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op)
+ case IR_UGE: return !(a < b);
+ case IR_ULE: return !(a > b);
+ case IR_UGT: return !(a <= b);
+- default: lua_assert(0); return 0;
++ default: lj_assertX(0, "bad IR op %d", op); return 0;
+ }
+ }
+
+@@ -470,7 +476,7 @@ int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op)
+ case IR_GE: return (res >= 0);
+ case IR_LE: return (res <= 0);
+ case IR_GT: return (res > 0);
+- default: lua_assert(0); return 0;
++ default: lj_assertX(0, "bad IR op %d", op); return 0;
+ }
+ }
+
+diff --git a/src/lj_ir.h b/src/lj_ir.h
+index 34c27853..2b127f6c 100644
+--- a/src/lj_ir.h
++++ b/src/lj_ir.h
+@@ -1,6 +1,6 @@
+ /*
+ ** SSA IR (Intermediate Representation) format.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_IR_H
+@@ -75,7 +75,6 @@
+ _(NEG, N , ref, ref) \
+ \
+ _(ABS, N , ref, ref) \
+- _(ATAN2, N , ref, ref) \
+ _(LDEXP, N , ref, ref) \
+ _(MIN, C , ref, ref) \
+ _(MAX, C , ref, ref) \
+@@ -96,6 +95,7 @@
+ _(UREFO, LW, ref, lit) \
+ _(UREFC, LW, ref, lit) \
+ _(FREF, R , ref, lit) \
++ _(TMPREF, S , ref, lit) \
+ _(STRREF, N , ref, ref) \
+ _(LREF, L , ___, ___) \
+ \
+@@ -106,7 +106,8 @@
+ _(FLOAD, L , ref, lit) \
+ _(XLOAD, L , ref, lit) \
+ _(SLOAD, L , lit, lit) \
+- _(VLOAD, L , ref, ___) \
++ _(VLOAD, L , ref, lit) \
++ _(ALEN, L , ref, ref) \
+ \
+ _(ASTORE, S , ref, ref) \
+ _(HSTORE, S , ref, ref) \
+@@ -124,8 +125,8 @@
+ \
+ /* Buffer operations. */ \
+ _(BUFHDR, L , ref, lit) \
+- _(BUFPUT, L , ref, ref) \
+- _(BUFSTR, A , ref, ref) \
++ _(BUFPUT, LW, ref, ref) \
++ _(BUFSTR, AW, ref, ref) \
+ \
+ /* Barriers. */ \
+ _(TBAR, S , ref, ___) \
+@@ -133,15 +134,15 @@
+ _(XBAR, S , ___, ___) \
+ \
+ /* Type conversions. */ \
+- _(CONV, NW, ref, lit) \
++ _(CONV, N , ref, lit) \
+ _(TOBIT, N , ref, ref) \
+ _(TOSTR, N , ref, lit) \
+ _(STRTO, N , ref, ___) \
+ \
+ /* Calls. */ \
+- _(CALLN, N , ref, lit) \
+- _(CALLA, A , ref, lit) \
+- _(CALLL, L , ref, lit) \
++ _(CALLN, NW, ref, lit) \
++ _(CALLA, AW, ref, lit) \
++ _(CALLL, LW, ref, lit) \
+ _(CALLS, S , ref, lit) \
+ _(CALLXS, S , ref, ref) \
+ _(CARG, N , ref, ref) \
+@@ -178,8 +179,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE);
+ /* FPMATH sub-functions. ORDER FPM. */
+ #define IRFPMDEF(_) \
+ _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
+- _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \
+- _(SIN) _(COS) _(TAN) \
++ _(SQRT) _(LOG) _(LOG2) \
+ _(OTHER)
+
+ typedef enum {
+@@ -205,9 +205,15 @@ IRFPMDEF(FPMENUM)
+ _(UDATA_META, offsetof(GCudata, metatable)) \
+ _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
+ _(UDATA_FILE, sizeof(GCudata)) \
++ _(SBUF_W, sizeof(GCudata) + offsetof(SBufExt, w)) \
++ _(SBUF_E, sizeof(GCudata) + offsetof(SBufExt, e)) \
++ _(SBUF_B, sizeof(GCudata) + offsetof(SBufExt, b)) \
++ _(SBUF_L, sizeof(GCudata) + offsetof(SBufExt, L)) \
++ _(SBUF_REF, sizeof(GCudata) + offsetof(SBufExt, cowref)) \
++ _(SBUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \
+ _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \
+ _(CDATA_PTR, sizeof(GCcdata)) \
+- _(CDATA_INT, sizeof(GCcdata)) \
++ _(CDATA_INT, sizeof(GCcdata)) \
+ _(CDATA_INT64, sizeof(GCcdata)) \
+ _(CDATA_INT64_4, sizeof(GCcdata) + 4)
+
+@@ -218,6 +224,11 @@ IRFLDEF(FLENUM)
+ IRFL__MAX
+ } IRFieldID;
+
++/* TMPREF mode bits, stored in op2. */
++#define IRTMPREF_IN1 0x01 /* First input value. */
++#define IRTMPREF_OUT1 0x02 /* First output value. */
++#define IRTMPREF_OUT2 0x04 /* Second output value. */
++
+ /* SLOAD mode bits, stored in op2. */
+ #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
+ #define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
+@@ -225,15 +236,17 @@ IRFLDEF(FLENUM)
+ #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
+ #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */
+ #define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */
++#define IRSLOAD_KEYINDEX 0x40 /* Table traversal key index. */
+
+-/* XLOAD mode, stored in op2. */
+-#define IRXLOAD_READONLY 1 /* Load from read-only data. */
+-#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */
+-#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */
++/* XLOAD mode bits, stored in op2. */
++#define IRXLOAD_READONLY 0x01 /* Load from read-only data. */
++#define IRXLOAD_VOLATILE 0x02 /* Load from volatile data. */
++#define IRXLOAD_UNALIGNED 0x04 /* Unaligned load. */
+
+ /* BUFHDR mode, stored in op2. */
+ #define IRBUFHDR_RESET 0 /* Reset buffer. */
+ #define IRBUFHDR_APPEND 1 /* Append to buffer. */
++#define IRBUFHDR_WRITE 2 /* Write to string buffer. */
+
+ /* CONV mode, stored in op2. */
+ #define IRCONV_SRCMASK 0x001f /* Source IRType. */
+@@ -250,6 +263,7 @@ IRFLDEF(FLENUM)
+ #define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */
+ #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */
+ #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */
++#define IRCONV_NONE IRCONV_ANY /* INT|*64 no conv, but change type. */
+
+ /* TOSTR mode, stored in op2. */
+ #define IRTOSTR_INT 0 /* Convert integer to string. */
+@@ -377,10 +391,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
+ #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64))
+
+ #if LJ_GC64
++/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */
+ #define IRT_IS64 \
+ ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
+
(1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
+-
(1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA))
++
(1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\
++ (1u<<IRT_NIL))
+ #elif LJ_64
+ #define IRT_IS64 \
+
((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
+@@ -412,11 +428,12 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
+
+ static LJ_AINLINE uint32_t irt_toitype_(IRType t)
+ {
+- lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD);
++ lj_assertX(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD,
++ "no plain type tag for lightuserdata");
+ if (LJ_DUALNUM && t > IRT_NUM) {
+ return LJ_TISNUM;
+ } else {
+- lua_assert(t <= IRT_NUM);
++ lj_assertX(t <= IRT_NUM, "no plain type tag for IR type %d", t);
+ return ~(uint32_t)t;
+ }
+ }
+@@ -479,6 +496,7 @@ typedef uint32_t TRef;
+ #define TREF_REFMASK 0x0000ffff
+ #define TREF_FRAME 0x00010000
+ #define TREF_CONT 0x00020000
++#define TREF_KEYINDEX 0x00100000
+
+ #define TREF(ref, t) ((TRef)((ref) + ((t)<<24)))
+
+@@ -560,6 +578,11 @@ typedef union IRIns {
+ TValue tv; /* TValue constant (overlaps entire slot). */
+ } IRIns;
+
++#define ir_isk64(ir) \
++ ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
++ (LJ_GC64 && \
++ ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)))
++
+ #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
+ #define ir_kstr(ir) (gco2str(ir_kgc((ir))))
+ #define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
+@@ -567,12 +590,7 @@ typedef union IRIns {
+ #define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
+ #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
+ #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
+-#define ir_k64(ir) \
+- check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
+- (LJ_GC64 && \
+- ((ir)->o == IR_KGC || \
+- (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \
+- &(ir)[1].tv)
++#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv)
+ #define ir_kptr(ir) \
+ check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
+ mref((ir)[LJ_GC64].ptr, void))
+@@ -585,4 +603,12 @@ static LJ_AINLINE int ir_sideeff(IRIns *ir)
+
+ LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W);
+
++/* Replace IR instruction with NOP. */
++static LJ_AINLINE void lj_ir_nop(IRIns *ir)
++{
++ ir->ot = IRT(IR_NOP, IRT_NIL);
++ ir->op1 = ir->op2 = 0;
++ ir->prev = 0;
++}
++
+ #endif
+diff --git a/src/lj_ircall.h b/src/lj_ircall.h
+index 973c36e6..9e7013ba 100644
+--- a/src/lj_ircall.h
++++ b/src/lj_ircall.h
+@@ -1,6 +1,6 @@
+ /*
+ ** IR CALL* instruction definitions.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_IRCALL_H
+@@ -21,6 +21,7 @@ typedef struct CCallInfo {
+
+ #define CCI_OTSHIFT 16
+ #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */
++#define CCI_TYPE(ci) (((ci)->flags>>CCI_OTSHIFT) & IRT_TYPE)
+ #define CCI_OPSHIFT 24
+ #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
+
+@@ -29,10 +30,12 @@ typedef struct CCallInfo {
+ #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
+ #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
+ #define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL)
++#define CCI_CALL_FA (CCI_CALL_A|CCI_CC_FASTCALL)
+ #define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL)
+ #define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL)
+
+ /* C call info flags. */
++#define CCI_T (IRT_GUARD << CCI_OTSHIFT) /* May throw. */
+ #define CCI_L 0x0100 /* Implicit L arg. */
+ #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */
+ #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */
+@@ -51,7 +54,7 @@ typedef struct CCallInfo {
+ #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
+ #define CCI_XA (1u << CCI_XARGS_SHIFT)
+
+-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
+ #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
+ #else
+ #define CCI_XNARGS(ci) CCI_NARGS((ci))
+@@ -78,13 +81,19 @@ typedef struct CCallInfo {
+ #define IRCALLCOND_SOFTFP_FFI(x) NULL
+ #endif
+
+-#if LJ_SOFTFP && LJ_TARGET_MIPS32
++#if LJ_SOFTFP && LJ_TARGET_MIPS
+ #define IRCALLCOND_SOFTFP_MIPS(x) x
+ #else
+ #define IRCALLCOND_SOFTFP_MIPS(x) NULL
+ #endif
+
+-#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32)
++#if LJ_SOFTFP && LJ_TARGET_MIPS64
++#define IRCALLCOND_SOFTFP_MIPS64(x) x
++#else
++#define IRCALLCOND_SOFTFP_MIPS64(x) NULL
++#endif
++
++#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
+
+ #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
+ #define IRCALLCOND_FP64_FFI(x) x
+@@ -104,6 +113,18 @@ typedef struct CCallInfo {
+ #define IRCALLCOND_FFI32(x) NULL
+ #endif
+
++#if LJ_HASBUFFER
++#define IRCALLCOND_BUFFER(x) x
++#else
++#define IRCALLCOND_BUFFER(x) NULL
++#endif
++
++#if LJ_HASBUFFER && LJ_HASFFI
++#define IRCALLCOND_BUFFFI(x) x
++#else
++#define IRCALLCOND_BUFFFI(x) NULL
++#endif
++
+ #if LJ_SOFTFP
+ #define XA_FP CCI_XA
+ #define XA2_FP (CCI_XA+CCI_XA)
+@@ -112,6 +133,14 @@ typedef struct CCallInfo {
+ #define XA2_FP 0
+ #endif
+
++#if LJ_SOFTFP32
++#define XA_FP32 CCI_XA
++#define XA2_FP32 (CCI_XA+CCI_XA)
++#else
++#define XA_FP32 0
++#define XA2_FP32 0
++#endif
++
+ #if LJ_32
+ #define XA_64 CCI_XA
+ #define XA2_64 (CCI_XA+CCI_XA)
+@@ -124,40 +153,57 @@ typedef struct CCallInfo {
+ #define IRCALLDEF(_) \
+ _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
+ _(ANY, lj_str_find, 4, N, PGC, 0) \
+- _(ANY, lj_str_new, 3, S, STR, CCI_L) \
++ _(ANY, lj_str_new, 3, S, STR, CCI_L|CCI_T) \
+ _(ANY, lj_strscan_num, 2, FN, INT, 0) \
+- _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \
+- _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \
+- _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \
+- _(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \
+- _(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \
+- _(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \
+- _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \
+- _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \
+- _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \
+- _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \
+- _(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \
+- _(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \
+- _(ANY, lj_buf_putmem, 3, S, PGC, 0) \
+- _(ANY, lj_buf_putstr, 2, FL, PGC, 0) \
+- _(ANY, lj_buf_putchar, 2, FL, PGC, 0) \
+- _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \
+- _(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \
+- _(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \
+- _(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \
+- _(ANY, lj_buf_puttab, 5, L, PGC, 0) \
+- _(ANY, lj_buf_tostr, 1, FL, STR, 0) \
+- _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \
+- _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
+- _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
++ _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L|CCI_T) \
++ _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L|CCI_T) \
++ _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L|CCI_T) \
++ _(ANY, lj_strfmt_putint, 2, FL, PGC, CCI_T) \
++ _(ANY, lj_strfmt_putnum, 2, FL, PGC, CCI_T) \
++ _(ANY, lj_strfmt_putquoted, 2, FL, PGC, CCI_T) \
++ _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64|CCI_T) \
++ _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP|CCI_T) \
++ _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP|CCI_T) \
++ _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP|CCI_T) \
++ _(ANY, lj_strfmt_putfstr, 3, L, PGC, CCI_T) \
++ _(ANY, lj_strfmt_putfchar, 3, L, PGC, CCI_T) \
++ _(ANY, lj_buf_putmem, 3, S, PGC, CCI_T) \
++ _(ANY, lj_buf_putstr, 2, FL, PGC, CCI_T) \
++ _(ANY, lj_buf_putchar, 2, FL, PGC, CCI_T) \
++ _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, CCI_T) \
++ _(ANY, lj_buf_putstr_lower, 2, FL, PGC, CCI_T) \
++ _(ANY, lj_buf_putstr_upper, 2, FL, PGC, CCI_T) \
++ _(ANY, lj_buf_putstr_rep, 3, L, PGC, CCI_T) \
++ _(ANY, lj_buf_puttab, 5, L, PGC, CCI_T) \
++ _(BUFFER, lj_bufx_set, 4, S, NIL, 0) \
++ _(BUFFFI, lj_bufx_more, 2, FS, INT, CCI_T) \
++ _(BUFFER, lj_serialize_put, 2, FS, PGC, CCI_T) \
++ _(BUFFER, lj_serialize_get, 2, FS, PTR, CCI_T) \
++ _(BUFFER, lj_serialize_encode, 2, FA, STR, CCI_L|CCI_T) \
++ _(BUFFER, lj_serialize_decode, 3, A, INT, CCI_L|CCI_T) \
++ _(ANY, lj_buf_tostr, 1, FL, STR, CCI_T) \
++ _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L|CCI_T) \
++ _(ANY, lj_tab_new1, 2, FA, TAB, CCI_L|CCI_T) \
++ _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \
+ _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
+- _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \
++ _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \
++ _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \
++ _(ANY, lj_vm_next, 2, FL, PTR, 0) \
+ _(ANY, lj_tab_len, 1, FL, INT, 0) \
++ _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \
+ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
+ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
+- _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \
+- _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \
++ _(ANY, lj_mem_newgco, 2, FA, PGC, CCI_L|CCI_T) \
++ _(ANY, lj_prng_u64d, 1, FS, NUM, CCI_CASTU64) \
+ _(ANY, lj_vm_modi, 2, FN, INT, 0) \
++ _(ANY, log10, 1, N, NUM, XA_FP) \
++ _(ANY, exp, 1, N, NUM, XA_FP) \
++ _(ANY, sin, 1, N, NUM, XA_FP) \
++ _(ANY, cos, 1, N, NUM, XA_FP) \
++ _(ANY, tan, 1, N, NUM, XA_FP) \
++ _(ANY, asin, 1, N, NUM, XA_FP) \
++ _(ANY, acos, 1, N, NUM, XA_FP) \
++ _(ANY, atan, 1, N, NUM, XA_FP) \
+ _(ANY, sinh, 1, N, NUM, XA_FP) \
+ _(ANY, cosh, 1, N, NUM, XA_FP) \
+ _(ANY, tanh, 1, N, NUM, XA_FP) \
+@@ -169,32 +215,27 @@ typedef struct CCallInfo {
+ _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
+ _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
+ _(FPMATH, sqrt, 1, N, NUM, XA_FP) \
+- _(ANY, exp, 1, N, NUM, XA_FP) \
+- _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \
+ _(ANY, log, 1, N, NUM, XA_FP) \
+ _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
+- _(ANY, log10, 1, N, NUM, XA_FP) \
+- _(ANY, sin, 1, N, NUM, XA_FP) \
+- _(ANY, cos, 1, N, NUM, XA_FP) \
+- _(ANY, tan, 1, N, NUM, XA_FP) \
+ _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
+ _(ANY, pow, 2, N, NUM, XA2_FP) \
+ _(ANY, atan2, 2, N, NUM, XA2_FP) \
+ _(ANY, ldexp, 2, N, NUM, XA_FP) \
+- _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
+- _(SOFTFP, softfp_add, 4, N, NUM, 0) \
+- _(SOFTFP, softfp_sub, 4, N, NUM, 0) \
+- _(SOFTFP, softfp_mul, 4, N, NUM, 0) \
+- _(SOFTFP, softfp_div, 4, N, NUM, 0) \
+- _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \
++ _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
++ _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \
++ _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \
++ _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \
++ _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \
++ _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \
+ _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \
+- _(SOFTFP, softfp_d2i, 2, N, INT, 0) \
+- _(SOFTFP_MIPS, lj_vm_sfmin, 4, N, NUM, 0) \
+- _(SOFTFP_MIPS, lj_vm_sfmax, 4, N, NUM, 0) \
++ _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \
++ _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \
++ _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \
++ _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
+ _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
+ _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
+- _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \
+- _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \
++ _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
++ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
+ _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
+ _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
+ _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
+@@ -272,7 +313,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
+ #define fp64_f2l __aeabi_f2lz
+ #define fp64_f2ul __aeabi_f2ulz
+ #endif
+-#elif LJ_TARGET_MIPS
++#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
+ #define softfp_add __adddf3
+ #define softfp_sub __subdf3
+ #define softfp_mul __muldf3
+@@ -315,7 +356,7 @@ extern double lj_vm_sfmax(double a, double b);
+ #endif
+
+ #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP)
+-#ifdef __GNUC__
++#if defined(__GNUC__) || defined(__clang__)
+ #define fp64_l2d __floatdidf
+ #define fp64_ul2d __floatundidf
+ #define fp64_l2f __floatdisf
+diff --git a/src/lj_iropt.h b/src/lj_iropt.h
+index 73aef0ef..0541090d 100644
+--- a/src/lj_iropt.h
++++ b/src/lj_iropt.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Common header for IR emitter and optimizations.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_IROPT_H
+@@ -120,10 +120,11 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
+ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
+ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
+ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J);
+-LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J);
++LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J);
+ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J);
+ LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
+ LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
++LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim);
+ LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
+
+ /* Dead-store elimination. */
+@@ -150,7 +151,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
+ /* Optimization passes. */
+ LJ_FUNC void lj_opt_dce(jit_State *J);
+ LJ_FUNC int lj_opt_loop(jit_State *J);
+-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
+ LJ_FUNC void lj_opt_split(jit_State *J);
+ #else
+ #define lj_opt_split(J) UNUSED(J)
+diff --git a/src/lj_jit.h b/src/lj_jit.h
+index 92054e3d..c9fe8319 100644
+--- a/src/lj_jit.h
++++ b/src/lj_jit.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Common definitions for the JIT compiler.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_JIT_H
+@@ -9,73 +9,85 @@
+ #include "lj_obj.h"
+ #include "lj_ir.h"
+
+-/* JIT engine flags. */
++/* -- JIT engine flags ---------------------------------------------------- */
++
++/* General JIT engine flags. 4 bits. */
+ #define JIT_F_ON 0x00000001
+
+-/* CPU-specific JIT engine flags. */
++/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */
++#define JIT_F_CPU 0x00000010
++
+ #if LJ_TARGET_X86ORX64
+-#define JIT_F_SSE2 0x00000010
+-#define JIT_F_SSE3 0x00000020
+-#define JIT_F_SSE4_1 0x00000040
+-#define JIT_F_PREFER_IMUL 0x00000080
+-#define JIT_F_LEA_AGU 0x00000100
+-#define JIT_F_BMI2 0x00000200
+-
+-/* Names for the CPU-specific flags. Must match the order above. */
+-#define JIT_F_CPU_FIRST JIT_F_SSE2
+-#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2"
++
++#define JIT_F_SSE3 (JIT_F_CPU << 0)
++#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
++#define JIT_F_BMI2 (JIT_F_CPU << 2)
++
++
++#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
++
+ #elif LJ_TARGET_ARM
+-#define JIT_F_ARMV6_ 0x00000010
+-#define JIT_F_ARMV6T2_ 0x00000020
+-#define JIT_F_ARMV7 0x00000040
+-#define JIT_F_VFPV2 0x00000080
+-#define JIT_F_VFPV3 0x00000100
+-
+-#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7)
+-#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7)
++
++#define JIT_F_ARMV6_ (JIT_F_CPU << 0)
++#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1)
++#define JIT_F_ARMV7 (JIT_F_CPU << 2)
++#define JIT_F_ARMV8 (JIT_F_CPU << 3)
++#define JIT_F_VFPV2 (JIT_F_CPU << 4)
++#define JIT_F_VFPV3 (JIT_F_CPU << 5)
++
++#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
++#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
+ #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3)
+
+-/* Names for the CPU-specific flags. Must match the order above. */
+-#define JIT_F_CPU_FIRST JIT_F_ARMV6_
+-#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3"
++#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3"
++
+ #elif LJ_TARGET_PPC
+-#define JIT_F_SQRT 0x00000010
+-#define JIT_F_ROUND 0x00000020
+
+-/* Names for the CPU-specific flags. Must match the order above. */
+-#define JIT_F_CPU_FIRST JIT_F_SQRT
++#define JIT_F_SQRT (JIT_F_CPU << 0)
++#define JIT_F_ROUND (JIT_F_CPU << 1)
++
+ #define JIT_F_CPUSTRING "\4SQRT\5ROUND"
++
+ #elif LJ_TARGET_MIPS
+-#define JIT_F_MIPSXXR2 0x00000010
+
+-/* Names for the CPU-specific flags. Must match the order above. */
+-#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2
++#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0)
++
+ #if LJ_TARGET_MIPS32
++#if LJ_TARGET_MIPSR6
++#define JIT_F_CPUSTRING "\010MIPS32R6"
++#else
+ #define JIT_F_CPUSTRING "\010MIPS32R2"
++#endif
++#else
++#if LJ_TARGET_MIPSR6
++#define JIT_F_CPUSTRING "\010MIPS64R6"
+ #else
+ #define JIT_F_CPUSTRING "\010MIPS64R2"
+ #endif
++#endif
++
+ #else
+-#define JIT_F_CPU_FIRST 0
++
+ #define JIT_F_CPUSTRING ""
++
+ #endif
+
+-/* Optimization flags. */
++/* Optimization flags. 12 bits. */
++#define JIT_F_OPT 0x00010000
+ #define JIT_F_OPT_MASK 0x0fff0000
+
+-#define JIT_F_OPT_FOLD 0x00010000
+-#define JIT_F_OPT_CSE 0x00020000
+-#define JIT_F_OPT_DCE 0x00040000
+-#define JIT_F_OPT_FWD 0x00080000
+-#define JIT_F_OPT_DSE 0x00100000
+-#define JIT_F_OPT_NARROW 0x00200000
+-#define JIT_F_OPT_LOOP 0x00400000
+-#define JIT_F_OPT_ABC 0x00800000
+-#define JIT_F_OPT_SINK 0x01000000
+-#define JIT_F_OPT_FUSE 0x02000000
++#define JIT_F_OPT_FOLD (JIT_F_OPT << 0)
++#define JIT_F_OPT_CSE (JIT_F_OPT << 1)
++#define JIT_F_OPT_DCE (JIT_F_OPT << 2)
++#define JIT_F_OPT_FWD (JIT_F_OPT << 3)
++#define JIT_F_OPT_DSE (JIT_F_OPT << 4)
++#define JIT_F_OPT_NARROW (JIT_F_OPT << 5)
++#define JIT_F_OPT_LOOP (JIT_F_OPT << 6)
++#define JIT_F_OPT_ABC (JIT_F_OPT << 7)
++#define JIT_F_OPT_SINK (JIT_F_OPT << 8)
++#define JIT_F_OPT_FUSE (JIT_F_OPT << 9)
+
+ /* Optimizations names for -O. Must match the order above. */
+-#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
+ #define JIT_F_OPTSTRING \
+ "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
+
+@@ -87,6 +99,8 @@
+ JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
+ #define JIT_F_OPT_DEFAULT JIT_F_OPT_3
+
++/* -- JIT engine parameters ----------------------------------------------- */
++
+ #if LJ_TARGET_WINDOWS || LJ_64
+ /* See:
http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
+ #define JIT_P_sizemcode_DEFAULT 64
+@@ -129,11 +143,14 @@ JIT_PARAMDEF(JIT_PARAMENUM)
+ #define JIT_PARAMSTR(len, name, value) #len #name
+ #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR)
+
++/* -- JIT engine data structures ------------------------------------------ */
++
+ /* Trace compiler state. */
+ typedef enum {
+ LJ_TRACE_IDLE, /* Trace compiler idle. */
+ LJ_TRACE_ACTIVE = 0x10,
+ LJ_TRACE_RECORD, /* Bytecode recording active. */
++ LJ_TRACE_RECORD_1ST, /* Record 1st instruction, too. */
+ LJ_TRACE_START, /* New trace started. */
+ LJ_TRACE_END, /* End of trace. */
+ LJ_TRACE_ASM, /* Assemble trace. */
+@@ -158,10 +175,17 @@ typedef uint8_t MCode;
+ typedef uint32_t MCode;
+ #endif
+
++/* Linked list of MCode areas. */
++typedef struct MCLink {
++ MCode *next; /* Next area. */
++ size_t size; /* Size of current area. */
++} MCLink;
++
+ /* Stack snapshot header. */
+ typedef struct SnapShot {
+- uint16_t mapofs; /* Offset into snapshot map. */
++ uint32_t mapofs; /* Offset into snapshot map. */
+ IRRef1 ref; /* First IR ref for this snapshot. */
++ uint16_t mcofs; /* Offset into machine code in MCode units. */
+ uint8_t nslots; /* Number of valid slots. */
+ uint8_t topslot; /* Maximum frame extent. */
+ uint8_t nent; /* Number of compressed entries. */
+@@ -177,12 +201,15 @@ typedef uint32_t SnapEntry;
+ #define SNAP_CONT 0x020000 /* Continuation slot. */
+ #define SNAP_NORESTORE 0x040000 /* No need to restore slot. */
+ #define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */
++#define SNAP_KEYINDEX 0x100000 /* Traversal key index. */
+ LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME);
+ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
++LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX);
+
+ #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
+ #define SNAP_TR(slot, tr) \
+- (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
++ (((SnapEntry)(slot) << 24) + \
++ ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK)))
+ #if !LJ_FR2
+ #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
+ #endif
+@@ -227,8 +254,7 @@ typedef enum {
+ /* Trace object. */
+ typedef struct GCtrace {
+ GCHeader;
+- uint8_t topslot; /* Top stack slot already checked to be allocated. */
+- uint8_t linktype; /* Type of link. */
++ uint16_t nsnap; /* Number of snapshots. */
+ IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
+ #if LJ_GC64
+ uint32_t unused_gc64;
+@@ -236,8 +262,7 @@ typedef struct GCtrace {
+ GCRef gclist;
+ IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
+ IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
+- uint16_t nsnap; /* Number of snapshots. */
+- uint16_t nsnapmap; /* Number of snapshot map elements. */
++ uint32_t nsnapmap; /* Number of snapshot map elements. */
+ SnapShot *snap; /* Snapshot array. */
+ SnapEntry *snapmap; /* Snapshot map. */
+ GCRef startpt; /* Starting prototype. */
+@@ -254,6 +279,8 @@ typedef struct GCtrace {
+ TraceNo1 nextroot; /* Next root trace for same prototype. */
+ TraceNo1 nextside; /* Next side trace of same root trace. */
+ uint8_t sinktags; /* Trace has SINK tags. */
++ uint8_t topslot; /* Top stack slot already checked to be allocated. */
++ uint8_t linktype; /* Type of link. */
+ uint8_t unused1;
+ #ifdef LUAJIT_USE_GDBJIT
+ void *gdbjit_entry; /* GDB JIT entry. */
+@@ -368,7 +395,7 @@ enum {
+ ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
+
+ /* Set/reset flag to activate the SPLIT pass for the current trace. */
+-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
+ #define lj_needsplit(J) (J->needsplit = 1)
+ #define lj_resetsplit(J) (J->needsplit = 0)
+ #else
+@@ -416,9 +443,9 @@ typedef struct jit_State {
+ int32_t framedepth; /* Current frame depth. */
+ int32_t retdepth; /* Return frame depth (count of RETF). */
+
++ uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */
+ TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
+- TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */
+- uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */
++ TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */
+
+ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
+ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
+@@ -431,7 +458,7 @@ typedef struct jit_State {
+ MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
+
+ PostProc postproc; /* Required post-processing after execution. */
+-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
++#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
+ uint8_t needsplit; /* Need SPLIT pass. */
+ #endif
+ uint8_t retryrec; /* Retry recording. */
+@@ -450,7 +477,6 @@ typedef struct jit_State {
+
+ HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */
+ uint32_t penaltyslot; /* Round-robin index into penalty slots. */
+- uint32_t prngstate; /* PRNG state. */
+
+ #ifdef LUAJIT_ENABLE_TABLE_BUMP
+ RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */
+@@ -464,6 +490,7 @@ typedef struct jit_State {
+ const BCIns *startpc; /* Bytecode PC of starting instruction. */
+ TraceNo parent; /* Parent of current side trace (0 for root traces). */
+ ExitNo exitno; /* Exit number in parent of current side trace. */
++ int exitcode; /* Exit code from unwound trace. */
+
+ BCIns *patchpc; /* PC for pending re-patch. */
+ BCIns patchins; /* Instruction for pending re-patch. */
+@@ -482,18 +509,12 @@ typedef struct jit_State {
+ BCLine prev_line; /* Previous line. */
+ int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
+ #endif
+-}
+-#if LJ_TARGET_ARM
+-LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */
+-#endif
+-jit_State;
++} jit_State;
+
+-/* Trivial PRNG e.g. used for penalty randomization. */
+-static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits)
+-{
+- /* Yes, this LCG is very weak, but that doesn't matter for our use case. */
+- J->prngstate = J->prngstate * 1103515245 + 12345;
+- return J->prngstate >> (32-bits);
+-}
++#ifdef LUA_USE_ASSERT
++#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__)
++#else
++#define lj_assertJ(c, ...) ((void)J)
++#endif
+
+ #endif
+diff --git a/src/lj_lex.c b/src/lj_lex.c
+index 2d2f8194..cc6fa533 100644
+--- a/src/lj_lex.c
++++ b/src/lj_lex.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Lexical analyzer.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -48,6 +48,12 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls)
+ size_t sz;
+ const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
+ if (p == NULL || sz == 0) return LEX_EOF;
++ if (sz >= LJ_MAX_BUF) {
++ if (sz != ~(size_t)0) lj_err_mem(ls->L);
++ sz = ~(uintptr_t)0 - (uintptr_t)p;
++ if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1;
++ ls->endmark = 1;
++ }
+ ls->pe = p + sz;
+ ls->p = p + 1;
+ return (LexChar)(uint8_t)p[0];
+@@ -76,7 +82,7 @@ static LJ_AINLINE LexChar lex_savenext(LexState *ls)
+ static void lex_newline(LexState *ls)
+ {
+ LexChar old = ls->c;
+- lua_assert(lex_iseol(ls));
++ lj_assertLS(lex_iseol(ls), "bad usage");
+ lex_next(ls); /* Skip "\n" or "\r". */
+ if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r"
or "\r\n". */
+ if (++ls->linenumber >= LJ_MAX_LINE)
+@@ -90,7 +96,7 @@ static void lex_number(LexState *ls, TValue *tv)
+ {
+ StrScanFmt fmt;
+ LexChar c, xp = 'e';
+- lua_assert(lj_char_isdigit(ls->c));
++ lj_assertLS(lj_char_isdigit(ls->c), "bad usage");
+ if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) ==
'x')
+ xp = 'p';
+ while (lj_char_isident(ls->c) || ls->c == '.' ||
+@@ -99,7 +105,7 @@ static void lex_number(LexState *ls, TValue *tv)
+ lex_savenext(ls);
+ }
+ lex_save(ls, '\0');
+- fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
++ fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv,
+ (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
+ (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
+ if (LJ_DUALNUM && fmt == STRSCAN_INT) {
+@@ -110,12 +116,9 @@ static void lex_number(LexState *ls, TValue *tv)
+ } else if (fmt != STRSCAN_ERROR) {
+ lua_State *L = ls->L;
+ GCcdata *cd;
+- lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG);
+- if (!ctype_ctsG(G(L))) {
+- ptrdiff_t oldtop = savestack(L, L->top);
+- luaopen_ffi(L); /* Load FFI library on-demand. */
+- L->top = restorestack(L, oldtop);
+- }
++ lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG,
++ "unexpected number format %d", fmt);
++ ctype_loadffi(L);
+ if (fmt == STRSCAN_IMAG) {
+ cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
+ ((double *)cdataptr(cd))[0] = 0;
+@@ -127,7 +130,8 @@ static void lex_number(LexState *ls, TValue *tv)
+ lj_parse_keepcdata(ls, tv, cd);
+ #endif
+ } else {
+- lua_assert(fmt == STRSCAN_ERROR);
++ lj_assertLS(fmt == STRSCAN_ERROR,
++ "unexpected number format %d", fmt);
+ lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
+ }
+ }
+@@ -137,8 +141,8 @@ static int lex_skipeq(LexState *ls)
+ {
+ int count = 0;
+ LexChar s = ls->c;
+- lua_assert(s == '[' || s == ']');
+- while (lex_savenext(ls) == '=')
++ lj_assertLS(s == '[' || s == ']', "bad usage");
++ while (lex_savenext(ls) == '=' && count < 0x20000000)
+ count++;
+ return (ls->c == s) ? count : (-count) - 1;
+ }
+@@ -172,7 +176,7 @@ static void lex_longstring(LexState *ls, TValue *tv, int sep)
+ }
+ } endloop:
+ if (tv) {
+- GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep),
++ GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep),
+ sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
+ setstrV(ls->L, tv, str);
+ }
+@@ -278,7 +282,7 @@ static void lex_string(LexState *ls, TValue *tv)
+ }
+ lex_savenext(ls); /* Skip trailing delimiter. */
+ setstrV(ls->L, tv,
+- lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
++ lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2));
+ }
+
+ /* -- Main lexical scanner ------------------------------------------------ */
+@@ -298,7 +302,7 @@ static LexToken lex_scan(LexState *ls, TValue *tv)
+ do {
+ lex_savenext(ls);
+ } while (lj_char_isident(ls->c));
+- s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
++ s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb));
+ setstrV(ls->L, tv, s);
+ if (s->reserved > 0) /* Reserved word? */
+ return TK_OFS + s->reserved;
+@@ -406,6 +410,7 @@ int lj_lex_setup(lua_State *L, LexState *ls)
+ ls->lookahead = TK_eof; /* No look-ahead token. */
+ ls->linenumber = 1;
+ ls->lastline = 1;
++ ls->endmark = 0;
+ lex_next(ls); /* Read-ahead first char. */
+ if (ls->c == 0xef && ls->p + 2 <= ls->pe &&
(uint8_t)ls->p[0] == 0xbb &&
+ (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
+@@ -462,7 +467,7 @@ void lj_lex_next(LexState *ls)
+ /* Look ahead for the next token. */
+ LexToken lj_lex_lookahead(LexState *ls)
+ {
+- lua_assert(ls->lookahead == TK_eof);
++ lj_assertLS(ls->lookahead == TK_eof, "double lookahead");
+ ls->lookahead = lex_scan(ls, &ls->lookaheadval);
+ return ls->lookahead;
+ }
+@@ -487,7 +492,7 @@ void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
+ tokstr = NULL;
+ } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
+ lex_save(ls, '\0');
+- tokstr = sbufB(&ls->sb);
++ tokstr = ls->sb.b;
+ } else {
+ tokstr = lj_lex_token2str(ls, tok);
+ }
+diff --git a/src/lj_lex.h b/src/lj_lex.h
+index 33fa8657..d2230b6a 100644
+--- a/src/lj_lex.h
++++ b/src/lj_lex.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Lexical analyzer.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_LEX_H
+@@ -73,6 +73,7 @@ typedef struct LexState {
+ BCInsLine *bcstack; /* Stack for bytecode instructions/line numbers. */
+ MSize sizebcstack; /* Size of bytecode stack. */
+ uint32_t level; /* Syntactical nesting level. */
++ int endmark; /* Trust bytecode end marker, even if not at EOF. */
+ } LexState;
+
+ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
+@@ -83,4 +84,10 @@ LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
+ LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
+ LJ_FUNC void lj_lex_init(lua_State *L);
+
++#ifdef LUA_USE_ASSERT
++#define lj_assertLS(c, ...) (lj_assertG_(G(ls->L), (c), __VA_ARGS__))
++#else
++#define lj_assertLS(c, ...) ((void)ls)
++#endif
++
+ #endif
+diff --git a/src/lj_lib.c b/src/lj_lib.c
+index b8638de6..438056d8 100644
+--- a/src/lj_lib.c
++++ b/src/lj_lib.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Library function support.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_lib_c
+@@ -16,6 +16,9 @@
+ #include "lj_func.h"
+ #include "lj_bc.h"
+ #include "lj_dispatch.h"
++#if LJ_HASFFI
++#include "lj_ctype.h"
++#endif
+ #include "lj_vm.h"
+ #include "lj_strscan.h"
+ #include "lj_strfmt.h"
+@@ -301,3 +304,56 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char
*lst)
+ return def;
+ }
+
++/* -- Strict type checks -------------------------------------------------- */
++
++/* The following type checks do not coerce between strings and numbers.
++** And they handle plain int64_t/uint64_t FFI numbers, too.
++*/
++
++#if LJ_HASBUFFER
++GCstr *lj_lib_checkstrx(lua_State *L, int narg)
++{
++ TValue *o = L->base + narg-1;
++ if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING);
++ return strV(o);
++}
++
++int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
++{
++ TValue *o = L->base + narg-1;
++ lj_assertL(b >= 0, "expected range must be non-negative");
++ if (o < L->top) {
++ if (LJ_LIKELY(tvisint(o))) {
++ int32_t i = intV(o);
++ if (i >= a && i <= b) return i;
++ } else if (LJ_LIKELY(tvisnum(o))) {
++ /* For performance reasons, this doesn't check for integerness or
++ ** integer overflow. Overflow detection still works, since all FPUs
++ ** return either MININT or MAXINT, which is then out of range.
++ */
++ int32_t i = (int32_t)numV(o);
++ if (i >= a && i <= b) return i;
++#if LJ_HASFFI
++ } else if (tviscdata(o)) {
++ GCcdata *cd = cdataV(o);
++ if (cd->ctypeid == CTID_INT64) {
++ int64_t i = *(int64_t *)cdataptr(cd);
++ if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i;
++ } else if (cd->ctypeid == CTID_UINT64) {
++ uint64_t i = *(uint64_t *)cdataptr(cd);
++ if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return
(int32_t)i;
++ } else {
++ goto badtype;
++ }
++#endif
++ } else {
++ goto badtype;
++ }
++ lj_err_arg(L, narg, LJ_ERR_NUMRNG);
++ }
++badtype:
++ lj_err_argt(L, narg, LUA_TNUMBER);
++ return 0; /* unreachable */
++}
++#endif
++
+diff --git a/src/lj_lib.h b/src/lj_lib.h
+index 37ec9d78..f59e9ea2 100644
+--- a/src/lj_lib.h
++++ b/src/lj_lib.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Library function support.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_LIB_H
+@@ -46,6 +46,12 @@ LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
+ LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
+ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
+
++#if LJ_HASBUFFER
++LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg);
++LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg,
++ int32_t a, int32_t b);
++#endif
++
+ /* Avoid including lj_frame.h. */
+ #if LJ_GC64
+ #define lj_lib_upvalue(L, n) \
+@@ -107,9 +113,4 @@ LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id,
+ #define LIBINIT_FFID 0xfe
+ #define LIBINIT_END 0xff
+
+-/* Exported library functions. */
+-
+-typedef struct RandomState RandomState;
+-LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs);
+-
+ #endif
+diff --git a/src/lj_load.c b/src/lj_load.c
+index 9a31d9a1..af0c2b1f 100644
+--- a/src/lj_load.c
++++ b/src/lj_load.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Load and dump code.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include <errno.h>
+@@ -159,7 +159,7 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s)
+ LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data)
+ {
+ cTValue *o = L->top-1;
+- api_check(L, L->top > L->base);
++ lj_checkapi(L->top > L->base, "top slot empty");
+ if (tvisfunc(o) && isluafunc(funcV(o)))
+ return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0);
+ else
+diff --git a/src/lj_mcode.c b/src/lj_mcode.c
+index 77035bf7..b3efbc55 100644
+--- a/src/lj_mcode.c
++++ b/src/lj_mcode.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Machine code management.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_mcode_c
+@@ -14,6 +14,7 @@
+ #include "lj_mcode.h"
+ #include "lj_trace.h"
+ #include "lj_dispatch.h"
++#include "lj_prng.h"
+ #endif
+ #if LJ_HASJIT || LJ_HASFFI
+ #include "lj_vm.h"
+@@ -44,7 +45,7 @@ void lj_mcode_sync(void *start, void *end)
+ sys_icache_invalidate(start, (char *)end-(char *)start);
+ #elif LJ_TARGET_PPC
+ lj_vm_cachesync(start, end);
+-#elif defined(__GNUC__)
++#elif defined(__GNUC__) || defined(__clang__)
+ __clear_cache(start, end);
+ #else
+ #error "Missing builtin to flush instruction cache"
+@@ -66,8 +67,8 @@ void lj_mcode_sync(void *start, void *end)
+
+ static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
+ {
+- void *p = VirtualAlloc((void *)hint, sz,
+- MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
++ void *p = LJ_WIN_VALLOC((void *)hint, sz,
++ MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
+ if (!p && !hint)
+ lj_trace_err(J, LJ_TRERR_MCODEAL);
+ return p;
+@@ -82,7 +83,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz)
+ static int mcode_setprot(void *p, size_t sz, DWORD prot)
+ {
+ DWORD oprot;
+- return !VirtualProtect(p, sz, prot, &oprot);
++ return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
+ }
+
+ #elif LJ_TARGET_POSIX
+@@ -96,10 +97,15 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
+ #define MCPROT_RW (PROT_READ|PROT_WRITE)
+ #define MCPROT_RX (PROT_READ|PROT_EXEC)
+ #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC)
++#ifdef PROT_MPROTECT
++#define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX))
++#else
++#define MCPROT_CREATE 0
++#endif
+
+ static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
+ {
+- void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
++ void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0);
+ if (p == MAP_FAILED) {
+ if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
+ p = NULL;
+@@ -118,52 +124,34 @@ static int mcode_setprot(void *p, size_t sz, int prot)
+ return mprotect(p, sz, prot);
+ }
+
+-#elif LJ_64
+-
+-#error "Missing OS support for explicit placement of executable memory"
+-
+ #else
+
+-/* Fallback allocator. This will fail if memory is not executable by default. */
+-#define LUAJIT_UNPROTECT_MCODE
+-#define MCPROT_RW 0
+-#define MCPROT_RX 0
+-#define MCPROT_RWX 0
+-
+-static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
+-{
+- UNUSED(hint); UNUSED(prot);
+- return lj_mem_new(J->L, sz);
+-}
+-
+-static void mcode_free(jit_State *J, void *p, size_t sz)
+-{
+- lj_mem_free(J2G(J), p, sz);
+-}
++#error "Missing OS support for explicit placement of executable memory"
+
+ #endif
+
+ /* -- MCode area protection ----------------------------------------------- */
+
+-/* Define this ONLY if page protection twiddling becomes a bottleneck. */
+-#ifdef LUAJIT_UNPROTECT_MCODE
++#if LUAJIT_SECURITY_MCODE == 0
+
+-/* It's generally considered to be a potential security risk to have
++/* Define this ONLY if page protection twiddling becomes a bottleneck.
++**
++** It's generally considered to be a potential security risk to have
+ ** pages with simultaneous write *and* execute access in a process.
+ **
+ ** Do not even think about using this mode for server processes or
+-** apps handling untrusted external data (such as a browser).
++** apps handling untrusted external data.
+ **
+ ** The security risk is not in LuaJIT itself -- but if an adversary finds
+-** any *other* flaw in your C application logic, then any RWX memory page
+-** simplifies writing an exploit considerably.
++** any *other* flaw in your C application logic, then any RWX memory pages
++** simplify writing an exploit considerably.
+ */
+ #define MCPROT_GEN MCPROT_RWX
+ #define MCPROT_RUN MCPROT_RWX
+
+ static void mcode_protect(jit_State *J, int prot)
+ {
+- UNUSED(J); UNUSED(prot);
++ UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot);
+ }
+
+ #else
+@@ -242,7 +230,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
+ }
+ /* Next try probing 64K-aligned pseudo-random addresses. */
+ do {
+- hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16;
++ hint = lj_prng_u64(&J2G(J)->prng) &
((1u<<LJ_TARGET_JUMPRANGE)-0x10000);
+ } while (!(hint + sz < range+range));
+ hint = target + hint - range;
+ }
+@@ -255,7 +243,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
+ /* All memory addresses are reachable by relative jumps. */
+ static void *mcode_alloc(jit_State *J, size_t sz)
+ {
+-#ifdef __OpenBSD__
++#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP
+ /* Allow better executable memory allocation for OpenBSD W^X mode. */
+ void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
+ if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
+@@ -272,12 +260,6 @@ static void *mcode_alloc(jit_State *J, size_t sz)
+
+ /* -- MCode area management ----------------------------------------------- */
+
+-/* Linked list of MCode areas. */
+-typedef struct MCLink {
+- MCode *next; /* Next area. */
+- size_t size; /* Size of current area. */
+-} MCLink;
+-
+ /* Allocate a new MCode area. */
+ static void mcode_allocarea(jit_State *J)
+ {
+@@ -292,6 +274,7 @@ static void mcode_allocarea(jit_State *J)
+ ((MCLink *)J->mcarea)->next = oldarea;
+ ((MCLink *)J->mcarea)->size = sz;
+ J->szallmcarea += sz;
++ J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t
*)J->mcbot);
+ }
+
+ /* Free all MCode areas. */
+@@ -302,7 +285,9 @@ void lj_mcode_free(jit_State *J)
+ J->szallmcarea = 0;
+ while (mc) {
+ MCode *next = ((MCLink *)mc)->next;
+- mcode_free(J, mc, ((MCLink *)mc)->size);
++ size_t sz = ((MCLink *)mc)->size;
++ lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
++ mcode_free(J, mc, sz);
+ mc = next;
+ }
+ }
+@@ -337,35 +322,36 @@ void lj_mcode_abort(jit_State *J)
+ /* Set/reset protection to allow patching of MCode areas. */
+ MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
+ {
+-#ifdef LUAJIT_UNPROTECT_MCODE
+- UNUSED(J); UNUSED(ptr); UNUSED(finish);
+- return NULL;
+-#else
+ if (finish) {
++#if LUAJIT_SECURITY_MCODE
+ if (J->mcarea == ptr)
+ mcode_protect(J, MCPROT_RUN);
+ else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN)))
+ mcode_protfail(J);
++#endif
+ return NULL;
+ } else {
+ MCode *mc = J->mcarea;
+ /* Try current area first to use the protection cache. */
+ if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) {
++#if LUAJIT_SECURITY_MCODE
+ mcode_protect(J, MCPROT_GEN);
++#endif
+ return mc;
+ }
+ /* Otherwise search through the list of MCode areas. */
+ for (;;) {
+ mc = ((MCLink *)mc)->next;
+- lua_assert(mc != NULL);
++ lj_assertJ(mc != NULL, "broken MCode area chain");
+ if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink
*)mc)->size)) {
++#if LUAJIT_SECURITY_MCODE
+ if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN)))
+ mcode_protfail(J);
++#endif
+ return mc;
+ }
+ }
+ }
+-#endif
+ }
+
+ /* Limit of MCode reservation reached. */
+diff --git a/src/lj_mcode.h b/src/lj_mcode.h
+index f0847e93..caaaec61 100644
+--- a/src/lj_mcode.h
++++ b/src/lj_mcode.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Machine code management.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_MCODE_H
+diff --git a/src/lj_meta.c b/src/lj_meta.c
+index 0bd4d842..660dfec0 100644
+--- a/src/lj_meta.c
++++ b/src/lj_meta.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Metamethod handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -47,7 +47,7 @@ void lj_meta_init(lua_State *L)
+ cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name)
+ {
+ cTValue *mo = lj_tab_getstr(mt, name);
+- lua_assert(mm <= MM_FAST);
++ lj_assertX(mm <= MM_FAST, "bad metamethod %d", mm);
+ if (!mo || tvisnil(mo)) { /* No metamethod? */
+ mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */
+ return NULL;
+@@ -86,8 +86,8 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv)
+ else
+ top->u32.lo = LJ_CONT_TAILCALL;
+ setframe_pc(top++, pc);
+- if (LJ_FR2) top++;
+ setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */
++ if (LJ_FR2) top++;
+ setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT);
+ L->base = L->top = top+1;
+ /*
+@@ -240,8 +240,8 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
+ int fromc = 0;
+ if (left < 0) { left = -left; fromc = 1; }
+ do {
+- if (!(tvisstr(top) || tvisnumber(top)) ||
+- !(tvisstr(top-1) || tvisnumber(top-1))) {
++ if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) ||
++ !(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) {
+ cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
+ if (tvisnil(mo)) {
+ mo = lj_meta_lookup(L, top, MM_concat);
+@@ -277,10 +277,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
+ ** next step: [...][CAT stack ............]
+ */
+ TValue *e, *o = top;
+- uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
++ uint64_t tlen = tvisstr(o) ? strV(o)->len :
++ tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
+ SBuf *sb;
+ do {
+- o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
++ o--; tlen += tvisstr(o) ? strV(o)->len :
++ tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
+ } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
+ if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
+ sb = lj_buf_tmp_(L);
+@@ -290,6 +292,9 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
+ GCstr *s = strV(o);
+ MSize len = s->len;
+ lj_buf_putmem(sb, strdata(s), len);
++ } else if (tvisbuf(o)) {
++ SBufExt *sbx = bufV(o);
++ lj_buf_putmem(sb, sbx->r, sbufxlen(sbx));
+ } else if (tvisint(o)) {
+ lj_strfmt_putint(sb, intV(o));
+ } else {
+@@ -363,7 +368,7 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins)
+ } else if (op == BC_ISEQN) {
+ o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)];
+ } else {
+- lua_assert(op == BC_ISEQP);
++ lj_assertL(op == BC_ISEQP, "bad bytecode op %d", op);
+ setpriV(&tv, ~bc_d(ins));
+ o2 = &tv;
+ }
+@@ -426,7 +431,7 @@ void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
+ {
+ L->top = curr_topL(L);
+ ra++; tp--;
+- lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */
++ lj_assertL(LJ_DUALNUM || tp != ~LJ_TNUMX, "bad type for ISTYPE");
+ if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
+ else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
+ else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
+diff --git a/src/lj_meta.h b/src/lj_meta.h
+index 73b45724..400a1d74 100644
+--- a/src/lj_meta.h
++++ b/src/lj_meta.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Metamethod handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_META_H
+diff --git a/src/lj_obj.c b/src/lj_obj.c
+index ee33aeb3..a2c3dc5b 100644
+--- a/src/lj_obj.c
++++ b/src/lj_obj.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Miscellaneous object handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_obj_c
+@@ -34,12 +34,13 @@ int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2)
+ }
+
+ /* Return pointer to object or its object data. */
+-const void * LJ_FASTCALL lj_obj_ptr(cTValue *o)
++const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o)
+ {
++ UNUSED(g);
+ if (tvisudata(o))
+ return uddata(udataV(o));
+ else if (tvislightud(o))
+- return lightudV(o);
++ return lightudV(g, o);
+ else if (LJ_HASFFI && tviscdata(o))
+ return cdataptr(cdataV(o));
+ else if (tvisgcv(o))
+diff --git a/src/lj_obj.h b/src/lj_obj.h
+index 52372c3e..1a6445fc 100644
+--- a/src/lj_obj.h
++++ b/src/lj_obj.h
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM tags, values and objects.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -13,7 +13,7 @@
+ #include "lj_def.h"
+ #include "lj_arch.h"
+
+-/* -- Memory references (32 bit address space) ---------------------------- */
++/* -- Memory references --------------------------------------------------- */
+
+ /* Memory and GC object sizes. */
+ typedef uint32_t MSize;
+@@ -34,17 +34,21 @@ typedef struct MRef {
+
+ #if LJ_GC64
+ #define mref(r, t) ((t *)(void *)(r).ptr64)
++#define mrefu(r) ((r).ptr64)
+
+ #define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p))
++#define setmrefu(r, u) ((r).ptr64 = (uint64_t)(u))
+ #define setmrefr(r, v) ((r).ptr64 = (v).ptr64)
+ #else
+ #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32)
++#define mrefu(r) ((r).ptr32)
+
+ #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
++#define setmrefu(r, u) ((r).ptr32 = (uint32_t)(u))
+ #define setmrefr(r, v) ((r).ptr32 = (v).ptr32)
+ #endif
+
+-/* -- GC object references (32 bit address space) ------------------------- */
++/* -- GC object references ------------------------------------------------ */
+
+ /* GCobj reference */
+ typedef struct GCRef {
+@@ -153,11 +157,9 @@ typedef int32_t BCLine; /* Bytecode line number. */
+ typedef void (*ASMFunction)(void);
+
+ /* Resizable string buffer. Need this here, details in lj_buf.h. */
++#define SBufHeader char *w, *e, *b; MRef L
+ typedef struct SBuf {
+- MRef p; /* String buffer pointer. */
+- MRef e; /* String buffer end pointer. */
+- MRef b; /* String buffer base. */
+- MRef L; /* lua_State, used for buffer resizing. */
++ SBufHeader;
+ } SBuf;
+
+ /* -- Tags and values ----------------------------------------------------- */
+@@ -232,7 +234,7 @@ typedef const TValue cTValue;
+ ** ---MSW---.---LSW---
+ ** primitive types | itype | |
+ ** lightuserdata | itype | void * | (32 bit platforms)
+-** lightuserdata |ffff| void * | (64 bit platforms, 47 bit pointers)
++** lightuserdata |ffff|seg| ofs | (64 bit platforms)
+ ** GC objects | itype | GCRef |
+ ** int (LJ_DUALNUM)| itype | int |
+ ** number -------double------
+@@ -245,7 +247,8 @@ typedef const TValue cTValue;
+ **
+ ** ------MSW------.------LSW------
+ ** primitive types |1..1|itype|1..................1|
+-** GC objects/lightud |1..1|itype|-------GCRef--------|
++** GC objects |1..1|itype|-------GCRef--------|
++** lightuserdata |1..1|itype|seg|------ofs-------|
+ ** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------|
+ ** number ------------double-------------
+ **
+@@ -281,18 +284,31 @@ typedef const TValue cTValue;
+ #define LJ_TISGCV (LJ_TSTR+1)
+ #define LJ_TISTABUD LJ_TTAB
+
++/* Type marker for slot holding a traversal index. Must be lightuserdata. */
++#define LJ_KEYINDEX 0xfffe7fffu
++
+ #if LJ_GC64
+ #define LJ_GCVMASK (((uint64_t)1 << 47) - 1)
+ #endif
+
++#if LJ_64
++/* To stay within 47 bits, lightuserdata is segmented. */
++#define LJ_LIGHTUD_BITS_SEG 8
++#define LJ_LIGHTUD_BITS_LO (47 - LJ_LIGHTUD_BITS_SEG)
++#endif
++
+ /* -- String object ------------------------------------------------------- */
+
++typedef uint32_t StrHash; /* String hash value. */
++typedef uint32_t StrID; /* String ID. */
++
+ /* String object header. String payload follows. */
+ typedef struct GCstr {
+ GCHeader;
+ uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */
+- uint8_t unused;
+- MSize hash; /* Hash of string. */
++ uint8_t hashalg; /* Hash algorithm. */
++ StrID sid; /* Interned string ID. */
++ StrHash hash; /* Hash of string. */
+ MSize len; /* Size of string. */
+ } GCstr;
+
+@@ -300,7 +316,6 @@ typedef struct GCstr {
+ #define strdata(s) ((const char *)((s)+1))
+ #define strdatawr(s) ((char *)((s)+1))
+ #define strVdata(o) strdata(strV(o))
+-#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1)
+
+ /* -- Userdata object ----------------------------------------------------- */
+
+@@ -320,6 +335,7 @@ enum {
+ UDTYPE_USERDATA, /* Regular userdata. */
+ UDTYPE_IO_FILE, /* I/O library FILE. */
+ UDTYPE_FFI_CLIB, /* FFI C library namespace. */
++ UDTYPE_BUFFER, /* String buffer. */
+ UDTYPE__MAX
+ };
+
+@@ -570,13 +586,18 @@ typedef enum {
+ #define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
+ #define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
+
++/* Garbage collector state. */
+ typedef struct GCState {
+ GCSize total; /* Memory currently allocated. */
+ GCSize threshold; /* Memory threshold. */
+ uint8_t currentwhite; /* Current white color. */
+ uint8_t state; /* GC state. */
+ uint8_t nocdatafin; /* No cdata finalizer called. */
+- uint8_t unused2;
++#if LJ_64
++ uint8_t lightudnum; /* Number of lightuserdata segments - 1. */
++#else
++ uint8_t unused1;
++#endif
+ MSize sweepstr; /* Sweep position in string table. */
+ GCRef root; /* List of all collectable objects. */
+ MRef sweep; /* Sweep position in root list. */
+@@ -588,27 +609,41 @@ typedef struct GCState {
+ GCSize estimate; /* Estimate of memory actually in use. */
+ MSize stepmul; /* Incremental GC step granularity. */
+ MSize pause; /* Pause between successive GC cycles. */
++#if LJ_64
++ MRef lightudseg; /* Upper bits of lightuserdata segments. */
++#endif
+ } GCState;
+
++/* String interning state. */
++typedef struct StrInternState {
++ GCRef *tab; /* String hash table anchors. */
++ MSize mask; /* String hash mask (size of hash table - 1). */
++ MSize num; /* Number of strings in hash table. */
++ StrID id; /* Next string ID. */
++ uint8_t idreseed; /* String ID reseed counter. */
++ uint8_t second; /* String interning table uses secondary hashing. */
++ uint8_t unused1;
++ uint8_t unused2;
++ LJ_ALIGN(8) uint64_t seed; /* Random string seed. */
++} StrInternState;
++
+ /* Global state, shared by all threads of a Lua universe. */
+ typedef struct global_State {
+- GCRef *strhash; /* String hash table (hash chain anchors). */
+- MSize strmask; /* String hash mask (size of hash table - 1). */
+- MSize strnum; /* Number of strings in hash table. */
+ lua_Alloc allocf; /* Memory allocator. */
+ void *allocd; /* Memory allocator data. */
+ GCState gc; /* Garbage collector. */
+- volatile int32_t vmstate; /* VM state or current JIT code trace number. */
+- SBuf tmpbuf; /* Temporary string buffer. */
+ GCstr strempty; /* Empty string. */
+ uint8_t stremptyz; /* Zero terminator of empty string. */
+ uint8_t hookmask; /* Hook mask. */
+ uint8_t dispatchmode; /* Dispatch mode. */
+ uint8_t vmevmask; /* VM event mask. */
++ StrInternState str; /* String interning. */
++ volatile int32_t vmstate; /* VM state or current JIT code trace number. */
+ GCRef mainthref; /* Link to main thread. */
+- TValue registrytv; /* Anchor for registry. */
++ SBuf tmpbuf; /* Temporary string buffer. */
+ TValue tmptv, tmptv2; /* Temporary TValues. */
+ Node nilnode; /* Fallback 1-element hash part (nil key and value). */
++ TValue registrytv; /* Anchor for registry. */
+ GCupval uvhead; /* Head of double-linked list of all open upvalues. */
+ int32_t hookcount; /* Instruction hook countdown. */
+ int32_t hookcstart; /* Start count for instruction hook counter. */
+@@ -620,6 +655,7 @@ typedef struct global_State {
+ GCRef cur_L; /* Currently executing lua_State. */
+ MRef jit_base; /* Current JIT code L->base or NULL. */
+ MRef ctype_state; /* Pointer to C type state. */
++ PRNGState prng; /* Global PRNG state. */
+ GCRef gcroot[GCROOT_MAX]; /* GC roots. */
+ } global_State;
+
+@@ -638,7 +674,8 @@ typedef struct global_State {
+ #define HOOK_PROFILE 0x80
+ #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
+ #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
+-#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC))
++#define hook_entergc(g) \
++ ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE)
+ #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT))
+ #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE)
+ #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK)
+@@ -678,6 +715,11 @@ struct lua_State {
+ #define curr_topL(L) (L->base + curr_proto(L)->framesize)
+ #define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top)
+
++#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
++LJ_FUNC_NORET void lj_assert_fail(global_State *g, const char *file, int line,
++ const char *func, const char *fmt, ...);
++#endif
++
+ /* -- GC object definition and conversions -------------------------------- */
+
+ /* GC header for generic access to common fields of GC objects. */
+@@ -731,10 +773,6 @@ typedef union GCobj {
+
+ /* -- TValue getters/setters ---------------------------------------------- */
+
+-#ifdef LUA_USE_ASSERT
+-#include "lj_gc.h"
+-#endif
+-
+ /* Macros to test types. */
+ #if LJ_GC64
+ #define itype(o) ((uint32_t)((o)->it64 >> 47))
+@@ -795,10 +833,23 @@ typedef union GCobj {
+ #endif
+ #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o)))
+ #if LJ_64
+-#define lightudV(o) \
+- check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff)))
++#define lightudseg(u) \
++ (((u) >> LJ_LIGHTUD_BITS_LO) & ((1 << LJ_LIGHTUD_BITS_SEG)-1))
++#define lightudlo(u) \
++ ((u) & (((uint64_t)1 << LJ_LIGHTUD_BITS_LO) - 1))
++#define lightudup(p) \
++ ((uint32_t)(((p) >> LJ_LIGHTUD_BITS_LO) << (LJ_LIGHTUD_BITS_LO-32)))
++static LJ_AINLINE void *lightudV(global_State *g, cTValue *o)
++{
++ uint64_t u = o->u64;
++ uint64_t seg = lightudseg(u);
++ uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
++ lj_assertG(tvislightud(o), "lightuserdata expected");
++ lj_assertG(seg <= g->gc.lightudnum, "bad lightuserdata segment %d",
seg);
++ return (void *)(((uint64_t)segmap[seg] << 32) | lightudlo(u));
++}
+ #else
+-#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void))
++#define lightudV(g, o) check_exp(tvislightud(o), gcrefp((o)->gcr, void))
+ #endif
+ #define gcV(o) check_exp(tvisgcv(o), gcval(o))
+ #define strV(o) check_exp(tvisstr(o), &gcval(o)->str)
+@@ -824,7 +875,7 @@ typedef union GCobj {
+ #define setpriV(o, i) (setitype((o), (i)))
+ #endif
+
+-static LJ_AINLINE void setlightudV(TValue *o, void *p)
++static LJ_AINLINE void setrawlightudV(TValue *o, void *p)
+ {
+ #if LJ_GC64
+ o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47);
+@@ -835,29 +886,29 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
+ #endif
+ }
+
+-#if LJ_64
+-#define checklightudptr(L, p) \
+- (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p))
+-#else
+-#define checklightudptr(L, p) (p)
+-#endif
+-
+-#if LJ_FR2
++#if LJ_FR2 || LJ_32
+ #define contptr(f) ((void *)(f))
+ #define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f))
+-#elif LJ_64
++#else
+ #define contptr(f) \
+ ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin))
+ #define setcont(o, f) \
+ ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
+-#else
+-#define contptr(f) ((void *)(f))
+-#define setcont(o, f) setlightudV((o), contptr(f))
+ #endif
+
+-#define tvchecklive(L, o) \
+- UNUSED(L), lua_assert(!tvisgcv(o) || \
+- ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o))))
++static LJ_AINLINE void checklivetv(lua_State *L, TValue *o, const char *msg)
++{
++ UNUSED(L); UNUSED(o); UNUSED(msg);
++#if LUA_USE_ASSERT
++ if (tvisgcv(o)) {
++ lj_assertL(~itype(o) == gcval(o)->gch.gct,
++ "mismatch of TValue type %d vs GC type %d",
++ ~itype(o), gcval(o)->gch.gct);
++ /* Copy of isdead check from lj_gc.h to avoid circular include. */
++ lj_assertL(!(gcval(o)->gch.marked & (G(L)->gc.currentwhite ^ 3) & 3),
msg);
++ }
++#endif
++}
+
+ static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype)
+ {
+@@ -870,11 +921,12 @@ static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t
itype)
+
+ static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
+ {
+- setgcVraw(o, v, it); tvchecklive(L, o);
++ setgcVraw(o, v, it);
++ checklivetv(L, o, "store to dead GC object");
+ }
+
+ #define define_setV(name, type, tag) \
+-static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \
++static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \
+ { \
+ setgcV(L, o, obj2gco(v), tag); \
+ }
+@@ -917,13 +969,17 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i)
+ /* Copy tagged values. */
+ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2)
+ {
+- *o1 = *o2; tvchecklive(L, o1);
++ *o1 = *o2;
++ checklivetv(L, o1, "copy of dead GC object");
+ }
+
+ /* -- Number to integer conversion ---------------------------------------- */
+
+ #if LJ_SOFTFP
+ LJ_ASMF int32_t lj_vm_tobit(double x);
++#if LJ_TARGET_MIPS64
++LJ_ASMF int32_t lj_vm_tointg(double x);
++#endif
+ #endif
+
+ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
+@@ -939,14 +995,22 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
+
+ #define lj_num2int(n) ((int32_t)(n))
+
++/*
++** This must match the JIT backend behavior. In particular for archs
++** that don't have a common hardware instruction for this conversion.
++** Note that signed FP to unsigned int conversions have an undefined
++** result and should never be relied upon in portable FFI code.
++** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
++*/
+ static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
+ {
+-#ifdef _MSC_VER
+- if (n >= 9223372036854775808.0) /* They think it's a feature. */
+- return (uint64_t)(int64_t)(n - 18446744073709551616.0);
+- else
++#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
++ int64_t i = (int64_t)n;
++ if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
++ return (uint64_t)i;
++#else
++ return (uint64_t)n;
+ #endif
+- return (uint64_t)n;
+ }
+
+ static LJ_AINLINE int32_t numberVint(cTValue *o)
+@@ -975,6 +1039,6 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
+
+ /* Compare two objects without calling metamethods. */
+ LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
+-LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o);
++LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o);
+
+ #endif
+diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c
+index 2417f324..d6b9e549 100644
+--- a/src/lj_opt_dce.c
++++ b/src/lj_opt_dce.c
+@@ -1,6 +1,6 @@
+ /*
+ ** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_opt_dce_c
+@@ -47,10 +47,7 @@ static void dce_propagate(jit_State *J)
+ pchain[ir->o] = &ir->prev;
+ } else if (!ir_sideeff(ir)) {
+ *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */
+- ir->t.irt = IRT_NIL;
+- ir->o = IR_NOP; /* Replace instruction with NOP. */
+- ir->op1 = ir->op2 = 0;
+- ir->prev = 0;
++ lj_ir_nop(ir);
+ continue;
+ }
+ if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t);
+diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
+index acbf36a5..2f903e27 100644
+--- a/src/lj_opt_fold.c
++++ b/src/lj_opt_fold.c
+@@ -2,7 +2,7 @@
+ ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
+ ** ABCelim: Array Bounds Check Elimination.
+ ** CSE: Common-Subexpression Elimination.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_opt_fold_c
+@@ -173,7 +173,6 @@ LJFOLD(ADD KNUM KNUM)
+ LJFOLD(SUB KNUM KNUM)
+ LJFOLD(MUL KNUM KNUM)
+ LJFOLD(DIV KNUM KNUM)
+-LJFOLD(ATAN2 KNUM KNUM)
+ LJFOLD(LDEXP KNUM KNUM)
+ LJFOLD(MIN KNUM KNUM)
+ LJFOLD(MAX KNUM KNUM)
+@@ -213,11 +212,36 @@ LJFOLDF(kfold_fpmath)
+ return lj_ir_knum(J, y);
+ }
+
++LJFOLD(CALLN KNUM any)
++LJFOLDF(kfold_fpcall1)
++{
++ const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
++ if (CCI_TYPE(ci) == IRT_NUM) {
++ double y = ((double (*)(double))ci->func)(knumleft);
++ return lj_ir_knum(J, y);
++ }
++ return NEXTFOLD;
++}
++
++LJFOLD(CALLN CARG IRCALL_atan2)
++LJFOLDF(kfold_fpcall2)
++{
++ if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
++ const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
++ double a = ir_knum(IR(fleft->op1))->n;
++ double b = ir_knum(IR(fleft->op2))->n;
++ double y = ((double (*)(double, double))ci->func)(a, b);
++ return lj_ir_knum(J, y);
++ }
++ return NEXTFOLD;
++}
++
+ LJFOLD(POW KNUM KINT)
++LJFOLD(POW KNUM KNUM)
+ LJFOLDF(kfold_numpow)
+ {
+ lua_Number a = knumleft;
+- lua_Number b = (lua_Number)fright->i;
++ lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
+ lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
+ return lj_ir_knum(J, y);
+ }
+@@ -258,7 +282,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
+ case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
+ case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
+ case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
+- default: lua_assert(0); break;
++ default: lj_assertX(0, "bad IR op %d", op); break;
+ }
+ return k1;
+ }
+@@ -330,7 +354,7 @@ LJFOLDF(kfold_intcomp)
+ case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
+ case IR_ABC:
+ case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
+- default: lua_assert(0); return FAILFOLD;
++ default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
+ }
+ }
+
+@@ -344,10 +368,12 @@ LJFOLDF(kfold_intcomp0)
+
+ /* -- Constant folding for 64 bit integers -------------------------------- */
+
+-static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
++static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
++ IROp op)
+ {
+- switch (op) {
++ UNUSED(J);
+ #if LJ_HASFFI
++ switch (op) {
+ case IR_ADD: k1 += k2; break;
+ case IR_SUB: k1 -= k2; break;
+ case IR_MUL: k1 *= k2; break;
+@@ -359,9 +385,12 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
+ case IR_BSAR: k1 >>= (k2 & 63); break;
+ case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
+ case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
+-#endif
+- default: UNUSED(k2); lua_assert(0); break;
++ default: lj_assertJ(0, "bad IR op %d", op); break;
+ }
++#else
++ UNUSED(k2); UNUSED(op);
++ lj_assertJ(0, "FFI IR op without FFI");
++#endif
+ return k1;
+ }
+
+@@ -373,7 +402,7 @@ LJFOLD(BOR KINT64 KINT64)
+ LJFOLD(BXOR KINT64 KINT64)
+ LJFOLDF(kfold_int64arith)
+ {
+- return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
++ return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
+ ir_k64(fright)->u64, (IROp)fins->o));
+ }
+
+@@ -395,7 +424,7 @@ LJFOLDF(kfold_int64arith2)
+ }
+ return INT64FOLD(k1);
+ #else
+- UNUSED(J); lua_assert(0); return FAILFOLD;
++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+ #endif
+ }
+
+@@ -411,7 +440,7 @@ LJFOLDF(kfold_int64shift)
+ int32_t sh = (fright->i & 63);
+ return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
+ #else
+- UNUSED(J); lua_assert(0); return FAILFOLD;
++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+ #endif
+ }
+
+@@ -421,7 +450,7 @@ LJFOLDF(kfold_bnot64)
+ #if LJ_HASFFI
+ return INT64FOLD(~ir_k64(fleft)->u64);
+ #else
+- UNUSED(J); lua_assert(0); return FAILFOLD;
++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+ #endif
+ }
+
+@@ -431,7 +460,7 @@ LJFOLDF(kfold_bswap64)
+ #if LJ_HASFFI
+ return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
+ #else
+- UNUSED(J); lua_assert(0); return FAILFOLD;
++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+ #endif
+ }
+
+@@ -456,10 +485,10 @@ LJFOLDF(kfold_int64comp)
+ case IR_UGE: return CONDFOLD(a >= b);
+ case IR_ULE: return CONDFOLD(a <= b);
+ case IR_UGT: return CONDFOLD(a > b);
+- default: lua_assert(0); return FAILFOLD;
++ default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
+ }
+ #else
+- UNUSED(J); lua_assert(0); return FAILFOLD;
++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+ #endif
+ }
+
+@@ -471,7 +500,7 @@ LJFOLDF(kfold_int64comp0)
+ return DROPFOLD;
+ return NEXTFOLD;
+ #else
+- UNUSED(J); lua_assert(0); return FAILFOLD;
++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+ #endif
+ }
+
+@@ -485,6 +514,7 @@ LJFOLDF(kfold_snew_kptr)
+ }
+
+ LJFOLD(SNEW any KINT)
++LJFOLD(XSNEW any KINT)
+ LJFOLDF(kfold_snew_empty)
+ {
+ if (fright->i == 0)
+@@ -496,7 +526,7 @@ LJFOLD(STRREF KGC KINT)
+ LJFOLDF(kfold_strref)
+ {
+ GCstr *str = ir_kstr(fleft);
+- lua_assert((MSize)fright->i <= str->len);
++ lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
+ return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
+ }
+
+@@ -548,22 +578,49 @@ LJFOLDF(kfold_strcmp)
+ ** The compromise is to declare them as loads, emit them like stores and
+ ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
+ ** fragments left over from CSE are eliminated by DCE.
++**
++** The string buffer methods emit a USE instead of a BUFSTR to keep the
++** chain alive.
+ */
+
+-/* BUFHDR is emitted like a store, see below. */
++LJFOLD(BUFHDR any any)
++LJFOLDF(bufhdr_merge)
++{
++ return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
++}
+
+-LJFOLD(BUFPUT BUFHDR BUFSTR)
+-LJFOLDF(bufput_append)
++LJFOLD(BUFPUT any BUFSTR)
++LJFOLDF(bufput_bufstr)
+ {
+- /* New buffer, no other buffer op inbetween and same buffer? */
+- if ((J->flags & JIT_F_OPT_FWD) &&
+- !(fleft->op2 & IRBUFHDR_APPEND) &&
+- fleft->prev == fright->op2 &&
+- fleft->op1 == IR(fright->op2)->op1) {
+- IRRef ref = fins->op1;
+- IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
+- IR(ref)->op1 = fright->op1;
+- return ref;
++ if ((J->flags & JIT_F_OPT_FWD)) {
++ IRRef hdr = fright->op2;
++ /* New buffer, no other buffer op inbetween and same buffer? */
++ if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
++ fleft->prev == hdr &&
++ fleft->op1 == IR(hdr)->op1) {
++ IRRef ref = fins->op1;
++ IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */
++ IR(ref)->op1 = fright->op1;
++ return ref;
++ }
++ /* Replay puts to global temporary buffer. */
++ if (IR(hdr)->op2 == IRBUFHDR_RESET) {
++ IRIns *ir = IR(fright->op1);
++ /* For now only handle single string.reverse .lower .upper .rep. */
++ if (ir->o == IR_CALLL &&
++ ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
++ ir->op2 <= IRCALL_lj_buf_putstr_rep) {
++ IRIns *carg1 = IR(ir->op1);
++ if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
++ IRIns *carg2 = IR(carg1->op1);
++ if (carg2->op1 == hdr) {
++ return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
++ }
++ } else if (carg1->op1 == hdr) {
++ return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
++ }
++ }
++ }
+ }
+ return EMITFOLD; /* Always emit, CSE later. */
+ }
+@@ -592,18 +649,19 @@ LJFOLDF(bufput_kgc)
+ LJFOLD(BUFSTR any any)
+ LJFOLDF(bufstr_kfold_cse)
+ {
+- lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
+- fleft->o == IR_CALLL);
++ lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
++ fleft->o == IR_CALLL,
++ "bad buffer constructor IR op %d", fleft->o);
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
+ if (fleft->o == IR_BUFHDR) { /* No put operations? */
+- if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
++ if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
+ return lj_ir_kstr(J, &J2G(J)->strempty);
+ fins->op1 = fleft->op1;
+ fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
+ return CSEFOLD;
+ } else if (fleft->o == IR_BUFPUT) {
+ IRIns *irb = IR(fleft->op1);
+- if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
++ if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
+ return fleft->op2; /* Shortcut for a single put operation. */
+ }
+ }
+@@ -613,9 +671,10 @@ LJFOLDF(bufstr_kfold_cse)
+ while (ref) {
+ IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
+ while (ira->o == irb->o && ira->op2 == irb->op2) {
+- lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
+- ira->o == IR_CALLL || ira->o == IR_CARG);
+- if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
++ lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
++ ira->o == IR_CALLL || ira->o == IR_CARG,
++ "bad buffer constructor IR op %d", ira->o);
++ if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
+ return ref; /* CSE succeeded. */
+ if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
+ break;
+@@ -673,7 +732,7 @@ LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
+ LJFOLDF(bufput_kfold_fmt)
+ {
+ IRIns *irc = IR(fleft->op1);
+- lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */
++ lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
+ if (irref_isk(fleft->op2)) {
+ SFormat sf = (SFormat)IR(irc->op2)->i;
+ IRIns *ira = IR(fleft->op2);
+@@ -1054,7 +1113,7 @@ LJFOLDF(simplify_nummuldiv_negneg)
+ }
+
+ LJFOLD(POW any KINT)
+-LJFOLDF(simplify_numpow_xk)
++LJFOLDF(simplify_numpow_xkint)
+ {
+ int32_t k = fright->i;
+ TRef ref = fins->op1;
+@@ -1083,13 +1142,22 @@ LJFOLDF(simplify_numpow_xk)
+ return ref;
+ }
+
++LJFOLD(POW any KNUM)
++LJFOLDF(simplify_numpow_xknum)
++{
++ if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */
++ return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
++ return NEXTFOLD;
++}
++
+ LJFOLD(POW KNUM any)
+ LJFOLDF(simplify_numpow_kx)
+ {
+ lua_Number n = knumleft;
+- if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
+- fins->o = IR_CONV;
++ if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i)
*/
+ #if LJ_TARGET_X86ORX64
++ /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
++ fins->o = IR_CONV;
+ fins->op1 = fins->op2;
+ fins->op2 = IRCONV_NUM_INT;
+ fins->op2 = (IRRef1)lj_opt_fold(J);
+@@ -1183,10 +1251,10 @@ LJFOLDF(simplify_tobit_conv)
+ {
+ /* Fold even across PHI to avoid expensive num->int conversions in loop. */
+ if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
+- lua_assert(irt_isnum(fleft->t));
++ lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
+ return fleft->op1;
+ } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
+- lua_assert(irt_isnum(fleft->t));
++ lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
+ fins->o = IR_CONV;
+ fins->op1 = fleft->op1;
+ fins->op2 = (IRT_INT<<5)|IRT_U32;
+@@ -1226,8 +1294,8 @@ LJFOLDF(simplify_conv_sext)
+ /* Use scalar evolution analysis results to strength-reduce sign-extension. */
+ if (ref == J->scev.idx) {
+ IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
+- lua_assert(irt_isint(J->scev.t));
+- if (lo && IR(lo)->i + ofs >= 0) {
++ lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
++ if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
+ ok_reduce:
+ #if LJ_TARGET_X64
+ /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
+@@ -1257,16 +1325,21 @@ LJFOLD(CONV SUB IRCONV_U32_U64)
+ LJFOLD(CONV MUL IRCONV_U32_U64)
+ LJFOLDF(simplify_conv_narrow)
+ {
++#if LJ_64
++ UNUSED(J);
++ return NEXTFOLD;
++#else
+ IROp op = (IROp)fleft->o;
+ IRType t = irt_type(fins->t);
+ IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
+ PHIBARRIER(fleft);
+- op1 = emitir(IRTI(IR_CONV), op1, mode);
+- op2 = emitir(IRTI(IR_CONV), op2, mode);
++ op1 = emitir(IRT(IR_CONV, t), op1, mode);
++ op2 = emitir(IRT(IR_CONV, t), op2, mode);
+ fins->ot = IRT(op, t);
+ fins->op1 = op1;
+ fins->op2 = op2;
+ return RETRYFOLD;
++#endif
+ }
+
+ /* Special CSE rule for CONV. */
+@@ -1302,7 +1375,8 @@ LJFOLDF(narrow_convert)
+ /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
+ if (J->chain[IR_LOOP])
+ return NEXTFOLD;
+- lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) !=
IRCONV_TOBIT);
++ lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) !=
IRCONV_TOBIT,
++ "unexpected CONV TOBIT");
+ return lj_opt_narrow_convert(J);
+ }
+
+@@ -1408,7 +1482,7 @@ LJFOLDF(simplify_intmul_k64)
+ return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
+ return NEXTFOLD;
+ #else
+- UNUSED(J); lua_assert(0); return FAILFOLD;
++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+ #endif
+ }
+
+@@ -1416,7 +1490,7 @@ LJFOLD(MOD any KINT)
+ LJFOLDF(simplify_intmod_k)
+ {
+ int32_t k = fright->i;
+- lua_assert(k != 0);
++ lj_assertJ(k != 0, "integer mod 0");
+ if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1)
*/
+ fins->o = IR_BAND;
+ fins->op2 = lj_ir_kint(J, k-1);
+@@ -1666,7 +1740,8 @@ LJFOLDF(simplify_shiftk_andk)
+ fins->ot = IRTI(IR_BAND);
+ return RETRYFOLD;
+ } else if (irk->o == IR_KINT64) {
+- uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o);
++ uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
++ (IROp)fins->o);
+ IROpT ot = fleft->ot;
+ fins->op1 = fleft->op1;
+ fins->op1 = (IRRef1)lj_opt_fold(J);
+@@ -1714,8 +1789,8 @@ LJFOLDF(simplify_andor_k64)
+ IRIns *irk = IR(fleft->op2);
+ PHIBARRIER(fleft);
+ if (irk->o == IR_KINT64) {
+- uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
+- ir_k64(fright)->u64, (IROp)fins->o);
++ uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
++ (IROp)fins->o);
+ /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
+ /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
+ if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
+@@ -1725,7 +1800,7 @@ LJFOLDF(simplify_andor_k64)
+ }
+ return NEXTFOLD;
+ #else
+- UNUSED(J); lua_assert(0); return FAILFOLD;
++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+ #endif
+ }
+
+@@ -1761,8 +1836,8 @@ LJFOLDF(reassoc_intarith_k64)
+ #if LJ_HASFFI
+ IRIns *irk = IR(fleft->op2);
+ if (irk->o == IR_KINT64) {
+- uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
+- ir_k64(fright)->u64, (IROp)fins->o);
++ uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
++ (IROp)fins->o);
+ PHIBARRIER(fleft);
+ fins->op1 = fleft->op1;
+ fins->op2 = (IRRef1)lj_ir_kint64(J, k);
+@@ -1770,12 +1845,10 @@ LJFOLDF(reassoc_intarith_k64)
+ }
+ return NEXTFOLD;
+ #else
+- UNUSED(J); lua_assert(0); return FAILFOLD;
++ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+ #endif
+ }
+
+-LJFOLD(MIN MIN any)
+-LJFOLD(MAX MAX any)
+ LJFOLD(BAND BAND any)
+ LJFOLD(BOR BOR any)
+ LJFOLDF(reassoc_dup)
+@@ -1785,6 +1858,15 @@ LJFOLDF(reassoc_dup)
+ return NEXTFOLD;
+ }
+
++LJFOLD(MIN MIN any)
++LJFOLD(MAX MAX any)
++LJFOLDF(reassoc_dup_minmax)
++{
++ if (fins->op2 == fleft->op2)
++ return LEFTFOLD; /* (a o b) o b ==> a o b */
++ return NEXTFOLD;
++}
++
+ LJFOLD(BXOR BXOR any)
+ LJFOLDF(reassoc_bxor)
+ {
+@@ -1823,23 +1905,12 @@ LJFOLDF(reassoc_shift)
+ return NEXTFOLD;
+ }
+
+-LJFOLD(MIN MIN KNUM)
+-LJFOLD(MAX MAX KNUM)
+ LJFOLD(MIN MIN KINT)
+ LJFOLD(MAX MAX KINT)
+ LJFOLDF(reassoc_minmax_k)
+ {
+ IRIns *irk = IR(fleft->op2);
+- if (irk->o == IR_KNUM) {
+- lua_Number a = ir_knum(irk)->n;
+- lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
+- if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
+- return LEFTFOLD;
+- PHIBARRIER(fleft);
+- fins->op1 = fleft->op1;
+- fins->op2 = (IRRef1)lj_ir_knum(J, y);
+- return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
+- } else if (irk->o == IR_KINT) {
++ if (irk->o == IR_KINT) {
+ int32_t a = irk->i;
+ int32_t y = kfold_intop(a, fright->i, fins->o);
+ if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
+@@ -1852,24 +1923,6 @@ LJFOLDF(reassoc_minmax_k)
+ return NEXTFOLD;
+ }
+
+-LJFOLD(MIN MAX any)
+-LJFOLD(MAX MIN any)
+-LJFOLDF(reassoc_minmax_left)
+-{
+- if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
+- return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
+- return NEXTFOLD;
+-}
+-
+-LJFOLD(MIN any MAX)
+-LJFOLD(MAX any MIN)
+-LJFOLDF(reassoc_minmax_right)
+-{
+- if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
+- return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
+- return NEXTFOLD;
+-}
+-
+ /* -- Array bounds check elimination -------------------------------------- */
+
+ /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
+@@ -1995,8 +2048,6 @@ LJFOLDF(comm_comp)
+
+ LJFOLD(BAND any any)
+ LJFOLD(BOR any any)
+-LJFOLD(MIN any any)
+-LJFOLD(MAX any any)
+ LJFOLDF(comm_dup)
+ {
+ if (fins->op1 == fins->op2) /* x o x ==> x */
+@@ -2004,6 +2055,15 @@ LJFOLDF(comm_dup)
+ return fold_comm_swap(J);
+ }
+
++LJFOLD(MIN any any)
++LJFOLD(MAX any any)
++LJFOLDF(comm_dup_minmax)
++{
++ if (fins->op1 == fins->op2) /* x o x ==> x */
++ return LEFTFOLD;
++ return NEXTFOLD;
++}
++
+ LJFOLD(BXOR any any)
+ LJFOLDF(comm_bxor)
+ {
+@@ -2040,7 +2100,7 @@ LJFOLDF(merge_eqne_snew_kgc)
+ {
+ GCstr *kstr = ir_kstr(fright);
+ int32_t len = (int32_t)kstr->len;
+- lua_assert(irt_isstr(fins->t));
++ lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
+
+ #if LJ_TARGET_UNALIGNED
+ #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
+@@ -2104,7 +2164,7 @@ LJFOLD(HLOAD KKPTR)
+ LJFOLDF(kfold_hload_kkptr)
+ {
+ UNUSED(J);
+- lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
++ lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
+ return TREF_NIL;
+ }
+
+@@ -2114,8 +2174,8 @@ LJFOLDX(lj_opt_fwd_hload)
+ LJFOLD(ULOAD any)
+ LJFOLDX(lj_opt_fwd_uload)
+
+-LJFOLD(CALLL any IRCALL_lj_tab_len)
+-LJFOLDX(lj_opt_fwd_tab_len)
++LJFOLD(ALEN any any)
++LJFOLDX(lj_opt_fwd_alen)
+
+ /* Upvalue refs are really loads, but there are no corresponding stores.
+ ** So CSE is ok for them, except for UREFO across a GC step (see below).
+@@ -2248,6 +2308,27 @@ LJFOLDF(fload_str_len_tostr)
+ return NEXTFOLD;
+ }
+
++LJFOLD(FLOAD any IRFL_SBUF_W)
++LJFOLD(FLOAD any IRFL_SBUF_E)
++LJFOLD(FLOAD any IRFL_SBUF_B)
++LJFOLD(FLOAD any IRFL_SBUF_L)
++LJFOLD(FLOAD any IRFL_SBUF_REF)
++LJFOLD(FLOAD any IRFL_SBUF_R)
++LJFOLDF(fload_sbuf)
++{
++ TRef tr = lj_opt_fwd_fload(J);
++ return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
++}
++
++/* The fast function ID of function objects is immutable. */
++LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
++LJFOLDF(fload_func_ffid_kgc)
++{
++ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
++ return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
++ return NEXTFOLD;
++}
++
+ /* The C type ID of cdata objects is immutable. */
+ LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
+ LJFOLDF(fload_cdata_typeid_kgc)
+@@ -2315,7 +2396,7 @@ LJFOLDF(fwd_sload)
+ TRef tr = lj_opt_cse(J);
+ return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
+ } else {
+- lua_assert(J->slot[fins->op1] != 0);
++ lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
+ return J->slot[fins->op1];
+ }
+ }
+@@ -2394,6 +2475,7 @@ LJFOLD(XSTORE any any)
+ LJFOLDX(lj_opt_dse_xstore)
+
+ LJFOLD(NEWREF any any) /* Treated like a store. */
++LJFOLD(TMPREF any any)
+ LJFOLD(CALLA any any)
+ LJFOLD(CALLL any any) /* Safeguard fallback. */
+ LJFOLD(CALLS any any)
+@@ -2404,7 +2486,6 @@ LJFOLD(TNEW any any)
+ LJFOLD(TDUP any)
+ LJFOLD(CNEW any any)
+ LJFOLD(XSNEW any any)
+-LJFOLD(BUFHDR any any)
+ LJFOLDX(lj_ir_emit)
+
+ /* ------------------------------------------------------------------------ */
+@@ -2430,8 +2511,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
+ IRRef ref;
+
+ if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
+- lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
+- JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
++ lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
++ JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
++ "bad JIT_F_OPT_DEFAULT");
+ /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
+ if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o])
== IRM_N)
+ return lj_opt_cse(J);
+@@ -2493,7 +2575,7 @@ retry:
+ return lj_ir_kint(J, fins->i);
+ if (ref == FAILFOLD)
+ lj_trace_err(J, LJ_TRERR_GFAIL);
+- lua_assert(ref == DROPFOLD);
++ lj_assertJ(ref == DROPFOLD, "bad fold result");
+ return REF_DROP;
+ }
+
+diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
+index 04c6d06d..df5811a9 100644
+--- a/src/lj_opt_loop.c
++++ b/src/lj_opt_loop.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LOOP: Loop Optimizations.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_opt_loop_c
+@@ -223,8 +223,9 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
+ }
+ J->guardemit.irt = 0;
+ /* Setup new snapshot. */
+- snap->mapofs = (uint16_t)nmapofs;
++ snap->mapofs = (uint32_t)nmapofs;
+ snap->ref = (IRRef1)J->cur.nins;
++ snap->mcofs = 0;
+ snap->nslots = nslots;
+ snap->topslot = osnap->topslot;
+ snap->count = 0;
+@@ -251,7 +252,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
+ nmap += nn;
+ while (omap < nextmap) /* Copy PC + frame links. */
+ *nmap++ = *omap++;
+- J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap);
++ J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap);
+ }
+
+ typedef struct LoopState {
+@@ -299,7 +300,8 @@ static void loop_unroll(LoopState *lps)
+ loopmap = &J->cur.snapmap[loopsnap->mapofs];
+ /* The PC of snapshot #0 and the loop snapshot must match. */
+ psentinel = &loopmap[loopsnap->nent];
+- lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]);
++ lj_assertJ(*psentinel == J->cur.snapmap[J->cur.snap[0].nent],
++ "mismatched PC for loop snapshot");
+ *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */
+
+ /* Start substitution with snapshot #1 (#0 is empty for root traces). */
+@@ -352,10 +354,12 @@ static void loop_unroll(LoopState *lps)
+ irr = IR(ref);
+ goto phiconv;
+ }
+- } else if (ref != REF_DROP && irr->o == IR_CONV &&
+- ref > invar && irr->op1 < invar) {
+- /* May need an extra PHI for a CONV. */
+- ref = irr->op1;
++ } else if (ref != REF_DROP && ref > invar &&
++ ((irr->o == IR_CONV && irr->op1 < invar) ||
++ (irr->o == IR_ALEN && irr->op2 < invar &&
++ irr->op2 != REF_NIL))) {
++ /* May need an extra PHI for a CONV or ALEN hint. */
++ ref = irr->o == IR_CONV ? irr->op1 : irr->op2;
+ irr = IR(ref);
+ phiconv:
+ if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) {
+@@ -369,8 +373,8 @@ static void loop_unroll(LoopState *lps)
+ }
+ }
+ if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
+- J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs;
+- lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
++ J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs;
++ lj_assertJ(J->cur.nsnapmap <= J->sizesnapmap, "bad snapshot map
index");
+ *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
+
+ loop_emit_phi(J, subst, phi, nphi, onsnap);
+@@ -383,7 +387,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize
nsnapmap)
+ SnapShot *snap = &J->cur.snap[nsnap-1];
+ SnapEntry *map = J->cur.snapmap;
+ map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC.
*/
+- J->cur.nsnapmap = (uint16_t)nsnapmap;
++ J->cur.nsnapmap = (uint32_t)nsnapmap;
+ J->cur.nsnap = nsnap;
+ J->guardemit.irt = 0;
+ lj_ir_rollback(J, ins);
+diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
+index cc177d39..d6a419e4 100644
+--- a/src/lj_opt_mem.c
++++ b/src/lj_opt_mem.c
+@@ -3,7 +3,7 @@
+ ** AA: Alias Analysis using high-level semantic disambiguation.
+ ** FWD: Load Forwarding (L2L) + Store Forwarding (S2L).
+ ** DSE: Dead-Store Elimination.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_opt_mem_c
+@@ -18,6 +18,7 @@
+ #include "lj_jit.h"
+ #include "lj_iropt.h"
+ #include "lj_ircall.h"
++#include "lj_dispatch.h"
+
+ /* Some local macros to save typing. Undef'd at the end. */
+ #define IR(ref) (&J->cur.ir[(ref)])
+@@ -56,8 +57,8 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb)
+ {
+ IRIns *taba = IR(ta), *tabb = IR(tb);
+ int newa, newb;
+- lua_assert(ta != tb);
+- lua_assert(irt_istab(taba->t) && irt_istab(tabb->t));
++ lj_assertJ(ta != tb, "bad usage");
++ lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad
usage");
+ /* Disambiguate new allocations. */
+ newa = (taba->o == IR_TNEW || taba->o == IR_TDUP);
+ newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP);
+@@ -99,7 +100,7 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
+ /* Disambiguate array references based on index arithmetic. */
+ int32_t ofsa = 0, ofsb = 0;
+ IRRef basea = ka, baseb = kb;
+- lua_assert(refb->o == IR_AREF);
++ lj_assertJ(refb->o == IR_AREF, "expected AREF");
+ /* Gather base and offset from t[base] or t[base+-ofs]. */
+ if (keya->o == IR_ADD && irref_isk(keya->op2)) {
+ basea = keya->op1;
+@@ -117,8 +118,9 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
+ return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */
+ } else {
+ /* Disambiguate hash references based on the type of their keys. */
+- lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF)
&&
+- (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF));
++ lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF)
&&
++ (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF),
++ "bad xREF IR op %d or %d", refa->o, refb->o);
+ if (!irt_sametype(keya->t, keyb->t))
+ return ALIAS_NO; /* Different key types. */
+ }
+@@ -180,7 +182,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
+ }
+ ref = store->prev;
+ }
+- lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t));
++ if (ir->o == IR_TNEW && !irt_isnil(fins->t))
++ return 0; /* Type instability in loop-carried dependency. */
+ if (irt_ispri(fins->t)) {
+ return TREF_PRI(irt_type(fins->t));
+ } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t))
||
+@@ -191,7 +194,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
+ if (key->o == IR_KSLOT) key = IR(key->op1);
+ lj_ir_kvalue(J->L, &keyv, key);
+ tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
+- lua_assert(itype2irt(tv) == irt_type(fins->t));
++ lj_assertJ(itype2irt(tv) == irt_type(fins->t),
++ "mismatched type in constant table");
+ if (irt_isnum(fins->t))
+ return lj_ir_knum_u64(J, tv->u64);
+ else if (LJ_DUALNUM && irt_isint(fins->t))
+@@ -360,16 +364,16 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
+ /* Different value: try to eliminate the redundant store. */
+ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
+ IRIns *ir;
+- /* Check for any intervening guards (includes conflicting loads). */
++ /* Check for any intervening guards (includes conflicting loads).
++ ** Note that lj_tab_keyindex and lj_vm_next don't need guards,
++ ** since they are followed by at least one guarded VLOAD.
++ */
+ for (ir = IR(J->cur.nins-1); ir > store; ir--)
+- if (irt_isguard(ir->t) || ir->o == IR_CALLL)
++ if (irt_isguard(ir->t) || ir->o == IR_ALEN)
+ goto doemit; /* No elimination possible. */
+ /* Remove redundant store from chain and replace with NOP. */
+ *refp = store->prev;
+- store->o = IR_NOP;
+- store->t.irt = IRT_NIL;
+- store->op1 = store->op2 = 0;
+- store->prev = 0;
++ lj_ir_nop(store);
+ /* Now emit the new store instead. */
+ }
+ goto doemit;
+@@ -380,6 +384,67 @@ doemit:
+ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+ }
+
++/* ALEN forwarding. */
++TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J)
++{
++ IRRef tab = fins->op1; /* Table reference. */
++ IRRef lim = tab; /* Search limit. */
++ IRRef ref;
++
++ /* Search for conflicting HSTORE with numeric key. */
++ ref = J->chain[IR_HSTORE];
++ while (ref > lim) {
++ IRIns *store = IR(ref);
++ IRIns *href = IR(store->op1);
++ IRIns *key = IR(href->op2);
++ if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
++ lim = ref; /* Conflicting store found, limits search for ALEN. */
++ break;
++ }
++ ref = store->prev;
++ }
++
++ /* Try to find a matching ALEN. */
++ ref = J->chain[IR_ALEN];
++ while (ref > lim) {
++ /* CSE for ALEN only depends on the table, not the hint. */
++ if (IR(ref)->op1 == tab) {
++ IRRef sref;
++
++ /* Search for aliasing table.clear. */
++ if (!fwd_aa_tab_clear(J, ref, tab))
++ break;
++
++ /* Search for hint-forwarding or conflicting store. */
++ sref = J->chain[IR_ASTORE];
++ while (sref > ref) {
++ IRIns *store = IR(sref);
++ IRIns *aref = IR(store->op1);
++ IRIns *fref = IR(aref->op1);
++ if (tab == fref->op1) { /* ASTORE to the same table. */
++ /* Detect t[#t+1] = x idiom for push. */
++ IRIns *idx = IR(aref->op2);
++ if (!irt_isnil(store->t) &&
++ idx->o == IR_ADD && idx->op1 == ref &&
++ IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) {
++ /* Note: this requires an extra PHI check in loop unroll. */
++ fins->op2 = aref->op2; /* Set ALEN hint. */
++ }
++ goto doemit; /* Conflicting store, possibly giving a hint. */
++ } else if (aa_table(J, tab, fref->op1) == ALIAS_NO) {
++ goto doemit; /* Conflicting store. */
++ }
++ sref = store->prev;
++ }
++
++ return ref; /* Plain ALEN forwarding. */
++ }
++ ref = IR(ref)->prev;
++ }
++doemit:
++ return EMITFOLD;
++}
++
+ /* -- ULOAD forwarding ---------------------------------------------------- */
+
+ /* The current alias analysis for upvalues is very simplistic. It only
+@@ -429,7 +494,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
+
+ cselim:
+ /* Try to find a matching load. Below the conflicting store, if any. */
+-
+ ref = J->chain[IR_ULOAD];
+ while (ref > lim) {
+ IRIns *ir = IR(ref);
+@@ -470,10 +534,7 @@ TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J)
+ goto doemit; /* No elimination possible. */
+ /* Remove redundant store from chain and replace with NOP. */
+ *refp = store->prev;
+- store->o = IR_NOP;
+- store->t.irt = IRT_NIL;
+- store->op1 = store->op2 = 0;
+- store->prev = 0;
++ lj_ir_nop(store);
+ if (ref+1 < J->cur.nins &&
+ store[1].o == IR_OBAR && store[1].op1 == xref) {
+ IRRef1 *bp = &J->chain[IR_OBAR];
+@@ -482,10 +543,7 @@ TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J)
+ bp = &obar->prev;
+ /* Remove OBAR, too. */
+ *bp = obar->prev;
+- obar->o = IR_NOP;
+- obar->t.irt = IRT_NIL;
+- obar->op1 = obar->op2 = 0;
+- obar->prev = 0;
++ lj_ir_nop(obar);
+ }
+ /* Now emit the new store instead. */
+ }
+@@ -565,8 +623,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
+ goto doemit;
+ break; /* Otherwise continue searching. */
+ case ALIAS_MUST:
+- if (store->op2 == val) /* Same value: drop the new store. */
+- return DROPFOLD;
++ if (store->op2 == val &&
++ !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R))
++ return DROPFOLD; /* Same value: drop the new store. */
+ /* Different value: try to eliminate the redundant store. */
+ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
+ IRIns *ir;
+@@ -576,10 +635,7 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
+ goto doemit; /* No elimination possible. */
+ /* Remove redundant store from chain and replace with NOP. */
+ *refp = store->prev;
+- store->o = IR_NOP;
+- store->t.irt = IRT_NIL;
+- store->op1 = store->op2 = 0;
+- store->prev = 0;
++ lj_ir_nop(store);
+ /* Now emit the new store instead. */
+ }
+ goto doemit;
+@@ -590,6 +646,29 @@ doemit:
+ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+ }
+
++/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */
++int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim)
++{
++ IRRef ref;
++ if (J->chain[IR_BUFPUT] > lim)
++ return 0; /* Conflict. */
++ ref = J->chain[IR_CALLS];
++ while (ref > lim) {
++ IRIns *ir = IR(ref);
++ if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 <
IRCALL_lj_buf_tostr)
++ return 0; /* Conflict. */
++ ref = ir->prev;
++ }
++ ref = J->chain[IR_CALLL];
++ while (ref > lim) {
++ IRIns *ir = IR(ref);
++ if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 <
IRCALL_lj_buf_tostr)
++ return 0; /* Conflict. */
++ ref = ir->prev;
++ }
++ return 1; /* No conflict. Can safely FOLD/CSE. */
++}
++
+ /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */
+
+ /* Find cdata allocation for a reference (if any). */
+@@ -830,10 +909,7 @@ TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J)
+ goto doemit; /* No elimination possible. */
+ /* Remove redundant store from chain and replace with NOP. */
+ *refp = store->prev;
+- store->o = IR_NOP;
+- store->t.irt = IRT_NIL;
+- store->op1 = store->op2 = 0;
+- store->prev = 0;
++ lj_ir_nop(store);
+ /* Now emit the new store instead. */
+ }
+ goto doemit;
+@@ -844,39 +920,6 @@ doemit:
+ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+ }
+
+-/* -- Forwarding of lj_tab_len -------------------------------------------- */
+-
+-/* This is rather simplistic right now, but better than nothing. */
+-TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
+-{
+- IRRef tab = fins->op1; /* Table reference. */
+- IRRef lim = tab; /* Search limit. */
+- IRRef ref;
+-
+- /* Any ASTORE is a conflict and limits the search. */
+- if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE];
+-
+- /* Search for conflicting HSTORE with numeric key. */
+- ref = J->chain[IR_HSTORE];
+- while (ref > lim) {
+- IRIns *store = IR(ref);
+- IRIns *href = IR(store->op1);
+- IRIns *key = IR(href->op2);
+- if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
+- lim = ref; /* Conflicting store found, limits search for TLEN. */
+- break;
+- }
+- ref = store->prev;
+- }
+-
+- /* Search for aliasing table.clear. */
+- if (!fwd_aa_tab_clear(J, lim, tab))
+- return lj_ir_emit(J);
+-
+- /* Try to find a matching load. Below the conflicting store, if any. */
+- return lj_opt_cselim(J, lim);
+-}
+-
+ /* -- ASTORE/HSTORE previous type analysis -------------------------------- */
+
+ /* Check whether the previous value for a table store is non-nil.
+diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
+index cd96ca4b..1a332bca 100644
+--- a/src/lj_opt_narrow.c
++++ b/src/lj_opt_narrow.c
+@@ -1,7 +1,7 @@
+ /*
+ ** NARROW: Narrowing of numbers to integers (double to int32_t).
+ ** STRIPOV: Stripping of overflow checks.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_opt_narrow_c
+@@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
+ } else if (op == NARROW_CONV) {
+ *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
+ } else if (op == NARROW_SEXT) {
+- lua_assert(sp >= nc->stack+1);
++ lj_assertJ(sp >= nc->stack+1, "stack underflow");
+ sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],
+ (IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
+ } else if (op == NARROW_INT) {
+- lua_assert(next < last);
++ lj_assertJ(next < last, "missing arg to NARROW_INT");
+ *sp++ = nc->t == IRT_I64 ?
+ lj_ir_kint64(J, (int64_t)(int32_t)*next++) :
+ lj_ir_kint(J, *next++);
+ } else { /* Regular IROpT. Pops two operands and pushes one result. */
+ IRRef mode = nc->mode;
+- lua_assert(sp >= nc->stack+2);
++ lj_assertJ(sp >= nc->stack+2, "stack underflow");
+ sp--;
+ /* Omit some overflow checks for array indexing. See comments above. */
+ if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
+@@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
+ narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode);
+ }
+ }
+- lua_assert(sp == nc->stack+1);
++ lj_assertJ(sp == nc->stack+1, "stack misalignment");
+ return nc->stack[0];
+ }
+
+@@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef
mode)
+ TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)
+ {
+ IRIns *ir;
+- lua_assert(tref_isnumber(tr));
++ lj_assertJ(tref_isnumber(tr), "expected number type");
+ if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
+ return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);
+ /* Omit some overflow checks for array indexing. See comments above. */
+@@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)
+ /* Narrow C array index (overflow undefined). */
+ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
+ {
+- lua_assert(tref_isnumber(tr));
++ lj_assertJ(tref_isnumber(tr), "expected number type");
+ if (tref_isnum(tr))
+ return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
+ /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
+@@ -551,8 +551,13 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
+ {
+ rc = conv_str_tonum(J, rc, vc);
+ if (tref_isinteger(rc)) {
+- if ((uint32_t)numberVint(vc) != 0x80000000u)
+- return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc);
++ uint32_t k = (uint32_t)numberVint(vc);
++ if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) {
++ TRef zero = lj_ir_kint(J, 0);
++ if (!LJ_DUALNUM)
++ emitir(IRTGI(IR_NE), rc, zero);
++ return emitir(IRTGI(IR_SUBOV), zero, rc);
++ }
+ rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
+ }
+ return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG));
+@@ -588,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb,
TValue *vc)
+ /* Narrowing must be unconditional to preserve (-x)^i semantics. */
+ if (tvisint(vc) || numisint(numV(vc))) {
+ int checkrange = 0;
+- /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */
++ /* pow() is faster for bigger exponents. But do this only for (+k)^i. */
+ if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0)
{
+ int32_t k = numberVint(vc);
+- if (!(k >= -65536 && k <= 65536)) goto split_pow;
++ if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
+ checkrange = 1;
+ }
+ if (!tref_isinteger(rc)) {
+@@ -602,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb,
TValue *vc)
+ TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
+ emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
+ }
+- return emitir(IRTN(IR_POW), rb, rc);
++ } else {
++force_pow_num:
++ rc = lj_ir_tonum(J, rc); /* Want POW(num, num), not POW(num, int). */
+ }
+-split_pow:
+- /* FOLD covers most cases, but some are easier to do here. */
+- if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
+- return rb; /* 1 ^ x ==> 1 */
+- rc = lj_ir_tonum(J, rc);
+- if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
+- return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */
+- /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
+- rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
+- rc = emitir(IRTN(IR_MUL), rb, rc);
+- return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
++ return emitir(IRTN(IR_POW), rb, rc);
+ }
+
+ /* -- Predictive narrowing of induction variables ------------------------- */
+@@ -630,9 +627,10 @@ static int narrow_forl(jit_State *J, cTValue *o)
+ /* Narrow the FORL index type by looking at the runtime values. */
+ IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)
+ {
+- lua_assert(tvisnumber(&tv[FORL_IDX]) &&
++ lj_assertJ(tvisnumber(&tv[FORL_IDX]) &&
+ tvisnumber(&tv[FORL_STOP]) &&
+- tvisnumber(&tv[FORL_STEP]));
++ tvisnumber(&tv[FORL_STEP]),
++ "expected number types");
+ /* Narrow only if the runtime values of start/stop/step are all integers. */
+ if (narrow_forl(J, &tv[FORL_IDX]) &&
+ narrow_forl(J, &tv[FORL_STOP]) &&
+diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
+index 929ccb61..5306a7db 100644
+--- a/src/lj_opt_sink.c
++++ b/src/lj_opt_sink.c
+@@ -1,6 +1,6 @@
+ /*
+ ** SINK: Allocation Sinking and Store Sinking.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_opt_sink_c
+@@ -78,8 +78,7 @@ static void sink_mark_ins(jit_State *J)
+ switch (ir->o) {
+ case IR_BASE:
+ return; /* Finished. */
+- case IR_CALLL: /* IRCALL_lj_tab_len */
+- case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR:
++ case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN:
+ irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
+ break;
+ case IR_FLOAD:
+@@ -100,8 +99,8 @@ static void sink_mark_ins(jit_State *J)
+ (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP &&
+ !sink_checkphi(J, ir, (ir+1)->op2))))
+ irt_setmark(ir->t); /* Mark ineligible allocation. */
+- /* fallthrough */
+ #endif
++ /* fallthrough */
+ case IR_USTORE:
+ irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
+ break;
+@@ -219,6 +218,7 @@ static void sink_sweep_ins(jit_State *J)
+ for (ir = IR(J->cur.nk); ir < irbase; ir++) {
+ irt_clearmark(ir->t);
+ ir->prev = REGSP_INIT;
++ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
+ if (irt_is64(ir->t) && ir->o != IR_KNULL)
+ ir++;
+ }
+diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
+index fc935204..25c1c234 100644
+--- a/src/lj_opt_split.c
++++ b/src/lj_opt_split.c
+@@ -1,6 +1,6 @@
+ /*
+ ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_opt_split_c
+@@ -8,7 +8,7 @@
+
+ #include "lj_obj.h"
+
+-#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
++#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
+
+ #include "lj_err.h"
+ #include "lj_buf.h"
+@@ -235,7 +235,7 @@ static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
+ return split_emit(J, IRTI(IR_BOR), t1, t2);
+ } else {
+ IRRef t1 = ir->prev, t2;
+- lua_assert(op == IR_BSHR || op == IR_BSAR);
++ lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
+ nir->o = IR_BSHR;
+ t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
+ ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
+@@ -250,7 +250,7 @@ static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
+ ir->prev = lj_ir_kint(J, 0);
+ return lo;
+ } else {
+- lua_assert(op == IR_BSHR || op == IR_BSAR);
++ lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
+ if (k == 32) {
+ J->cur.nins--;
+ ir->prev = hi;
+@@ -403,32 +403,8 @@ static void split_ir(jit_State *J)
+ hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
+ break;
+ case IR_FPMATH:
+- /* Try to rejoin pow from EXP2, MUL and LOG2. */
+- if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
+- IRIns *irp = IR(nir->op1);
+- if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
+- IRIns *irm4 = IR(irp->op1);
+- IRIns *irm3 = IR(irm4->op1);
+- IRIns *irm12 = IR(irm3->op1);
+- IRIns *irl1 = IR(irm12->op1);
+- if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
+- irl1->op2 == IRCALL_lj_vm_log2) {
+- IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
+- IRRef arg3 = irm3->op2, arg4 = irm4->op2;
+- J->cur.nins--;
+- tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
+- tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
+- ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
+- hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
+- break;
+- }
+- }
+- }
+ hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
+ break;
+- case IR_ATAN2:
+- hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
+- break;
+ case IR_LDEXP:
+ hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
+ break;
+@@ -453,7 +429,7 @@ static void split_ir(jit_State *J)
+ hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
+ break;
+ case IR_FLOAD:
+- lua_assert(ir->op1 == REF_NIL);
++ lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State");
+ hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
+ nir->op2 += LJ_BE*4;
+ break;
+@@ -489,8 +465,9 @@ static void split_ir(jit_State *J)
+ break;
+ }
+ #endif
+- lua_assert(st == IRT_INT ||
+- (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
++ lj_assertJ(st == IRT_INT ||
++ (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)),
++ "bad source type for CONV");
+ nir->o = IR_CALLN;
+ #if LJ_32 && LJ_HASFFI
+ nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
+@@ -520,7 +497,8 @@ static void split_ir(jit_State *J)
+ hi = nir->op2;
+ break;
+ default:
+- lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
++ lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX,
++ "bad IR op %d", ir->o);
+ hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
+ hisubst[ir->op1], hisubst[ir->op2]);
+ break;
+@@ -577,7 +555,7 @@ static void split_ir(jit_State *J)
+ hi = split_bitshift(J, hisubst, oir, nir, ir);
+ break;
+ case IR_FLOAD:
+- lua_assert(ir->op2 == IRFL_CDATA_INT64);
++ lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported");
+ hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
+ #if LJ_BE
+ ir->prev = hi; hi = nref;
+@@ -643,7 +621,7 @@ static void split_ir(jit_State *J)
+ hi = nir->op2;
+ break;
+ default:
+- lua_assert(ir->o <= IR_NE); /* Comparisons. */
++ lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons.
*/
+ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
+ break;
+ }
+@@ -667,7 +645,7 @@ static void split_ir(jit_State *J)
+ tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
+ #endif
+ ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
+- } else if (ir->o == IR_TOSTR) {
++ } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
+ if (hisubst[ir->op1]) {
+ if (irref_isk(ir->op1))
+ nir->op1 = ir->op1;
+@@ -721,7 +699,7 @@ static void split_ir(jit_State *J)
+ #if LJ_SOFTFP
+ if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
+ if (irt_isguard(ir->t)) {
+- lua_assert(st == IRT_NUM && irt_isint(ir->t));
++ lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV
types");
+ J->cur.nins--;
+ ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
+ } else {
+@@ -852,7 +830,7 @@ void lj_opt_split(jit_State *J)
+ if (!J->needsplit)
+ J->needsplit = split_needsplit(J);
+ #else
+- lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
++ lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state");
+ #endif
+ if (J->needsplit) {
+ int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
+diff --git a/src/lj_parse.c b/src/lj_parse.c
+index 08f7cfa6..ea64677f 100644
+--- a/src/lj_parse.c
++++ b/src/lj_parse.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Lua parser (source code -> bytecode).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -163,6 +163,12 @@ LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV ==
(int)OPR_MUL-(int)OPR_ADD);
+ LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD);
+ LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
+
++#ifdef LUA_USE_ASSERT
++#define lj_assertFS(c, ...) (lj_assertG_(G(fs->L), (c), __VA_ARGS__))
++#else
++#define lj_assertFS(c, ...) ((void)fs)
++#endif
++
+ /* -- Error handling ------------------------------------------------------ */
+
+ LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
+@@ -200,7 +206,7 @@ static BCReg const_num(FuncState *fs, ExpDesc *e)
+ {
+ lua_State *L = fs->L;
+ TValue *o;
+- lua_assert(expr_isnumk(e));
++ lj_assertFS(expr_isnumk(e), "bad usage");
+ o = lj_tab_set(L, fs->kt, &e->u.nval);
+ if (tvhaskslot(o))
+ return tvkslot(o);
+@@ -225,7 +231,7 @@ static BCReg const_gc(FuncState *fs, GCobj *gc, uint32_t itype)
+ /* Add a string constant. */
+ static BCReg const_str(FuncState *fs, ExpDesc *e)
+ {
+- lua_assert(expr_isstrk(e) || e->k == VGLOBAL);
++ lj_assertFS(expr_isstrk(e) || e->k == VGLOBAL, "bad usage");
+ return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR);
+ }
+
+@@ -313,7 +319,7 @@ static void jmp_patchins(FuncState *fs, BCPos pc, BCPos dest)
+ {
+ BCIns *jmp = &fs->bcbase[pc].ins;
+ BCPos offset = dest-(pc+1)+BCBIAS_J;
+- lua_assert(dest != NO_JMP);
++ lj_assertFS(dest != NO_JMP, "uninitialized jump target");
+ if (offset > BCMAX_D)
+ err_syntax(fs->ls, LJ_ERR_XJUMP);
+ setbc_d(jmp, offset);
+@@ -362,7 +368,7 @@ static void jmp_patch(FuncState *fs, BCPos list, BCPos target)
+ if (target == fs->pc) {
+ jmp_tohere(fs, list);
+ } else {
+- lua_assert(target < fs->pc);
++ lj_assertFS(target < fs->pc, "bad jump target");
+ jmp_patchval(fs, list, target, NO_REG, target);
+ }
+ }
+@@ -392,7 +398,7 @@ static void bcreg_free(FuncState *fs, BCReg reg)
+ {
+ if (reg >= fs->nactvar) {
+ fs->freereg--;
+- lua_assert(reg == fs->freereg);
++ lj_assertFS(reg == fs->freereg, "bad regfree");
+ }
+ }
+
+@@ -542,7 +548,7 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg
reg)
+ } else if (e->k <= VKTRUE) {
+ ins = BCINS_AD(BC_KPRI, reg, const_pri(e));
+ } else {
+- lua_assert(e->k == VVOID || e->k == VJMP);
++ lj_assertFS(e->k == VVOID || e->k == VJMP, "bad expr type %d",
e->k);
+ return;
+ }
+ bcemit_INS(fs, ins);
+@@ -637,7 +643,7 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
+ ins = BCINS_AD(BC_GSET, ra, const_str(fs, var));
+ } else {
+ BCReg ra, rc;
+- lua_assert(var->k == VINDEXED);
++ lj_assertFS(var->k == VINDEXED, "bad expr type %d", var->k);
+ ra = expr_toanyreg(fs, e);
+ rc = var->u.s.aux;
+ if ((int32_t)rc < 0) {
+@@ -645,10 +651,12 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
+ } else if (rc > BCMAX_C) {
+ ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1));
+ } else {
++#ifdef LUA_USE_ASSERT
+ /* Free late alloced key reg to avoid assert on free of value reg. */
+ /* This can only happen when called from expr_table(). */
+- lua_assert(e->k != VNONRELOC || ra < fs->nactvar ||
+- rc < ra || (bcreg_free(fs, rc),1));
++ if (e->k == VNONRELOC && ra >= fs->nactvar && rc >=
ra)
++ bcreg_free(fs, rc);
++#endif
+ ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc);
+ }
+ }
+@@ -663,7 +671,7 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key)
+ expr_free(fs, e);
+ func = fs->freereg;
+ bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */
+- lua_assert(expr_isstrk(key));
++ lj_assertFS(expr_isstrk(key), "bad usage");
+ idx = const_str(fs, key);
+ if (idx <= BCMAX_C) {
+ bcreg_reserve(fs, 2+LJ_FR2);
+@@ -803,7 +811,8 @@ static void bcemit_arith(FuncState *fs, BinOpr opr, ExpDesc *e1,
ExpDesc *e2)
+ else
+ rc = expr_toanyreg(fs, e2);
+ /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */
+- lua_assert(expr_isnumk(e1) || e1->k == VNONRELOC);
++ lj_assertFS(expr_isnumk(e1) || e1->k == VNONRELOC,
++ "bad expr type %d", e1->k);
+ expr_toval(fs, e1);
+ /* Avoid two consts to satisfy bytecode constraints. */
+ if (expr_isnumk(e1) && !expr_isnumk(e2) &&
+@@ -853,9 +862,12 @@ static void bcemit_comp(FuncState *fs, BinOpr opr, ExpDesc *e1,
ExpDesc *e2)
+ e1 = e2; e2 = eret; /* Swap operands. */
+ op = ((op-BC_ISLT)^3)+BC_ISLT;
+ expr_toval(fs, e1);
++ ra = expr_toanyreg(fs, e1);
++ rd = expr_toanyreg(fs, e2);
++ } else {
++ rd = expr_toanyreg(fs, e2);
++ ra = expr_toanyreg(fs, e1);
+ }
+- rd = expr_toanyreg(fs, e2);
+- ra = expr_toanyreg(fs, e1);
+ ins = BCINS_AD(op, ra, rd);
+ }
+ /* Using expr_free might cause asserts if the order is wrong. */
+@@ -888,19 +900,20 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1,
ExpDesc *e2)
+ if (op <= OPR_POW) {
+ bcemit_arith(fs, op, e1, e2);
+ } else if (op == OPR_AND) {
+- lua_assert(e1->t == NO_JMP); /* List must be closed. */
++ lj_assertFS(e1->t == NO_JMP, "jump list not closed");
+ expr_discharge(fs, e2);
+ jmp_append(fs, &e2->f, e1->f);
+ *e1 = *e2;
+ } else if (op == OPR_OR) {
+- lua_assert(e1->f == NO_JMP); /* List must be closed. */
++ lj_assertFS(e1->f == NO_JMP, "jump list not closed");
+ expr_discharge(fs, e2);
+ jmp_append(fs, &e2->t, e1->t);
+ *e1 = *e2;
+ } else if (op == OPR_CONCAT) {
+ expr_toval(fs, e2);
+ if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) {
+- lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1);
++ lj_assertFS(e1->u.s.info == bc_b(*bcptr(fs, e2))-1,
++ "bad CAT stack layout");
+ expr_free(fs, e1);
+ setbc_b(bcptr(fs, e2), e1->u.s.info);
+ e1->u.s.info = e2->u.s.info;
+@@ -912,8 +925,9 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1,
ExpDesc *e2)
+ }
+ e1->k = VRELOCABLE;
+ } else {
+- lua_assert(op == OPR_NE || op == OPR_EQ ||
+- op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT);
++ lj_assertFS(op == OPR_NE || op == OPR_EQ ||
++ op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT,
++ "bad binop %d", op);
+ bcemit_comp(fs, op, e1, e2);
+ }
+ }
+@@ -942,10 +956,10 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
+ e->u.s.info = fs->freereg-1;
+ e->k = VNONRELOC;
+ } else {
+- lua_assert(e->k == VNONRELOC);
++ lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k);
+ }
+ } else {
+- lua_assert(op == BC_UNM || op == BC_LEN);
++ lj_assertFS(op == BC_UNM || op == BC_LEN, "bad unop %d", op);
+ if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */
+ #if LJ_HASFFI
+ if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */
+@@ -1040,8 +1054,9 @@ static void var_new(LexState *ls, BCReg n, GCstr *name)
+ lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK);
+ lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo);
+ }
+- lua_assert((uintptr_t)name < VARNAME__MAX ||
+- lj_tab_getstr(fs->kt, name) != NULL);
++ lj_assertFS((uintptr_t)name < VARNAME__MAX ||
++ lj_tab_getstr(fs->kt, name) != NULL,
++ "unanchored variable name");
+ /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */
+ setgcref(ls->vstack[vtop].name, obj2gco(name));
+ fs->varmap[fs->nactvar+n] = (uint16_t)vtop;
+@@ -1096,7 +1111,7 @@ static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e)
+ return i; /* Already exists. */
+ /* Otherwise create a new one. */
+ checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues");
+- lua_assert(e->k == VLOCAL || e->k == VUPVAL);
++ lj_assertFS(e->k == VLOCAL || e->k == VUPVAL, "bad expr type %d",
e->k);
+ fs->uvmap[n] = (uint16_t)vidx;
+ fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx :
LJ_MAX_VSTACK+e->u.s.info);
+ fs->nuv = n+1;
+@@ -1147,7 +1162,8 @@ static MSize gola_new(LexState *ls, GCstr *name, uint8_t info,
BCPos pc)
+ lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK);
+ lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo);
+ }
+- lua_assert(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL);
++ lj_assertFS(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL,
++ "unanchored label name");
+ /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */
+ setgcref(ls->vstack[vtop].name, obj2gco(name));
+ ls->vstack[vtop].startpc = pc;
+@@ -1177,8 +1193,9 @@ static void gola_close(LexState *ls, VarInfo *vg)
+ FuncState *fs = ls->fs;
+ BCPos pc = vg->startpc;
+ BCIns *ip = &fs->bcbase[pc].ins;
+- lua_assert(gola_isgoto(vg));
+- lua_assert(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO);
++ lj_assertFS(gola_isgoto(vg), "expected goto");
++ lj_assertFS(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO,
++ "bad bytecode op %d", bc_op(*ip));
+ setbc_a(ip, vg->slot);
+ if (bc_op(*ip) == BC_JMP) {
+ BCPos next = jmp_next(fs, pc);
+@@ -1197,9 +1214,9 @@ static void gola_resolve(LexState *ls, FuncScope *bl, MSize idx)
+ if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) {
+ if (vg->slot < vl->slot) {
+ GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name);
+- lua_assert((uintptr_t)name >= VARNAME__MAX);
++ lj_assertLS((uintptr_t)name >= VARNAME__MAX, "expected goto name");
+ ls->linenumber = ls->fs->bcbase[vg->startpc].line;
+- lua_assert(strref(vg->name) != NAME_BREAK);
++ lj_assertLS(strref(vg->name) != NAME_BREAK, "unexpected break");
+ lj_lex_error(ls, 0, LJ_ERR_XGSCOPE,
+ strdata(strref(vg->name)), strdata(name));
+ }
+@@ -1263,7 +1280,7 @@ static void fscope_begin(FuncState *fs, FuncScope *bl, int flags)
+ bl->vstart = fs->ls->vtop;
+ bl->prev = fs->bl;
+ fs->bl = bl;
+- lua_assert(fs->freereg == fs->nactvar);
++ lj_assertFS(fs->freereg == fs->nactvar, "bad regalloc");
+ }
+
+ /* End a scope. */
+@@ -1274,7 +1291,7 @@ static void fscope_end(FuncState *fs)
+ fs->bl = bl->prev;
+ var_remove(ls, bl->nactvar);
+ fs->freereg = fs->nactvar;
+- lua_assert(bl->nactvar == fs->nactvar);
++ lj_assertFS(bl->nactvar == fs->nactvar, "bad regalloc");
+ if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL)
+ bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0);
+ if ((bl->flags & FSCOPE_BREAK)) {
+@@ -1361,13 +1378,13 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr)
+ Node *n = &node[i];
+ if (tvhaskslot(&n->val)) {
+ ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val);
+- lua_assert(!tvisint(&n->key));
++ lj_assertFS(!tvisint(&n->key), "unexpected integer key");
+ if (tvisnum(&n->key)) {
+ TValue *tv = &((TValue *)kptr)[kidx];
+ if (LJ_DUALNUM) {
+ lua_Number nn = numV(&n->key);
+ int32_t k = lj_num2int(nn);
+- lua_assert(!tvismzero(&n->key));
++ lj_assertFS(!tvismzero(&n->key), "unexpected -0 key");
+ if ((lua_Number)k == nn)
+ setintV(tv, k);
+ else
+@@ -1415,21 +1432,21 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
+ uint8_t *li = (uint8_t *)lineinfo;
+ do {
+ BCLine delta = base[i].line - first;
+- lua_assert(delta >= 0 && delta < 256);
++ lj_assertFS(delta >= 0 && delta < 256, "bad line delta");
+ li[i] = (uint8_t)delta;
+ } while (++i < n);
+ } else if (LJ_LIKELY(numline < 65536)) {
+ uint16_t *li = (uint16_t *)lineinfo;
+ do {
+ BCLine delta = base[i].line - first;
+- lua_assert(delta >= 0 && delta < 65536);
++ lj_assertFS(delta >= 0 && delta < 65536, "bad line
delta");
+ li[i] = (uint16_t)delta;
+ } while (++i < n);
+ } else {
+ uint32_t *li = (uint32_t *)lineinfo;
+ do {
+ BCLine delta = base[i].line - first;
+- lua_assert(delta >= 0);
++ lj_assertFS(delta >= 0, "bad line delta");
+ li[i] = (uint32_t)delta;
+ } while (++i < n);
+ }
+@@ -1448,7 +1465,7 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t
*ofsvar)
+ MSize len = s->len+1;
+ char *p = lj_buf_more(&ls->sb, len);
+ p = lj_buf_wmem(p, strdata(s), len);
+- setsbufP(&ls->sb, p);
++ ls->sb.w = p;
+ }
+ *ofsvar = sbuflen(&ls->sb);
+ lastpc = 0;
+@@ -1469,7 +1486,7 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t
*ofsvar)
+ startpc = vs->startpc;
+ p = lj_strfmt_wuleb128(p, startpc-lastpc);
+ p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
+- setsbufP(&ls->sb, p);
++ ls->sb.w = p;
+ lastpc = startpc;
+ }
+ }
+@@ -1482,7 +1499,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p,
size_t ofsvar)
+ {
+ setmref(pt->uvinfo, p);
+ setmref(pt->varinfo, (char *)p + ofsvar);
+- memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer.
*/
++ memcpy(p, ls->sb.b, sbuflen(&ls->sb)); /* Copy from temp. buffer. */
+ }
+ #else
+
+@@ -1519,7 +1536,7 @@ static void fs_fixup_ret(FuncState *fs)
+ }
+ fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */
+ fscope_end(fs);
+- lua_assert(fs->bl == NULL);
++ lj_assertFS(fs->bl == NULL, "bad scope nesting");
+ /* May need to fixup returns encoded before first function was created. */
+ if (fs->flags & PROTO_FIXUP_RETURN) {
+ BCPos pc;
+@@ -1591,7 +1608,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
+ L->top--; /* Pop table of constants. */
+ ls->vtop = fs->vbase; /* Reset variable stack. */
+ ls->fs = fs->prev;
+- lua_assert(ls->fs != NULL || ls->tok == TK_eof);
++ lj_assertL(ls->fs != NULL || ls->tok == TK_eof, "bad parser state");
+ return pt;
+ }
+
+@@ -1685,14 +1702,15 @@ static void expr_bracket(LexState *ls, ExpDesc *v)
+ }
+
+ /* Get value of constant expression. */
+-static void expr_kvalue(TValue *v, ExpDesc *e)
++static void expr_kvalue(FuncState *fs, TValue *v, ExpDesc *e)
+ {
++ UNUSED(fs);
+ if (e->k <= VKTRUE) {
+ setpriV(v, ~(uint32_t)e->k);
+ } else if (e->k == VKSTR) {
+ setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR);
+ } else {
+- lua_assert(tvisnumber(expr_numtv(e)));
++ lj_assertFS(tvisnumber(expr_numtv(e)), "bad number constant");
+ *v = *expr_numtv(e);
+ }
+ }
+@@ -1742,11 +1760,11 @@ static void expr_table(LexState *ls, ExpDesc *e)
+ fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx);
+ }
+ vcall = 0;
+- expr_kvalue(&k, &key);
++ expr_kvalue(fs, &k, &key);
+ v = lj_tab_set(fs->L, t, &k);
+ lj_gc_anybarriert(fs->L, t);
+ if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */
+- expr_kvalue(v, &val);
++ expr_kvalue(fs, v, &val);
+ } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */
+ settabV(fs->L, v, t); /* Preserve key with table itself as value. */
+ fixt = 1; /* Fix this later, after all resizes. */
+@@ -1765,8 +1783,9 @@ static void expr_table(LexState *ls, ExpDesc *e)
+ if (vcall) {
+ BCInsLine *ilp = &fs->bcbase[fs->pc-1];
+ ExpDesc en;
+- lua_assert(bc_a(ilp->ins) == freg &&
+- bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB));
++ lj_assertFS(bc_a(ilp->ins) == freg &&
++ bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB),
++ "bad CALL code generation");
+ expr_init(&en, VKNUM, 0);
+ en.u.nval.u32.lo = narr-1;
+ en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */
+@@ -1796,7 +1815,7 @@ static void expr_table(LexState *ls, ExpDesc *e)
+ for (i = 0; i <= hmask; i++) {
+ Node *n = &node[i];
+ if (tvistab(&n->val)) {
+- lua_assert(tabV(&n->val) == t);
++ lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table");
+ setnilV(&n->val); /* Turn value into nil. */
+ }
+ }
+@@ -1827,7 +1846,7 @@ static BCReg parse_params(LexState *ls, int needself)
+ } while (lex_opt(ls, ','));
+ }
+ var_add(ls, nparams);
+- lua_assert(fs->nactvar == nparams);
++ lj_assertFS(fs->nactvar == nparams, "bad regalloc");
+ bcreg_reserve(fs, nparams);
+ lex_check(ls, ')');
+ return nparams;
+@@ -1914,7 +1933,7 @@ static void parse_args(LexState *ls, ExpDesc *e)
+ err_syntax(ls, LJ_ERR_XFUNARG);
+ return; /* Silence compiler. */
+ }
+- lua_assert(e->k == VNONRELOC);
++ lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k);
+ base = e->u.s.info; /* Base register for call. */
+ if (args.k == VCALL) {
+ ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2);
+@@ -2667,7 +2686,8 @@ static int parse_stmt(LexState *ls)
+ lj_lex_next(ls);
+ parse_goto(ls);
+ break;
+- } /* else: fallthrough */
++ }
++ /* fallthrough */
+ default:
+ parse_call_assign(ls);
+ break;
+@@ -2683,8 +2703,9 @@ static void parse_chunk(LexState *ls)
+ while (!islast && !parse_isend(ls->tok)) {
+ islast = parse_stmt(ls);
+ lex_opt(ls, ';');
+- lua_assert(ls->fs->framesize >= ls->fs->freereg &&
+- ls->fs->freereg >= ls->fs->nactvar);
++ lj_assertLS(ls->fs->framesize >= ls->fs->freereg &&
++ ls->fs->freereg >= ls->fs->nactvar,
++ "bad regalloc");
+ ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt.
*/
+ }
+ synlevel_end(ls);
+@@ -2719,9 +2740,8 @@ GCproto *lj_parse(LexState *ls)
+ err_token(ls, TK_eof);
+ pt = fs_finish(ls, ls->linenumber);
+ L->top--; /* Drop chunkname. */
+- lua_assert(fs.prev == NULL);
+- lua_assert(ls->fs == NULL);
+- lua_assert(pt->sizeuv == 0);
++ lj_assertL(fs.prev == NULL && ls->fs == NULL, "mismatched frame
nesting");
++ lj_assertL(pt->sizeuv == 0, "toplevel proto has upvalues");
+ return pt;
+ }
+
+diff --git a/src/lj_parse.h b/src/lj_parse.h
+index ceeab699..5207023f 100644
+--- a/src/lj_parse.h
++++ b/src/lj_parse.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Lua parser (source code -> bytecode).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_PARSE_H
+diff --git a/src/lj_prng.c b/src/lj_prng.c
+new file mode 100644
+index 00000000..bb32da8b
+--- /dev/null
++++ b/src/lj_prng.c
+@@ -0,0 +1,250 @@
++/*
++** Pseudo-random number generation.
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#define lj_prng_c
++#define LUA_CORE
++
++/* To get the syscall prototype. */
++#if defined(__linux__) && !defined(_GNU_SOURCE)
++#define _GNU_SOURCE
++#endif
++
++#include "lj_def.h"
++#include "lj_arch.h"
++#include "lj_prng.h"
++
++/* -- PRNG step function -------------------------------------------------- */
++
++/* This implements a Tausworthe PRNG with period 2^223. Based on:
++** Tables of maximally-equidistributed combined LFSR generators,
++** Pierre L'Ecuyer, 1991, table 3, 1st entry.
++** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
++**
++** Important note: This PRNG is NOT suitable for cryptographic use!
++**
++** But it works fine for math.random(), which has an API that's not
++** suitable for cryptography, anyway.
++**
++** When used as a securely seeded global PRNG, it substantially raises
++** the difficulty for various attacks on the VM.
++*/
++
++/* Update generator i and compute a running xor of all states. */
++#define TW223_GEN(rs, z, r, i, k, q, s) \
++ z = rs->u[i]; \
++ z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 <<
(64-k)))<<s); \
++ r ^= z; rs->u[i] = z;
++
++#define TW223_STEP(rs, z, r) \
++ TW223_GEN(rs, z, r, 0, 63, 31, 18) \
++ TW223_GEN(rs, z, r, 1, 58, 19, 28) \
++ TW223_GEN(rs, z, r, 2, 55, 24, 7) \
++ TW223_GEN(rs, z, r, 3, 47, 21, 8)
++
++/* PRNG step function with uint64_t result. */
++LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs)
++{
++ uint64_t z, r = 0;
++ TW223_STEP(rs, z, r)
++ return r;
++}
++
++/* PRNG step function with double in uint64_t result. */
++LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs)
++{
++ uint64_t z, r = 0;
++ TW223_STEP(rs, z, r)
++ /* Returns a double bit pattern in the range 1.0 <= d < 2.0. */
++ return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
++}
++
++/* Condition seed: ensure k[i] MSB of u[i] are non-zero. */
++static LJ_AINLINE void lj_prng_condition(PRNGState *rs)
++{
++ if (rs->u[0] < (1u << 1)) rs->u[0] += (1u << 1);
++ if (rs->u[1] < (1u << 6)) rs->u[1] += (1u << 6);
++ if (rs->u[2] < (1u << 9)) rs->u[2] += (1u << 9);
++ if (rs->u[3] < (1u << 17)) rs->u[3] += (1u << 17);
++}
++
++/* -- PRNG seeding from OS ------------------------------------------------ */
++
++#if LUAJIT_SECURITY_PRNG == 0
++
++/* Nothing to define. */
++
++#elif LJ_TARGET_XBOX360
++
++extern int XNetRandom(void *buf, unsigned int len);
++
++#elif LJ_TARGET_PS3
++
++extern int sys_get_random_number(void *buf, uint64_t len);
++
++#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA
++
++extern int sceRandomGetRandomNumber(void *buf, size_t len);
++
++#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE
++
++#define WIN32_LEAN_AND_MEAN
++#include <windows.h>
++
++#if LJ_TARGET_UWP || LJ_TARGET_XBOXONE
++/* Must use BCryptGenRandom. */
++#include <bcrypt.h>
++#pragma comment(lib, "bcrypt.lib")
++#else
++/* If you wonder about this mess, then search online for RtlGenRandom. */
++typedef BOOLEAN (WINAPI *PRGR)(void *buf, ULONG len);
++static PRGR libfunc_rgr;
++#endif
++
++#elif LJ_TARGET_POSIX
++
++#if LJ_TARGET_LINUX
++/* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */
++#include <sys/syscall.h>
++#else
++
++#if LJ_TARGET_OSX && !LJ_TARGET_IOS
++/*
++** In their infinite wisdom Apple decided to disallow getentropy() in the
++** iOS App Store. Even though the call is common to all BSD-ish OS, it's
++** recommended by Apple in their own security-related docs, and, to top
++** off the foolery, /dev/urandom is handled by the same kernel code,
++** yet accessing it is actually permitted (but less efficient).
++*/
++#include <Availability.h>
++#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200
++#define LJ_TARGET_HAS_GETENTROPY 1
++#endif
++#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS ||
LJ_TARGET_CYGWIN
++#define LJ_TARGET_HAS_GETENTROPY 1
++#endif
++
++#if LJ_TARGET_HAS_GETENTROPY
++extern int getentropy(void *buf, size_t len);
++#ifdef __ELF__
++ __attribute__((weak))
++#endif
++;
++#endif
++
++#endif
++
++/* For the /dev/urandom fallback. */
++#include <fcntl.h>
++#include <unistd.h>
++
++#endif
++
++#if LUAJIT_SECURITY_PRNG == 0
++
++/* If you really don't care about security, then define
++** LUAJIT_SECURITY_PRNG=0. This yields a predictable seed
++** and provides NO SECURITY against various attacks on the VM.
++**
++** BTW: This is NOT the way to get predictable table iteration,
++** predictable trace generation, predictable bytecode generation, etc.
++*/
++int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
++{
++ lj_prng_seed_fixed(rs); /* The fixed seed is already conditioned. */
++ return 1;
++}
++
++#else
++
++/* Securely seed PRNG from system entropy. Returns 0 on failure. */
++int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
++{
++#if LJ_TARGET_XBOX360
++
++ if (XNetRandom(rs->u, (unsigned int)sizeof(rs->u)) == 0)
++ goto ok;
++
++#elif LJ_TARGET_PS3
++
++ if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0)
++ goto ok;
++
++#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA
++
++ if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0)
++ goto ok;
++
++#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE
++
++ if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u),
++ BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0)
++ goto ok;
++
++#elif LJ_TARGET_WINDOWS
++
++ /* Keep the library loaded in case multiple VMs are started. */
++ if (!libfunc_rgr) {
++ HMODULE lib = LJ_WIN_LOADLIBA("advapi32.dll");
++ if (!lib) return 0;
++ libfunc_rgr = (PRGR)GetProcAddress(lib, "SystemFunction036");
++ if (!libfunc_rgr) return 0;
++ }
++ if (libfunc_rgr(rs->u, (ULONG)sizeof(rs->u)))
++ goto ok;
++
++#elif LJ_TARGET_POSIX
++
++#if LJ_TARGET_LINUX && defined(SYS_getrandom)
++
++ if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u))
++ goto ok;
++
++#elif LJ_TARGET_HAS_GETENTROPY
++
++#ifdef __ELF__
++ if (&getentropy && getentropy(rs->u, sizeof(rs->u)) == 0)
++ goto ok;
++#else
++ if (getentropy(rs->u, sizeof(rs->u)) == 0)
++ goto ok;
++#endif
++
++#endif
++
++ /* Fallback to /dev/urandom. This may fail if the device is not
++ ** existent or accessible in a chroot or container, or if the process
++ ** or the OS ran out of file descriptors.
++ */
++ {
++ int fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC);
++ if (fd != -1) {
++ ssize_t n = read(fd, rs->u, sizeof(rs->u));
++ (void)close(fd);
++ if (n == (ssize_t)sizeof(rs->u))
++ goto ok;
++ }
++ }
++
++#else
++
++ /* Add an elif above for your OS with a secure PRNG seed.
++ ** Note that fiddling around with rand(), getpid(), time() or coercing
++ ** ASLR to yield a few bits of randomness is not helpful.
++ ** If you don't want any security, then don't pretend you have any
++ ** and simply define LUAJIT_SECURITY_PRNG=0 for the build.
++ */
++#error "Missing secure PRNG seed for this OS"
++
++#endif
++ return 0; /* Fail. */
++
++ok:
++ lj_prng_condition(rs);
++ (void)lj_prng_u64(rs);
++ return 1; /* Success. */
++}
++
++#endif
++
+diff --git a/src/lj_prng.h b/src/lj_prng.h
+new file mode 100644
+index 00000000..216729be
+--- /dev/null
++++ b/src/lj_prng.h
+@@ -0,0 +1,24 @@
++/*
++** Pseudo-random number generation.
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#ifndef _LJ_PRNG_H
++#define _LJ_PRNG_H
++
++#include "lj_def.h"
++
++LJ_FUNC int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs);
++LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs);
++LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs);
++
++/* This is just the precomputed result of lib_math.c:random_seed(rs, 0.0). */
++static LJ_AINLINE void lj_prng_seed_fixed(PRNGState *rs)
++{
++ rs->u[0] = U64x(a0d27757,0a345b8c);
++ rs->u[1] = U64x(764a296c,5d4aa64f);
++ rs->u[2] = U64x(51220704,070adeaa);
++ rs->u[3] = U64x(2a2717b5,a7b7b927);
++}
++
++#endif
+diff --git a/src/lj_profile.c b/src/lj_profile.c
+index 116998e1..fbcb9878 100644
+--- a/src/lj_profile.c
++++ b/src/lj_profile.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Low-overhead profiling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_profile_c
+@@ -153,7 +153,7 @@ static void profile_trigger(ProfileState *ps)
+ profile_lock(ps);
+ ps->samples++; /* Always increment number of samples. */
+ mask = g->hookmask;
+- if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) { /* Set profile hook. */
++ if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */
+ int st = g->vmstate;
+ ps->vmstate = st >= 0 ? 'N' :
+ st == ~LJ_VMST_INTERP ? 'I' :
+@@ -247,7 +247,7 @@ static DWORD WINAPI profile_thread(void *psx)
+ {
+ ProfileState *ps = (ProfileState *)psx;
+ int interval = ps->interval;
+-#if LJ_TARGET_WINDOWS
++#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
+ ps->wmm_tbp(interval);
+ #endif
+ while (1) {
+@@ -255,7 +255,7 @@ static DWORD WINAPI profile_thread(void *psx)
+ if (ps->abort) break;
+ profile_trigger(ps);
+ }
+-#if LJ_TARGET_WINDOWS
++#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
+ ps->wmm_tep(interval);
+ #endif
+ return 0;
+@@ -264,9 +264,9 @@ static DWORD WINAPI profile_thread(void *psx)
+ /* Start profiling timer thread. */
+ static void profile_timer_start(ProfileState *ps)
+ {
+-#if LJ_TARGET_WINDOWS
++#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
+ if (!ps->wmm) { /* Load WinMM library on-demand. */
+- ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0);
++ ps->wmm = LJ_WIN_LOADLIBA("winmm.dll");
+ if (ps->wmm) {
+ ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm,
"timeBeginPeriod");
+ ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm,
"timeEndPeriod");
+@@ -346,8 +346,7 @@ LUA_API void luaJIT_profile_stop(lua_State *L)
+ lj_trace_flushall(L);
+ #endif
+ lj_buf_free(g, &ps->sb);
+- setmref(ps->sb.b, NULL);
+- setmref(ps->sb.e, NULL);
++ ps->sb.w = ps->sb.e = NULL;
+ ps->g = NULL;
+ }
+ }
+@@ -362,7 +361,7 @@ LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char
*fmt,
+ lj_buf_reset(sb);
+ lj_debug_dumpstack(L, sb, fmt, depth);
+ *len = (size_t)sbuflen(sb);
+- return sbufB(sb);
++ return sb->b;
+ }
+
+ #endif
+diff --git a/src/lj_profile.h b/src/lj_profile.h
+index 0cccfd78..96706ee3 100644
+--- a/src/lj_profile.h
++++ b/src/lj_profile.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Low-overhead profiling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_PROFILE_H
+diff --git a/src/lj_record.c b/src/lj_record.c
+index 9d0469c4..30722814 100644
+--- a/src/lj_record.c
++++ b/src/lj_record.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Trace recorder (bytecode -> SSA IR).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_record_c
+@@ -33,6 +33,7 @@
+ #include "lj_snap.h"
+ #include "lj_dispatch.h"
+ #include "lj_vm.h"
++#include "lj_prng.h"
+
+ /* Some local macros to save typing. Undef'd at the end. */
+ #define IR(ref) (&J->cur.ir[(ref)])
+@@ -50,34 +51,52 @@
+ static void rec_check_ir(jit_State *J)
+ {
+ IRRef i, nins = J->cur.nins, nk = J->cur.nk;
+- lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins <
65536);
++ lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins <
65536,
++ "inconsistent IR layout");
+ for (i = nk; i < nins; i++) {
+ IRIns *ir = IR(i);
+ uint32_t mode = lj_ir_mode[ir->o];
+ IRRef op1 = ir->op1;
+ IRRef op2 = ir->op2;
++ const char *err = NULL;
+ switch (irm_op1(mode)) {
+- case IRMnone: lua_assert(op1 == 0); break;
+- case IRMref: lua_assert(op1 >= nk);
+- lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
++ case IRMnone:
++ if (op1 != 0) err = "IRMnone op1 used";
++ break;
++ case IRMref:
++ if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i))
++ err = "IRMref op1 out of range";
++ break;
+ case IRMlit: break;
+- case IRMcst: lua_assert(i < REF_BIAS);
++ case IRMcst:
++ if (i >= REF_BIAS) { err = "constant in IR range"; break; }
+ if (irt_is64(ir->t) && ir->o != IR_KNULL)
+ i++;
+ continue;
+ }
+ switch (irm_op2(mode)) {
+- case IRMnone: lua_assert(op2 == 0); break;
+- case IRMref: lua_assert(op2 >= nk);
+- lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break;
++ case IRMnone:
++ if (op2) err = "IRMnone op2 used";
++ break;
++ case IRMref:
++ if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i))
++ err = "IRMref op2 out of range";
++ break;
+ case IRMlit: break;
+- case IRMcst: lua_assert(0); break;
++ case IRMcst: err = "IRMcst op2"; break;
+ }
+- if (ir->prev) {
+- lua_assert(ir->prev >= nk);
+- lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i);
+- lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o);
++ if (!err && ir->prev) {
++ if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev
<= i))
++ err = "chain out of range";
++ else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o)
++ err = "chain to different op";
+ }
++ lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s",
++ i-REF_BIAS,
++ ir->o,
++ irm_op1(mode) == IRMref ? op1-REF_BIAS : op1,
++ irm_op2(mode) == IRMref ? op2-REF_BIAS : op2,
++ err);
+ }
+ }
+
+@@ -87,9 +106,10 @@ static void rec_check_slots(jit_State *J)
+ BCReg s, nslots = J->baseslot + J->maxslot;
+ int32_t depth = 0;
+ cTValue *base = J->L->base - J->baseslot;
+- lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot <
LJ_MAX_JSLOTS);
+- lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] &
TREF_FRAME));
+- lua_assert(nslots < LJ_MAX_JSLOTS);
++ lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot");
++ lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] &
TREF_FRAME),
++ "baseslot does not point to frame");
++ lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow");
+ for (s = 0; s < nslots; s++) {
+ TRef tr = J->slot[s];
+ if (tr) {
+@@ -97,56 +117,68 @@ static void rec_check_slots(jit_State *J)
+ IRRef ref = tref_ref(tr);
+ IRIns *ir = NULL; /* Silence compiler. */
+ if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
+- lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
++ lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins,
++ "slot %d ref %04d out of range", s, ref - REF_BIAS);
+ ir = IR(ref);
+- lua_assert(irt_t(ir->t) == tref_t(tr));
++ lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s);
+ }
+ if (s == 0) {
+- lua_assert(tref_isfunc(tr));
++ lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function");
+ #if LJ_FR2
+ } else if (s == 1) {
+- lua_assert((tr & ~TREF_FRAME) == 0);
++ lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1");
+ #endif
+ } else if ((tr & TREF_FRAME)) {
+ GCfunc *fn = gco2func(frame_gc(tv));
+ BCReg delta = (BCReg)(tv - frame_prev(tv));
+ #if LJ_FR2
+- if (ref)
+- lua_assert(ir_knum(ir)->u64 == tv->u64);
++ lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64,
++ "frame slot %d PC mismatch", s);
+ tr = J->slot[s-1];
+ ir = IR(tref_ref(tr));
+ #endif
+- lua_assert(tref_isfunc(tr));
+- if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir));
+- lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
+- : (s == delta + LJ_FR2));
++ lj_assertJ(tref_isfunc(tr),
++ "frame slot %d is not a function", s-LJ_FR2);
++ lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir),
++ "frame slot %d function mismatch", s-LJ_FR2);
++ lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
++ : (s == delta + LJ_FR2),
++ "frame slot %d broken chain", s-LJ_FR2);
+ depth++;
+ } else if ((tr & TREF_CONT)) {
+ #if LJ_FR2
+- if (ref)
+- lua_assert(ir_knum(ir)->u64 == tv->u64);
++ lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64,
++ "cont slot %d continuation mismatch", s);
+ #else
+- lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void));
++ lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void),
++ "cont slot %d continuation mismatch", s);
+ #endif
+- lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME));
++ lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME),
++ "cont slot %d not followed by frame", s);
+ depth++;
++ } else if ((tr & TREF_KEYINDEX)) {
++ lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d",
++ s, tref_type(tr));
+ } else {
+- if (tvisnumber(tv))
+- lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */
+- else
+- lua_assert(itype2irt(tv) == tref_type(tr));
++ /* Number repr. may differ, but other types must be the same. */
++ lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) :
++ itype2irt(tv) == tref_type(tr),
++ "slot %d type mismatch: stack type %d vs IR type %d",
++ s, itypemap(tv), tref_type(tr));
+ if (tref_isk(tr)) { /* Compare constants. */
+ TValue tvk;
+ lj_ir_kvalue(J->L, &tvk, ir);
+- if (!(tvisnum(&tvk) && tvisnan(&tvk)))
+- lua_assert(lj_obj_equal(tv, &tvk));
+- else
+- lua_assert(tvisnum(tv) && tvisnan(tv));
++ lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ?
++ (tvisnum(tv) && tvisnan(tv)) :
++ lj_obj_equal(tv, &tvk),
++ "slot %d const mismatch: stack %016llx vs IR %016llx",
++ s, tv->u64, tvk.u64);
+ }
+ }
+ }
+ }
+- lua_assert(J->framedepth == depth);
++ lj_assertJ(J->framedepth == depth,
++ "frame depth mismatch %d vs %d", J->framedepth, depth);
+ }
+ #endif
+
+@@ -182,7 +214,8 @@ static TRef getcurrf(jit_State *J)
+ {
+ if (J->base[-1-LJ_FR2])
+ return J->base[-1-LJ_FR2];
+- lua_assert(J->baseslot == 1+LJ_FR2);
++ /* Non-base frame functions ought to be loaded already. */
++ lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot");
+ return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
+ }
+
+@@ -229,6 +262,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o)
+ return 0; /* Can't represent lightuserdata (pointless). */
+ }
+
++/* Emit a VLOAD with the correct type. */
++TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t)
++{
++ TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx);
++ if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
++ return tr;
++}
++
+ /* -- Record loop ops ----------------------------------------------------- */
+
+ /* Loop event. */
+@@ -245,9 +286,9 @@ static void canonicalize_slots(jit_State *J)
+ if (LJ_DUALNUM) return;
+ for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
+ TRef tr = J->slot[s];
+- if (tref_isinteger(tr)) {
++ if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) {
+ IRIns *ir = IR(tref_ref(tr));
+- if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY)))
++ if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY))))
+ J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
+ }
+ }
+@@ -427,7 +468,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry
*scev,
+ TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode);
+ TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode);
+ int tc, dir = rec_for_direction(&tv[FORL_STEP]);
+- lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
++ lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI,
++ "bad bytecode %d instead of FORI/JFORI", bc_op(*fori));
+ scev->t.irt = t;
+ scev->dir = dir;
+ scev->stop = tref_ref(stop);
+@@ -483,7 +525,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int
isforl)
+ IRT_NUM;
+ for (i = FORL_IDX; i <= FORL_STEP; i++) {
+ if (!tr[i]) sload(J, ra+i);
+- lua_assert(tref_isnumber_str(tr[i]));
++ lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type");
+ if (tref_isstr(tr[i]))
+ tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0);
+ if (t == IRT_INT) {
+@@ -540,10 +582,10 @@ static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
+ }
+
+ /* Record LOOP/JLOOP. Now, that was easy. */
+-static LoopEvent rec_loop(jit_State *J, BCReg ra)
++static LoopEvent rec_loop(jit_State *J, BCReg ra, int skip)
+ {
+ if (ra < J->maxslot) J->maxslot = ra;
+- J->pc++;
++ J->pc += skip;
+ return LOOPEV_ENTER;
+ }
+
+@@ -567,6 +609,7 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent
ev)
+ {
+ if (J->parent == 0 && J->exitno == 0) {
+ if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
++ if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */
+ /* Same loop? */
+ if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
+ lj_trace_err(J, LJ_TRERR_LLEAVE);
+@@ -607,6 +650,70 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
+ } /* Side trace continues across a loop that's left or not entered. */
+ }
+
++/* Record ITERN. */
++static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb)
++{
++#if LJ_BE
++ /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
++ ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
++ */
++ UNUSED(ra); UNUSED(rb);
++ setintV(&J->errinfo, (int32_t)BC_ITERN);
++ lj_trace_err_info(J, LJ_TRERR_NYIBC);
++#else
++ RecordIndex ix;
++ /* Since ITERN is recorded at the start, we need our own loop detection. */
++ if (J->pc == J->startpc &&
++ (J->cur.nins > REF_FIRST+1 ||
++ (J->cur.nins == REF_FIRST+1 && J->cur.ir[REF_FIRST].o != IR_PROF))
&&
++ J->framedepth + J->retdepth == 0 && J->parent == 0 &&
J->exitno == 0) {
++ lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
++ return LOOPEV_ENTER;
++ }
++ J->maxslot = ra;
++ lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */
++ ix.tab = getslot(J, ra-2);
++ ix.key = J->base[ra-1] ? J->base[ra-1] :
++ sloadt(J, (int32_t)(ra-1), IRT_INT, IRSLOAD_KEYINDEX);
++ copyTV(J->L, &ix.tabv, &J->L->base[ra-2]);
++ copyTV(J->L, &ix.keyv, &J->L->base[ra-1]);
++ ix.idxchain = (rb < 3); /* Omit value type check, if unused. */
++ ix.mobj = 1; /* We need the next index, too. */
++ J->maxslot = ra + lj_record_next(J, &ix);
++ J->needsnap = 1;
++ if (!tref_isnil(ix.key)) { /* Looping back? */
++ J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */
++ J->base[ra] = ix.key;
++ J->base[ra+1] = ix.val;
++ J->pc += bc_j(J->pc[1])+2;
++ return LOOPEV_ENTER;
++ } else {
++ J->maxslot = ra-3;
++ J->pc += 2;
++ return LOOPEV_LEAVE;
++ }
++#endif
++}
++
++/* Record ISNEXT. */
++static void rec_isnext(jit_State *J, BCReg ra)
++{
++ cTValue *b = &J->L->base[ra-3];
++ if (tvisfunc(b) && funcV(b)->c.ffid == FF_next &&
++ tvistab(b+1) && tvisnil(b+2)) {
++ /* These checks are folded away for a compiled pairs(). */
++ TRef func = getslot(J, ra-3);
++ TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID);
++ emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next));
++ (void)getslot(J, ra-2); /* Type check for table. */
++ (void)getslot(J, ra-1); /* Type check for nil key. */
++ J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX;
++ J->maxslot = ra;
++ } else { /* Abort trace. Interpreter will despecialize bytecode. */
++ lj_trace_err(J, LJ_TRERR_RECERR);
++ }
++}
++
+ /* -- Record profiler hook checks ----------------------------------------- */
+
+ #if LJ_HASPROFILE
+@@ -615,7 +722,8 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
+ static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc)
+ {
+ GCproto *ppt;
+- lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l');
++ lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l',
++ "bad profiler mode %c", J->prof_mode);
+ if (!pt)
+ return 0;
+ ppt = J->prev_pt;
+@@ -676,7 +784,7 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
+ /* NYI: io_file_iter doesn't have an ffid, yet. */
+ { /* Specialize to the ffid. */
+ TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
+- emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid));
++ emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid));
+ }
+ return tr;
+ default:
+@@ -731,6 +839,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
+ J->framedepth++;
+ J->base += func+1+LJ_FR2;
+ J->baseslot += func+1+LJ_FR2;
++ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
++ lj_trace_err(J, LJ_TRERR_STACKOV);
+ }
+
+ /* Record tail call. */
+@@ -791,13 +901,14 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t
gotresults)
+ BCReg cbase = (BCReg)frame_delta(frame);
+ if (--J->framedepth <= 0)
+ lj_trace_err(J, LJ_TRERR_NYIRETL);
+- lua_assert(J->baseslot > 1+LJ_FR2);
++ lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return");
+ gotresults++;
+ rbase += cbase;
+ J->baseslot -= (BCReg)cbase;
+ J->base -= cbase;
+ J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */
+ frame = frame_prevd(frame);
++ J->needsnap = 1; /* Stop catching on-trace errors. */
+ }
+ /* Return to lower frame via interpreter for unhandled cases. */
+ if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc))
&&
+@@ -815,7 +926,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
+ BCReg cbase = (BCReg)frame_delta(frame);
+ if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
+ lj_trace_err(J, LJ_TRERR_NYIRETL);
+- lua_assert(J->baseslot > 1+LJ_FR2);
++ lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return");
+ rbase += cbase;
+ J->baseslot -= (BCReg)cbase;
+ J->base -= cbase;
+@@ -842,7 +953,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
+ J->maxslot = cbase+(BCReg)nresults;
+ if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
+ J->framedepth--;
+- lua_assert(J->baseslot > cbase+1+LJ_FR2);
++ lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for
return");
+ J->baseslot -= cbase+1+LJ_FR2;
+ J->base -= cbase+1+LJ_FR2;
+ } else if (J->parent == 0 && J->exitno == 0 &&
+@@ -857,7 +968,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
+ emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
+ J->retdepth++;
+ J->needsnap = 1;
+- lua_assert(J->baseslot == 1+LJ_FR2);
++ lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
+ /* Shift result slots up and clear the slots of the new frame below. */
+ memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
+ memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
+@@ -884,6 +995,9 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
+ TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
+ if (bslot != J->maxslot) { /* Concatenate the remainder. */
+ TValue *b = J->L->base, save; /* Simulate lower frame and result. */
++ /* Can't handle MM_concat + CALLT + fast func side-effects. */
++ if (J->postproc != LJ_POST_NONE)
++ lj_trace_err(J, LJ_TRERR_NYIRETL);
+ J->base[J->maxslot] = tr;
+ copyTV(J->L, &save, b-(2<<LJ_FR2));
+ if (gotresults)
+@@ -905,12 +1019,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t
gotresults)
+ } /* Otherwise continue with another __concat call. */
+ } else {
+ /* Result type already specialized. */
+- lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
++ lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt,
++ "bad continuation type");
+ }
+ } else {
+ lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
+ }
+- lua_assert(J->baseslot >= 1+LJ_FR2);
++ lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return");
+ }
+
+ /* -- Metamethod handling ------------------------------------------------- */
+@@ -976,13 +1091,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
+ }
+ /* The cdata metatable is treated as immutable. */
+ if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
+-#if LJ_GC64
+- /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
+ ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
+ GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
+-#else
+- ix->mt = mix.tab = lj_ir_ktab(J, mt);
+-#endif
+ goto nocheck;
+ }
+ ix->mt = mt ? mix.tab : TREF_NIL;
+@@ -1056,7 +1166,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
+ lj_record_call(J, func, 2);
+ } else {
+ if (LJ_52 && tref_istab(tr))
+- return lj_ir_call(J, IRCALL_lj_tab_len, tr);
++ return emitir(IRTI(IR_ALEN), tr, TREF_NIL);
+ lj_trace_err(J, LJ_TRERR_NOMM);
+ }
+ return 0; /* No result yet. */
+@@ -1165,7 +1275,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int
op, MMS mm)
+ ix->tab = ix->val;
+ copyTV(J->L, &ix->tabv, &ix->valv);
+ } else {
+- lua_assert(tref_iscdata(ix->key));
++ lj_assertJ(tref_iscdata(ix->key), "cdata expected");
+ ix->tab = ix->key;
+ copyTV(J->L, &ix->tabv, &ix->keyv);
+ }
+@@ -1262,7 +1372,8 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey,
uint32_t asize)
+ /* Got scalar evolution analysis results for this reference? */
+ if (ref == J->scev.idx) {
+ int32_t stop;
+- lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD);
++ lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD,
++ "only int SCEV supported");
+ stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 +
FORL_STOP]);
+ /* Runtime value for stop of loop is within bounds? */
+ if ((uint64_t)stop + ofs < (uint64_t)asize) {
+@@ -1380,7 +1491,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
+
+ while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
+ /* Never call raw lj_record_idx() on non-table. */
+- lua_assert(ix->idxchain != 0);
++ lj_assertJ(ix->idxchain != 0, "bad usage");
+ if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index))
+ lj_trace_err(J, LJ_TRERR_NOMM);
+ handlemm:
+@@ -1402,6 +1513,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
+ return 0; /* No result yet. */
+ }
+ }
++#if LJ_HASBUFFER
++ /* The index table of buffer objects is treated as immutable. */
++ if (ix->mt == TREF_NIL && !ix->val &&
++ tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype ==
UDTYPE_BUFFER &&
++ tref_istab(ix->mobj) && tref_isstr(ix->key) &&
tref_isk(ix->key)) {
++ cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv));
++ TRef tr = lj_record_constify(J, val);
++ if (tr) return tr; /* Specialize to the value, i.e. a method. */
++ }
++#endif
+ /* Otherwise retry lookup with metaobject. */
+ ix->tab = ix->mobj;
+ copyTV(J->L, &ix->tabv, &ix->mobjv);
+@@ -1464,10 +1585,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
+ emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC),
+ xref, lj_ir_kkptr(J, niltvg(J2G(J))));
+ if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) {
+- lua_assert(hasmm);
++ lj_assertJ(hasmm, "inconsistent metamethod handling");
+ goto handlemm;
+ }
+- lua_assert(!hasmm);
++ lj_assertJ(!hasmm, "inconsistent metamethod handling");
+ if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */
+ TRef key = ix->key;
+ if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
+@@ -1512,6 +1633,47 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
+ }
+ }
+
++/* Determine result type of table traversal. */
++static IRType rec_next_types(GCtab *t, uint32_t idx)
++{
++ for (; idx < t->asize; idx++) {
++ cTValue *a = arrayslot(t, idx);
++ if (LJ_LIKELY(!tvisnil(a)))
++ return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8);
++ }
++ idx -= t->asize;
++ for (; idx <= t->hmask; idx++) {
++ Node *n = &noderef(t->node)[idx];
++ if (!tvisnil(&n->val))
++ return itype2irt(&n->key) + (itype2irt(&n->val) << 8);
++ }
++ return IRT_NIL + (IRT_NIL << 8);
++}
++
++/* Record a table traversal step aka next(). */
++int lj_record_next(jit_State *J, RecordIndex *ix)
++{
++ IRType t, tkey, tval;
++ TRef trvk;
++ t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo);
++ tkey = (t & 0xff); tval = (t >> 8);
++ trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key);
++ if (ix->mobj || tkey == IRT_NIL) {
++ TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk);
++ /* Always check for invalid key from next() for nil result. */
++ if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1));
++ ix->mobj = idx;
++ }
++ ix->key = lj_record_vload(J, trvk, 1, tkey);
++ if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */
++ ix->val = TREF_NIL;
++ return 1;
++ } else { /* Need value. */
++ ix->val = lj_record_vload(J, trvk, 0, tval);
++ return 2;
++ }
++}
++
+ static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
+ {
+ RecordIndex ix;
+@@ -1573,7 +1735,7 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
+ int needbarrier = 0;
+ if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */
+ TRef tr, kfunc;
+- lua_assert(val == 0);
++ lj_assertJ(val == 0, "bad usage");
+ if (!tref_isk(fn)) { /* Late specialization of current function. */
+ if (J->pt->flags >= PROTO_CLC_POLY)
+ goto noconstify;
+@@ -1667,7 +1829,7 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
+ if (lnk) { /* Possible tail- or up-recursion. */
+ lj_trace_flush(J, lnk); /* Flush trace that only returns. */
+ /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */
+- hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4));
++ hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u);
+ }
+ lj_trace_err(J, LJ_TRERR_CUNROLL);
+ }
+@@ -1695,7 +1857,7 @@ static void rec_func_vararg(jit_State *J)
+ {
+ GCproto *pt = J->pt;
+ BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
+- lua_assert((pt->flags & PROTO_VARARG));
++ lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg
function");
+ if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
+ lj_trace_err(J, LJ_TRERR_STACKOV);
+ J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
+@@ -1764,7 +1926,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
+ {
+ int32_t numparams = J->pt->numparams;
+ ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
+- lua_assert(frame_isvarg(J->L->base-1));
++ lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame");
+ if (LJ_FR2 && dst > J->maxslot)
+ J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */
+ if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
+@@ -1795,11 +1957,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
+ vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
+ for (i = 0; i < nload; i++) {
+ IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
+- TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
+- vbase, lj_ir_kint(J, (int32_t)i));
+- TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
+- if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
+- J->base[dst+i] = tr;
++ J->base[dst+i] = lj_record_vload(J, vbase, i, t);
+ }
+ } else {
+ emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs));
+@@ -1846,8 +2004,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
+ lj_ir_kint(J, frofs-(8<<LJ_FR2)));
+ t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
+ aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
+- tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
+- if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
++ tr = lj_record_vload(J, aref, 0, t);
+ }
+ J->base[dst-2-LJ_FR2] = tr;
+ J->maxslot = dst-1-LJ_FR2;
+@@ -1858,6 +2015,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
+ lj_trace_err_info(J, LJ_TRERR_NYIBC);
+ }
+ }
++ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
++ lj_trace_err(J, LJ_TRERR_STACKOV);
+ }
+
+ /* -- Record allocations -------------------------------------------------- */
+@@ -1885,7 +2044,7 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
+ TValue savetv[5];
+ BCReg s;
+ RecordIndex ix;
+- lua_assert(baseslot < topslot);
++ lj_assertJ(baseslot < topslot, "bad CAT arg");
+ for (s = baseslot; s <= topslot; s++)
+ (void)getslot(J, s); /* Ensure all arguments have a reference. */
+ if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) {
+@@ -1902,9 +2061,9 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
+ tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
+ lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
+ do {
+- tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++);
++ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++);
+ } while (trp <= top);
+- tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
++ tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+ J->maxslot = (BCReg)(xbase - J->base);
+ if (xbase == base) return tr; /* Return simple concatenation result. */
+ /* Pass partial result. */
+@@ -2009,7 +2168,7 @@ void lj_record_ins(jit_State *J)
+ if (bc_op(*J->pc) >= BC__MAX)
+ return;
+ break;
+- default: lua_assert(0); break;
++ default: lj_assertJ(0, "bad post-processing mode"); break;
+ }
+ J->postproc = LJ_POST_NONE;
+ }
+@@ -2017,7 +2176,7 @@ void lj_record_ins(jit_State *J)
+ /* Need snapshot before recording next bytecode (e.g. after a store). */
+ if (J->needsnap) {
+ J->needsnap = 0;
+- lj_snap_purge(J);
++ if (J->pt) lj_snap_purge(J);
+ lj_snap_add(J);
+ J->mergesnap = 1;
+ }
+@@ -2187,7 +2346,7 @@ void lj_record_ins(jit_State *J)
+ if (tref_isstr(rc))
+ rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
+ else if (!LJ_52 && tref_istab(rc))
+- rc = lj_ir_call(J, IRCALL_lj_tab_len, rc);
++ rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL);
+ else
+ rc = rec_mm_len(J, rc, rcv);
+ break;
+@@ -2377,7 +2536,8 @@ void lj_record_ins(jit_State *J)
+ J->loopref = J->cur.nins;
+ break;
+ case BC_JFORI:
+- lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
++ lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL,
++ "JFORI does not point to JFORL");
+ if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
+ lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
+ /* Continue tracing if the loop is not entered. */
+@@ -2389,8 +2549,11 @@ void lj_record_ins(jit_State *J)
+ case BC_ITERL:
+ rec_loop_interp(J, pc, rec_iterl(J, *pc));
+ break;
++ case BC_ITERN:
++ rec_loop_interp(J, pc, rec_itern(J, ra, rb));
++ break;
+ case BC_LOOP:
+- rec_loop_interp(J, pc, rec_loop(J, ra));
++ rec_loop_interp(J, pc, rec_loop(J, ra, 1));
+ break;
+
+ case BC_JFORL:
+@@ -2400,7 +2563,8 @@ void lj_record_ins(jit_State *J)
+ rec_loop_jit(J, rc, rec_iterl(J, traceref(J, rc)->startins));
+ break;
+ case BC_JLOOP:
+- rec_loop_jit(J, rc, rec_loop(J, ra));
++ rec_loop_jit(J, rc, rec_loop(J, ra,
++ !bc_isret(bc_op(traceref(J, rc)->startins))));
+ break;
+
+ case BC_IFORL:
+@@ -2416,6 +2580,10 @@ void lj_record_ins(jit_State *J)
+ J->maxslot = ra; /* Shrink used slots. */
+ break;
+
++ case BC_ISNEXT:
++ rec_isnext(J, ra);
++ break;
++
+ /* -- Function headers -------------------------------------------------- */
+
+ case BC_FUNCF:
+@@ -2430,7 +2598,8 @@ void lj_record_ins(jit_State *J)
+ rec_func_lua(J);
+ break;
+ case BC_JFUNCV:
+- lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */
++ /* Cannot happen. No hotcall counting for varag funcs. */
++ lj_assertJ(0, "unsupported vararg hotcall");
+ break;
+
+ case BC_FUNCC:
+@@ -2444,8 +2613,6 @@ void lj_record_ins(jit_State *J)
+ break;
+ }
+ /* fallthrough */
+- case BC_ITERN:
+- case BC_ISNEXT:
+ case BC_UCLO:
+ case BC_FNEW:
+ setintV(&J->errinfo, (int32_t)op);
+@@ -2468,8 +2635,9 @@ void lj_record_ins(jit_State *J)
+ #undef rbv
+ #undef rcv
+
+- /* Limit the number of recorded IR instructions. */
+- if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord])
++ /* Limit the number of recorded IR instructions and constants. */
++ if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] ||
++ J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst])
+ lj_trace_err(J, LJ_TRERR_TRACEOV);
+ }
+
+@@ -2489,13 +2657,20 @@ static const BCIns *rec_setup_root(jit_State *J)
+ J->bc_min = pc;
+ break;
+ case BC_ITERL:
+- lua_assert(bc_op(pc[-1]) == BC_ITERC);
++ lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL");
+ J->maxslot = ra + bc_b(pc[-1]) - 1;
+ J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
+ pc += 1+bc_j(ins);
+- lua_assert(bc_op(pc[-1]) == BC_JMP);
++ lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1");
+ J->bc_min = pc;
+ break;
++ case BC_ITERN:
++ lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN");
++ J->maxslot = ra;
++ J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns);
++ J->bc_min = pc+2 + bc_j(pc[1]);
++ J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */
++ break;
+ case BC_LOOP:
+ /* Only check BC range for real loops, but not for "repeat until true".
*/
+ pcj = pc + bc_j(ins);
+@@ -2525,7 +2700,7 @@ static const BCIns *rec_setup_root(jit_State *J)
+ pc++;
+ break;
+ default:
+- lua_assert(0);
++ lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins));
+ break;
+ }
+ return pc;
+@@ -2592,9 +2767,14 @@ void lj_record_setup(jit_State *J)
+ }
+ lj_snap_replay(J, T);
+ sidecheck:
+- if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
+- T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
+- J->param[JIT_P_tryside]) {
++ if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
++ T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
++ J->param[JIT_P_tryside])) {
++ if (bc_op(*J->pc) == BC_JLOOP) {
++ BCIns startins = traceref(J, bc_d(*J->pc))->startins;
++ if (bc_op(startins) == BC_ITERN)
++ rec_itern(J, bc_a(startins), bc_b(startins));
++ }
+ lj_record_stop(J, LJ_TRLINK_INTERP, 0);
+ }
+ } else { /* Root trace. */
+@@ -2603,6 +2783,7 @@ void lj_record_setup(jit_State *J)
+ J->pc = rec_setup_root(J);
+ /* Note: the loop instruction itself is recorded at the end and not
+ ** at the start! So snapshot #0 needs to point to the *next* instruction.
++ ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST.
+ */
+ lj_snap_add(J);
+ if (bc_op(J->cur.startins) == BC_FORL)
+diff --git a/src/lj_record.h b/src/lj_record.h
+index 93d374d2..01cc6041 100644
+--- a/src/lj_record.h
++++ b/src/lj_record.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Trace recorder (bytecode -> SSA IR).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_RECORD_H
+@@ -30,6 +30,7 @@ LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
+ cTValue *av, cTValue *bv);
+ LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
+ LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
++LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t);
+
+ LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
+ LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs);
+@@ -37,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t
gotresults);
+
+ LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm);
+ LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix);
++LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix);
+
+ LJ_FUNC void lj_record_ins(jit_State *J);
+ LJ_FUNC void lj_record_setup(jit_State *J);
+diff --git a/src/lj_serialize.c b/src/lj_serialize.c
+new file mode 100644
+index 00000000..d6551b11
+--- /dev/null
++++ b/src/lj_serialize.c
+@@ -0,0 +1,538 @@
++/*
++** Object de/serialization.
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#define lj_serialize_c
++#define LUA_CORE
++
++#include "lj_obj.h"
++
++#if LJ_HASBUFFER
++#include "lj_err.h"
++#include "lj_buf.h"
++#include "lj_str.h"
++#include "lj_tab.h"
++#include "lj_udata.h"
++#if LJ_HASFFI
++#include "lj_ctype.h"
++#include "lj_cdata.h"
++#endif
++#if LJ_HASJIT
++#include "lj_ir.h"
++#endif
++#include "lj_serialize.h"
++
++/* Tags for internal serialization format. */
++enum {
++ SER_TAG_NIL, /* 0x00 */
++ SER_TAG_FALSE,
++ SER_TAG_TRUE,
++ SER_TAG_NULL,
++ SER_TAG_LIGHTUD32,
++ SER_TAG_LIGHTUD64,
++ SER_TAG_INT,
++ SER_TAG_NUM,
++ SER_TAG_TAB, /* 0x08 */
++ SER_TAG_DICT_MT = SER_TAG_TAB+6,
++ SER_TAG_DICT_STR,
++ SER_TAG_INT64, /* 0x10 */
++ SER_TAG_UINT64,
++ SER_TAG_COMPLEX,
++ SER_TAG_0x13,
++ SER_TAG_0x14,
++ SER_TAG_0x15,
++ SER_TAG_0x16,
++ SER_TAG_0x17,
++ SER_TAG_0x18, /* 0x18 */
++ SER_TAG_0x19,
++ SER_TAG_0x1a,
++ SER_TAG_0x1b,
++ SER_TAG_0x1c,
++ SER_TAG_0x1d,
++ SER_TAG_0x1e,
++ SER_TAG_0x1f,
++ SER_TAG_STR, /* 0x20 + str->len */
++};
++LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0);
++
++/* -- Helper functions ---------------------------------------------------- */
++
++static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz)
++{
++ if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) {
++ sbx->w = w;
++ w = lj_buf_more2((SBuf *)sbx, sz);
++ }
++ return w;
++}
++
++/* Write U124 to buffer. */
++static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v)
++{
++ if (v < 0x1fe0) {
++ v -= 0xe0;
++ *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v;
++ } else {
++ *w++ = (char)0xff;
++#if LJ_BE
++ v = lj_bswap(v);
++#endif
++ memcpy(w, &v, 4); w += 4;
++ }
++ return w;
++}
++
++static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v)
++{
++ if (LJ_LIKELY(v < 0xe0)) {
++ *w++ = (char)v;
++ return w;
++ } else {
++ return serialize_wu124_(w, v);
++ }
++}
++
++static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv)
++{
++ uint32_t v = *pv;
++ if (v != 0xff) {
++ if (r >= w) return NULL;
++ v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++;
++ } else {
++ if (r + 4 > w) return NULL;
++ v = lj_getu32(r); r += 4;
++#if LJ_BE
++ v = lj_bswap(v);
++#endif
++ }
++ *pv = v;
++ return r;
++}
++
++static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
++{
++ if (LJ_LIKELY(r < w)) {
++ uint32_t v = *(uint8_t *)r; r++;
++ *pv = v;
++ if (LJ_UNLIKELY(v >= 0xe0)) {
++ r = serialize_ru124_(r, w, pv);
++ }
++ return r;
++ }
++ return NULL;
++}
++
++/* Prepare string dictionary for use (once). */
++void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict)
++{
++ if (!dict->hmask) { /* No hash part means not prepared, yet. */
++ MSize i, len = lj_tab_len(dict);
++ if (!len) return;
++ lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
++ for (i = 1; i <= len && i < dict->asize; i++) {
++ cTValue *o = arrayslot(dict, i);
++ if (tvisstr(o)) {
++ if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */
++ lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
++ }
++ } else if (!tvisfalse(o)) {
++ lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
++ }
++ }
++ }
++}
++
++/* Prepare metatable dictionary for use (once). */
++void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict)
++{
++ if (!dict->hmask) { /* No hash part means not prepared, yet. */
++ MSize i, len = lj_tab_len(dict);
++ if (!len) return;
++ lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
++ for (i = 1; i <= len && i < dict->asize; i++) {
++ cTValue *o = arrayslot(dict, i);
++ if (tvistab(o)) {
++ if (tvisnil(lj_tab_get(L, dict, o))) { /* Ignore dups. */
++ lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
++ }
++ } else if (!tvisfalse(o)) {
++ lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
++ }
++ }
++ }
++}
++
++/* -- Internal serializer ------------------------------------------------- */
++
++/* Put serialized object into buffer. */
++static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
++{
++ if (LJ_LIKELY(tvisstr(o))) {
++ const GCstr *str = strV(o);
++ MSize len = str->len;
++ w = serialize_more(w, sbx, 5+len);
++ w = serialize_wu124(w, SER_TAG_STR + len);
++ w = lj_buf_wmem(w, strdata(str), len);
++ } else if (tvisint(o)) {
++ uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o);
++ w = serialize_more(w, sbx, 1+4);
++ *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4;
++ } else if (tvisnum(o)) {
++ uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64;
++ w = serialize_more(w, sbx, 1+sizeof(lua_Number));
++ *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8;
++ } else if (tvispri(o)) {
++ w = serialize_more(w, sbx, 1);
++ *w++ = (char)(SER_TAG_NIL + ~itype(o));
++ } else if (tvistab(o)) {
++ const GCtab *t = tabV(o);
++ uint32_t narray = 0, nhash = 0, one = 2;
++ if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
++ sbx->depth--;
++ if (t->asize > 0) { /* Determine max. length of array part. */
++ ptrdiff_t i;
++ TValue *array = tvref(t->array);
++ for (i = (ptrdiff_t)t->asize-1; i >= 0; i--)
++ if (!tvisnil(&array[i]))
++ break;
++ narray = (uint32_t)(i+1);
++ if (narray && tvisnil(&array[0])) one = 4;
++ }
++ if (t->hmask > 0) { /* Count number of used hash slots. */
++ uint32_t i, hmask = t->hmask;
++ Node *node = noderef(t->node);
++ for (i = 0; i <= hmask; i++)
++ nhash += !tvisnil(&node[i].val);
++ }
++ /* Write metatable index. */
++ if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) {
++ TValue mto;
++ Node *n;
++ settabV(sbufL(sbx), &mto, tabref(t->metatable));
++ n = hashgcref(tabref(sbx->dict_mt), mto.gcr);
++ do {
++ if (n->key.u64 == mto.u64) {
++ uint32_t idx = n->val.u32.lo;
++ w = serialize_more(w, sbx, 1+5);
++ *w++ = SER_TAG_DICT_MT;
++ w = serialize_wu124(w, idx);
++ break;
++ }
++ } while ((n = nextnode(n)));
++ }
++ /* Write number of array slots and hash slots. */
++ w = serialize_more(w, sbx, 1+2*5);
++ *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
++ if (narray) w = serialize_wu124(w, narray);
++ if (nhash) w = serialize_wu124(w, nhash);
++ if (narray) { /* Write array entries. */
++ cTValue *oa = tvref(t->array) + (one >> 2);
++ cTValue *oe = tvref(t->array) + narray;
++ while (oa < oe) w = serialize_put(w, sbx, oa++);
++ }
++ if (nhash) { /* Write hash entries. */
++ const Node *node = noderef(t->node) + t->hmask;
++ GCtab *dict_str = tabref(sbx->dict_str);
++ if (LJ_UNLIKELY(dict_str)) {
++ for (;; node--)
++ if (!tvisnil(&node->val)) {
++ if (LJ_LIKELY(tvisstr(&node->key))) {
++ /* Inlined lj_tab_getstr is 30% faster. */
++ const GCstr *str = strV(&node->key);
++ Node *n = hashstr(dict_str, str);
++ do {
++ if (tvisstr(&n->key) && strV(&n->key) == str) {
++ uint32_t idx = n->val.u32.lo;
++ w = serialize_more(w, sbx, 1+5);
++ *w++ = SER_TAG_DICT_STR;
++ w = serialize_wu124(w, idx);
++ break;
++ }
++ n = nextnode(n);
++ if (!n) {
++ MSize len = str->len;
++ w = serialize_more(w, sbx, 5+len);
++ w = serialize_wu124(w, SER_TAG_STR + len);
++ w = lj_buf_wmem(w, strdata(str), len);
++ break;
++ }
++ } while (1);
++ } else {
++ w = serialize_put(w, sbx, &node->key);
++ }
++ w = serialize_put(w, sbx, &node->val);
++ if (--nhash == 0) break;
++ }
++ } else {
++ for (;; node--)
++ if (!tvisnil(&node->val)) {
++ w = serialize_put(w, sbx, &node->key);
++ w = serialize_put(w, sbx, &node->val);
++ if (--nhash == 0) break;
++ }
++ }
++ }
++ sbx->depth++;
++#if LJ_HASFFI
++ } else if (tviscdata(o)) {
++ CTState *cts = ctype_cts(sbufL(sbx));
++ CType *s = ctype_raw(cts, cdataV(o)->ctypeid);
++ uint8_t *sp = cdataptr(cdataV(o));
++ if (ctype_isinteger(s->info) && s->size == 8) {
++ w = serialize_more(w, sbx, 1+8);
++ *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64;
++#if LJ_BE
++ { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); }
++#else
++ memcpy(w, sp, 8);
++#endif
++ w += 8;
++ } else if (ctype_iscomplex(s->info) && s->size == 16) {
++ w = serialize_more(w, sbx, 1+16);
++ *w++ = SER_TAG_COMPLEX;
++#if LJ_BE
++ { /* Only swap the doubles. The re/im order stays the same. */
++ uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8);
++ u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8);
++ }
++#else
++ memcpy(w, sp, 16);
++#endif
++ w += 16;
++ } else {
++ goto badenc; /* NYI other cdata */
++ }
++#endif
++ } else if (tvislightud(o)) {
++ uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o);
++ w = serialize_more(w, sbx, 1+sizeof(ud));
++ if (ud == 0) {
++ *w++ = SER_TAG_NULL;
++ } else if (LJ_32 || checku32(ud)) {
++#if LJ_BE && LJ_64
++ ud = lj_bswap64(ud);
++#elif LJ_BE
++ ud = lj_bswap(ud);
++#endif
++ *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4;
++#if LJ_64
++ } else {
++#if LJ_BE
++ ud = lj_bswap64(ud);
++#endif
++ *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8;
++#endif
++ }
++ } else {
++ /* NYI userdata */
++#if LJ_HASFFI
++ badenc:
++#endif
++ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o));
++ }
++ return w;
++}
++
++/* Get serialized object from buffer. */
++static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
++{
++ char *w = sbx->w;
++ uint32_t tp;
++ r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
++ if (LJ_LIKELY(tp >= SER_TAG_STR)) {
++ uint32_t len = tp - SER_TAG_STR;
++ if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob;
++ setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len));
++ r += len;
++ } else if (tp == SER_TAG_INT) {
++ if (LJ_UNLIKELY(r + 4 > w)) goto eob;
++ setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)));
++ r += 4;
++ } else if (tp == SER_TAG_NUM) {
++ if (LJ_UNLIKELY(r + 8 > w)) goto eob;
++ memcpy(o, r, 8); r += 8;
++#if LJ_BE
++ o->u64 = lj_bswap64(o->u64);
++#endif
++ if (!tvisnum(o)) setnanV(o); /* Fix non-canonical NaNs. */
++ } else if (tp <= SER_TAG_TRUE) {
++ setpriV(o, ~tp);
++ } else if (tp == SER_TAG_DICT_STR) {
++ GCtab *dict_str;
++ uint32_t idx;
++ r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
++ idx++;
++ dict_str = tabref(sbx->dict_str);
++ if (dict_str && idx < dict_str->asize &&
tvisstr(arrayslot(dict_str, idx)))
++ copyTV(sbufL(sbx), o, arrayslot(dict_str, idx));
++ else
++ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
++ } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) {
++ uint32_t narray = 0, nhash = 0;
++ GCtab *t, *mt = NULL;
++ if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
++ sbx->depth--;
++ if (tp == SER_TAG_DICT_MT) {
++ GCtab *dict_mt;
++ uint32_t idx;
++ r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
++ idx++;
++ dict_mt = tabref(sbx->dict_mt);
++ if (dict_mt && idx < dict_mt->asize &&
tvistab(arrayslot(dict_mt, idx)))
++ mt = tabV(arrayslot(dict_mt, idx));
++ else
++ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
++ r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
++ if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag;
++ }
++ if (tp >= SER_TAG_TAB+2) {
++ r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
++ }
++ if ((tp & 1)) {
++ r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
++ }
++ t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
++ /* NOBARRIER: The table is new (marked white). */
++ setgcref(t->metatable, obj2gco(mt));
++ settabV(sbufL(sbx), o, t);
++ if (narray) {
++ TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
++ TValue *oe = tvref(t->array) + narray;
++ while (oa < oe) r = serialize_get(r, sbx, oa++);
++ }
++ if (nhash) {
++ do {
++ TValue k, *v;
++ r = serialize_get(r, sbx, &k);
++ v = lj_tab_set(sbufL(sbx), t, &k);
++ if (LJ_UNLIKELY(!tvisnil(v)))
++ lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY);
++ r = serialize_get(r, sbx, v);
++ } while (--nhash);
++ }
++ sbx->depth++;
++#if LJ_HASFFI
++ } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) {
++ uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8;
++ GCcdata *cd;
++ if (LJ_UNLIKELY(r + sz > w)) goto eob;
++ cd = lj_cdata_new_(sbufL(sbx),
++ tp == SER_TAG_INT64 ? CTID_INT64 :
++ tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE,
++ sz);
++ memcpy(cdataptr(cd), r, sz); r += sz;
++#if LJ_BE
++ *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd));
++ if (sz == 16)
++ ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]);
++#endif
++ if (sz == 16) { /* Fix non-canonical NaNs. */
++ TValue *cdo = (TValue *)cdataptr(cd);
++ if (!tvisnum(&cdo[0])) setnanV(&cdo[0]);
++ if (!tvisnum(&cdo[1])) setnanV(&cdo[1]);
++ }
++ setcdataV(sbufL(sbx), o, cd);
++#endif
++ } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) {
++ uintptr_t ud = 0;
++ if (tp == SER_TAG_LIGHTUD32) {
++ if (LJ_UNLIKELY(r + 4 > w)) goto eob;
++ ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r));
++ r += 4;
++ }
++#if LJ_64
++ else if (tp == SER_TAG_LIGHTUD64) {
++ if (LJ_UNLIKELY(r + 8 > w)) goto eob;
++ memcpy(&ud, r, 8); r += 8;
++#if LJ_BE
++ ud = lj_bswap64(ud);
++#endif
++ }
++ setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud));
++#else
++ setrawlightudV(o, (void *)ud);
++#endif
++ } else {
++badtag:
++ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
++ }
++ return r;
++eob:
++ lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB);
++ return NULL;
++}
++
++/* -- External serialization API ------------------------------------------ */
++
++/* Encode to buffer. */
++SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o)
++{
++ sbx->depth = LJ_SERIALIZE_DEPTH;
++ sbx->w = serialize_put(sbx->w, sbx, o);
++ return sbx;
++}
++
++/* Decode from buffer. */
++char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o)
++{
++ sbx->depth = LJ_SERIALIZE_DEPTH;
++ return serialize_get(sbx->r, sbx, o);
++}
++
++/* Stand-alone encoding, borrowing from global temporary buffer. */
++GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o)
++{
++ SBufExt sbx;
++ char *w;
++ memset(&sbx, 0, sizeof(SBufExt));
++ lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
++ sbx.depth = LJ_SERIALIZE_DEPTH;
++ w = serialize_put(sbx.w, &sbx, o);
++ return lj_str_new(L, sbx.b, (size_t)(w - sbx.b));
++}
++
++/* Stand-alone decoding, copy-on-write from string. */
++void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str)
++{
++ SBufExt sbx;
++ char *r;
++ memset(&sbx, 0, sizeof(SBufExt));
++ lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
++ /* No need to set sbx.cowref here. */
++ sbx.depth = LJ_SERIALIZE_DEPTH;
++ r = serialize_get(sbx.r, &sbx, o);
++ if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV);
++}
++
++#if LJ_HASJIT
++/* Peek into buffer to find the result IRType for specialization purposes. */
++LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
++{
++ uint32_t tp;
++ if (serialize_ru124(sbx->r, sbx->w, &tp)) {
++ /* This must match the handling of all tags in the decoder above. */
++ switch (tp) {
++ case SER_TAG_NIL: return IRT_NIL;
++ case SER_TAG_FALSE: return IRT_FALSE;
++ case SER_TAG_TRUE: return IRT_TRUE;
++ case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64:
++ return IRT_LIGHTUD;
++ case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM;
++ case SER_TAG_NUM: return IRT_NUM;
++ case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
++ case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
++ case SER_TAG_DICT_MT:
++ return IRT_TAB;
++ case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
++ return IRT_CDATA;
++ case SER_TAG_DICT_STR:
++ default:
++ return IRT_STR;
++ }
++ }
++ return IRT_NIL; /* Will fail on actual decode. */
++}
++#endif
++
++#endif
+diff --git a/src/lj_serialize.h b/src/lj_serialize.h
+new file mode 100644
+index 00000000..1fda23eb
+--- /dev/null
++++ b/src/lj_serialize.h
+@@ -0,0 +1,28 @@
++/*
++** Object de/serialization.
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#ifndef _LJ_SERIALIZE_H
++#define _LJ_SERIALIZE_H
++
++#include "lj_obj.h"
++#include "lj_buf.h"
++
++#if LJ_HASBUFFER
++
++#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */
++
++LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict);
++LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict);
++LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
++LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
++LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);
++LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str);
++#if LJ_HASJIT
++LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx);
++#endif
++
++#endif
++
++#endif
+diff --git a/src/lj_snap.c b/src/lj_snap.c
+index bb063c2b..97097a5b 100644
+--- a/src/lj_snap.c
++++ b/src/lj_snap.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Snapshot handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_snap_c
+@@ -85,15 +85,20 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg
nslots)
+ IRIns *ir = &J->cur.ir[ref];
+ if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
+ ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
+- /* No need to snapshot unmodified non-inherited slots. */
+- if (!(ir->op2 & IRSLOAD_INHERIT))
++ /*
++ ** No need to snapshot unmodified non-inherited slots.
++ ** But always snapshot the function below a frame in LJ_FR2 mode.
++ */
++ if (!(ir->op2 & IRSLOAD_INHERIT) &&
++ (!LJ_FR2 || s == 0 || s+1 == nslots ||
++ !(J->slot[s+1] & (TREF_CONT|TREF_FRAME))))
+ continue;
+ /* No need to restore readonly slots and unmodified non-parent slots. */
+ if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
+ (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
+ sn |= SNAP_NORESTORE;
+ }
+- if (LJ_SOFTFP && irt_isnum(ir->t))
++ if (LJ_SOFTFP32 && irt_isnum(ir->t))
+ sn |= SNAP_SOFTFPNUM;
+ map[n++] = sn;
+ }
+@@ -110,12 +115,15 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map,
uint8_t *topslot)
+ cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) :
J->L->top;
+ #if LJ_FR2
+ uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
+- lua_assert(2 <= J->baseslot && J->baseslot <= 257);
++ lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad
baseslot");
+ memcpy(map, &pcbase, sizeof(uint64_t));
+ #else
+ MSize f = 0;
+ map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
+ #endif
++ lj_assertJ(!J->pt ||
++ (J->pc >= proto_bc(J->pt) &&
++ J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot
PC");
+ while (frame > lim) { /* Backwards traversal of all frames above base. */
+ if (frame_islua(frame)) {
+ #if !LJ_FR2
+@@ -129,7 +137,7 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map,
uint8_t *topslot)
+ #endif
+ frame = frame_prevd(frame);
+ } else {
+- lua_assert(!frame_isc(frame));
++ lj_assertJ(!frame_isc(frame), "broken frame chain");
+ #if !LJ_FR2
+ map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
+ #endif
+@@ -141,10 +149,10 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map,
uint8_t *topslot)
+ }
+ *topslot = (uint8_t)(ftop - lim);
+ #if LJ_FR2
+- lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
++ lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def");
+ return 2;
+ #else
+- lua_assert(f == (MSize)(1 + J->framedepth));
++ lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot
size");
+ return f;
+ #endif
+ }
+@@ -161,11 +169,12 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize
nsnapmap)
+ nent = snapshot_slots(J, p, nslots);
+ snap->nent = (uint8_t)nent;
+ nent += snapshot_framelinks(J, p + nent, &snap->topslot);
+- snap->mapofs = (uint16_t)nsnapmap;
++ snap->mapofs = (uint32_t)nsnapmap;
+ snap->ref = (IRRef1)J->cur.nins;
++ snap->mcofs = 0;
+ snap->nslots = (uint8_t)nslots;
+ snap->count = 0;
+- J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
++ J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
+ }
+
+ /* Add or merge a snapshot. */
+@@ -222,7 +231,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
+ #define DEF_SLOT(s) udf[(s)] *= 3
+
+ /* Scan through following bytecode and check for uses/defs. */
+- lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) +
J->pt->sizebc);
++ lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) +
J->pt->sizebc,
++ "snapshot PC out of range");
+ for (;;) {
+ BCIns ins = *pc++;
+ BCOp op = bc_op(ins);
+@@ -233,7 +243,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
+ switch (bcmode_c(op)) {
+ case BCMvar: USE_SLOT(bc_c(ins)); break;
+ case BCMrbase:
+- lua_assert(op == BC_CAT);
++ lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op);
+ for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
+ for (; s < maxslot; s++) DEF_SLOT(s);
+ break;
+@@ -242,7 +252,12 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
+ BCReg minslot = bc_a(ins);
+ if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
+ else if (op >= BC_ITERL && op <= BC_JITERL) minslot +=
bc_b(pc[-2])-1;
+- else if (op == BC_UCLO) { pc += bc_j(ins); break; }
++ else if (op == BC_UCLO) {
++ ptrdiff_t delta = bc_j(ins);
++ if (delta < 0) return maxslot; /* Prevent loop. */
++ pc += delta;
++ break;
++ }
+ for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
+ return minslot < maxslot ? minslot : maxslot;
+ }
+@@ -266,7 +281,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
+ if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
+ break;
+ case BCMbase:
+- if (op >= BC_CALLM && op <= BC_VARG) {
++ if (op >= BC_CALLM && op <= BC_ITERN) {
+ BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
+ maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
+ if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
+@@ -277,6 +292,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
+ for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
+ return 0;
+ }
++ } else if (op == BC_VARG) {
++ return maxslot; /* NYI: punt. */
+ } else if (op == BC_KNIL) {
+ for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
+ } else if (op == BC_TSETM) {
+@@ -285,7 +302,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
+ break;
+ default: break;
+ }
+- lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) +
J->pt->sizebc);
++ lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) +
J->pt->sizebc,
++ "use/def analysis PC out of range");
+ }
+
+ #undef USE_SLOT
+@@ -294,15 +312,45 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
+ return 0; /* unreachable */
+ }
+
++/* Mark slots used by upvalues of child prototypes as used. */
++void snap_useuv(GCproto *pt, uint8_t *udf)
++{
++ /* This is a coarse check, because it's difficult to correlate the lifetime
++ ** of slots and closures. But the number of false positives is quite low.
++ ** A false positive may cause a slot not to be purged, which is just
++ ** a missed optimization.
++ */
++ if ((pt->flags & PROTO_CHILD)) {
++ ptrdiff_t i, j, n = pt->sizekgc;
++ GCRef *kr = mref(pt->k, GCRef) - 1;
++ for (i = 0; i < n; i++, kr--) {
++ GCobj *o = gcref(*kr);
++ if (o->gch.gct == ~LJ_TPROTO) {
++ for (j = 0; j < gco2pt(o)->sizeuv; j++) {
++ uint32_t v = proto_uv(gco2pt(o))[j];
++ if ((v & PROTO_UV_LOCAL)) {
++ udf[(v & 0xff)] = 0;
++ }
++ }
++ }
++ }
++ }
++}
++
+ /* Purge dead slots before the next snapshot. */
+ void lj_snap_purge(jit_State *J)
+ {
+ uint8_t udf[SNAP_USEDEF_SLOTS];
+- BCReg maxslot = J->maxslot;
+- BCReg s = snap_usedef(J, udf, J->pc, maxslot);
+- for (; s < maxslot; s++)
+- if (udf[s] != 0)
+- J->base[s] = 0; /* Purge dead slots. */
++ BCReg s, maxslot = J->maxslot;
++ if (bc_op(*J->pc) == BC_FUNCV && maxslot > J->pt->numparams)
++ maxslot = J->pt->numparams;
++ s = snap_usedef(J, udf, J->pc, maxslot);
++ if (s < maxslot) {
++ snap_useuv(J->pt, udf);
++ for (; s < maxslot; s++)
++ if (udf[s] != 0)
++ J->base[s] = 0; /* Purge dead slots. */
++ }
+ }
+
+ /* Shrink last snapshot. */
+@@ -315,6 +363,7 @@ void lj_snap_shrink(jit_State *J)
+ BCReg maxslot = J->maxslot;
+ BCReg baseslot = J->baseslot;
+ BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
++ if (minslot < maxslot) snap_useuv(J->pt, udf);
+ maxslot += baseslot;
+ minslot += baseslot;
+ snap->nslots = (uint8_t)maxslot;
+@@ -326,7 +375,7 @@ void lj_snap_shrink(jit_State *J)
+ snap->nent = (uint8_t)m;
+ nlim = J->cur.nsnapmap - snap->mapofs - 1;
+ while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
+- J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */
++ J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */
+ }
+
+ /* -- Snapshot access ----------------------------------------------------- */
+@@ -356,25 +405,26 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref,
RegSP rs)
+ }
+
+ /* Copy RegSP from parent snapshot to the parent links of the IR. */
+-IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
++IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir)
+ {
+ SnapShot *snap = &T->snap[snapno];
+ SnapEntry *map = &T->snapmap[snap->mapofs];
+ BloomFilter rfilt = snap_renamefilter(T, snapno);
+ MSize n = 0;
+ IRRef ref = 0;
++ UNUSED(J);
+ for ( ; ; ir++) {
+ uint32_t rs;
+ if (ir->o == IR_SLOAD) {
+ if (!(ir->op2 & IRSLOAD_PARENT)) break;
+ for ( ; ; n++) {
+- lua_assert(n < snap->nent);
++ lj_assertJ(n < snap->nent, "slot %d not found in snapshot",
ir->op1);
+ if (snap_slot(map[n]) == ir->op1) {
+ ref = snap_ref(map[n++]);
+ break;
+ }
+ }
+- } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
++ } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
+ ref++;
+ } else if (ir->o == IR_PVAL) {
+ ref = ir->op1 + REF_BIAS;
+@@ -385,7 +435,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
+ if (bloomtest(rfilt, ref))
+ rs = snap_renameref(T, snapno, ref, rs);
+ ir->prev = (uint16_t)rs;
+- lua_assert(regsp_used(rs));
++ lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS);
+ }
+ return ir;
+ }
+@@ -403,7 +453,7 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
+ case IR_KNUM: case IR_KINT64:
+ return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
+ case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
+- default: lua_assert(0); return TREF_NIL; break;
++ default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL;
+ }
+ }
+
+@@ -413,7 +463,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax,
IRRef ref)
+ MSize j;
+ for (j = 0; j < nmax; j++)
+ if (snap_ref(map[j]) == ref)
+- return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
++ return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME);
+ return 0;
+ }
+
+@@ -481,17 +531,19 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
+ tr = snap_replay_const(J, ir);
+ } else if (!regsp_used(ir->prev)) {
+ pass23 = 1;
+- lua_assert(s != 0);
++ lj_assertJ(s != 0, "unused slot 0 in snapshot");
+ tr = s;
+ } else {
+ IRType t = irt_type(ir->t);
+ uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
+- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
++ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
+ if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
++ if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX;
+ tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
+ }
+ setslot:
+- J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
++ /* Same as TREF_* flags. */
++ J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME));
+ J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
+ if ((sn & SNAP_FRAME))
+ J->baseslot = s+1;
+@@ -507,8 +559,9 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
+ if (regsp_reg(ir->r) == RID_SUNK) {
+ if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
+ pass23 = 1;
+- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
+- ir->o == IR_CNEW || ir->o == IR_CNEWI);
++ lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
++ ir->o == IR_CNEW || ir->o == IR_CNEWI,
++ "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
+ if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
+ if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
+ if (LJ_HASFFI && ir->o == IR_CNEWI) {
+@@ -520,13 +573,14 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
+ if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
+ if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
+ snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
+- else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
++ else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
+ irs+1 < irlast && (irs+1)->o == IR_HIOP)
+ snap_pref(J, T, map, nent, seen, (irs+1)->op2);
+ }
+ }
+ } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
+- lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
++ lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
++ "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
+ J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
+ }
+ }
+@@ -576,13 +630,15 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
+ val = snap_pref(J, T, map, nent, seen, irs->op2);
+ if (val == 0) {
+ IRIns *irc = &T->ir[irs->op2];
+- lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
++ lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT,
++ "sunk store for parent IR %04d with bad op %d",
++ refp - REF_BIAS, irc->o);
+ val = snap_pref(J, T, map, nent, seen, irc->op1);
+ val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
+- } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
++ } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
+ irs+1 < irlast && (irs+1)->o == IR_HIOP) {
+ IRType t = IRT_I64;
+- if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
++ if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
+ t = IRT_NUM;
+ lj_needsplit(J);
+ if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
+@@ -626,7 +682,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState
*ex,
+ IRType1 t = ir->t;
+ RegSP rs = ir->prev;
+ if (irref_isk(ref)) { /* Restore constant slot. */
+- lj_ir_kvalue(J->L, o, ir);
++ if (ir->o == IR_KPTR) {
++ o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir);
++ } else {
++ lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL),
++ "restore of const from IR %04d with bad op %d",
++ ref - REF_BIAS, ir->o);
++ lj_ir_kvalue(J->L, o, ir);
++ }
+ return;
+ }
+ if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
+@@ -635,7 +698,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
+ int32_t *sps = &ex->spill[regsp_spill(rs)];
+ if (irt_isinteger(t)) {
+ setintV(o, *sps);
+-#if !LJ_SOFTFP
++#if !LJ_SOFTFP32
+ } else if (irt_isnum(t)) {
+ o->u64 = *(uint64_t *)sps;
+ #endif
+@@ -645,13 +708,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState
*ex,
+ o->u64 = *(uint64_t *)sps;
+ #endif
+ } else {
+- lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
++ lj_assertJ(!irt_ispri(t), "PRI ref with spill slot");
+ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
+ }
+ } else { /* Restore from register. */
+ Reg r = regsp_reg(rs);
+ if (ra_noreg(r)) {
+- lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
++ lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
++ "restore from IR %04d has no reg", ref - REF_BIAS);
+ snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
+ if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
+ return;
+@@ -660,6 +724,9 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
+ #if !LJ_SOFTFP
+ } else if (irt_isnum(t)) {
+ setnumV(o, ex->fpr[r-RID_MIN_FPR]);
++#elif LJ_64 /* && LJ_SOFTFP */
++ } else if (irt_isnum(t)) {
++ o->u64 = ex->gpr[r-RID_MIN_GPR];
+ #endif
+ #if LJ_64 && !LJ_GC64
+ } else if (irt_is64(t)) {
+@@ -676,7 +743,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
+
+ #if LJ_HASFFI
+ /* Restore raw data from the trace exit state. */
+-static void snap_restoredata(GCtrace *T, ExitState *ex,
++static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex,
+ SnapNo snapno, BloomFilter rfilt,
+ IRRef ref, void *dst, CTSize sz)
+ {
+@@ -684,8 +751,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
+ RegSP rs = ir->prev;
+ int32_t *src;
+ uint64_t tmp;
++ UNUSED(J);
+ if (irref_isk(ref)) {
+- if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
++ if (ir_isk64(ir)) {
+ src = (int32_t *)&ir[1];
+ } else if (sz == 8) {
+ tmp = (uint64_t)(uint32_t)ir->i;
+@@ -706,8 +774,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
+ Reg r = regsp_reg(rs);
+ if (ra_noreg(r)) {
+ /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
+- lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 ==
IRCONV_NUM_INT);
+- snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
++ lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 ==
IRCONV_NUM_INT,
++ "restore from IR %04d has no reg", ref - REF_BIAS);
++ snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4);
+ *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
+ return;
+ }
+@@ -728,7 +797,8 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
+ if (LJ_64 && LJ_BE && sz == 4) src++;
+ }
+ }
+- lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
++ lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8,
++ "restore from IR %04d with bad size %d", ref - REF_BIAS, sz);
+ if (sz == 4) *(int32_t *)dst = *src;
+ else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
+ else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
+@@ -741,8 +811,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
+ SnapNo snapno, BloomFilter rfilt,
+ IRIns *ir, TValue *o)
+ {
+- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
+- ir->o == IR_CNEW || ir->o == IR_CNEWI);
++ lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
++ ir->o == IR_CNEW || ir->o == IR_CNEWI,
++ "sunk allocation with bad op %d", ir->o);
+ #if LJ_HASFFI
+ if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
+ CTState *cts = ctype_cts(J->L);
+@@ -753,13 +824,14 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
+ setcdataV(J->L, o, cd);
+ if (ir->o == IR_CNEWI) {
+ uint8_t *p = (uint8_t *)cdataptr(cd);
+- lua_assert(sz == 4 || sz == 8);
++ lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz);
+ if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins &&
(ir+1)->o == IR_HIOP) {
+- snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
++ snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2,
++ LJ_LE ? p+4 : p, 4);
+ if (LJ_BE) p += 4;
+ sz = 4;
+ }
+- snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
++ snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz);
+ } else {
+ IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
+ for (irs = ir+1; irs < irlast; irs++)
+@@ -767,8 +839,11 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
+ IRIns *iro = &T->ir[T->ir[irs->op1].op2];
+ uint8_t *p = (uint8_t *)cd;
+ CTSize szs;
+- lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
+- lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
++ lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d",
irs->o);
++ lj_assertJ(T->ir[irs->op1].o == IR_ADD,
++ "sunk store with bad add op %d", T->ir[irs->op1].o);
++ lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64,
++ "sunk store with bad const offset op %d", iro->o);
+ if (irt_is64(irs->t)) szs = 8;
+ else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
+ else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
+@@ -777,14 +852,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
+ p += (int64_t)ir_k64(iro)->u64;
+ else
+ p += iro->i;
+- lua_assert(p >= (uint8_t *)cdataptr(cd) &&
+- p + szs <= (uint8_t *)cdataptr(cd) + sz);
++ lj_assertJ(p >= (uint8_t *)cdataptr(cd) &&
++ p + szs <= (uint8_t *)cdataptr(cd) + sz,
++ "sunk store with offset out of range");
+ if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o ==
IR_HIOP) {
+- lua_assert(szs == 4);
+- snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
++ lj_assertJ(szs == 4, "sunk store with bad size %d", szs);
++ snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2,
++ LJ_LE ? p+4 : p, 4);
+ if (LJ_BE) p += 4;
+ }
+- snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
++ snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs);
+ }
+ }
+ } else
+@@ -799,10 +876,12 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
+ if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
+ IRIns *irk = &T->ir[irs->op1];
+ TValue tmp, *val;
+- lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
+- irs->o == IR_FSTORE);
++ lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
++ irs->o == IR_FSTORE,
++ "sunk store with bad op %d", irs->o);
+ if (irk->o == IR_FREF) {
+- lua_assert(irk->op2 == IRFL_TAB_META);
++ lj_assertJ(irk->op2 == IRFL_TAB_META,
++ "sunk store with bad field %d", irk->op2);
+ snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
+ /* NOBARRIER: The table is new (marked white). */
+ setgcref(t->metatable, obj2gco(tabV(&tmp)));
+@@ -813,7 +892,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
+ val = lj_tab_set(J->L, t, &tmp);
+ /* NOBARRIER: The table is new (marked white). */
+ snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
+- if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o ==
IR_HIOP) {
++ if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o
== IR_HIOP) {
+ snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
+ val->u32.hi = tmp.u32.lo;
+ }
+@@ -874,7 +953,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
+ continue;
+ }
+ snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
+- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
++ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
+ TValue tmp;
+ snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
+ o->u32.hi = tmp.u32.lo;
+@@ -884,13 +963,17 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
+ setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
+ L->base = o+1;
+ #endif
++ } else if ((sn & SNAP_KEYINDEX)) {
++ /* A IRT_INT key index slot is restored as a number. Undo this. */
++ o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o)));
++ o->u32.hi = LJ_KEYINDEX;
+ }
+ }
+ }
+ #if LJ_FR2
+ L->base += (map[nent+LJ_BE] & 0xff);
+ #endif
+- lua_assert(map + nent == flinks);
++ lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot");
+
+ /* Compute current stack top. */
+ switch (bc_op(*pc)) {
+diff --git a/src/lj_snap.h b/src/lj_snap.h
+index 2c9ae3d6..c73f75b3 100644
+--- a/src/lj_snap.h
++++ b/src/lj_snap.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Snapshot handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_SNAP_H
+@@ -13,7 +13,8 @@
+ LJ_FUNC void lj_snap_add(jit_State *J);
+ LJ_FUNC void lj_snap_purge(jit_State *J);
+ LJ_FUNC void lj_snap_shrink(jit_State *J);
+-LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir);
++LJ_FUNC IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno,
++ IRIns *ir);
+ LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T);
+ LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
+ LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
+diff --git a/src/lj_state.c b/src/lj_state.c
+index 632dd07e..e87b945a 100644
+--- a/src/lj_state.c
++++ b/src/lj_state.c
+@@ -1,6 +1,6 @@
+ /*
+ ** State and stack handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -25,6 +25,7 @@
+ #include "lj_trace.h"
+ #include "lj_dispatch.h"
+ #include "lj_vm.h"
++#include "lj_prng.h"
+ #include "lj_lex.h"
+ #include "lj_alloc.h"
+ #include "luajit.h"
+@@ -60,7 +61,8 @@ static void resizestack(lua_State *L, MSize n)
+ MSize oldsize = L->stacksize;
+ MSize realsize = n + 1 + LJ_STACK_EXTRA;
+ GCobj *up;
+- lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1);
++ lj_assertL((MSize)(tvref(L->maxstack)-oldst) == L->stacksize-LJ_STACK_EXTRA-1,
++ "inconsistent stack size");
+ st = (TValue *)lj_mem_realloc(L, tvref(L->stack),
+ (MSize)(oldsize*sizeof(TValue)),
+ (MSize)(realsize*sizeof(TValue)));
+@@ -148,12 +150,13 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void
*ud)
+ /* NOBARRIER: State initialization, all objects are white. */
+ setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL)));
+ settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY));
+- lj_str_resize(L, LJ_MIN_STRTAB-1);
++ lj_str_init(L);
+ lj_meta_init(L);
+ lj_lex_init(L);
+ fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */
+ g->gc.threshold = 4*g->gc.total;
+ lj_trace_initstate(g);
++ lj_err_verify();
+ return NULL;
+ }
+
+@@ -162,16 +165,25 @@ static void close_state(lua_State *L)
+ global_State *g = G(L);
+ lj_func_closeuv(L, tvref(L->stack));
+ lj_gc_freeall(g);
+- lua_assert(gcref(g->gc.root) == obj2gco(L));
+- lua_assert(g->strnum == 0);
++ lj_assertG(gcref(g->gc.root) == obj2gco(L),
++ "main thread is not first GC object");
++ lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num);
+ lj_trace_freestate(g);
+ #if LJ_HASFFI
+ lj_ctype_freestate(g);
+ #endif
+- lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
++ lj_str_freetab(g);
+ lj_buf_free(g, &g->tmpbuf);
+ lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
+- lua_assert(g->gc.total == sizeof(GG_State));
++#if LJ_64
++ if (mref(g->gc.lightudseg, uint32_t)) {
++ MSize segnum = g->gc.lightudnum ? (2 << lj_fls(g->gc.lightudnum)) : 2;
++ lj_mem_freevec(g, mref(g->gc.lightudseg, uint32_t), segnum, uint32_t);
++ }
++#endif
++ lj_assertG(g->gc.total == sizeof(GG_State),
++ "memory leak of %lld bytes",
++ (long long)(g->gc.total - sizeof(GG_State)));
+ #ifndef LUAJIT_USE_SYSMALLOC
+ if (g->allocf == lj_alloc_f)
+ lj_alloc_destroy(g->allocd);
+@@ -181,16 +193,33 @@ static void close_state(lua_State *L)
+ }
+
+ #if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) &&
defined(LUAJIT_USE_SYSMALLOC))
+-lua_State *lj_state_newstate(lua_Alloc f, void *ud)
++lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd)
+ #else
+-LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
++LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd)
+ #endif
+ {
+- GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State));
+- lua_State *L = &GG->L;
+- global_State *g = &GG->g;
++ PRNGState prng;
++ GG_State *GG;
++ lua_State *L;
++ global_State *g;
++ /* We need the PRNG for the memory allocator, so initialize this first. */
++ if (!lj_prng_seed_secure(&prng)) {
++ lj_assertX(0, "secure PRNG seeding failed");
++ /* Can only return NULL here, so this errors with "not enough memory". */
++ return NULL;
++ }
++#ifndef LUAJIT_USE_SYSMALLOC
++ if (allocf == LJ_ALLOCF_INTERNAL) {
++ allocd = lj_alloc_create(&prng);
++ if (!allocd) return NULL;
++ allocf = lj_alloc_f;
++ }
++#endif
++ GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State));
+ if (GG == NULL || !checkptrGC(GG)) return NULL;
+ memset(GG, 0, sizeof(GG_State));
++ L = &GG->L;
++ g = &GG->g;
+ L->gct = ~LJ_TTHREAD;
+ L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
+ L->dummy_ffid = FF_C;
+@@ -198,12 +227,18 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
+ g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED;
+ g->strempty.marked = LJ_GC_WHITE0;
+ g->strempty.gct = ~LJ_TSTR;
+- g->allocf = f;
+- g->allocd = ud;
++ g->allocf = allocf;
++ g->allocd = allocd;
++ g->prng = prng;
++#ifndef LUAJIT_USE_SYSMALLOC
++ if (allocf == lj_alloc_f) {
++ lj_alloc_setprng(allocd, &g->prng);
++ }
++#endif
+ setgcref(g->mainthref, obj2gco(L));
+ setgcref(g->uvhead.prev, obj2gco(&g->uvhead));
+ setgcref(g->uvhead.next, obj2gco(&g->uvhead));
+- g->strmask = ~(MSize)0;
++ g->str.mask = ~(MSize)0;
+ setnilV(registry(L));
+ setnilV(&g->nilnode.val);
+ setnilV(&g->nilnode.key);
+@@ -283,17 +318,17 @@ lua_State *lj_state_new(lua_State *L)
+ setmrefr(L1->glref, L->glref);
+ setgcrefr(L1->env, L->env);
+ stack_init(L1, L); /* init stack */
+- lua_assert(iswhite(obj2gco(L1)));
++ lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white");
+ return L1;
+ }
+
+ void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
+ {
+- lua_assert(L != mainthread(g));
++ lj_assertG(L != mainthread(g), "free of main thread");
+ if (obj2gco(L) == gcref(g->cur_L))
+ setgcrefnull(g->cur_L);
+ lj_func_closeuv(L, tvref(L->stack));
+- lua_assert(gcref(L->openupval) == NULL);
++ lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
+ lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
+ lj_mem_freet(g, L);
+ }
+diff --git a/src/lj_state.h b/src/lj_state.h
+index 02a0eafa..273b6b12 100644
+--- a/src/lj_state.h
++++ b/src/lj_state.h
+@@ -1,6 +1,6 @@
+ /*
+ ** State and stack handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_STATE_H
+@@ -32,4 +32,6 @@ LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L);
+ LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud);
+ #endif
+
++#define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4))
++
+ #endif
+diff --git a/src/lj_str.c b/src/lj_str.c
+index 264dedc1..c6f2ceec 100644
+--- a/src/lj_str.c
++++ b/src/lj_str.c
+@@ -1,6 +1,6 @@
+ /*
+ ** String handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_str_c
+@@ -11,6 +11,7 @@
+ #include "lj_err.h"
+ #include "lj_str.h"
+ #include "lj_char.h"
++#include "lj_prng.h"
+
+ /* -- String helpers ------------------------------------------------------ */
+
+@@ -37,27 +38,6 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
+ return (int32_t)(a->len - b->len);
+ }
+
+-/* Fast string data comparison. Caveat: unaligned access to 1st string! */
+-static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
+-{
+- MSize i = 0;
+- lua_assert(len > 0);
+- lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
+- do { /* Note: innocuous access up to end of string + 3. */
+- uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
+- if (v) {
+- i -= len;
+-#if LJ_LE
+- return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
+-#else
+- return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
+-#endif
+- }
+- i += 4;
+- } while (i < len);
+- return 0;
+-}
+-
+ /* Find fixed string p inside string s. */
+ const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
+ {
+@@ -90,108 +70,301 @@ int lj_str_haspattern(GCstr *s)
+ return 0; /* No pattern matching chars found. */
+ }
+
+-/* -- String interning ---------------------------------------------------- */
+-
+-/* Resize the string hash table (grow and shrink). */
+-void lj_str_resize(lua_State *L, MSize newmask)
+-{
+- global_State *g = G(L);
+- GCRef *newhash;
+- MSize i;
+- if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
+- return; /* No resizing during GC traversal or if already too big. */
+- newhash = lj_mem_newvec(L, newmask+1, GCRef);
+- memset(newhash, 0, (newmask+1)*sizeof(GCRef));
+- for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
+- GCobj *p = gcref(g->strhash[i]);
+- while (p) { /* Follow each hash chain and reinsert all strings. */
+- MSize h = gco2str(p)->hash & newmask;
+- GCobj *next = gcnext(p);
+- /* NOBARRIER: The string table is a GC root. */
+- setgcrefr(p->gch.nextgc, newhash[h]);
+- setgcref(newhash[h], p);
+- p = next;
+- }
+- }
+- lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
+- g->strmask = newmask;
+- g->strhash = newhash;
+-}
++/* -- String hashing ------------------------------------------------------ */
+
+-/* Intern a string and return string object. */
+-GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
++/* Keyed sparse ARX string hash. Constant time. */
++static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
+ {
+- global_State *g;
+- GCstr *s;
+- GCobj *o;
+- MSize len = (MSize)lenx;
+- MSize a, b, h = len;
+- if (lenx >= LJ_MAX_STR)
+- lj_err_msg(L, LJ_ERR_STROV);
+- g = G(L);
+- /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
++ /* Constants taken from lookup3 hash by Bob Jenkins. */
++ StrHash a, b, h = len ^ (StrHash)seed;
+ if (len >= 4) { /* Caveat: unaligned access! */
+ a = lj_getu32(str);
+ h ^= lj_getu32(str+len-4);
+ b = lj_getu32(str+(len>>1)-2);
+ h ^= b; h -= lj_rol(b, 14);
+ b += lj_getu32(str+(len>>2)-1);
+- } else if (len > 0) {
++ } else {
+ a = *(const uint8_t *)str;
+ h ^= *(const uint8_t *)(str+len-1);
+ b = *(const uint8_t *)(str+(len>>1));
+ h ^= b; h -= lj_rol(b, 14);
+- } else {
+- return &g->strempty;
+ }
+ a ^= h; a -= lj_rol(h, 11);
+ b ^= a; b -= lj_rol(a, 25);
+ h ^= b; h -= lj_rol(b, 16);
+- /* Check if the string has already been interned. */
+- o = gcref(g->strhash[h & g->strmask]);
+- if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
+- while (o != NULL) {
+- GCstr *sx = gco2str(o);
+- if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
+- /* Resurrect if dead. Can only happen with fixstring() (keywords). */
+- if (isdead(g, o)) flipwhite(o);
+- return sx; /* Return existing string. */
++ return h;
++}
++
++#if LUAJIT_SECURITY_STRHASH
++/* Keyed dense ARX string hash. Linear time. */
++static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
++ const char *str, MSize len)
++{
++ StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
++ if (len > 12) {
++ StrHash a = (StrHash)seed;
++ const char *pe = str+len-12, *p = pe, *q = str;
++ do {
++ a += lj_getu32(p);
++ b += lj_getu32(p+4);
++ h += lj_getu32(p+8);
++ p = q; q += 12;
++ h ^= b; h -= lj_rol(b, 14);
++ a ^= h; a -= lj_rol(h, 11);
++ b ^= a; b -= lj_rol(a, 25);
++ } while (p < pe);
++ h ^= b; h -= lj_rol(b, 16);
++ a ^= h; a -= lj_rol(h, 4);
++ b ^= a; b -= lj_rol(a, 14);
++ }
++ return b;
++}
++#endif
++
++/* -- String interning ---------------------------------------------------- */
++
++#define LJ_STR_MAXCOLL 32
++
++/* Resize the string interning hash table (grow and shrink). */
++void lj_str_resize(lua_State *L, MSize newmask)
++{
++ global_State *g = G(L);
++ GCRef *newtab, *oldtab = g->str.tab;
++ MSize i;
++
++ /* No resizing during GC traversal or if already too big. */
++ if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
++ return;
++
++ newtab = lj_mem_newvec(L, newmask+1, GCRef);
++ memset(newtab, 0, (newmask+1)*sizeof(GCRef));
++
++#if LUAJIT_SECURITY_STRHASH
++ /* Check which chains need secondary hashes. */
++ if (g->str.second) {
++ int newsecond = 0;
++ /* Compute primary chain lengths. */
++ for (i = g->str.mask; i != ~(MSize)0; i--) {
++ GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
++ while (o) {
++ GCstr *s = gco2str(o);
++ MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) :
++ s->hash;
++ hash &= newmask;
++ setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1);
++ o = gcnext(o);
+ }
+- o = gcnext(o);
+ }
+- } else { /* Slow path: end of string is too close to a page boundary. */
+- while (o != NULL) {
+- GCstr *sx = gco2str(o);
+- if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
+- /* Resurrect if dead. Can only happen with fixstring() (keywords). */
+- if (isdead(g, o)) flipwhite(o);
+- return sx; /* Return existing string. */
++ /* Mark secondary chains. */
++ for (i = newmask; i != ~(MSize)0; i--) {
++ int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL;
++ newsecond |= secondary;
++ setgcrefp(newtab[i], secondary);
++ }
++ g->str.second = newsecond;
++ }
++#endif
++
++ /* Reinsert all strings from the old table into the new table. */
++ for (i = g->str.mask; i != ~(MSize)0; i--) {
++ GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
++ while (o) {
++ GCobj *next = gcnext(o);
++ GCstr *s = gco2str(o);
++ MSize hash = s->hash;
++#if LUAJIT_SECURITY_STRHASH
++ uintptr_t u;
++ if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */
++ hash &= newmask;
++ u = gcrefu(newtab[hash]);
++ if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */
++ s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len);
++ s->hashalg = 1;
++ hash &= newmask;
++ u = gcrefu(newtab[hash]);
++ }
++ } else { /* String hashed with secondary hash. */
++ MSize shash = hash_sparse(g->str.seed, strdata(s), s->len);
++ u = gcrefu(newtab[shash & newmask]);
++ if (u & 1) {
++ hash &= newmask;
++ u = gcrefu(newtab[hash]);
++ } else { /* Revert string back to primary hash. */
++ s->hash = shash;
++ s->hashalg = 0;
++ hash = (shash & newmask);
++ }
++ }
++ /* NOBARRIER: The string table is a GC root. */
++ setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
++ setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1)));
++#else
++ hash &= newmask;
++ /* NOBARRIER: The string table is a GC root. */
++ setgcrefr(o->gch.nextgc, newtab[hash]);
++ setgcref(newtab[hash], o);
++#endif
++ o = next;
++ }
++ }
++
++ /* Free old table and replace with new table. */
++ lj_str_freetab(g);
++ g->str.tab = newtab;
++ g->str.mask = newmask;
++}
++
++#if LUAJIT_SECURITY_STRHASH
++/* Rehash and rechain all strings in a chain. */
++static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc,
++ const char *str, MSize len)
++{
++ global_State *g = G(L);
++ int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */
++ GCRef *strtab = g->str.tab;
++ MSize strmask = g->str.mask;
++ GCobj *o = gcref(strtab[hashc & strmask]);
++ setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1));
++ g->str.second = 1;
++ while (o) {
++ uintptr_t u;
++ GCobj *next = gcnext(o);
++ GCstr *s = gco2str(o);
++ StrHash hash;
++ if (ow) { /* Must sweep while rechaining. */
++ if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */
++ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
++ "sweep of undead string");
++ makewhite(g, o);
++ } else { /* Free dead string. */
++ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
++ "sweep of unlive string");
++ lj_str_free(g, s);
++ o = next;
++ continue;
+ }
+- o = gcnext(o);
+ }
++ hash = s->hash;
++ if (!s->hashalg) { /* Rehash with secondary hash. */
++ hash = hash_dense(g->str.seed, hash, strdata(s), s->len);
++ s->hash = hash;
++ s->hashalg = 1;
++ }
++ /* Rechain. */
++ hash &= strmask;
++ u = gcrefu(strtab[hash]);
++ setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
++ setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1)));
++ o = next;
+ }
+- /* Nope, create a new string. */
+- s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
++ /* Try to insert the pending string again. */
++ return lj_str_new(L, str, len);
++}
++#endif
++
++/* Reseed String ID from PRNG after random interval < 2^bits. */
++#if LUAJIT_SECURITY_STRID == 1
++#define STRID_RESEED_INTERVAL 8
++#elif LUAJIT_SECURITY_STRID == 2
++#define STRID_RESEED_INTERVAL 4
++#elif LUAJIT_SECURITY_STRID >= 3
++#define STRID_RESEED_INTERVAL 0
++#endif
++
++/* Allocate a new string and add to string interning table. */
++static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
++ StrHash hash, int hashalg)
++{
++ GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr);
++ global_State *g = G(L);
++ uintptr_t u;
+ newwhite(g, s);
+ s->gct = ~LJ_TSTR;
+ s->len = len;
+- s->hash = h;
++ s->hash = hash;
++#ifndef STRID_RESEED_INTERVAL
++ s->sid = g->str.id++;
++#elif STRID_RESEED_INTERVAL
++ if (!g->str.idreseed--) {
++ uint64_t r = lj_prng_u64(&g->prng);
++ g->str.id = (StrID)r;
++ g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL));
++ }
++ s->sid = g->str.id++;
++#else
++ s->sid = (StrID)lj_prng_u64(&g->prng);
++#endif
+ s->reserved = 0;
++ s->hashalg = (uint8_t)hashalg;
++ /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */
++ *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0;
+ memcpy(strdatawr(s), str, len);
+- strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
+- /* Add it to string hash table. */
+- h &= g->strmask;
+- s->nextgc = g->strhash[h];
++ /* Add to string hash table. */
++ hash &= g->str.mask;
++ u = gcrefu(g->str.tab[hash]);
++ setgcrefp(s->nextgc, (u & ~(uintptr_t)1));
+ /* NOBARRIER: The string table is a GC root. */
+- setgcref(g->strhash[h], obj2gco(s));
+- if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
+- lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
++ setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1)));
++ if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */
++ lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */
+ return s; /* Return newly interned string. */
+ }
+
++/* Intern a string and return string object. */
++GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
++{
++ global_State *g = G(L);
++ if (lenx-1 < LJ_MAX_STR-1) {
++ MSize len = (MSize)lenx;
++ StrHash hash = hash_sparse(g->str.seed, str, len);
++ MSize coll = 0;
++ int hashalg = 0;
++ /* Check if the string has already been interned. */
++ GCobj *o = gcref(g->str.tab[hash & g->str.mask]);
++#if LUAJIT_SECURITY_STRHASH
++ if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */
++ hashalg = 1;
++ hash = hash_dense(g->str.seed, hash, str, len);
++ o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) &
~(uintptr_t)1);
++ }
++#endif
++ while (o != NULL) {
++ GCstr *sx = gco2str(o);
++ if (sx->hash == hash && sx->len == len) {
++ if (memcmp(str, strdata(sx), len) == 0) {
++ if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */
++ return sx; /* Return existing string. */
++ }
++ coll++;
++ }
++ coll++;
++ o = gcnext(o);
++ }
++#if LUAJIT_SECURITY_STRHASH
++ /* Rehash chain if there are too many collisions. */
++ if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) {
++ return lj_str_rehash_chain(L, hash, str, len);
++ }
++#endif
++ /* Otherwise allocate a new string. */
++ return lj_str_alloc(L, str, len, hash, hashalg);
++ } else {
++ if (lenx)
++ lj_err_msg(L, LJ_ERR_STROV);
++ return &g->strempty;
++ }
++}
++
+ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
+ {
+- g->strnum--;
+- lj_mem_free(g, s, sizestring(s));
++ g->str.num--;
++ lj_mem_free(g, s, lj_str_size(s->len));
++}
++
++void LJ_FASTCALL lj_str_init(lua_State *L)
++{
++ global_State *g = G(L);
++ g->str.seed = lj_prng_u64(&g->prng);
++ lj_str_resize(L, LJ_MIN_STRTAB-1);
+ }
+
+diff --git a/src/lj_str.h b/src/lj_str.h
+index 85c1e405..39fa4f06 100644
+--- a/src/lj_str.h
++++ b/src/lj_str.h
+@@ -1,6 +1,6 @@
+ /*
+ ** String handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_STR_H
+@@ -20,8 +20,12 @@ LJ_FUNC int lj_str_haspattern(GCstr *s);
+ LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
+ LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
+ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
++LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
++#define lj_str_freetab(g) \
++ (lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef))
+
+ #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
+ #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
++#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))
+
+ #endif
+diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
+index d7893ce9..945954aa 100644
+--- a/src/lj_strfmt.c
++++ b/src/lj_strfmt.c
+@@ -1,6 +1,6 @@
+ /*
+ ** String formatting.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include <stdio.h>
+@@ -9,11 +9,17 @@
+ #define LUA_CORE
+
+ #include "lj_obj.h"
++#include "lj_err.h"
+ #include "lj_buf.h"
+ #include "lj_str.h"
++#include "lj_meta.h"
+ #include "lj_state.h"
+ #include "lj_char.h"
+ #include "lj_strfmt.h"
++#if LJ_HASFFI
++#include "lj_ctype.h"
++#endif
++#include "lj_lib.h"
+
+ /* -- Format parser ------------------------------------------------------- */
+
+@@ -161,6 +167,10 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize
*lenp)
+ if (tvisstr(o)) {
+ *lenp = strV(o)->len;
+ return strVdata(o);
++ } else if (tvisbuf(o)) {
++ SBufExt *sbx = bufV(o);
++ *lenp = sbufxlen(sbx);
++ return sbx->r;
+ } else if (tvisint(o)) {
+ sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
+ } else if (tvisnum(o)) {
+@@ -169,7 +179,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
+ return NULL;
+ }
+ *lenp = sbuflen(sb);
+- return sbufB(sb);
++ return sb->b;
+ }
+
+ /* -- Unformatted conversions to buffer ----------------------------------- */
+@@ -177,7 +187,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
+ /* Add integer to buffer. */
+ SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
+ {
+- setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
++ sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k);
+ return sb;
+ }
+
+@@ -191,73 +201,86 @@ SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
+
+ SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
+ {
+- setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
++ sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v);
+ return sb;
+ }
+
+ /* Add quoted string to buffer. */
+-SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
++static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len)
+ {
+- const char *s = strdata(str);
+- MSize len = str->len;
+ lj_buf_putb(sb, '"');
+ while (len--) {
+ uint32_t c = (uint32_t)(uint8_t)*s++;
+- char *p = lj_buf_more(sb, 4);
++ char *w = lj_buf_more(sb, 4);
+ if (c == '"' || c == '\\' || c == '\n') {
+- *p++ = '\\';
++ *w++ = '\\';
+ } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
+ uint32_t d;
+- *p++ = '\\';
++ *w++ = '\\';
+ if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
+- *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
++ *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
+ goto tens;
+ } else if (c >= 10) {
+ tens:
+- d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
++ d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d);
+ }
+ c += '0';
+ }
+- *p++ = (char)c;
+- setsbufP(sb, p);
++ *w++ = (char)c;
++ sb->w = w;
+ }
+ lj_buf_putb(sb, '"');
+ return sb;
+ }
+
++#if LJ_HASJIT
++SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
++{
++ return strfmt_putquotedlen(sb, strdata(str), str->len);
++}
++#endif
++
+ /* -- Formatted conversions to buffer ------------------------------------- */
+
+ /* Add formatted char to buffer. */
+ SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
+ {
+ MSize width = STRFMT_WIDTH(sf);
+- char *p = lj_buf_more(sb, width > 1 ? width : 1);
+- if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
+- while (width-- > 1) *p++ = ' ';
+- if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
+- setsbufP(sb, p);
++ char *w = lj_buf_more(sb, width > 1 ? width : 1);
++ if ((sf & STRFMT_F_LEFT)) *w++ = (char)c;
++ while (width-- > 1) *w++ = ' ';
++ if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c;
++ sb->w = w;
+ return sb;
+ }
+
+ /* Add formatted string to buffer. */
+-SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
++static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len)
+ {
+- MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
+ MSize width = STRFMT_WIDTH(sf);
+- char *p = lj_buf_more(sb, width > len ? width : len);
+- if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
+- while (width-- > len) *p++ = ' ';
+- if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
+- setsbufP(sb, p);
++ char *w;
++ if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf);
++ w = lj_buf_more(sb, width > len ? width : len);
++ if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
++ while (width-- > len) *w++ = ' ';
++ if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
++ sb->w = w;
+ return sb;
+ }
+
++#if LJ_HASJIT
++SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
++{
++ return strfmt_putfstrlen(sb, sf, strdata(str), str->len);
++}
++#endif
++
+ /* Add formatted signed/unsigned integer to buffer. */
+ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
+ {
+- char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
++ char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w;
+ #ifdef LUA_USE_ASSERT
+- char *ps;
++ char *ws;
+ #endif
+ MSize prefix = 0, len, prec, pprec, width, need;
+
+@@ -301,27 +324,27 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
+ width = STRFMT_WIDTH(sf);
+ pprec = prec + (prefix >> 8);
+ need = width > pprec ? width : pprec;
+- p = lj_buf_more(sb, need);
++ w = lj_buf_more(sb, need);
+ #ifdef LUA_USE_ASSERT
+- ps = p;
++ ws = w;
+ #endif
+
+ /* Format number with leading/trailing whitespace and zeros. */
+ if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
+- while (width-- > pprec) *p++ = ' ';
++ while (width-- > pprec) *w++ = ' ';
+ if (prefix) {
+- if ((char)prefix >= 'X') *p++ = '0';
+- *p++ = (char)prefix;
++ if ((char)prefix >= 'X') *w++ = '0';
++ *w++ = (char)prefix;
+ }
+ if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
+- while (width-- > pprec) *p++ = '0';
+- while (prec-- > len) *p++ = '0';
+- while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */
++ while (width-- > pprec) *w++ = '0';
++ while (prec-- > len) *w++ = '0';
++ while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */
+ if ((sf & STRFMT_F_LEFT))
+- while (width-- > pprec) *p++ = ' ';
++ while (width-- > pprec) *w++ = ' ';
+
+- lua_assert(need == (MSize)(p - ps));
+- setsbufP(sb, p);
++ lj_assertX(need == (MSize)(w - ws), "miscalculated format size");
++ sb->w = w;
+ return sb;
+ }
+
+@@ -346,6 +369,117 @@ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
+ return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
+ }
+
++/* Format stack arguments to buffer. */
++int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry)
++{
++ int narg = (int)(L->top - L->base);
++ GCstr *fmt = lj_lib_checkstr(L, arg);
++ FormatState fs;
++ SFormat sf;
++ lj_strfmt_init(&fs, strdata(fmt), fmt->len);
++ while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
++ if (sf == STRFMT_LIT) {
++ lj_buf_putmem(sb, fs.str, fs.len);
++ } else if (sf == STRFMT_ERR) {
++ lj_err_callerv(L, LJ_ERR_STRFMT,
++ strdata(lj_str_new(L, fs.str, fs.len)));
++ } else {
++ TValue *o = &L->base[arg++];
++ if (arg > narg)
++ lj_err_arg(L, arg, LJ_ERR_NOVAL);
++ switch (STRFMT_TYPE(sf)) {
++ case STRFMT_INT:
++ if (tvisint(o)) {
++ int32_t k = intV(o);
++ if (sf == STRFMT_INT)
++ lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
++ else
++ lj_strfmt_putfxint(sb, sf, k);
++ break;
++ }
++#if LJ_HASFFI
++ if (tviscdata(o)) {
++ GCcdata *cd = cdataV(o);
++ if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
++ lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
++ break;
++ }
++ }
++#endif
++ lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
++ break;
++ case STRFMT_UINT:
++ if (tvisint(o)) {
++ lj_strfmt_putfxint(sb, sf, intV(o));
++ break;
++ }
++#if LJ_HASFFI
++ if (tviscdata(o)) {
++ GCcdata *cd = cdataV(o);
++ if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
++ lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
++ break;
++ }
++ }
++#endif
++ lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
++ break;
++ case STRFMT_NUM:
++ lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
++ break;
++ case STRFMT_STR: {
++ MSize len;
++ const char *s;
++ cTValue *mo;
++ if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0
&&
++ !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
++ /* Call __tostring metamethod once. */
++ copyTV(L, L->top++, mo);
++ copyTV(L, L->top++, o);
++ lua_call(L, 1, 1);
++ o = &L->base[arg-1]; /* Stack may have been reallocated. */
++ copyTV(L, o, --L->top); /* Replace inline for retry. */
++ if (retry < 2) { /* Global buffer may have been overwritten. */
++ retry = 1;
++ break;
++ }
++ }
++ if (LJ_LIKELY(tvisstr(o))) {
++ len = strV(o)->len;
++ s = strVdata(o);
++#if LJ_HASBUFFER
++ } else if (tvisbuf(o)) {
++ SBufExt *sbx = bufV(o);
++ if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
++ len = sbufxlen(sbx);
++ s = sbx->r;
++#endif
++ } else {
++ GCstr *str = lj_strfmt_obj(L, o);
++ len = str->len;
++ s = strdata(str);
++ }
++ if ((sf & STRFMT_T_QUOTED))
++ strfmt_putquotedlen(sb, s, len); /* No formatting. */
++ else
++ strfmt_putfstrlen(sb, sf, s, len);
++ break;
++ }
++ case STRFMT_CHAR:
++ lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
++ break;
++ case STRFMT_PTR: /* No formatting. */
++ lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o));
++ break;
++ default:
++ lj_assertL(0, "bad string format type");
++ break;
++ }
++ }
++ }
++ return retry;
++}
++
+ /* -- Conversions to strings ---------------------------------------------- */
+
+ /* Convert integer to string. */
+@@ -393,7 +527,7 @@ GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
+ p = lj_buf_wmem(p, "builtin#", 8);
+ p = lj_strfmt_wint(p, funcV(o)->c.ffid);
+ } else {
+- p = lj_strfmt_wptr(p, lj_obj_ptr(o));
++ p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o));
+ }
+ return lj_str_new(L, buf, (size_t)(p - buf));
+ }
+@@ -449,7 +583,7 @@ const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list
argp)
+ case STRFMT_ERR:
+ default:
+ lj_buf_putb(sb, '?');
+- lua_assert(0);
++ lj_assertL(0, "bad string format near offset %d", fs.len);
+ break;
+ }
+ }
+diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h
+index 6e1d9017..cb2c7360 100644
+--- a/src/lj_strfmt.h
++++ b/src/lj_strfmt.h
+@@ -1,6 +1,6 @@
+ /*
+ ** String formatting.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_STRFMT_H
+@@ -79,7 +79,8 @@ static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p,
MSize len)
+ {
+ fs->p = (const uint8_t *)p;
+ fs->e = (const uint8_t *)p + len;
+- lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */
++ /* Must be NUL-terminated. May have NULs inside, too. */
++ lj_assertX(*fs->e == 0, "format not NUL-terminated");
+ }
+
+ /* Raw conversions. */
+@@ -94,7 +95,9 @@ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
+ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
+ #endif
+ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
++#if LJ_HASJIT
+ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
++#endif
+
+ /* Formatted conversions to buffer. */
+ LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
+@@ -102,7 +105,10 @@ LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number
n);
+ LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
+ LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
+ LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
++#if LJ_HASJIT
+ LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
++#endif
++LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry);
+
+ /* Conversions to strings. */
+ LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
+@@ -117,7 +123,7 @@ LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o);
+ LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt,
+ va_list argp);
+ LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
+-#ifdef __GNUC__
++#if defined(__GNUC__) || defined(__clang__)
+ __attribute__ ((format (printf, 2, 3)))
+ #endif
+ ;
+diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c
+index 9271f68a..dfd56bd4 100644
+--- a/src/lj_strfmt_num.c
++++ b/src/lj_strfmt_num.c
+@@ -1,6 +1,6 @@
+ /*
+ ** String formatting for floating-point numbers.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ ** Contributed by Peter Cawley.
+ */
+
+@@ -257,7 +257,7 @@ static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref,
MSize hilen,
+ } else {
+ prec -= hilen - 9;
+ }
+- lua_assert(prec < 9);
++ lj_assertX(prec < 9, "bad precision %d", prec);
+ lj_strfmt_wuint9(nd9, nd[ndhi]);
+ lj_strfmt_wuint9(ref9, *ref);
+ return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec]
< '5');
+@@ -414,14 +414,14 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n,
char *p)
+ ** Rescaling was performed, but this introduced some error, and might
+ ** have pushed us across a rounding boundary. We check whether this
+ ** error affected the result by introducing even more error (2ulp in
+- ** either direction), and seeing whether a roundary boundary was
++ ** either direction), and seeing whether a rounding boundary was
+ ** crossed. Having already converted the -2ulp case, we save off its
+ ** most significant digits, convert the +2ulp case, and compare them.
+ */
+ int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29)
+ + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12));
+ const int8_t *m_e = four_ulp_m_e + eidx * 2;
+- lua_assert(0 <= eidx && eidx < 128);
++ lj_assertG_(G(sbufL(sb)), 0 <= eidx && eidx < 128, "bad eidx
%d", eidx);
+ nd[33] = nd[ndhi];
+ nd[32] = nd[(ndhi - 1) & 0x3f];
+ nd[31] = nd[(ndhi - 2) & 0x3f];
+@@ -576,7 +576,7 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char
*p)
+ /* Add formatted floating-point number to buffer. */
+ SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
+ {
+- setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL));
++ sb->w = lj_strfmt_wfnum(sb, sf, n, NULL);
+ return sb;
+ }
+
+diff --git a/src/lj_strscan.c b/src/lj_strscan.c
+index f5f35c96..f681fbb0 100644
+--- a/src/lj_strscan.c
++++ b/src/lj_strscan.c
+@@ -1,6 +1,6 @@
+ /*
+ ** String scanning.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include <math.h>
+@@ -79,7 +79,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t
neg)
+ /* Avoid double rounding for denormals. */
+ if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) {
+ /* NYI: all of this generates way too much code on 32 bit CPUs. */
+-#if defined(__GNUC__) && LJ_64
++#if (defined(__GNUC__) || defined(__clang__)) && LJ_64
+ int32_t b = (int32_t)(__builtin_clzll(x)^63);
+ #else
+ int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) :
+@@ -93,7 +93,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t
neg)
+ }
+
+ /* Convert to double using a signed int64_t conversion, then rescale. */
+- lua_assert((int64_t)x >= 0);
++ lj_assertX((int64_t)x >= 0, "bad double conversion");
+ n = (double)(int64_t)x;
+ if (neg) n = -n;
+ if (ex2) n = ldexp(n, ex2);
+@@ -262,7 +262,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
+ uint32_t hi = 0, lo = (uint32_t)(xip-xi);
+ int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1);
+
+- lua_assert(lo > 0 && (ex10 & 1) == 0);
++ lj_assertX(lo > 0 && (ex10 & 1) == 0, "bad lo %d ex10 %d",
lo, ex10);
+
+ /* Handle simple overflow/underflow. */
+ if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; }
+@@ -370,9 +370,11 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
+ }
+
+ /* Scan string containing a number. Returns format. Returns value in o. */
+-StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
++StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
++ uint32_t opt)
+ {
+ int32_t neg = 0;
++ const uint8_t *pe = p + len;
+
+ /* Remove leading space, parse sign and non-numbers. */
+ if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
+@@ -390,7 +392,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t
opt)
+ p += 3;
+ }
+ while (lj_char_isspace(*p)) p++;
+- if (*p) return STRSCAN_ERROR;
++ if (*p || p < pe) return STRSCAN_ERROR;
+ o->u64 = tmp.u64;
+ return STRSCAN_NUM;
+ }
+@@ -441,6 +443,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t
opt)
+
+ /* Handle decimal point. */
+ if (dp) {
++ if (base == 2) return STRSCAN_ERROR;
+ fmt = STRSCAN_NUM;
+ if (dig) {
+ ex = (int32_t)(dp-(p-1)); dp = p-1;
+@@ -488,16 +491,16 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t
opt)
+ while (lj_char_isspace(*p)) p++;
+ if (*p) return STRSCAN_ERROR;
+ }
++ if (p < pe) return STRSCAN_ERROR;
+
+ /* Fast path for decimal 32 bit integers. */
+ if (fmt == STRSCAN_INT && base == 10 &&
+ (dig < 10 || (dig == 10 && *sp <= '2' && x <
0x80000000u+neg))) {
+- int32_t y = neg ? -(int32_t)x : (int32_t)x;
+ if ((opt & STRSCAN_OPT_TONUM)) {
+- o->n = (double)y;
++ o->n = neg ? -(double)x : (double)x;
+ return STRSCAN_NUM;
+ } else {
+- o->i = y;
++ o->i = neg ? -(int32_t)x : (int32_t)x;
+ return STRSCAN_INT;
+ }
+ }
+@@ -524,18 +527,19 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t
opt)
+
+ int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
+ {
+- StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
++ StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
+ STRSCAN_OPT_TONUM);
+- lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM);
++ lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM, "bad scan format");
+ return (fmt != STRSCAN_ERROR);
+ }
+
+ #if LJ_DUALNUM
+ int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
+ {
+- StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
++ StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
+ STRSCAN_OPT_TOINT);
+- lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT);
++ lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT,
++ "bad scan format");
+ if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
+ return (fmt != STRSCAN_ERROR);
+ }
+diff --git a/src/lj_strscan.h b/src/lj_strscan.h
+index 6fb0dda0..7b7d6fed 100644
+--- a/src/lj_strscan.h
++++ b/src/lj_strscan.h
+@@ -1,6 +1,6 @@
+ /*
+ ** String scanning.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_STRSCAN_H
+@@ -22,7 +22,8 @@ typedef enum {
+ STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
+ } StrScanFmt;
+
+-LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt);
++LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
++ uint32_t opt);
+ LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
+ #if LJ_DUALNUM
+ LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
+diff --git a/src/lj_tab.c b/src/lj_tab.c
+index 47c0cfd3..4113839f 100644
+--- a/src/lj_tab.c
++++ b/src/lj_tab.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Table handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -16,29 +16,10 @@
+
+ /* -- Object hashing ------------------------------------------------------ */
+
+-/* Hash values are masked with the table hash mask and used as an index. */
+-static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
+-{
+- Node *n = noderef(t->node);
+- return &n[hash & t->hmask];
+-}
+-
+-/* String hashes are precomputed when they are interned. */
+-#define hashstr(t, s) hashmask(t, (s)->hash)
+-
+-#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
+-#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
+-#if LJ_GC64
+-#define hashgcref(t, r) \
+- hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
+-#else
+-#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
+-#endif
+-
+ /* Hash an arbitrary key and return its anchor position in the hash table. */
+ static Node *hashkey(const GCtab *t, cTValue *key)
+ {
+- lua_assert(!tvisint(key));
++ lj_assertX(!tvisint(key), "attempt to hash integer");
+ if (tvisstr(key))
+ return hashstr(t, strV(key));
+ else if (tvisnum(key))
+@@ -57,7 +38,7 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t
hbits)
+ {
+ uint32_t hsize;
+ Node *node;
+- lua_assert(hbits != 0);
++ lj_assertL(hbits != 0, "zero hash size");
+ if (hbits > LJ_MAX_HBITS)
+ lj_err_msg(L, LJ_ERR_TABOV);
+ hsize = 1u << hbits;
+@@ -78,7 +59,7 @@ static LJ_AINLINE void clearhpart(GCtab *t)
+ {
+ uint32_t i, hmask = t->hmask;
+ Node *node = noderef(t->node);
+- lua_assert(t->hmask != 0);
++ lj_assertX(t->hmask != 0, "empty hash part");
+ for (i = 0; i <= hmask; i++) {
+ Node *n = &node[i];
+ setmref(n->next, NULL);
+@@ -103,7 +84,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
+ /* First try to colocate the array part. */
+ if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <=
LJ_MAX_COLOSIZE) {
+ Node *nilnode;
+- lua_assert((sizeof(GCtab) & 7) == 0);
++ lj_assertL((sizeof(GCtab) & 7) == 0, "bad GCtab size");
+ t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize));
+ t->gct = ~LJ_TTAB;
+ t->nomm = (uint8_t)~0;
+@@ -185,7 +166,8 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
+ GCtab *t;
+ uint32_t asize, hmask;
+ t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0);
+- lua_assert(kt->asize == t->asize && kt->hmask == t->hmask);
++ lj_assertL(kt->asize == t->asize && kt->hmask == t->hmask,
++ "mismatched size of table and template");
+ t->nomm = 0; /* Keys with metamethod names may be present. */
+ asize = kt->asize;
+ if (asize > 0) {
+@@ -310,7 +292,7 @@ void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t
hbits)
+
+ static uint32_t countint(cTValue *key, uint32_t *bins)
+ {
+- lua_assert(!tvisint(key));
++ lj_assertX(!tvisint(key), "bad integer key");
+ if (tvisnum(key)) {
+ lua_Number nk = numV(key);
+ int32_t k = lj_num2int(nk);
+@@ -412,7 +394,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key)
+ return NULL;
+ }
+
+-cTValue *lj_tab_getstr(GCtab *t, GCstr *key)
++cTValue *lj_tab_getstr(GCtab *t, const GCstr *key)
+ {
+ Node *n = hashstr(t, key);
+ do {
+@@ -463,7 +445,8 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
+ if (!tvisnil(&n->val) || t->hmask == 0) {
+ Node *nodebase = noderef(t->node);
+ Node *collide, *freenode = getfreetop(t, nodebase);
+- lua_assert(freenode >= nodebase && freenode <=
nodebase+t->hmask+1);
++ lj_assertL(freenode >= nodebase && freenode <=
nodebase+t->hmask+1,
++ "bad freenode");
+ do {
+ if (freenode == nodebase) { /* No free node found? */
+ rehashtab(L, t, key); /* Rehash table. */
+@@ -471,7 +454,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
+ }
+ } while (!tvisnil(&(--freenode)->key));
+ setfreetop(t, nodebase, freenode);
+- lua_assert(freenode != &G(L)->nilnode);
++ lj_assertL(freenode != &G(L)->nilnode, "store to fallback hash");
+ collide = hashkey(t, &n->key);
+ if (collide != n) { /* Colliding node not the main node? */
+ while (noderef(collide->next) != n) /* Find predecessor. */
+@@ -486,11 +469,33 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
+ /* Rechain pseudo-resurrected string keys with colliding hashes. */
+ while (nextnode(freenode)) {
+ Node *nn = nextnode(freenode);
+- if (tvisstr(&nn->key) && !tvisnil(&nn->val) &&
+- hashstr(t, strV(&nn->key)) == n) {
++ if (!tvisnil(&nn->val) && hashkey(t, &nn->key) == n) {
+ freenode->next = nn->next;
+ nn->next = n->next;
+ setmref(n->next, nn);
++ /*
++ ** Rechaining a resurrected string key creates a new dilemma:
++ ** Another string key may have originally been resurrected via
++ ** _any_ of the previous nodes as a chain anchor. Including
++ ** a node that had to be moved, which makes them unreachable.
++ ** It's not feasible to check for all previous nodes, so rechain
++ ** any string key that's currently in a non-main positions.
++ */
++ while ((nn = nextnode(freenode))) {
++ if (!tvisnil(&nn->val)) {
++ Node *mn = hashkey(t, &nn->key);
++ if (mn != freenode && mn != nn) {
++ freenode->next = nn->next;
++ nn->next = mn->next;
++ setmref(mn->next, nn);
++ } else {
++ freenode = nn;
++ }
++ } else {
++ freenode = nn;
++ }
++ }
++ break;
+ } else {
+ freenode = nn;
+ }
+@@ -505,7 +510,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
+ if (LJ_UNLIKELY(tvismzero(&n->key)))
+ n->key.u64 = 0;
+ lj_gc_anybarriert(L, t);
+- lua_assert(tvisnil(&n->val));
++ lj_assertL(tvisnil(&n->val), "new hash slot is not empty");
+ return &n->val;
+ }
+
+@@ -522,7 +527,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key)
+ return lj_tab_newkey(L, t, &k);
+ }
+
+-TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key)
++TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key)
+ {
+ TValue k;
+ Node *n = hashstr(t, key);
+@@ -563,103 +568,126 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
+
+ /* -- Table traversal ----------------------------------------------------- */
+
+-/* Get the traversal index of a key. */
+-static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key)
++/* Table traversal indexes:
++**
++** Array key index: [0 .. t->asize-1]
++** Hash key index: [t->asize .. t->asize+t->hmask]
++** Invalid key: ~0
++*/
++
++/* Get the successor traversal index of a key. */
++uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key)
+ {
+ TValue tmp;
+ if (tvisint(key)) {
+ int32_t k = intV(key);
+ if ((uint32_t)k < t->asize)
+- return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */
++ return (uint32_t)k + 1;
+ setnumV(&tmp, (lua_Number)k);
+ key = &tmp;
+ } else if (tvisnum(key)) {
+ lua_Number nk = numV(key);
+ int32_t k = lj_num2int(nk);
+ if ((uint32_t)k < t->asize && nk == (lua_Number)k)
+- return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */
++ return (uint32_t)k + 1;
+ }
+ if (!tvisnil(key)) {
+ Node *n = hashkey(t, key);
+ do {
+ if (lj_obj_equal(&n->key, key))
+- return t->asize + (uint32_t)(n - noderef(t->node));
+- /* Hash key indexes: [t->asize..t->asize+t->nmask] */
++ return t->asize + (uint32_t)((n+1) - noderef(t->node));
+ } while ((n = nextnode(n)));
+- if (key->u32.hi == 0xfffe7fff) /* ITERN was despecialized while running. */
+- return key->u32.lo - 1;
+- lj_err_msg(L, LJ_ERR_NEXTIDX);
+- return 0; /* unreachable */
++ if (key->u32.hi == LJ_KEYINDEX) /* Despecialized ITERN while running. */
++ return key->u32.lo;
++ return ~0u; /* Invalid key to next. */
+ }
+- return ~0u; /* A nil key starts the traversal. */
++ return 0; /* A nil key starts the traversal. */
+ }
+
+-/* Advance to the next step in a table traversal. */
+-int lj_tab_next(lua_State *L, GCtab *t, TValue *key)
++/* Get the next key/value pair of a table traversal. */
++int lj_tab_next(GCtab *t, cTValue *key, TValue *o)
+ {
+- uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */
+- for (i++; i < t->asize; i++) /* First traverse the array keys. */
+- if (!tvisnil(arrayslot(t, i))) {
+- setintV(key, i);
+- copyTV(L, key+1, arrayslot(t, i));
++ uint32_t idx = lj_tab_keyindex(t, key); /* Find successor index of key. */
++ /* First traverse the array part. */
++ for (; idx < t->asize; idx++) {
++ cTValue *a = arrayslot(t, idx);
++ if (LJ_LIKELY(!tvisnil(a))) {
++ setintV(o, idx);
++ o[1] = *a;
+ return 1;
+ }
+- for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys.
*/
+- Node *n = &noderef(t->node)[i];
++ }
++ idx -= t->asize;
++ /* Then traverse the hash part. */
++ for (; idx <= t->hmask; idx++) {
++ Node *n = &noderef(t->node)[idx];
+ if (!tvisnil(&n->val)) {
+- copyTV(L, key, &n->key);
+- copyTV(L, key+1, &n->val);
++ o[0] = n->key;
++ o[1] = n->val;
+ return 1;
+ }
+ }
+- return 0; /* End of traversal. */
++ return (int32_t)idx < 0 ? -1 : 0; /* Invalid key or end of traversal. */
+ }
+
+ /* -- Table length calculation -------------------------------------------- */
+
+-static MSize unbound_search(GCtab *t, MSize j)
++/* Compute table length. Slow path with mixed array/hash lookups. */
++LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi)
+ {
+ cTValue *tv;
+- MSize i = j; /* i is zero or a present index */
+- j++;
+- /* find `i' and `j' such that i is present and j is not */
+- while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) {
+- i = j;
+- j *= 2;
+- if (j > (MSize)(INT_MAX-2)) { /* overflow? */
+- /* table was built with bad purposes: resort to linear search */
+- i = 1;
+- while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++;
+- return i - 1;
++ size_t lo = hi;
++ hi++;
++ /* Widening search for an upper bound. */
++ while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) {
++ lo = hi;
++ hi += hi;
++ if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */
++ lo = 1;
++ while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++;
++ return (MSize)(lo - 1);
+ }
+ }
+- /* now do a binary search between them */
+- while (j - i > 1) {
+- MSize m = (i+j)/2;
+- cTValue *tvb = lj_tab_getint(t, (int32_t)m);
+- if (tvb && !tvisnil(tvb)) i = m; else j = m;
++ /* Binary search to find a non-nil to nil transition. */
++ while (hi - lo > 1) {
++ size_t mid = (lo+hi) >> 1;
++ cTValue *tvb = lj_tab_getint(t, (int32_t)mid);
++ if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid;
+ }
+- return i;
++ return (MSize)lo;
+ }
+
+-/*
+-** Try to find a boundary in table `t'. A `boundary' is an integer index
+-** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
+-*/
++/* Compute table length. Fast path. */
+ MSize LJ_FASTCALL lj_tab_len(GCtab *t)
+ {
+- MSize j = (MSize)t->asize;
+- if (j > 1 && tvisnil(arrayslot(t, j-1))) {
+- MSize i = 1;
+- while (j - i > 1) {
+- MSize m = (i+j)/2;
+- if (tvisnil(arrayslot(t, m-1))) j = m; else i = m;
++ size_t hi = (size_t)t->asize;
++ if (hi) hi--;
++ /* In a growing array the last array element is very likely nil. */
++ if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) {
++ /* Binary search to find a non-nil to nil transition in the array. */
++ size_t lo = 0;
++ while (hi - lo > 1) {
++ size_t mid = (lo+hi) >> 1;
++ if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid;
+ }
+- return i-1;
++ return (MSize)lo;
+ }
+- if (j) j--;
+- if (t->hmask <= 0)
+- return j;
+- return unbound_search(t, j);
++ /* Without a hash part, there's an implicit nil after the last element. */
++ return t->hmask ? tab_len_slow(t, hi) : (MSize)hi;
+ }
+
++#if LJ_HASJIT
++/* Verify hinted table length or compute it. */
++MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint)
++{
++ size_t asize = (size_t)t->asize;
++ cTValue *tv = arrayslot(t, hint);
++ if (LJ_LIKELY(hint+1 < asize)) {
++ if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint;
++ } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) &&
!tvisnil(tv)) {
++ return (MSize)hint;
++ }
++ return lj_tab_len(t);
++}
++#endif
++
+diff --git a/src/lj_tab.h b/src/lj_tab.h
+index 71e34945..e0e81ff7 100644
+--- a/src/lj_tab.h
++++ b/src/lj_tab.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Table handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_TAB_H
+@@ -31,6 +31,25 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi)
+ return hi;
+ }
+
++/* Hash values are masked with the table hash mask and used as an index. */
++static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
++{
++ Node *n = noderef(t->node);
++ return &n[hash & t->hmask];
++}
++
++/* String IDs are generated when a string is interned. */
++#define hashstr(t, s) hashmask(t, (s)->sid)
++
++#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
++#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
++#if LJ_GC64
++#define hashgcref(t, r) \
++ hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
++#else
++#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
++#endif
++
+ #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
+
+ LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
+@@ -50,14 +69,14 @@ LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t
nasize);
+ /* Caveat: all getters except lj_tab_get() can return NULL! */
+
+ LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key);
+-LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key);
++LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key);
+ LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
+
+ /* Caveat: all setters require a write barrier for the stored value. */
+
+ LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
+ LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
+-LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
++LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key);
+ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
+
+ #define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize)
+@@ -67,7 +86,11 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
+ #define lj_tab_setint(L, t, key) \
+ (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
+
+-LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
++LJ_FUNC uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key);
++LJ_FUNCA int lj_tab_next(GCtab *t, cTValue *key, TValue *o);
+ LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
++#if LJ_HASJIT
++LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint);
++#endif
+
+ #endif
+diff --git a/src/lj_target.h b/src/lj_target.h
+index 8dcae957..2d186b14 100644
+--- a/src/lj_target.h
++++ b/src/lj_target.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Definitions for target CPU.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_TARGET_H
+@@ -152,7 +152,8 @@ typedef uint32_t RegCost;
+ /* Return the address of an exit stub. */
+ static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno)
+ {
+- lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL);
++ lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL,
++ "exit stub group for exit %d uninitialized", exitno);
+ return (char *)group[exitno / EXITSTUBS_PER_GROUP] +
+ EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
+ }
+diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
+index 5551b1f1..72516bc2 100644
+--- a/src/lj_target_arm.h
++++ b/src/lj_target_arm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Definitions for ARM CPUs.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_TARGET_ARM_H
+@@ -211,6 +211,7 @@ typedef enum ARMIns {
+ /* ARMv6T2 */
+ ARMI_MOVW = 0xe3000000,
+ ARMI_MOVT = 0xe3400000,
++ ARMI_BFI = 0xe7c00010,
+
+ /* VFP */
+ ARMI_VMOV_D = 0xeeb00b40,
+diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
+index 520023ae..6d39ffb8 100644
+--- a/src/lj_target_arm64.h
++++ b/src/lj_target_arm64.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Definitions for ARM64 CPUs.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_TARGET_ARM64_H
+@@ -31,6 +31,8 @@ enum {
+
+ /* Calling conventions. */
+ RID_RET = RID_X0,
++ RID_RETLO = RID_X0,
++ RID_RETHI = RID_X1,
+ RID_FPRET = RID_D0,
+
+ /* These definitions must match with the *.dasc file(s): */
+@@ -132,9 +134,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p,
uint32_t exitno)
+ #define A64F_IMMR(x) ((x) << 16)
+ #define A64F_U16(x) ((x) << 5)
+ #define A64F_U12(x) ((x) << 10)
+-#define A64F_S26(x) (x)
++#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu))
+ #define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5)
+-#define A64F_S14(x) ((x) << 5)
++#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5)
+ #define A64F_S9(x) ((x) << 12)
+ #define A64F_BIT(x) ((x) << 19)
+ #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
+@@ -145,6 +147,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p,
uint32_t exitno)
+ #define A64F_LSL16(x) (((x) / 16) << 21)
+ #define A64F_BSH(sh) ((sh) << 10)
+
++/* Check for valid field range. */
++#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
++
+ typedef enum A64Ins {
+ A64I_S = 0x20000000,
+ A64I_X = 0x80000000,
+@@ -207,6 +212,8 @@ typedef enum A64Ins {
+
+ A64I_EXTRw = 0x13800000,
+ A64I_EXTRx = 0x93c00000,
++ A64I_BFMw = 0x33000000,
++ A64I_BFMx = 0xb3400000,
+ A64I_SBFMw = 0x13000000,
+ A64I_SBFMx = 0x93400000,
+ A64I_SXTBw = 0x13001c00,
+diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
+index 740687b3..5da94605 100644
+--- a/src/lj_target_mips.h
++++ b/src/lj_target_mips.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Definitions for MIPS CPUs.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_TARGET_MIPS_H
+@@ -223,6 +223,8 @@ typedef enum MIPSIns {
+ MIPSI_ADDIU = 0x24000000,
+ MIPSI_SUB = 0x00000022,
+ MIPSI_SUBU = 0x00000023,
++
++#if !LJ_TARGET_MIPSR6
+ MIPSI_MUL = 0x70000002,
+ MIPSI_DIV = 0x0000001a,
+ MIPSI_DIVU = 0x0000001b,
+@@ -232,6 +234,15 @@ typedef enum MIPSIns {
+ MIPSI_MFHI = 0x00000010,
+ MIPSI_MFLO = 0x00000012,
+ MIPSI_MULT = 0x00000018,
++#else
++ MIPSI_MUL = 0x00000098,
++ MIPSI_MUH = 0x000000d8,
++ MIPSI_DIV = 0x0000009a,
++ MIPSI_DIVU = 0x0000009b,
++
++ MIPSI_SELEQZ = 0x00000035,
++ MIPSI_SELNEZ = 0x00000037,
++#endif
+
+ MIPSI_SLL = 0x00000000,
+ MIPSI_SRL = 0x00000002,
+@@ -245,6 +256,8 @@ typedef enum MIPSIns {
+ MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */
+ MIPSI_DROTRV = 0x00000056,
+
++ MIPSI_INS = 0x7c000004, /* MIPSXXR2 */
++
+ MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */
+ MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */
+ MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */
+@@ -253,8 +266,13 @@ typedef enum MIPSIns {
+ MIPSI_B = 0x10000000,
+ MIPSI_J = 0x08000000,
+ MIPSI_JAL = 0x0c000000,
++#if !LJ_TARGET_MIPSR6
+ MIPSI_JALX = 0x74000000,
+ MIPSI_JR = 0x00000008,
++#else
++ MIPSI_JR = 0x00000009,
++ MIPSI_BALC = 0xe8000000,
++#endif
+ MIPSI_JALR = 0x0000f809,
+
+ MIPSI_BEQ = 0x10000000,
+@@ -282,15 +300,23 @@ typedef enum MIPSIns {
+
+ /* MIPS64 instructions. */
+ MIPSI_DADD = 0x0000002c,
+- MIPSI_DADDI = 0x60000000,
+ MIPSI_DADDU = 0x0000002d,
+ MIPSI_DADDIU = 0x64000000,
+ MIPSI_DSUB = 0x0000002e,
+ MIPSI_DSUBU = 0x0000002f,
++#if !LJ_TARGET_MIPSR6
+ MIPSI_DDIV = 0x0000001e,
+ MIPSI_DDIVU = 0x0000001f,
+ MIPSI_DMULT = 0x0000001c,
+ MIPSI_DMULTU = 0x0000001d,
++#else
++ MIPSI_DDIV = 0x0000009e,
++ MIPSI_DMOD = 0x000000de,
++ MIPSI_DDIVU = 0x0000009f,
++ MIPSI_DMODU = 0x000000df,
++ MIPSI_DMUL = 0x0000009c,
++ MIPSI_DMUH = 0x000000dc,
++#endif
+
+ MIPSI_DSLL = 0x00000038,
+ MIPSI_DSRL = 0x0000003a,
+@@ -308,6 +334,11 @@ typedef enum MIPSIns {
+ MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU,
+ MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD,
+ MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD,
++#if LJ_TARGET_MIPSR6
++ MIPSI_LSA = 0x00000005,
++ MIPSI_DLSA = 0x00000015,
++ MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA,
++#endif
+
+ /* Extract/insert instructions. */
+ MIPSI_DEXTM = 0x7c000001,
+@@ -317,18 +348,19 @@ typedef enum MIPSIns {
+ MIPSI_DINSU = 0x7c000006,
+ MIPSI_DINS = 0x7c000007,
+
+- MIPSI_RINT_D = 0x4620001a,
+- MIPSI_RINT_S = 0x4600001a,
+- MIPSI_RINT = 0x4400001a,
+ MIPSI_FLOOR_D = 0x4620000b,
+- MIPSI_CEIL_D = 0x4620000a,
+- MIPSI_ROUND_D = 0x46200008,
+
+ /* FP instructions. */
+ MIPSI_MOV_S = 0x46000006,
+ MIPSI_MOV_D = 0x46200006,
++#if !LJ_TARGET_MIPSR6
+ MIPSI_MOVT_D = 0x46210011,
+ MIPSI_MOVF_D = 0x46200011,
++#else
++ MIPSI_MIN_D = 0x4620001C,
++ MIPSI_MAX_D = 0x4620001E,
++ MIPSI_SEL_D = 0x46200010,
++#endif
+
+ MIPSI_ABS_D = 0x46200005,
+ MIPSI_NEG_D = 0x46200007,
+@@ -363,15 +395,23 @@ typedef enum MIPSIns {
+ MIPSI_DMTC1 = 0x44a00000,
+ MIPSI_DMFC1 = 0x44200000,
+
++#if !LJ_TARGET_MIPSR6
+ MIPSI_BC1F = 0x45000000,
+ MIPSI_BC1T = 0x45010000,
+-
+ MIPSI_C_EQ_D = 0x46200032,
+ MIPSI_C_OLT_S = 0x46000034,
+ MIPSI_C_OLT_D = 0x46200034,
+ MIPSI_C_ULT_D = 0x46200035,
+ MIPSI_C_OLE_D = 0x46200036,
+ MIPSI_C_ULE_D = 0x46200037,
++#else
++ MIPSI_BC1EQZ = 0x45200000,
++ MIPSI_BC1NEZ = 0x45a00000,
++ MIPSI_CMP_EQ_D = 0x46a00002,
++ MIPSI_CMP_LT_S = 0x46800004,
++ MIPSI_CMP_LT_D = 0x46a00004,
++#endif
++
+ } MIPSIns;
+
+ #endif
+diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
+index c5c991a3..c83dcc5e 100644
+--- a/src/lj_target_ppc.h
++++ b/src/lj_target_ppc.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Definitions for PPC CPUs.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_TARGET_PPC_H
+diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
+index 356f7924..d0ce196c 100644
+--- a/src/lj_target_x86.h
++++ b/src/lj_target_x86.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Definitions for x86 and x64 CPUs.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_TARGET_X86_H
+@@ -38,10 +38,9 @@ enum {
+ RID_RET = RID_EAX,
+ #if LJ_64
+ RID_FPRET = RID_XMM0,
+-#else
++#endif
+ RID_RETLO = RID_EAX,
+ RID_RETHI = RID_EDX,
+-#endif
+
+ /* These definitions must match with the *.dasc file(s): */
+ RID_BASE = RID_EDX, /* Interpreter BASE. */
+@@ -165,6 +164,8 @@ typedef struct {
+ #define EXITSTUB_SPACING (2+2)
+ #define EXITSTUBS_PER_GROUP 32
+
++#define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */
++
+ /* -- x86 ModRM operand encoding ------------------------------------------ */
+
+ typedef enum {
+@@ -228,16 +229,10 @@ typedef enum {
+ /* Note: little-endian byte-order! */
+ XI_FLDZ = 0xeed9,
+ XI_FLD1 = 0xe8d9,
+- XI_FLDLG2 = 0xecd9,
+- XI_FLDLN2 = 0xedd9,
+ XI_FDUP = 0xc0d9, /* Really fld st0. */
+ XI_FPOP = 0xd8dd, /* Really fstp st0. */
+ XI_FPOP1 = 0xd9dd, /* Really fstp st1. */
+ XI_FRNDINT = 0xfcd9,
+- XI_FSIN = 0xfed9,
+- XI_FCOS = 0xffd9,
+- XI_FPTAN = 0xf2d9,
+- XI_FPATAN = 0xf3d9,
+ XI_FSCALE = 0xfdd9,
+ XI_FYL2X = 0xf1d9,
+
+diff --git a/src/lj_trace.c b/src/lj_trace.c
+index d85b47f8..43b86e4f 100644
+--- a/src/lj_trace.c
++++ b/src/lj_trace.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Trace management.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_trace_c
+@@ -30,6 +30,7 @@
+ #include "lj_vm.h"
+ #include "lj_vmevent.h"
+ #include "lj_target.h"
++#include "lj_prng.h"
+
+ /* -- Error handling ------------------------------------------------------ */
+
+@@ -104,7 +105,8 @@ static void perftools_addtrace(GCtrace *T)
+ name++;
+ else
+ name = "(string)";
+- lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) +
pt->sizebc);
++ lj_assertX(startpc >= proto_bc(pt) && startpc < proto_bc(pt) +
pt->sizebc,
++ "trace PC out of range");
+ lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
+ if (!fp) {
+ char fname[40];
+@@ -183,7 +185,7 @@ void lj_trace_reenableproto(GCproto *pt)
+ {
+ if ((pt->flags & PROTO_ILOOP)) {
+ BCIns *bc = proto_bc(pt);
+- BCPos i, sizebc = pt->sizebc;;
++ BCPos i, sizebc = pt->sizebc;
+ pt->flags &= ~PROTO_ILOOP;
+ if (bc_op(bc[0]) == BC_IFUNCF)
+ setbc_op(&bc[0], BC_FUNCF);
+@@ -205,27 +207,28 @@ static void trace_unpatch(jit_State *J, GCtrace *T)
+ return; /* No need to unpatch branches in parent traces (yet). */
+ switch (bc_op(*pc)) {
+ case BC_JFORL:
+- lua_assert(traceref(J, bc_d(*pc)) == T);
++ lj_assertJ(traceref(J, bc_d(*pc)) == T, "JFORL references other trace");
+ *pc = T->startins;
+ pc += bc_j(T->startins);
+- lua_assert(bc_op(*pc) == BC_JFORI);
++ lj_assertJ(bc_op(*pc) == BC_JFORI, "FORL does not point to JFORI");
+ setbc_op(pc, BC_FORI);
+ break;
+ case BC_JITERL:
+ case BC_JLOOP:
+- lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op));
++ lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP ||
++ bc_isret(op), "bad original bytecode %d", op);
+ *pc = T->startins;
+ break;
+ case BC_JMP:
+- lua_assert(op == BC_ITERL);
++ lj_assertJ(op == BC_ITERL, "bad original bytecode %d", op);
+ pc += bc_j(*pc)+2;
+ if (bc_op(*pc) == BC_JITERL) {
+- lua_assert(traceref(J, bc_d(*pc)) == T);
++ lj_assertJ(traceref(J, bc_d(*pc)) == T, "JITERL references other
trace");
+ *pc = T->startins;
+ }
+ break;
+ case BC_JFUNCF:
+- lua_assert(op == BC_FUNCF);
++ lj_assertJ(op == BC_FUNCF, "bad original bytecode %d", op);
+ *pc = T->startins;
+ break;
+ default: /* Already unpatched. */
+@@ -237,7 +240,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T)
+ static void trace_flushroot(jit_State *J, GCtrace *T)
+ {
+ GCproto *pt = &gcref(T->startpt)->pt;
+- lua_assert(T->root == 0 && pt != NULL);
++ lj_assertJ(T->root == 0, "not a root trace");
++ lj_assertJ(pt != NULL, "trace has no prototype");
+ /* First unpatch any modified bytecode. */
+ trace_unpatch(J, T);
+ /* Unlink root trace from chain anchored in prototype. */
+@@ -353,7 +357,8 @@ void lj_trace_freestate(global_State *g)
+ { /* This assumes all traces have already been freed. */
+ ptrdiff_t i;
+ for (i = 1; i < (ptrdiff_t)J->sizetrace; i++)
+- lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL);
++ lj_assertG(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL,
++ "trace still allocated");
+ }
+ #endif
+ lj_mcode_free(J);
+@@ -368,8 +373,13 @@ void lj_trace_freestate(global_State *g)
+ /* Blacklist a bytecode instruction. */
+ static void blacklist_pc(GCproto *pt, BCIns *pc)
+ {
+- setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP);
+- pt->flags |= PROTO_ILOOP;
++ if (bc_op(*pc) == BC_ITERN) {
++ setbc_op(pc, BC_ITERC);
++ setbc_op(pc+1+bc_j(pc[1]), BC_JMP);
++ } else {
++ setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP);
++ pt->flags |= PROTO_ILOOP;
++ }
+ }
+
+ /* Penalize a bytecode instruction. */
+@@ -380,7 +390,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc,
TraceError e)
+ if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */
+ /* First try to bump its hotcount several times. */
+ val = ((uint32_t)J->penalty[i].val << 1) +
+- LJ_PRNG_BITS(J, PENALTY_RNDBITS);
++ (lj_prng_u64(&J2G(J)->prng) & ((1u<<PENALTY_RNDBITS)-1));
+ if (val > PENALTY_MAX) {
+ blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */
+ return;
+@@ -406,10 +416,11 @@ static void trace_start(jit_State *J)
+ TraceNo traceno;
+
+ if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
+- if (J->parent == 0 && J->exitno == 0) {
++ if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) !=
BC_ITERN) {
+ /* Lazy bytecode patching to disable hotcount events. */
+- lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
+- bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF);
++ lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
++ bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF,
++ "bad hot bytecode %d", bc_op(*J->pc));
+ setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP);
+ J->pt->flags |= PROTO_ILOOP;
+ }
+@@ -420,7 +431,8 @@ static void trace_start(jit_State *J)
+ /* Get a new trace number. */
+ traceno = trace_findfree(J);
+ if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */
+- lua_assert((J2G(J)->hookmask & HOOK_GC) == 0);
++ lj_assertJ((J2G(J)->hookmask & HOOK_GC) == 0,
++ "recorder called from GC hook");
+ lj_trace_flushall(J->L);
+ J->state = LJ_TRACE_IDLE; /* Silently ignored. */
+ return;
+@@ -489,6 +501,7 @@ static void trace_stop(jit_State *J)
+ J->cur.nextroot = pt->trace;
+ pt->trace = (TraceNo1)traceno;
+ break;
++ case BC_ITERN:
+ case BC_RET:
+ case BC_RET0:
+ case BC_RET1:
+@@ -496,10 +509,14 @@ static void trace_stop(jit_State *J)
+ goto addroot;
+ case BC_JMP:
+ /* Patch exit branch in parent to side trace entry. */
+- lua_assert(J->parent != 0 && J->cur.root != 0);
++ lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side
trace");
+ lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode);
+ /* Avoid compiling a side trace twice (stack resizing uses parent exit). */
+- traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE;
++ {
++ SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
++ snap->count = SNAPCOUNT_DONE;
++ if (J->cur.topslot > snap->topslot) snap->topslot =
J->cur.topslot;
++ }
+ /* Add to side trace chain in root trace. */
+ {
+ GCtrace *root = traceref(J, J->cur.root);
+@@ -515,7 +532,7 @@ static void trace_stop(jit_State *J)
+ traceref(J, J->exitno)->link = traceno;
+ break;
+ default:
+- lua_assert(0);
++ lj_assertJ(0, "bad stop bytecode %d", op);
+ break;
+ }
+
+@@ -536,8 +553,8 @@ static void trace_stop(jit_State *J)
+ static int trace_downrec(jit_State *J)
+ {
+ /* Restart recording at the return instruction. */
+- lua_assert(J->pt != NULL);
+- lua_assert(bc_isret(bc_op(*J->pc)));
++ lj_assertJ(J->pt != NULL, "no active prototype");
++ lj_assertJ(bc_isret(bc_op(*J->pc)), "not at a return bytecode");
+ if (bc_op(*J->pc) == BC_RETM)
+ return 0; /* NYI: down-recursion with RETM. */
+ J->parent = 0;
+@@ -644,8 +661,13 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void
*ud)
+ J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */
+ trace_start(J);
+ lj_dispatch_update(J2G(J));
+- break;
++ if (J->state != LJ_TRACE_RECORD_1ST)
++ break;
++ /* fallthrough */
+
++ case LJ_TRACE_RECORD_1ST:
++ J->state = LJ_TRACE_RECORD;
++ /* fallthrough */
+ case LJ_TRACE_RECORD:
+ trace_pendpatch(J, 0);
+ setvmstate(J2G(J), RECORD);
+@@ -750,7 +772,7 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
+ isluafunc(curr_func(J->L)) &&
+ snap->count != SNAPCOUNT_DONE &&
+ ++snap->count >= J->param[JIT_P_hotexit]) {
+- lua_assert(J->state == LJ_TRACE_IDLE);
++ lj_assertJ(J->state == LJ_TRACE_IDLE, "hot side exit while
recording");
+ /* J->parent is non-zero for a side trace. */
+ J->state = LJ_TRACE_START;
+ lj_trace_ins(J, pc);
+@@ -782,7 +804,9 @@ typedef struct ExitDataCP {
+ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud)
+ {
+ ExitDataCP *exd = (ExitDataCP *)ud;
+- cframe_errfunc(L->cframe) = -1; /* Inherit error function. */
++ /* Always catch error here and don't call error function. */
++ cframe_errfunc(L->cframe) = 0;
++ cframe_nres(L->cframe) = -2*LUAI_MAXSTACK*(int)sizeof(TValue);
+ exd->pc = lj_snap_restore(exd->J, exd->exptr);
+ UNUSED(dummy);
+ return NULL;
+@@ -812,7 +836,7 @@ static void trace_exit_regs(lua_State *L, ExitState *ex)
+ }
+ #endif
+
+-#ifdef EXITSTATE_PCREG
++#if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE)
+ /* Determine trace number from pc of exit instruction. */
+ static TraceNo trace_exit_find(jit_State *J, MCode *pc)
+ {
+@@ -822,7 +846,7 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc)
+ if (T && pc >= T->mcode && pc < (MCode *)((char
*)T->mcode + T->szmcode))
+ return traceno;
+ }
+- lua_assert(0);
++ lj_assertJ(0, "bad exit pc");
+ return 0;
+ }
+ #endif
+@@ -834,29 +858,39 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
+ lua_State *L = J->L;
+ ExitState *ex = (ExitState *)exptr;
+ ExitDataCP exd;
+- int errcode;
++ int errcode, exitcode = J->exitcode;
++ TValue exiterr;
+ const BCIns *pc;
+ void *cf;
+ GCtrace *T;
++
++ setnilV(&exiterr);
++ if (exitcode) { /* Trace unwound with error code. */
++ J->exitcode = 0;
++ copyTV(L, &exiterr, L->top-1);
++ }
++
+ #ifdef EXITSTATE_PCREG
+ J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]);
+ #endif
+ T = traceref(J, J->parent); UNUSED(T);
+ #ifdef EXITSTATE_CHECKEXIT
+ if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */
+- lua_assert(T->root != 0);
++ lj_assertJ(T->root != 0, "stack check in root trace");
+ J->exitno = T->ir[REF_BASE].op2;
+ J->parent = T->ir[REF_BASE].op1;
+ T = traceref(J, J->parent);
+ }
+ #endif
+- lua_assert(T != NULL && J->exitno < T->nsnap);
++ lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit
number");
+ exd.J = J;
+ exd.exptr = exptr;
+ errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp);
+ if (errcode)
+ return -errcode; /* Return negated error code. */
+
++ if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */
++
+ if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
+ lj_vmevent_send(L, TEXIT,
+ lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
+@@ -868,7 +902,9 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
+ pc = exd.pc;
+ cf = cframe_raw(L->cframe);
+ setcframe_pc(cf, pc);
+- if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
++ if (exitcode) {
++ return -exitcode;
++ } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
+ /* Just exit to interpreter. */
+ } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
+ if (!(G(L)->hookmask & HOOK_GC))
+@@ -878,13 +914,14 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
+ }
+ if (bc_op(*pc) == BC_JLOOP) {
+ BCIns *retpc = &traceref(J, bc_d(*pc))->startins;
+- if (bc_isret(bc_op(*retpc))) {
++ int isret = bc_isret(bc_op(*retpc));
++ if (isret || bc_op(*retpc) == BC_ITERN) {
+ if (J->state == LJ_TRACE_RECORD) {
+ J->patchins = *pc;
+ J->patchpc = (BCIns *)pc;
+ *J->patchpc = *retpc;
+ J->bcskip = 1;
+- } else {
++ } else if (isret) {
+ pc = retpc;
+ setcframe_pc(cf, pc);
+ }
+@@ -906,4 +943,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
+ }
+ }
+
++#if LJ_UNWIND_JIT
++/* Given an mcode address determine trace exit address for unwinding. */
++uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep)
++{
++#if EXITTRACE_VMSTATE
++ TraceNo traceno = J2G(J)->vmstate;
++#else
++ TraceNo traceno = trace_exit_find(J, (MCode *)addr);
++#endif
++ GCtrace *T = traceref(J, traceno);
++ if (T
++#if EXITTRACE_VMSTATE
++ && addr >= (uintptr_t)T->mcode && addr <
(uintptr_t)T->mcode + T->szmcode
++#endif
++ ) {
++ SnapShot *snap = T->snap;
++ SnapNo lo = 0, exitno = T->nsnap;
++ uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode); /* MCode units! */
++ /* Rightmost binary search for mcode offset to determine exit number. */
++ do {
++ SnapNo mid = (lo+exitno) >> 1;
++ if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1;
++ } while (lo < exitno);
++ exitno--;
++ *ep = exitno;
++#ifdef EXITSTUBS_PER_GROUP
++ return (uintptr_t)exitstub_addr(J, exitno);
++#else
++ return (uintptr_t)exitstub_trace_addr(T, exitno);
++#endif
++ }
++ /* Cannot correlate addr with trace/exit. This will be fatal. */
++ lj_assertJ(0, "bad exit pc");
++ return 0;
++}
++#endif
++
+ #endif
+diff --git a/src/lj_trace.h b/src/lj_trace.h
+index 22cae741..e4cf2dc4 100644
+--- a/src/lj_trace.h
++++ b/src/lj_trace.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Trace management.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_TRACE_H
+@@ -37,6 +37,9 @@ LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
+ LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
+ LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
+ LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
++#if LJ_UNWIND_EXT
++LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo
*ep);
++#endif
+
+ /* Signal asynchronous abort of trace or end of trace. */
+ #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE)
+diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
+index 1363c4f3..24dcb5c1 100644
+--- a/src/lj_traceerr.h
++++ b/src/lj_traceerr.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Trace compiler error messages.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* This file may be included multiple times with different TREDEF macros. */
+diff --git a/src/lj_udata.c b/src/lj_udata.c
+index bd0321b8..a0edd0df 100644
+--- a/src/lj_udata.c
++++ b/src/lj_udata.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Userdata handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_udata_c
+@@ -8,6 +8,7 @@
+
+ #include "lj_obj.h"
+ #include "lj_gc.h"
++#include "lj_err.h"
+ #include "lj_udata.h"
+
+ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
+@@ -32,3 +33,30 @@ void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud)
+ lj_mem_free(g, ud, sizeudata(ud));
+ }
+
++#if LJ_64
++void *lj_lightud_intern(lua_State *L, void *p)
++{
++ global_State *g = G(L);
++ uint64_t u = (uint64_t)p;
++ uint32_t up = lightudup(u);
++ uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
++ MSize segnum = g->gc.lightudnum;
++ if (segmap) {
++ MSize seg;
++ for (seg = 0; seg <= segnum; seg++)
++ if (segmap[seg] == up) /* Fast path. */
++ return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
++ segnum++;
++ /* Leave last segment unused to avoid clash with ITERN key. */
++ if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)-1) lj_err_msg(L, LJ_ERR_BADLU);
++ }
++ if (!((segnum-1) & segnum) && segnum != 1) {
++ lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t);
++ setmref(g->gc.lightudseg, segmap);
++ }
++ g->gc.lightudnum = segnum;
++ segmap[segnum] = up;
++ return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
++}
++#endif
++
+diff --git a/src/lj_udata.h b/src/lj_udata.h
+index f271a42d..78522ecc 100644
+--- a/src/lj_udata.h
++++ b/src/lj_udata.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Userdata handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_UDATA_H
+@@ -10,5 +10,8 @@
+
+ LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env);
+ LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud);
++#if LJ_64
++LJ_FUNC void * LJ_FASTCALL lj_lightud_intern(lua_State *L, void *p);
++#endif
+
+ #endif
+diff --git a/src/lj_vm.h b/src/lj_vm.h
+index 1cc7eed7..81ee8e28 100644
+--- a/src/lj_vm.h
++++ b/src/lj_vm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Assembler VM interface definitions.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_VM_H
+@@ -26,6 +26,9 @@ LJ_ASMF void lj_vm_unwind_ff_eh(void);
+ #if LJ_TARGET_X86ORX64
+ LJ_ASMF void lj_vm_unwind_rethrow(void);
+ #endif
++#if LJ_TARGET_MIPS
++LJ_ASMF void lj_vm_unwind_stub(void);
++#endif
+
+ /* Miscellaneous functions. */
+ #if LJ_TARGET_X86ORX64
+@@ -48,6 +51,7 @@ LJ_ASMF void lj_vm_inshook(void);
+ LJ_ASMF void lj_vm_rethook(void);
+ LJ_ASMF void lj_vm_callhook(void);
+ LJ_ASMF void lj_vm_profhook(void);
++LJ_ASMF void lj_vm_IITERN(void);
+
+ /* Trace exit handling. */
+ LJ_ASMF void lj_vm_exit_handler(void);
+@@ -92,14 +96,10 @@ LJ_ASMF double lj_vm_trunc(double);
+ LJ_ASMF double lj_vm_trunc_sf(double);
+ #endif
+ #endif
+-#ifdef LUAJIT_NO_EXP2
+-LJ_ASMF double lj_vm_exp2(double);
+-#else
+-#define lj_vm_exp2 exp2
+-#endif
+ #if LJ_HASFFI
+ LJ_ASMF int lj_vm_errno(void);
+ #endif
++LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx);
+ #endif
+
+ /* Continuations for metamethods. */
+diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
+index 86640804..45c82096 100644
+--- a/src/lj_vmevent.c
++++ b/src/lj_vmevent.c
+@@ -1,6 +1,6 @@
+ /*
+ ** VM event handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include <stdio.h>
+diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h
+index 050fb4dd..a9082e7d 100644
+--- a/src/lj_vmevent.h
++++ b/src/lj_vmevent.h
+@@ -1,6 +1,6 @@
+ /*
+ ** VM event handling.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_VMEVENT_H
+diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
+index b231d3e8..23ef0dd2 100644
+--- a/src/lj_vmmath.c
++++ b/src/lj_vmmath.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Math helper functions for assembler VM.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_vmmath_c
+@@ -48,10 +48,9 @@ double lj_vm_foldarith(double x, double y, int op)
+ case IR_NEG - IR_ADD: return -x; break;
+ case IR_ABS - IR_ADD: return fabs(x); break;
+ #if LJ_HASJIT
+- case IR_ATAN2 - IR_ADD: return atan2(x, y); break;
+ case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break;
+- case IR_MIN - IR_ADD: return x > y ? y : x; break;
+- case IR_MAX - IR_ADD: return x < y ? y : x; break;
++ case IR_MIN - IR_ADD: return x < y ? x : y; break;
++ case IR_MAX - IR_ADD: return x > y ? x : y; break;
+ #endif
+ default: return x;
+ }
+@@ -61,7 +60,8 @@ double lj_vm_foldarith(double x, double y, int op)
+ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
+ {
+ uint32_t y, ua, ub;
+- lua_assert(b != 0); /* This must be checked before using this function. */
++ /* This must be checked before using this function. */
++ lj_assertX(b != 0, "modulo with zero divisor");
+ ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
+ ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
+ y = ua % ub;
+@@ -80,19 +80,12 @@ double lj_vm_log2(double a)
+ }
+ #endif
+
+-#ifdef LUAJIT_NO_EXP2
+-double lj_vm_exp2(double a)
+-{
+- return exp(a * 0.6931471805599453);
+-}
+-#endif
+-
+ #if !LJ_TARGET_X86ORX64
+ /* Unsigned x^k. */
+ static double lj_vm_powui(double x, uint32_t k)
+ {
+ double y;
+- lua_assert(k != 0);
++ lj_assertX(k != 0, "pow with zero exponent");
+ for (; (k & 1) == 0; k >>= 1) x *= x;
+ y = x;
+ if ((k >>= 1) != 0) {
+@@ -129,15 +122,9 @@ double lj_vm_foldfpm(double x, int fpm)
+ case IRFPM_CEIL: return lj_vm_ceil(x);
+ case IRFPM_TRUNC: return lj_vm_trunc(x);
+ case IRFPM_SQRT: return sqrt(x);
+- case IRFPM_EXP: return exp(x);
+- case IRFPM_EXP2: return lj_vm_exp2(x);
+ case IRFPM_LOG: return log(x);
+ case IRFPM_LOG2: return lj_vm_log2(x);
+- case IRFPM_LOG10: return log10(x);
+- case IRFPM_SIN: return sin(x);
+- case IRFPM_COS: return cos(x);
+- case IRFPM_TAN: return tan(x);
+- default: lua_assert(0);
++ default: lj_assertX(0, "bad fpm %d", fpm);
+ }
+ return 0;
+ }
+diff --git a/src/ljamalg.c b/src/ljamalg.c
+index f1f28623..384b3cc1 100644
+--- a/src/ljamalg.c
++++ b/src/ljamalg.c
+@@ -1,16 +1,6 @@
+ /*
+ ** LuaJIT core and libraries amalgamation.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+-*/
+-
+-/*
+-+--------------------------------------------------------------------------+
+-| WARNING: Compiling the amalgamation needs a lot of virtual memory |
+-| (around 300 MB with GCC 4.x)! If you don't have enough physical memory |
+-| your machine will start swapping to disk and the compile will not finish |
+-| within a reasonable amount of time. |
+-| So either compile on a bigger machine or use the non-amalgamated build. |
+-+--------------------------------------------------------------------------+
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define ljamalg_c
+@@ -28,6 +18,7 @@
+ #include "lua.h"
+ #include "lauxlib.h"
+
++#include "lj_assert.c"
+ #include "lj_gc.c"
+ #include "lj_err.c"
+ #include "lj_char.c"
+@@ -40,6 +31,7 @@
+ #include "lj_udata.c"
+ #include "lj_meta.c"
+ #include "lj_debug.c"
++#include "lj_prng.c"
+ #include "lj_state.c"
+ #include "lj_dispatch.c"
+ #include "lj_vmevent.c"
+@@ -47,6 +39,7 @@
+ #include "lj_strscan.c"
+ #include "lj_strfmt.c"
+ #include "lj_strfmt_num.c"
++#include "lj_serialize.c"
+ #include "lj_api.c"
+ #include "lj_profile.c"
+ #include "lj_lex.c"
+@@ -93,5 +86,6 @@
+ #include "lib_bit.c"
+ #include "lib_jit.c"
+ #include "lib_ffi.c"
++#include "lib_buffer.c"
+ #include "lib_init.c"
+
+diff --git a/src/lua.h b/src/lua.h
+index 850bd796..6d1634d1 100644
+--- a/src/lua.h
++++ b/src/lua.h
+@@ -1,7 +1,7 @@
+ /*
+ ** $Id: lua.h,v 1.218.1.5 2008/08/06 13:30:12 roberto Exp $
+ ** Lua - An Extensible Extension Language
+-**
Lua.org, PUC-Rio, Brazil (
http://www.lua.org)
++**
Lua.org, PUC-Rio, Brazil (
https://www.lua.org)
+ ** See Copyright Notice at the end of this file
+ */
+
+diff --git a/src/luaconf.h b/src/luaconf.h
+index c2d29d94..5ba6eda9 100644
+--- a/src/luaconf.h
++++ b/src/luaconf.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Configuration header.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef luaconf_h
+@@ -136,7 +136,7 @@
+
+ #define LUALIB_API LUA_API
+
+-/* Support for internal assertions. */
++/* Compatibility support for assertions. */
+ #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
+ #include <assert.h>
+ #endif
+diff --git a/src/luajit.c b/src/luajit.c
+index 1ca24301..6aed5337 100644
+--- a/src/luajit.c
++++ b/src/luajit.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -150,6 +150,7 @@ static void print_jit_status(lua_State *L)
+ fputs(s, stdout);
+ }
+ putc('\n', stdout);
++ lua_settop(L, 0); /* clear stack */
+ }
+
+ static void createargtable(lua_State *L, char **argv, int argc, int argf)
+@@ -421,6 +422,7 @@ static int collectargs(char **argv, int *flags)
+ break;
+ case 'e':
+ *flags |= FLAGS_EXEC;
++ /* fallthrough */
+ case 'j': /* LuaJIT extension */
+ case 'l':
+ *flags |= FLAGS_OPTION;
+diff --git a/src/luajit.h b/src/luajit.h
+index 708a5a11..2ee1f908 100644
+--- a/src/luajit.h
++++ b/src/luajit.h
+@@ -1,7 +1,7 @@
+ /*
+-** LuaJIT -- a Just-In-Time Compiler for Lua.
http://luajit.org/
++** LuaJIT -- a Just-In-Time Compiler for Lua.
https://luajit.org/
+ **
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ **
+ ** Permission is hereby granted, free of charge, to any person obtaining
+ ** a copy of this software and associated documentation files (the
+@@ -22,7 +22,7 @@
+ ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **
+-** [ MIT license:
http://www.opensource.org/licenses/mit-license.php ]
++** [ MIT license:
https://www.opensource.org/licenses/mit-license.php ]
+ */
+
+ #ifndef _LUAJIT_H
+@@ -33,8 +33,8 @@
+ #define LUAJIT_VERSION "LuaJIT 2.1.0-beta3"
+ #define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
+ #define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3
+-#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2017 Mike Pall"
+-#define LUAJIT_URL "http://luajit.org/"
++#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2021 Mike Pall"
++#define LUAJIT_URL "https://luajit.org/"
+
+ /* Modes for luaJIT_setmode. */
+ #define LUAJIT_MODE_MASK 0x00ff
+diff --git a/src/lualib.h b/src/lualib.h
+index bfc130a1..5c18e9ec 100644
+--- a/src/lualib.h
++++ b/src/lualib.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Standard library header.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LUALIB_H
+@@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *L);
+ LUALIB_API int luaopen_bit(lua_State *L);
+ LUALIB_API int luaopen_jit(lua_State *L);
+ LUALIB_API int luaopen_ffi(lua_State *L);
++LUALIB_API int luaopen_string_buffer(lua_State *L);
+
+ LUALIB_API void luaL_openlibs(lua_State *L);
+
+diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
+index 71bde759..7e1a6e04 100644
+--- a/src/msvcbuild.bat
++++ b/src/msvcbuild.bat
+@@ -1,29 +1,31 @@
+ @rem Script to build LuaJIT with MSVC.
+-@rem Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++@rem Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ @rem
+-@rem Either open a "Visual Studio .NET Command Prompt"
+-@rem (Note that the Express Edition does not contain an x64 compiler)
+-@rem -or-
+-@rem Open a "Windows SDK Command Shell" and set the compiler environment:
+-@rem setenv /release /x86
+-@rem -or-
+-@rem setenv /release /x64
++@rem Open a "Visual Studio Command Prompt" (either x86 or x64).
++@rem Then cd to this directory and run this script. Use the following
++@rem options (in order), if needed. The default is a dynamic release build.
+ @rem
+-@rem Then cd to this directory and run this script.
++@rem nogc64 disable LJ_GC64 mode for x64
++@rem debug emit debug symbols
++@rem amalg amalgamated build
++@rem static static linkage
+
+ @if not defined INCLUDE goto :FAIL
+
+ @setlocal
++@rem Add more debug flags here, e.g. DEBUGCFLAGS=/DLUA_USE_APICHECK
++@set DEBUGCFLAGS=
+ @set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
/D_CRT_STDIO_INLINE=__declspec(dllexport)__inline
+ @set LJLINK=link /nologo
+ @set LJMT=mt /nologo
+ @set LJLIB=lib /nologo /nodefaultlib
+ @set DASMDIR=..\dynasm
+ @set DASM=%DASMDIR%\dynasm.lua
+-@set DASC=vm_x86.dasc
++@set DASC=vm_x64.dasc
+ @set LJDLLNAME=lua51.dll
+ @set LJLIBNAME=lua51.lib
+-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c
++@set BUILDTYPE=release
++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+
+ %LJCOMPILE% host\minilua.c
+ @if errorlevel 1 goto :BAD
+@@ -36,15 +38,16 @@ if exist minilua.exe.manifest^
+ @set LJARCH=x64
+ @minilua
+ @if errorlevel 8 goto :X64
++@set DASC=vm_x86.dasc
+ @set DASMFLAGS=-D WIN -D JIT -D FFI
+ @set LJARCH=x86
+ @set LJCOMPILE=%LJCOMPILE% /arch:SSE2
+ :X64
+-@if "%1" neq "gc64" goto :NOGC64
++@if "%1" neq "nogc64" goto :GC64
+ @shift
+-@set DASC=vm_x64.dasc
+-@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_GC64
+-:NOGC64
++@set DASC=vm_x86.dasc
++@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
++:GC64
+ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
+ @if errorlevel 1 goto :BAD
+
+@@ -72,9 +75,11 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+
+ @if "%1" neq "debug" goto :NODEBUG
+ @shift
+-@set LJCOMPILE=%LJCOMPILE% /Zi
+-@set LJLINK=%LJLINK% /debug /opt:ref /opt:icf /incremental:no
++@set BUILDTYPE=debug
++@set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS%
++@set LJLINK=%LJLINK% /opt:ref /opt:icf /incremental:no
+ :NODEBUG
++@set LJLINK=%LJLINK% /%BUILDTYPE%
+ @if "%1"=="amalg" goto :AMALGDLL
+ @if "%1"=="static" goto :STATIC
+ %LJCOMPILE% /MD /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
+@@ -118,5 +123,5 @@ if exist luajit.exe.manifest^
+ @echo *******************************************************
+ @goto :END
+ :FAIL
+-@echo You must open a "Visual Studio .NET Command Prompt" to run this script
++@echo You must open a "Visual Studio Command Prompt" to run this script
+ :END
+diff --git a/src/ps4build.bat b/src/ps4build.bat
+index e4a7defe..fdd09d81 100644
+--- a/src/ps4build.bat
++++ b/src/ps4build.bat
+@@ -26,13 +26,13 @@
+ @set LJMT=mt /nologo
+ @set DASMDIR=..\dynasm
+ @set DASM=%DASMDIR%\dynasm.lua
+-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+-@set GC64=-DLUAJIT_ENABLE_GC64
++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
++@set GC64=
+ @set DASC=vm_x64.dasc
+
+ @if "%1" neq "gc32" goto :NOGC32
+ @shift
+-@set GC64=
++@set GC64=-DLUAJIT_DISABLE_GC64
+ @set DASC=vm_x86.dasc
+ :NOGC32
+
+diff --git a/src/psvitabuild.bat b/src/psvitabuild.bat
+index 3991dc65..2980e157 100644
+--- a/src/psvitabuild.bat
++++ b/src/psvitabuild.bat
+@@ -14,7 +14,7 @@
+ @set LJMT=mt /nologo
+ @set DASMDIR=..\dynasm
+ @set DASM=%DASMDIR%\dynasm.lua
+-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c
++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+
+ %LJCOMPILE% host\minilua.c
+ @if errorlevel 1 goto :BAD
+diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
+index 780cc16e..405be30a 100644
+--- a/src/vm_arm.dasc
++++ b/src/vm_arm.dasc
+@@ -1,6 +1,6 @@
+ |// Low-level VM code for ARM CPUs.
+ |// Bytecode interpreter, fast functions and helper functions.
+-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ |
+ |.arch arm
+ |.section code_op, code_sub
+@@ -539,13 +539,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | cmp CARG1, #1
+ |.endif
+ | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC].
+- | ldr CARG3, LFUNC:CARG3->field_pc
+ | mvn INS, #~LJ_TNIL
+ | add CARG2, RA, RC
+ | str INS, [CARG2, #-4] // Ensure one valid arg.
+ |.if FFI
+ | bls >1
+ |.endif
++ | ldr CARG3, LFUNC:CARG3->field_pc
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
+ | // BASE = base, RA = resultptr, CARG4 = meta base
+ | bx CARG1
+@@ -699,6 +699,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vmeta_tsetr:
+ | str BASE, L->base
+ | .IOS mov RC, BASE
++ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // Returns TValue *.
+@@ -1011,9 +1012,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | cmp TAB:RB, #0
+ | beq ->fff_restv
+ | ldr CARG3, TAB:RB->hmask
+- | ldr CARG4, STR:RC->hash
++ | ldr CARG4, STR:RC->sid
+ | ldr NODE:INS, TAB:RB->node
+- | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask
++ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
+ | add CARG3, CARG3, CARG3, lsl #1
+ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+@@ -1110,24 +1111,18 @@ static void build_subroutines(BuildCtx *ctx)
+ | checktab CARG2, ->fff_fallback
+ | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
+ | ldr PC, [BASE, FRAME_PC]
+- | mov CARG2, CARG1
+- | str BASE, L->base // Add frame since C call can throw.
+- | mov CARG1, L
+- | str BASE, L->top // Dummy frame length is ok.
+- | add CARG3, BASE, #8
+- | str PC, SAVE_PC
+- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+- | // Returns 0 at end of traversal.
++ | add CARG2, BASE, #8
++ | sub CARG3, BASE, #8
++ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
++ | // Returns 1=found, 0=end, -1=error.
+ | .IOS ldr BASE, L->base
+ | cmp CRET1, #0
+- | mvneq CRET2, #~LJ_TNIL
+- | beq ->fff_restv // End of traversal: return nil.
+- | ldrd CARG12, [BASE, #8] // Copy key and value to results.
+- | ldrd CARG34, [BASE, #16]
+- | mov RC, #(2+1)*8
+- | strd CARG12, [BASE, #-8]
+- | strd CARG34, [BASE]
+- | b ->fff_res
++ | mov RC, #(2+1)*8
++ | bgt ->fff_res // Found key/value.
++ | bmi ->fff_fallback // Invalid key.
++ | // End of traversal: return nil.
++ | mvn CRET2, #~LJ_TNIL
++ | b ->fff_restv
+ |
+ |.ffunc_1 pairs
+ | checktab CARG2, ->fff_fallback
+@@ -1715,8 +1710,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endif
+ |.endmacro
+ |
+- | math_minmax math_min, gt, hi
+- | math_minmax math_max, lt, lo
++ | math_minmax math_min, gt, pl
++ | math_minmax math_max, lt, le
+ |
+ |//-- String library -----------------------------------------------------
+ |
+@@ -1809,7 +1804,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | str L, SBUF:CARG1->L
+- | str CARG4, SBUF:CARG1->p
++ | str CARG4, SBUF:CARG1->w
+ | bl extern lj_buf_putstr_ .. name
+ | bl extern lj_buf_tostr
+ | b ->fff_resstr
+@@ -2246,7 +2241,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |9: // Rethrow error from the right C frame.
+ | rsb CARG2, CARG1, #0
+ | mov CARG1, L
+- | bl extern lj_err_throw // (lua_State *L, int errcode)
++ | bl extern lj_err_trace // (lua_State *L, int errcode)
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+@@ -2429,6 +2424,64 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
++ |.define NEXT_TAB, TAB:CARG1
++ |.define NEXT_RES, CARG1
++ |.define NEXT_IDX, CARG2
++ |.define NEXT_TMP0, CARG3
++ |.define NEXT_TMP1, CARG4
++ |.define NEXT_LIM, r12
++ |.define NEXT_RES_PTR, sp
++ |.define NEXT_RES_VAL, [sp]
++ |.define NEXT_RES_KEY_I, [sp, #8]
++ |.define NEXT_RES_KEY_IT, [sp, #12]
++ |
++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
++ |// Next idx returned in CRET2.
++ |->vm_next:
++ |.if JIT
++ | ldr NEXT_TMP0, NEXT_TAB->array
++ | ldr NEXT_LIM, NEXT_TAB->asize
++ | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3
++ |1: // Traverse array part.
++ | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM
++ | bhs >5
++ | ldr NEXT_TMP1, [NEXT_TMP0, #4]
++ | str NEXT_IDX, NEXT_RES_KEY_I
++ | add NEXT_TMP0, NEXT_TMP0, #8
++ | add NEXT_IDX, NEXT_IDX, #1
++ | checktp NEXT_TMP1, LJ_TNIL
++ | beq <1 // Skip holes in array part.
++ | ldr NEXT_TMP0, [NEXT_TMP0, #-8]
++ | mov NEXT_RES, NEXT_RES_PTR
++ | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too.
++ | mvn NEXT_TMP0, #~LJ_TISNUM
++ | str NEXT_TMP0, NEXT_RES_KEY_IT
++ | bx lr
++ |
++ |5: // Traverse hash part.
++ | ldr NEXT_TMP0, NEXT_TAB->hmask
++ | ldr NODE:NEXT_RES, NEXT_TAB->node
++ | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1
++ | add NEXT_LIM, NEXT_LIM, NEXT_TMP0
++ | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3
++ |6:
++ | cmp NEXT_IDX, NEXT_LIM
++ | bhi >9
++ | ldr NEXT_TMP1, NODE:NEXT_RES->val.it
++ | checktp NEXT_TMP1, LJ_TNIL
++ | add NEXT_IDX, NEXT_IDX, #1
++ | bxne lr
++ | // Skip holes in hash part.
++ | add NEXT_RES, NEXT_RES, #sizeof(Node)
++ | b <6
++ |
++ |9: // End of iteration. Set the key to nil (not the value).
++ | mvn NEXT_TMP0, #0
++ | mov NEXT_RES, NEXT_RES_PTR
++ | str NEXT_TMP0, NEXT_RES_KEY_IT
++ | bx lr
++ |.endif
++ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+@@ -3499,10 +3552,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TGETS_Z:
+ | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | ldr CARG3, TAB:CARG1->hmask
+- | ldr CARG4, STR:RC->hash
++ | ldr CARG4, STR:RC->sid
+ | ldr NODE:INS, TAB:CARG1->node
+ | mov TAB:RB, TAB:CARG1
+- | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask
++ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
+ | add CARG3, CARG3, CARG3, lsl #1
+ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
+ |1:
+@@ -3646,10 +3699,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TSETS_Z:
+ | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | ldr CARG3, TAB:CARG1->hmask
+- | ldr CARG4, STR:RC->hash
++ | ldr CARG4, STR:RC->sid
+ | ldr NODE:INS, TAB:CARG1->node
+ | mov TAB:RB, TAB:CARG1
+- | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask
++ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
+ | add CARG3, CARG3, CARG3, lsl #1
+ | mov CARG4, #0
+ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
+@@ -3919,10 +3972,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ITERN:
+- | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
+ |.if JIT
+- | // NYI: add hotloop, record BC_ITERN.
++ | hotloop
+ |.endif
++ |->vm_IITERN:
++ | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
+ | add RA, BASE, RA
+ | ldr TAB:RB, [RA, #-16]
+ | ldr CARG1, [RA, #-8] // Get index from control var.
+@@ -3988,7 +4042,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next1
+ | ins_next2
+ | mov CARG1, #0
+- | mvn CARG2, #0x00018000
++ | mvn CARG2, #~LJ_KEYINDEX
+ | strd CARG1, [RA, #-8] // Initialize control var.
+ |1:
+ | ins_next3
+@@ -3997,9 +4051,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | mov OP, #BC_ITERC
+ | strb CARG1, [PC, #-4]
+ | sub PC, RC, #0x20000
++ |.if JIT
++ | ldrb CARG1, [PC]
++ | cmp CARG1, #BC_ITERN
++ | bne >6
++ |.endif
+ | strb OP, [PC] // Subsumes ins_next1.
+ | ins_next2
+ | b <1
++ |.if JIT
++ |6: // Unpatch JLOOP.
++ | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
++ | ldrh CARG2, [PC, #2]
++ | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
++ | // Subsumes ins_next1 and ins_next2.
++ | ldr INS, TRACE:CARG1->startins
++ | bfi INS, OP, #0, #8
++ | str INS, [PC], #4
++ | b <1
++ |.endif
+ break;
+
+ case BC_VARG:
+diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
+index 3eaf3763..c7090ca3 100644
+--- a/src/vm_arm64.dasc
++++ b/src/vm_arm64.dasc
+@@ -1,6 +1,6 @@
+ |// Low-level VM code for ARM64 CPUs.
+ |// Bytecode interpreter, fast functions and helper functions.
+-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ |
+ |.arch arm64
+ |.section code_op, code_sub
+@@ -81,47 +81,48 @@
+ |
+ |.define CFRAME_SPACE, 208
+ |//----- 16 byte aligned, <-- sp entering interpreter
+-|// Unused [sp, #204] // 32 bit values
+-|.define SAVE_NRES, [sp, #200]
+-|.define SAVE_ERRF, [sp, #196]
+-|.define SAVE_MULTRES, [sp, #192]
+-|.define TMPD, [sp, #184] // 64 bit values
+-|.define SAVE_L, [sp, #176]
+-|.define SAVE_PC, [sp, #168]
+-|.define SAVE_CFRAME, [sp, #160]
+-|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves
+-|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves
+-|.define SAVE_LR, [sp, #8]
+-|.define SAVE_FP, [sp]
++|.define SAVE_FP_LR_, 192
++|.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves
++|.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves
++|// Unused [sp, #44] // 32 bit values
++|.define SAVE_NRES, [sp, #40]
++|.define SAVE_ERRF, [sp, #36]
++|.define SAVE_MULTRES, [sp, #32]
++|.define TMPD, [sp, #24] // 64 bit values
++|.define SAVE_L, [sp, #16]
++|.define SAVE_PC, [sp, #8]
++|.define SAVE_CFRAME, [sp, #0]
+ |//----- 16 byte aligned, <-- sp while in interpreter.
+ |
+-|.define TMPDofs, #184
++|.define TMPDofs, #24
+ |
+ |.macro save_, gpr1, gpr2, fpr1, fpr2
+-| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
+-| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
++| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
++| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
+ |.endmacro
+ |.macro rest_, gpr1, gpr2, fpr1, fpr2
+-| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
+-| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
++| ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
++| ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
+ |.endmacro
+ |
+ |.macro saveregs
+-| stp fp, lr, [sp, #-CFRAME_SPACE]!
+-| add fp, sp, #0
+-| stp x19, x20, [sp, # SAVE_GPR_]
++| sub sp, sp, # CFRAME_SPACE
++| stp fp, lr, [sp, # SAVE_FP_LR_]
++| add fp, sp, # SAVE_FP_LR_
++| stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
+ | save_ 21, 22, 8, 9
+ | save_ 23, 24, 10, 11
+ | save_ 25, 26, 12, 13
+ | save_ 27, 28, 14, 15
+ |.endmacro
+ |.macro restoreregs
+-| ldp x19, x20, [sp, # SAVE_GPR_]
++| ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
+ | rest_ 21, 22, 8, 9
+ | rest_ 23, 24, 10, 11
+ | rest_ 25, 26, 12, 13
+ | rest_ 27, 28, 14, 15
+-| ldp fp, lr, [sp], # CFRAME_SPACE
++| ldp fp, lr, [sp, # SAVE_FP_LR_]
++| add sp, sp, # CFRAME_SPACE
+ |.endmacro
+ |
+ |// Type definitions. Some of these are only used for documentation.
+@@ -500,8 +501,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | ldr GL, L->glref // Setup pointer to global state.
+ | mov BASE, CARG2
+ | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
+- | str RC, SAVE_CFRAME
+- | str fp, L->cframe // Add our C frame to cframe chain.
++ | add TMP0, sp, #0
++ | str RC, SAVE_CFRAME
++ | str TMP0, L->cframe // Add our C frame to cframe chain.
+ |
+ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+ | str L, GL->cur_L
+@@ -536,8 +538,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | sub RA, RA, RB // Compute -savestack(L, L->top).
+ | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | str wzr, SAVE_ERRF // No error function.
+- | str RC, SAVE_CFRAME
+- | str fp, L->cframe // Add our C frame to cframe chain.
++ | add TMP0, sp, #0
++ | str RC, SAVE_CFRAME
++ | str TMP0, L->cframe // Add our C frame to cframe chain.
+ | str L, GL->cur_L
+ | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+ | mov BASE, CRET1
+@@ -562,12 +565,12 @@ static void build_subroutines(BuildCtx *ctx)
+ | cmp CARG1, #1
+ |.endif
+ | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC].
+- | ldr CARG3, LFUNC:CARG3->pc
+ | add TMP0, RA, RC
+ | str TISNIL, [TMP0, #-8] // Ensure one valid arg.
+ |.if FFI
+ | bls >1
+ |.endif
++ | ldr CARG3, LFUNC:CARG3->pc
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
+ | // BASE = base, RA = resultptr, CARG4 = meta base
+ | br CARG1
+@@ -711,6 +714,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |->vmeta_tsetr:
+ | sxtw CARG3, TMP1w
+ | str BASE, L->base
++ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // Returns TValue *.
+@@ -992,9 +996,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
+ | cbz TAB:RB, ->fff_restv
+ | ldr TMP1w, TAB:RB->hmask
+- | ldr TMP2w, STR:RC->hash
++ | ldr TMP2w, STR:RC->sid
+ | ldr NODE:CARG3, TAB:RB->node
+- | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
++ | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
+ | add TMP1, TMP1, TMP1, lsl #1
+ | movn CARG4, #~LJ_TSTR
+ | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
+@@ -1085,21 +1089,19 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+- | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback
++ | checktp CARG1, LJ_TTAB, ->fff_fallback
+ | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
+ | ldr PC, [BASE, FRAME_PC]
+- | stp BASE, BASE, L->base // Add frame since C call can throw.
+- | mov CARG1, L
+- | add CARG3, BASE, #8
+- | str PC, SAVE_PC
+- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+- | // Returns 0 at end of traversal.
++ | add CARG2, BASE, #8
++ | sub CARG3, BASE, #16
++ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
++ | // Returns 1=found, 0=end, -1=error.
++ | mov RC, #(2+1)*8
++ | tbnz CRET1w, #31, ->fff_fallback // Invalid key.
++ | cbnz CRET1, ->fff_res // Found key/value.
++ | // End of traversal: return nil.
+ | str TISNIL, [BASE, #-16]
+- | cbz CRET1, ->fff_res1 // End of traversal: return nil.
+- | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results.
+- | mov RC, #(2+1)*8
+- | stp CARG1, CARG2, [BASE, #-16]
+- | b ->fff_res
++ | b ->fff_res1
+ |
+ |.ffunc_1 pairs
+ | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+@@ -1182,15 +1184,16 @@ static void build_subroutines(BuildCtx *ctx)
+ |.ffunc xpcall
+ | ldp CARG1, CARG2, [BASE]
+ | ldrb TMP0w, GL->hookmask
+- | subs NARGS8:RC, NARGS8:RC, #16
++ | subs NARGS8:TMP1, NARGS8:RC, #16
+ | blo ->fff_fallback
+ | mov RB, BASE
+- | add BASE, BASE, #24
+ | asr ITYPE, CARG2, #47
+ | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
+ | cmn ITYPE, #-LJ_TFUNC
+ | add PC, TMP0, #24+FRAME_PCALL
+ | bne ->fff_fallback // Traceback must be a function.
++ | mov NARGS8:RC, NARGS8:TMP1
++ | add BASE, BASE, #24
+ | stp CARG2, CARG1, [RB] // Swap function and traceback.
+ | cbz NARGS8:RC, ->vm_call_dispatch
+ | b <1
+@@ -1487,8 +1490,8 @@ static void build_subroutines(BuildCtx *ctx)
+ | b <6
+ |.endmacro
+ |
+- | math_minmax math_min, gt, hi
+- | math_minmax math_max, lt, lo
++ | math_minmax math_min, gt, pl
++ | math_minmax math_max, lt, le
+ |
+ |//-- String library -----------------------------------------------------
+ |
+@@ -1587,7 +1590,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | str L, GL->tmpbuf.L
+- | str TMP0, GL->tmpbuf.p
++ | str TMP0, GL->tmpbuf.w
+ | bl extern lj_buf_putstr_ .. name
+ | bl extern lj_buf_tostr
+ | b ->fff_resstr
+@@ -2033,9 +2036,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | b <2
+ |
+ |9: // Rethrow error from the right C frame.
+- | neg CARG2, CARG1
++ | neg CARG2w, CARG1w
+ | mov CARG1, L
+- | bl extern lj_err_throw // (lua_State *L, int errcode)
++ | bl extern lj_err_trace // (lua_State *L, int errcode)
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+@@ -2064,6 +2067,63 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
++ |.define NEXT_TAB, TAB:CARG1
++ |.define NEXT_RES, CARG1
++ |.define NEXT_IDX, CARG2w
++ |.define NEXT_LIM, CARG3w
++ |.define NEXT_TMP0, TMP0
++ |.define NEXT_TMP0w, TMP0w
++ |.define NEXT_TMP1, TMP1
++ |.define NEXT_TMP1w, TMP1w
++ |.define NEXT_RES_PTR, sp
++ |.define NEXT_RES_VAL, [sp]
++ |.define NEXT_RES_KEY, [sp, #8]
++ |
++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
++ |// Next idx returned in CRET2w.
++ |->vm_next:
++ |.if JIT
++ | ldr NEXT_LIM, NEXT_TAB->asize
++ | ldr NEXT_TMP1, NEXT_TAB->array
++ |1: // Traverse array part.
++ | subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM
++ | bhs >5 // Index points after array part?
++ | ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3]
++ | cmn NEXT_TMP0, #-LJ_TNIL
++ | cinc NEXT_IDX, NEXT_IDX, eq
++ | beq <1 // Skip holes in array part.
++ | str NEXT_TMP0, NEXT_RES_VAL
++ | movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16
++ | stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY
++ | add NEXT_IDX, NEXT_IDX, #1
++ | mov NEXT_RES, NEXT_RES_PTR
++ |4:
++ | ret
++ |
++ |5: // Traverse hash part.
++ | ldr NEXT_TMP1w, NEXT_TAB->hmask
++ | ldr NODE:NEXT_RES, NEXT_TAB->node
++ | add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1
++ | add NEXT_LIM, NEXT_LIM, NEXT_TMP1w
++ | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3
++ |6:
++ | cmp NEXT_IDX, NEXT_LIM
++ | bhi >9
++ | ldr NEXT_TMP0, NODE:NEXT_RES->val
++ | cmn NEXT_TMP0, #-LJ_TNIL
++ | add NEXT_IDX, NEXT_IDX, #1
++ | bne <4
++ | // Skip holes in hash part.
++ | add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node)
++ | b <6
++ |
++ |9: // End of iteration. Set the key to nil (not the value).
++ | movn NEXT_TMP0, #0
++ | str NEXT_TMP0, NEXT_RES_KEY
++ | mov NEXT_RES, NEXT_RES_PTR
++ | ret
++ |.endif
++ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+@@ -2121,16 +2181,16 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+ | .type CCSTATE, CCallState, x19
+- | stp fp, lr, [sp, #-32]!
+- | add fp, sp, #0
+- | str CCSTATE, [sp, #16]
++ | stp x20, CCSTATE, [sp, #-32]!
++ | stp fp, lr, [sp, #16]
++ | add fp, sp, #16
+ | mov CCSTATE, x0
+ | ldr TMP0w, CCSTATE:x0->spadj
+ | ldrb TMP1w, CCSTATE->nsp
+ | add TMP2, CCSTATE, #offsetof(CCallState, stack)
+ | subs TMP1, TMP1, #1
+ | ldr TMP3, CCSTATE->func
+- | sub sp, fp, TMP0
++ | sub sp, sp, TMP0
+ | bmi >2
+ |1: // Copy stack slots
+ | ldr TMP0, [TMP2, TMP1, lsl #3]
+@@ -2148,12 +2208,12 @@ static void build_subroutines(BuildCtx *ctx)
+ | ldp d6, d7, CCSTATE->fpr[6]
+ | ldr x8, CCSTATE->retp
+ | blr TMP3
+- | mov sp, fp
++ | sub sp, fp, #16
+ | stp x0, x1, CCSTATE->gpr[0]
+ | stp d0, d1, CCSTATE->fpr[0]
+ | stp d2, d3, CCSTATE->fpr[2]
+- | ldr CCSTATE, [sp, #16]
+- | ldp fp, lr, [sp], #32
++ | ldp fp, lr, [sp, #16]
++ | ldp x20, CCSTATE, [sp], #32
+ | ret
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+@@ -2779,7 +2839,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |2: // Check if string is white and ensure upvalue is closed.
+ | ldrb TMP0w, UPVAL:CARG1->closed
+ | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
+- | ccmp TMP0w, #0, #0, ne
++ | ccmp TMP0w, #0, #4, ne
+ | beq <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | mov CARG1, GL
+@@ -2883,7 +2943,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_GGET:
+ | // RA = dst, RC = str_const (~)
+ case BC_GSET:
+- | // RA = dst, RC = str_const (~)
++ | // RA = src, RC = str_const (~)
+ | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
+ | mvn RC, RC
+ | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+@@ -2941,9 +3001,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TGETS_Z:
+ | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst
+ | ldr TMP1w, TAB:CARG2->hmask
+- | ldr TMP2w, STR:RC->hash
++ | ldr TMP2w, STR:RC->sid
+ | ldr NODE:CARG3, TAB:CARG2->node
+- | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
++ | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
+ | add TMP1, TMP1, TMP1, lsl #1
+ | movn CARG4, #~LJ_TSTR
+ | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
+@@ -3067,9 +3127,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TSETS_Z:
+ | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src
+ | ldr TMP1w, TAB:CARG2->hmask
+- | ldr TMP2w, STR:RC->hash
++ | ldr TMP2w, STR:RC->sid
+ | ldr NODE:CARG3, TAB:CARG2->node
+- | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
++ | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
+ | add TMP1, TMP1, TMP1, lsl #1
+ | movn CARG4, #~LJ_TSTR
+ | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
+@@ -3320,10 +3380,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ITERN:
+- | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ |.if JIT
+- | // NYI: add hotloop, record BC_ITERN.
++ | hotloop
+ |.endif
++ |->vm_IITERN:
++ | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ | add RA, BASE, RA, lsl #3
+ | ldr TAB:RB, [RA, #-16]
+ | ldrh TMP3w, [PC, # OFS_RD]
+@@ -3382,7 +3443,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ccmp CARG4, TISNIL, #0, eq
+ | ccmp TMP1w, #FF_next_N, #0, eq
+ | bne >5
+- | mov TMP0w, #0xfffe7fff
++ | mov TMP0w, #0xfffe7fff // LJ_KEYINDEX
+ | lsl TMP0, TMP0, #32
+ | str TMP0, [RA, #-8] // Initialize control var.
+ |1:
+@@ -3390,11 +3451,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next
+ |
+ |5: // Despecialize bytecode if any of the checks fail.
++ |.if JIT
++ | ldrb TMP2w, [RC, # OFS_OP]
++ |.endif
+ | mov TMP0, #BC_JMP
+ | mov TMP1, #BC_ITERC
+ | strb TMP0w, [PC, #-4+OFS_OP]
++ |.if JIT
++ | cmp TMP2w, #BC_ITERN
++ | bne >6
++ |.endif
+ | strb TMP1w, [RC, # OFS_OP]
+ | b <1
++ |.if JIT
++ |6: // Unpatch JLOOP.
++ | ldr RA, [GL, #GL_J(trace)]
++ | ldrh TMP2w, [RC, # OFS_RD]
++ | ldr TRACE:RA, [RA, TMP2, lsl #3]
++ | ldr TMP2w, TRACE:RA->startins
++ | bfxil TMP2w, TMP1w, #0, #8
++ | str TMP2w, [RC]
++ | b <1
++ |.endif
+ break;
+
+ case BC_VARG:
+@@ -3859,7 +3937,7 @@ static int build_backend(BuildCtx *ctx)
+ static void emit_asm_debug(BuildCtx *ctx)
+ {
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+- int i, cf = CFRAME_SIZE >> 3;
++ int i;
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section
.debug_frame,\"\",%%progbits\n");
+@@ -3873,7 +3951,7 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 30\n" /* Return address is in lr. */
+- "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
+ "\t.align 3\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+@@ -3883,15 +3961,14 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.long .Lframe0\n"
+ "\t.quad .Lbegin\n"
+ "\t.quad %d\n"
+- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+- "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
+- "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
+- fcofs, CFRAME_SIZE, cf, cf-1);
++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
++ "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
++ fcofs);
+ for (i = 19; i <= 28; i++) /* offset x19-x28 */
+- fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
++ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
+ for (i = 8; i <= 15; i++) /* offset d8-d15 */
+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+- 64+i, cf-i-4);
++ 64+i, i+(3+(28-19+1)-8));
+ fprintf(ctx->fp,
+ "\t.align 3\n"
+ ".LEFDE0:\n\n");
+@@ -3903,10 +3980,10 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.long .Lframe0\n"
+ "\t.quad lj_vm_ffi_call\n"
+ "\t.quad %d\n"
+- "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
+- "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
+- "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
+- "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
++ "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
++ "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
++ "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
+ "\t.align 3\n"
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+ #endif
+@@ -3925,7 +4002,7 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.long lj_err_unwind_dwarf-.\n"
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+- "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
+ "\t.align 3\n"
+ ".LECIE1:\n\n");
+ fprintf(ctx->fp,
+@@ -3936,15 +4013,14 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.long .Lbegin-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+- "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
+- "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
+- fcofs, CFRAME_SIZE, cf, cf-1);
++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
++ "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
++ fcofs);
+ for (i = 19; i <= 28; i++) /* offset x19-x28 */
+- fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
++ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
+ for (i = 8; i <= 15; i++) /* offset d8-d15 */
+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+- 64+i, cf-i-4);
++ 64+i, i+(3+(28-19+1)-8));
+ fprintf(ctx->fp,
+ "\t.align 3\n"
+ ".LEFDE2:\n\n");
+@@ -3961,7 +4037,7 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.byte 30\n" /* Return address is in lr. */
+ "\t.uleb128 1\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+- "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
+ "\t.align 3\n"
+ ".LECIE2:\n\n");
+ fprintf(ctx->fp,
+@@ -3972,14 +4048,106 @@ static void emit_asm_debug(BuildCtx *ctx)
+ "\t.long lj_vm_ffi_call-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+- "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
+- "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
+- "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
+- "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
++ "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
++ "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
++ "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
+ "\t.align 3\n"
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+ #endif
+ break;
++#if !LJ_NO_UNWIND
++ case BUILD_machasm: {
++#if LJ_HASFFI
++ int fcsize = 0;
++#endif
++ int j;
++ fprintf(ctx->fp, "\t.section
__TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
++ fprintf(ctx->fp,
++ "EH_frame1:\n"
++ "\t.set L$set$x,LECIEX-LSCIEX\n"
++ "\t.long L$set$x\n"
++ "LSCIEX:\n"
++ "\t.long 0\n"
++ "\t.byte 0x1\n"
++ "\t.ascii \"zPR\\0\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -8\n"
++ "\t.byte 30\n" /* Return address is in lr. */
++ "\t.uleb128 6\n" /* augmentation length */
++ "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
++ "\t.long _lj_err_unwind_dwarf(a)GOT-.\n"
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
++ "\t.align 3\n"
++ "LECIEX:\n\n");
++ for (j = 0; j < ctx->nsym; j++) {
++ const char *name = ctx->sym[j].name;
++ int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
++ if (size == 0) continue;
++#if LJ_HASFFI
++ if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
++#endif
++ fprintf(ctx->fp,
++ "LSFDE%d:\n"
++ "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
++ "\t.long L$set$%d\n"
++ "LASFDE%d:\n"
++ "\t.long LASFDE%d-EH_frame1\n"
++ "\t.long %s-.\n"
++ "\t.long %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
++ "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
++ j, j, j, j, j, j, j, name, size);
++ for (i = 19; i <= 28; i++) /* offset x19-x28 */
++ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
++ for (i = 8; i <= 15; i++) /* offset d8-d15 */
++ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
++ 64+i, i+(3+(28-19+1)-8));
++ fprintf(ctx->fp,
++ "\t.align 3\n"
++ "LEFDE%d:\n\n", j);
++ }
++#if LJ_HASFFI
++ if (fcsize) {
++ fprintf(ctx->fp,
++ "EH_frame2:\n"
++ "\t.set L$set$y,LECIEY-LSCIEY\n"
++ "\t.long L$set$y\n"
++ "LSCIEY:\n"
++ "\t.long 0\n"
++ "\t.byte 0x1\n"
++ "\t.ascii \"zR\\0\"\n"
++ "\t.uleb128 0x1\n"
++ "\t.sleb128 -8\n"
++ "\t.byte 30\n" /* Return address is in lr. */
++ "\t.uleb128 1\n" /* augmentation length */
++ "\t.byte 0x1b\n" /* pcrel|sdata4 */
++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
++ "\t.align 3\n"
++ "LECIEY:\n\n");
++ fprintf(ctx->fp,
++ "LSFDEY:\n"
++ "\t.set L$set$yy,LEFDEY-LASFDEY\n"
++ "\t.long L$set$yy\n"
++ "LASFDEY:\n"
++ "\t.long LASFDEY-EH_frame2\n"
++ "\t.long _lj_vm_ffi_call-.\n"
++ "\t.long %d\n"
++ "\t.uleb128 0\n" /* augmentation length */
++ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
++ "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
++ "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
++ "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
++ "\t.align 3\n"
++ "LEFDEY:\n\n", fcsize);
++ }
++#endif
++ fprintf(ctx->fp, ".subsections_via_symbols\n");
++ }
++ break;
++#endif
+ default:
+ break;
+ }
+diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
+index 1afd6118..e3cc42a5 100644
+--- a/src/vm_mips.dasc
++++ b/src/vm_mips.dasc
+@@ -1,6 +1,6 @@
+ |// Low-level VM code for MIPS CPUs.
+ |// Bytecode interpreter, fast functions and helper functions.
+-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ |//
+ |// MIPS soft-float support contributed by Djordje Kovacevic and
+ |// Stefan Pejic from
RT-RK.com, sponsored by Cisco Systems, Inc.
+@@ -190,7 +190,7 @@
+ |//-----------------------------------------------------------------------
+ |
+ |// Trap for not-yet-implemented parts.
+-|.macro NYI; .long 0xf0f0f0f0; .endmacro
++|.macro NYI; .long 0xec1cf0f0; .endmacro
+ |
+ |// Macros to mark delay slots.
+ |.macro ., a; a; .endmacro
+@@ -399,7 +399,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | xori AT, TMP0, FRAME_C
+ | and TMP2, PC, TMP2
+ | bnez AT, ->vm_returnp
+- | subu TMP2, BASE, TMP2 // TMP2 = previous base.
++ |. subu TMP2, BASE, TMP2 // TMP2 = previous base.
+ |
+ | addiu TMP1, RD, -8
+ | sw TMP2, L->base
+@@ -501,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx)
+ | b ->vm_returnc
+ |. li RD, 16 // 2 results: false + error message.
+ |
++ |->vm_unwind_stub: // Jump to exit stub from unwinder.
++ | jr CARG1
++ |. move ra, CARG2
++ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+@@ -669,11 +673,11 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endif
+ | lw PC, -16+HI(RB) // Restore PC from [cont|PC].
+ | addu TMP2, RA, RD
+- | lw TMP1, LFUNC:TMP1->pc
+ |.if FFI
+ | bnez AT, >1
+ |.endif
+ |. sw TISNIL, -8+HI(TMP2) // Ensure one valid arg.
++ | lw TMP1, LFUNC:TMP1->pc
+ | // BASE = base, RA = resultptr, RB = meta base
+ | jr TMP0 // Jump to continuation.
+ |. lw KBASE, PC2PROTO(k)(TMP1)
+@@ -1152,9 +1156,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |. li SFARG1HI, LJ_TNIL
+ | lw TMP0, TAB:SFARG1LO->hmask
+ | li SFARG1HI, LJ_TTAB // Use metatable as default result.
+- | lw TMP1, STR:RC->hash
++ | lw TMP1, STR:RC->sid
+ | lw NODE:TMP2, TAB:SFARG1LO->node
+- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | sll TMP0, TMP1, 5
+ | sll TMP1, TMP1, 3
+ | subu TMP1, TMP0, TMP1
+@@ -1258,35 +1262,27 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc next
+- | lw CARG1, HI(BASE)
+- | lw TAB:CARG2, LO(BASE)
++ | lw CARG2, HI(BASE)
++ | lw TAB:CARG1, LO(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ |. addu TMP2, BASE, NARGS8:RC
+ | li AT, LJ_TTAB
+ | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil.
+- | bne CARG1, AT, ->fff_fallback
++ | bne CARG2, AT, ->fff_fallback
+ |. lw PC, FRAME_PC(BASE)
+ | load_got lj_tab_next
+- | sw BASE, L->base // Add frame since C call can throw.
+- | sw BASE, L->top // Dummy frame length is ok.
+- | addiu CARG3, BASE, 8
+- | sw PC, SAVE_PC
+- | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+- |. move CARG1, L
+- | // Returns 0 at end of traversal.
++ | addiu CARG2, BASE, 8
++ | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
++ |. addiu CARG3, BASE, -8
++ | // Returns 1=found, 0=end, -1=error.
++ | addiu RA, BASE, -8
++ | bgtz CRET1, ->fff_res // Found key/value.
++ |. li RD, (2+1)*8
+ | beqz CRET1, ->fff_restv // End of traversal: return nil.
+ |. li SFARG1HI, LJ_TNIL
+- | lw TMP0, 8+HI(BASE)
+- | lw TMP1, 8+LO(BASE)
+- | addiu RA, BASE, -8
+- | lw TMP2, 16+HI(BASE)
+- | lw TMP3, 16+LO(BASE)
+- | sw TMP0, HI(RA)
+- | sw TMP1, LO(RA)
+- | sw TMP2, 8+HI(RA)
+- | sw TMP3, 8+LO(RA)
+- | b ->fff_res
+- |. li RD, (2+1)*8
++ | lw CFUNC:RB, FRAME_FUNC(BASE)
++ | b ->fff_fallback // Invalid key.
++ |. li RC, 2*8
+ |
+ |.ffunc_1 pairs
+ | li AT, LJ_TTAB
+@@ -1768,7 +1764,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | b ->fff_res
+ |. li RD, (2+1)*8
+ |
+- |.macro math_minmax, name, intins, fpins
++ |.macro math_minmax, name, intins, ismax
+ | .ffunc_1 name
+ | addu TMP3, BASE, NARGS8:RC
+ | bne SFARG1HI, TISNUM, >5
+@@ -1822,13 +1818,21 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endif
+ |7:
+ |.if FPU
++ |.if ismax
++ | c.olt.d FARG1, FRET1
++ |.else
+ | c.olt.d FRET1, FARG1
+- | fpins FRET1, FARG1
++ |.endif
++ | movf.d FRET1, FARG1
++ |.else
++ |.if ismax
++ | bal ->vm_sfcmpogt
+ |.else
+ | bal ->vm_sfcmpolt
++ |.endif
+ |. nop
+- | intins SFARG1LO, SFARG2LO, CRET1
+- | intins SFARG1HI, SFARG2HI, CRET1
++ | movz SFARG1LO, SFARG2LO, CRET1
++ | movz SFARG1HI, SFARG2HI, CRET1
+ |.endif
+ | b <6
+ |. addiu TMP2, TMP2, 8
+@@ -1849,8 +1853,8 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.endmacro
+ |
+- | math_minmax math_min, movz, movf.d
+- | math_minmax math_max, movn, movt.d
++ | math_minmax math_min, movz, 0
++ | math_minmax math_max, movn, 1
+ |
+ |//-- String library -----------------------------------------------------
+ |
+@@ -1959,7 +1963,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lw TMP0, SBUF:CARG1->b
+ | sw L, SBUF:CARG1->L
+ | sw BASE, L->base
+- | sw TMP0, SBUF:CARG1->p
++ | sw TMP0, SBUF:CARG1->w
+ | call_intern extern lj_buf_putstr_ .. name
+ |. sw PC, SAVE_PC
+ | load_got lj_buf_tostr
+@@ -2512,9 +2516,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |. addu RA, RA, BASE
+ |
+ |9: // Rethrow error from the right C frame.
+- | load_got lj_err_throw
+- | negu CARG2, CRET1
+- | call_intern lj_err_throw // (lua_State *L, int errcode)
++ | load_got lj_err_trace
++ | sub CARG2, r0, CRET1
++ | call_intern lj_err_trace // (lua_State *L, int errcode)
+ |. move CARG1, L
+ |.endif
+ |
+@@ -2692,6 +2696,43 @@ static void build_subroutines(BuildCtx *ctx)
+ |. move CRET1, CRET2
+ |.endif
+ |
++ |->vm_sfcmpogt:
++ |.if not FPU
++ | sll AT, SFARG2HI, 1
++ | sll TMP0, SFARG1HI, 1
++ | or CRET1, SFARG2LO, SFARG1LO
++ | or TMP1, AT, TMP0
++ | or TMP1, TMP1, CRET1
++ | beqz TMP1, >8 // Both args +-0: return 0.
++ |. sltu CRET1, r0, SFARG2LO
++ | lui TMP1, 0xffe0
++ | addu AT, AT, CRET1
++ | sltu CRET1, r0, SFARG1LO
++ | sltu AT, TMP1, AT
++ | addu TMP0, TMP0, CRET1
++ | sltu TMP0, TMP1, TMP0
++ | or TMP1, AT, TMP0
++ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
++ |. and AT, SFARG2HI, SFARG1HI
++ | bltz AT, >5 // Both args negative?
++ |. nop
++ | beq SFARG2HI, SFARG1HI, >8
++ |. sltu CRET1, SFARG2LO, SFARG1LO
++ | jr ra
++ |. slt CRET1, SFARG2HI, SFARG1HI
++ |5: // Swap conditions if both operands are negative.
++ | beq SFARG2HI, SFARG1HI, >8
++ |. sltu CRET1, SFARG1LO, SFARG2LO
++ | jr ra
++ |. slt CRET1, SFARG1HI, SFARG2HI
++ |8:
++ | jr ra
++ |. nop
++ |9:
++ | jr ra
++ |. li CRET1, 0
++ |.endif
++ |
+ |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
+ |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfcmpolex:
+@@ -2734,29 +2775,96 @@ static void build_subroutines(BuildCtx *ctx)
+ |. li CRET1, 0
+ |.endif
+ |
+- |.macro sfmin_max, name, intins
++ |.macro sfmin_max, name, fpcall
+ |->vm_sf .. name:
+ |.if JIT and not FPU
+ | move TMP2, ra
+- | bal ->vm_sfcmpolt
++ | bal ->fpcall
+ |. nop
+ | move TMP0, CRET1
+ | move SFRETHI, SFARG1HI
+ | move SFRETLO, SFARG1LO
+ | move ra, TMP2
+- | intins SFRETHI, SFARG2HI, TMP0
++ | movz SFRETHI, SFARG2HI, TMP0
+ | jr ra
+- |. intins SFRETLO, SFARG2LO, TMP0
++ |. movz SFRETLO, SFARG2LO, TMP0
+ |.endif
+ |.endmacro
+ |
+- | sfmin_max min, movz
+- | sfmin_max max, movn
++ | sfmin_max min, vm_sfcmpolt
++ | sfmin_max max, vm_sfcmpogt
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
++ |.define NEXT_TAB, TAB:CARG1
++ |.define NEXT_IDX, CARG2
++ |.define NEXT_ASIZE, CARG3
++ |.define NEXT_NIL, CARG4
++ |.define NEXT_TMP0, r12
++ |.define NEXT_TMP1, r13
++ |.define NEXT_TMP2, r14
++ |.define NEXT_RES_VK, CRET1
++ |.define NEXT_RES_IDX, CRET2
++ |.define NEXT_RES_PTR, sp
++ |.define NEXT_RES_VAL_I, 0(sp)
++ |.define NEXT_RES_VAL_IT, 4(sp)
++ |.define NEXT_RES_KEY_I, 8(sp)
++ |.define NEXT_RES_KEY_IT, 12(sp)
++ |
++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
++ |// Next idx returned in CRET2.
++ |->vm_next:
++ |.if JIT and ENDIAN_LE
++ | lw NEXT_ASIZE, NEXT_TAB->asize
++ | lw NEXT_TMP0, NEXT_TAB->array
++ | li NEXT_NIL, LJ_TNIL
++ |1: // Traverse array part.
++ | sltu AT, NEXT_IDX, NEXT_ASIZE
++ | sll NEXT_TMP1, NEXT_IDX, 3
++ | beqz AT, >5
++ |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
++ | lw NEXT_TMP2, 4(NEXT_TMP1)
++ | sw NEXT_IDX, NEXT_RES_KEY_I
++ | beq NEXT_TMP2, NEXT_NIL, <1
++ |. addiu NEXT_IDX, NEXT_IDX, 1
++ | lw NEXT_TMP0, 0(NEXT_TMP1)
++ | li AT, LJ_TISNUM
++ | sw NEXT_TMP2, NEXT_RES_VAL_IT
++ | sw AT, NEXT_RES_KEY_IT
++ | sw NEXT_TMP0, NEXT_RES_VAL_I
++ | move NEXT_RES_VK, NEXT_RES_PTR
++ | jr ra
++ |. move NEXT_RES_IDX, NEXT_IDX
++ |
++ |5: // Traverse hash part.
++ | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
++ | lw NODE:NEXT_RES_VK, NEXT_TAB->node
++ | sll NEXT_TMP2, NEXT_RES_IDX, 5
++ | lw NEXT_TMP0, NEXT_TAB->hmask
++ | sll AT, NEXT_RES_IDX, 3
++ | subu AT, NEXT_TMP2, AT
++ | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
++ |6:
++ | sltu AT, NEXT_TMP0, NEXT_RES_IDX
++ | bnez AT, >8
++ |. nop
++ | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it
++ | bne NEXT_TMP2, NEXT_NIL, >9
++ |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1
++ | // Skip holes in hash part.
++ | b <6
++ |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
++ |
++ |8: // End of iteration. Set the key to nil (not the value).
++ | sw NEXT_NIL, NEXT_RES_KEY_IT
++ | move NEXT_RES_VK, NEXT_RES_PTR
++ |9:
++ | jr ra
++ |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
++ |.endif
++ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+@@ -3984,9 +4092,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TGETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | lw TMP0, TAB:RB->hmask
+- | lw TMP1, STR:RC->hash
++ | lw TMP1, STR:RC->sid
+ | lw NODE:TMP2, TAB:RB->node
+- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | sll TMP0, TMP1, 5
+ | sll TMP1, TMP1, 3
+ | subu TMP1, TMP0, TMP1
+@@ -4158,10 +4266,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TSETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
+ | lw TMP0, TAB:RB->hmask
+- | lw TMP1, STR:RC->hash
++ | lw TMP1, STR:RC->sid
+ | lw NODE:TMP2, TAB:RB->node
+ | sb r0, TAB:RB->nomm // Clear metamethod cache.
+- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | sll TMP0, TMP1, 5
+ | sll TMP1, TMP1, 3
+ | subu TMP1, TMP0, TMP1
+@@ -4317,7 +4425,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next2
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+- | barrierback TAB:CARG2, TMP3, TMP0, <2
++ | barrierback TAB:CARG2, TMP3, CRET1, <2
+ break;
+
+ case BC_TSETM:
+@@ -4480,10 +4588,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ITERN:
+- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
+- |.if JIT
+- | // NYI: add hotloop, record BC_ITERN.
++ |.if JIT and ENDIAN_LE
++ | hotloop
+ |.endif
++ |->vm_IITERN:
++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
+ | addu RA, BASE, RA
+ | lw TAB:RB, -16+LO(RA)
+ | lw RC, -8+LO(RA) // Get index from control var.
+@@ -4562,9 +4671,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | addiu CARG2, CARG2, -FF_next_N
+ | or CARG2, CARG2, CARG3
+ | bnez CARG2, >5
+- |. lui TMP1, 0xfffe
++ |. lui TMP1, (LJ_KEYINDEX >> 16)
+ | addu PC, TMP0, TMP2
+- | ori TMP1, TMP1, 0x7fff
++ | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
+ | sw r0, -8+LO(RA) // Initialize control var.
+ | sw TMP1, -8+HI(RA)
+ |1:
+@@ -4573,9 +4682,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | li TMP3, BC_JMP
+ | li TMP1, BC_ITERC
+ | sb TMP3, -4+OFS_OP(PC)
+- | addu PC, TMP0, TMP2
++ | addu PC, TMP0, TMP2
++ |.if JIT
++ | lb TMP0, OFS_OP(PC)
++ | li AT, BC_ITERN
++ | bne TMP0, AT, >6
++ |. lhu TMP2, OFS_RD(PC)
++ |.endif
+ | b <1
+ |. sb TMP1, OFS_OP(PC)
++ |.if JIT
++ |6: // Unpatch JLOOP.
++ | lw TMP0, DISPATCH_J(trace)(DISPATCH)
++ | sll TMP2, TMP2, 2
++ | addu TMP0, TMP0, TMP2
++ | lw TRACE:TMP2, 0(TMP0)
++ | lw TMP0, TRACE:TMP2->startins
++ | li AT, -256
++ | and TMP0, TMP0, AT
++ | or TMP0, TMP0, TMP1
++ | b <1
++ |. sw TMP0, 0(PC)
++ |.endif
+ break;
+
+ case BC_VARG:
+diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
+index c06270a0..4ddb2f9c 100644
+--- a/src/vm_mips64.dasc
++++ b/src/vm_mips64.dasc
+@@ -1,6 +1,6 @@
+ |// Low-level VM code for MIPS64 CPUs.
+ |// Bytecode interpreter, fast functions and helper functions.
+-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ |//
+ |// Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
+ |// Sponsored by Cisco Systems, Inc.
+@@ -83,6 +83,10 @@
+ |
+ |.define FRET1, f0
+ |.define FRET2, f2
++|
++|.define FTMP0, f20
++|.define FTMP1, f21
++|.define FTMP2, f22
+ |.endif
+ |
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
+@@ -189,7 +193,7 @@
+ |//-----------------------------------------------------------------------
+ |
+ |// Trap for not-yet-implemented parts.
+-|.macro NYI; .long 0xf0f0f0f0; .endmacro
++|.macro NYI; .long 0xec1cf0f0; .endmacro
+ |
+ |// Macros to mark delay slots.
+ |.macro ., a; a; .endmacro
+@@ -310,10 +314,10 @@
+ |.endmacro
+ |
+ |// Assumes DISPATCH is relative to GL.
+-#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+-#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+-#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch))
+-#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name)
++#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
++#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
++#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch))
++#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name)
+ |
+ #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+ |
+@@ -492,8 +496,15 @@ static void build_subroutines(BuildCtx *ctx)
+ |7: // Less results wanted.
+ | subu TMP0, RD, TMP2
+ | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it.
++ |.if MIPSR6
++ | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case?
++ | seleqz BASE, BASE, TMP2
++ | b <3
++ |. or BASE, BASE, TMP0
++ |.else
+ | b <3
+ |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case?
++ |.endif
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+@@ -545,6 +556,10 @@ static void build_subroutines(BuildCtx *ctx)
+ | b ->vm_returnc
+ |. li RD, 16 // 2 results: false + error message.
+ |
++ |->vm_unwind_stub: // Jump to exit stub from unwinder.
++ | jr CARG1
++ |. move ra, CARG2
++ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+@@ -713,11 +728,11 @@ static void build_subroutines(BuildCtx *ctx)
+ | ld PC, -24(RB) // Restore PC from [cont|PC].
+ | cleartp LFUNC:TMP1
+ | daddu TMP2, RA, RD
+- | ld TMP1, LFUNC:TMP1->pc
+ |.if FFI
+ | bnez AT, >1
+ |.endif
+ |. sd TISNIL, -8(TMP2) // Ensure one valid arg.
++ | ld TMP1, LFUNC:TMP1->pc
+ | // BASE = base, RA = resultptr, RB = meta base
+ | jr TMP0 // Jump to continuation.
+ |. ld KBASE, PC2PROTO(k)(TMP1)
+@@ -1121,11 +1136,16 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot!
++ |// MIPSR6: no delay slot, but a forbidden slot.
+ |.macro ffgccheck
+ | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
+ | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
+ | dsubu AT, TMP0, TMP1
++ |.if MIPSR6
++ | bgezalc AT, ->fff_gcstep
++ |.else
+ | bgezal AT, ->fff_gcstep
++ |.endif
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+@@ -1153,7 +1173,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | sltu TMP1, TISNUM, TMP0
+ | not TMP2, TMP0
+ | li TMP3, ~LJ_TISNUM
++ |.if MIPSR6
++ | selnez TMP2, TMP2, TMP1
++ | seleqz TMP3, TMP3, TMP1
++ | or TMP2, TMP2, TMP3
++ |.else
+ | movz TMP2, TMP3, TMP1
++ |.endif
+ | dsll TMP2, TMP2, 3
+ | daddu TMP2, CFUNC:RB, TMP2
+ | b ->fff_restv
+@@ -1165,7 +1191,11 @@ static void build_subroutines(BuildCtx *ctx)
+ | gettp TMP2, CARG1
+ | daddiu TMP0, TMP2, -LJ_TTAB
+ | daddiu TMP1, TMP2, -LJ_TUDATA
++ |.if MIPSR6
++ | selnez TMP0, TMP1, TMP0
++ |.else
+ | movn TMP0, TMP1, TMP0
++ |.endif
+ | bnez TMP0, >6
+ |. cleartp TAB:CARG1
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+@@ -1175,9 +1205,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | beqz TAB:RB, ->fff_restv
+ |. li CARG1, LJ_TNIL
+ | lw TMP0, TAB:RB->hmask
+- | lw TMP1, STR:RC->hash
++ | lw TMP1, STR:RC->sid
+ | ld NODE:TMP2, TAB:RB->node
+- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | dsll TMP0, TMP1, 5
+ | dsll TMP1, TMP1, 3
+ | dsubu TMP1, TMP0, TMP1
+@@ -1204,7 +1234,13 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |6:
+ | sltiu AT, TMP2, LJ_TISNUM
++ |.if MIPSR6
++ | selnez TMP0, TISNUM, AT
++ | seleqz AT, TMP2, AT
++ | or TMP2, TMP0, AT
++ |.else
+ | movn TMP2, TISNUM, AT
++ |.endif
+ | dsll TMP2, TMP2, 3
+ | dsubu TMP0, DISPATCH, TMP2
+ | b <2
+@@ -1266,8 +1302,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | or TMP0, TMP0, TMP1
+ | bnez TMP0, ->fff_fallback
+ |. sd BASE, L->base // Add frame since C call can throw.
++ |.if MIPSR6
++ | sd PC, SAVE_PC // Redundant (but a defined value).
++ | ffgccheck
++ |.else
+ | ffgccheck
+ |. sd PC, SAVE_PC // Redundant (but a defined value).
++ |.endif
+ | load_got lj_strfmt_number
+ | move CARG1, L
+ | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
+@@ -1281,27 +1322,24 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+- | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback
++ | checktp CARG1, -LJ_TTAB, ->fff_fallback
+ | daddu TMP2, BASE, NARGS8:RC
+ | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil.
+- | ld PC, FRAME_PC(BASE)
+ | load_got lj_tab_next
+- | sd BASE, L->base // Add frame since C call can throw.
+- | sd BASE, L->top // Dummy frame length is ok.
+- | daddiu CARG3, BASE, 8
+- | sd PC, SAVE_PC
+- | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+- |. move CARG1, L
+- | // Returns 0 at end of traversal.
++ | ld PC, FRAME_PC(BASE)
++ | daddiu CARG2, BASE, 8
++ | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
++ |. daddiu CARG3, BASE, -16
++ | // Returns 1=found, 0=end, -1=error.
++ | daddiu RA, BASE, -16
++ | bgtz CRET1, ->fff_res // Found key/value.
++ |. li RD, (2+1)*8
+ | beqz CRET1, ->fff_restv // End of traversal: return nil.
+ |. move CARG1, TISNIL
+- | ld TMP0, 8(BASE)
+- | daddiu RA, BASE, -16
+- | ld TMP2, 16(BASE)
+- | sd TMP0, 0(RA)
+- | sd TMP2, 8(RA)
+- | b ->fff_res
+- |. li RD, (2+1)*8
++ | ld CFUNC:RB, FRAME_FUNC(BASE)
++ | cleartp CFUNC:RB
++ | b ->fff_fallback // Invalid key.
++ |. li RC, 2*8
+ |
+ |.ffunc_1 pairs
+ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+@@ -1399,15 +1437,16 @@ static void build_subroutines(BuildCtx *ctx)
+ |. nop
+ |
+ |.ffunc xpcall
+- | daddiu NARGS8:RC, NARGS8:RC, -16
++ | daddiu NARGS8:TMP0, NARGS8:RC, -16
+ | ld CARG1, 0(BASE)
+ | ld CARG2, 8(BASE)
+- | bltz NARGS8:RC, ->fff_fallback
++ | bltz NARGS8:TMP0, ->fff_fallback
+ |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
+ | gettp AT, CARG2
+ | daddiu AT, AT, -LJ_TFUNC
+ | bnez AT, ->fff_fallback // Traceback must be a function.
+ |. move TMP2, BASE
++ | move NARGS8:RC, NARGS8:TMP0
+ | daddiu BASE, BASE, 24
+ | // Remember active hook before pcall.
+ | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
+@@ -1437,8 +1476,15 @@ static void build_subroutines(BuildCtx *ctx)
+ | addiu AT, TMP0, -LUA_YIELD
+ | daddu CARG3, CARG2, TMP0
+ | daddiu TMP3, CARG2, 8
++ |.if MIPSR6
++ | seleqz CARG2, CARG2, AT
++ | selnez TMP3, TMP3, AT
++ | bgtz AT, ->fff_fallback // st > LUA_YIELD?
++ |. or CARG2, TMP3, CARG2
++ |.else
+ | bgtz AT, ->fff_fallback // st > LUA_YIELD?
+ |. movn CARG2, TMP3, AT
++ |.endif
+ | xor TMP2, TMP2, CARG3
+ | bnez TMP1, ->fff_fallback // cframe != 0?
+ |. or AT, TMP2, TMP0
+@@ -1750,7 +1796,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | b ->fff_res
+ |. li RD, (2+1)*8
+ |
+- |.macro math_minmax, name, intins, fpins
++ |.macro math_minmax, name, intins, intinsc, fpins
+ | .ffunc_1 name
+ | daddu TMP3, BASE, NARGS8:RC
+ | checkint CARG1, >5
+@@ -1762,7 +1808,13 @@ static void build_subroutines(BuildCtx *ctx)
+ |. sextw CARG1, CARG1
+ | lw CARG2, LO(TMP2)
+ |. slt AT, CARG1, CARG2
++ |.if MIPSR6
++ | intins TMP1, CARG2, AT
++ | intinsc CARG1, CARG1, AT
++ | or CARG1, CARG1, TMP1
++ |.else
+ | intins CARG1, CARG2, AT
++ |.endif
+ | daddiu TMP2, TMP2, 8
+ | zextw CARG1, CARG1
+ | b <1
+@@ -1798,12 +1850,30 @@ static void build_subroutines(BuildCtx *ctx)
+ |. nop
+ |7:
+ |.if FPU
++ |.if MIPSR6
++ | fpins FRET1, FRET1, FARG1
++ |.else
++ |.if fpins // ismax
++ | c.olt.d FARG1, FRET1
++ |.else
+ | c.olt.d FRET1, FARG1
+- | fpins FRET1, FARG1
++ |.endif
++ | movf.d FRET1, FARG1
++ |.endif
++ |.else
++ |.if fpins // ismax
++ | bal ->vm_sfcmpogt
+ |.else
+ | bal ->vm_sfcmpolt
++ |.endif
+ |. nop
+- | intins CARG1, CARG2, CRET1
++ |.if MIPSR6
++ | seleqz AT, CARG2, CRET1
++ | selnez CARG1, CARG1, CRET1
++ | or CARG1, CARG1, AT
++ |.else
++ | movz CARG1, CARG2, CRET1
++ |.endif
+ |.endif
+ | b <6
+ |. daddiu TMP2, TMP2, 8
+@@ -1824,8 +1894,13 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.endmacro
+ |
+- | math_minmax math_min, movz, movf.d
+- | math_minmax math_max, movn, movt.d
++ |.if MIPSR6
++ | math_minmax math_min, seleqz, selnez, min.d
++ | math_minmax math_max, selnez, seleqz, max.d
++ |.else
++ | math_minmax math_min, movz, _, 0
++ | math_minmax math_max, movn, _, 1
++ |.endif
+ |
+ |//-- String library -----------------------------------------------------
+ |
+@@ -1850,7 +1925,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
++ |.if not MIPSR6
+ |. nop
++ |.endif
+ | ld CARG1, 0(BASE)
+ | gettp TMP0, CARG1
+ | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
+@@ -1880,7 +1957,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.ffunc string_sub
+ | ffgccheck
++ |.if not MIPSR6
+ |. nop
++ |.endif
+ | addiu AT, NARGS8:RC, -16
+ | ld TMP0, 0(BASE)
+ | bltz AT, ->fff_fallback
+@@ -1903,8 +1982,30 @@ static void build_subroutines(BuildCtx *ctx)
+ | addiu TMP0, CARG2, 1
+ | addu TMP1, CARG4, TMP0
+ | slt TMP3, CARG3, r0
++ |.if MIPSR6
++ | seleqz CARG4, CARG4, AT
++ | selnez TMP1, TMP1, AT
++ | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1
++ |.else
+ | movn CARG4, TMP1, AT // if (end < 0) end += len+1
++ |.endif
+ | addu TMP1, CARG3, TMP0
++ |.if MIPSR6
++ | selnez TMP1, TMP1, TMP3
++ | seleqz CARG3, CARG3, TMP3
++ | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1
++ | li TMP2, 1
++ | slt AT, CARG4, r0
++ | slt TMP3, r0, CARG3
++ | seleqz CARG4, CARG4, AT // if (end < 0) end = 0
++ | selnez CARG3, CARG3, TMP3
++ | seleqz TMP2, TMP2, TMP3
++ | or CARG3, TMP2, CARG3 // if (start < 1) start = 1
++ | slt AT, CARG2, CARG4
++ | seleqz CARG4, CARG4, AT
++ | selnez CARG2, CARG2, AT
++ | or CARG4, CARG2, CARG4 // if (end > len) end = len
++ |.else
+ | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1
+ | li TMP2, 1
+ | slt AT, CARG4, r0
+@@ -1913,6 +2014,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1
+ | slt AT, CARG2, CARG4
+ | movn CARG4, CARG2, AT // if (end > len) end = len
++ |.endif
+ | daddu CARG2, STR:CARG1, CARG3
+ | subu CARG3, CARG4, CARG3 // len = end - start
+ | daddiu CARG2, CARG2, sizeof(GCstr)-1
+@@ -1936,7 +2038,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | ld TMP0, SBUF:CARG1->b
+ | sd L, SBUF:CARG1->L
+ | sd BASE, L->base
+- | sd TMP0, SBUF:CARG1->p
++ | sd TMP0, SBUF:CARG1->w
+ | call_intern extern lj_buf_putstr_ .. name
+ |. sd PC, SAVE_PC
+ | load_got lj_buf_tostr
+@@ -1974,12 +2076,63 @@ static void build_subroutines(BuildCtx *ctx)
+ | slt AT, CARG1, r0
+ | dsrlv CRET1, TMP0, CARG3
+ | dsubu TMP0, r0, CRET1
++ |.if MIPSR6
++ | selnez TMP0, TMP0, AT
++ | seleqz CRET1, CRET1, AT
++ | or CRET1, CRET1, TMP0
++ |.else
+ | movn CRET1, TMP0, AT
++ |.endif
+ | jr ra
+ |. zextw CRET1, CRET1
+ |1:
+ | jr ra
+ |. move CRET1, r0
++ |
++ |// FP number to int conversion with a check for soft-float.
++ |// Modifies CARG1, CRET1, CRET2, TMP0, AT.
++ |->vm_tointg:
++ |.if JIT
++ | dsll CRET2, CARG1, 1
++ | beqz CRET2, >2
++ |. li TMP0, 1076
++ | dsrl AT, CRET2, 53
++ | dsubu TMP0, TMP0, AT
++ | sltiu AT, TMP0, 54
++ | beqz AT, >1
++ |. dextm CRET2, CRET2, 0, 20
++ | dinsu CRET2, AT, 21, 21
++ | slt AT, CARG1, r0
++ | dsrlv CRET1, CRET2, TMP0
++ | dsubu CARG1, r0, CRET1
++ |.if MIPSR6
++ | seleqz CRET1, CRET1, AT
++ | selnez CARG1, CARG1, AT
++ | or CRET1, CRET1, CARG1
++ |.else
++ | movn CRET1, CARG1, AT
++ |.endif
++ | li CARG1, 64
++ | subu TMP0, CARG1, TMP0
++ | dsllv CRET2, CRET2, TMP0 // Integer check.
++ | sextw AT, CRET1
++ | xor AT, CRET1, AT // Range check.
++ |.if MIPSR6
++ | seleqz AT, AT, CRET2
++ | selnez CRET2, CRET2, CRET2
++ | jr ra
++ |. or CRET2, AT, CRET2
++ |.else
++ | jr ra
++ |. movz CRET2, AT, CRET2
++ |.endif
++ |1:
++ | jr ra
++ |. li CRET2, 1
++ |2:
++ | jr ra
++ |. move CRET1, r0
++ |.endif
+ |.endif
+ |
+ |.macro .ffunc_bit, name
+@@ -2470,9 +2623,9 @@ static void build_subroutines(BuildCtx *ctx)
+ |. daddu RA, RA, BASE
+ |
+ |9: // Rethrow error from the right C frame.
+- | load_got lj_err_throw
+- | negu CARG2, CRET1
+- | call_intern lj_err_throw // (lua_State *L, int errcode)
++ | load_got lj_err_trace
++ | sub CARG2, r0, CRET1
++ | call_intern lj_err_trace // (lua_State *L, int errcode)
+ |. move CARG1, L
+ |.endif
+ |
+@@ -2482,15 +2635,22 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |// Hard-float round to integer.
+ |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
++ |// MIPSR6: Modifies FTMP1, too.
+ |.macro vm_round_hf, func
+ | lui TMP0, 0x4330 // Hiword of 2^52 (double).
+ | dsll TMP0, TMP0, 32
+ | dmtc1 TMP0, f4
+ | abs.d FRET2, FARG1 // |x|
+ | dmfc1 AT, FARG1
++ |.if MIPSR6
++ | cmp.lt.d FTMP1, FRET2, f4
++ | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
++ | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52.
++ |.else
+ | c.olt.d 0, FRET2, f4
+ | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
+ | bc1f 0, >1 // Truncate only if |x| < 2^52.
++ |.endif
+ |. sub.d FRET1, FRET1, f4
+ | slt AT, AT, r0
+ |.if "func" == "ceil"
+@@ -2501,16 +2661,38 @@ static void build_subroutines(BuildCtx *ctx)
+ |.if "func" == "trunc"
+ | dsll TMP0, TMP0, 32
+ | dmtc1 TMP0, f4
++ |.if MIPSR6
++ | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result?
++ | sub.d FRET2, FRET1, f4
++ | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1.
++ | dmtc1 AT, FRET1
++ | neg.d FRET2, FTMP1
++ | jr ra
++ |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in.
++ |.else
+ | c.olt.d 0, FRET2, FRET1 // |x| < result?
+ | sub.d FRET2, FRET1, f4
+ | movt.d FRET1, FRET2, 0 // If yes, subtract +1.
+ | neg.d FRET2, FRET1
+ | jr ra
+ |. movn.d FRET1, FRET2, AT // Merge sign bit back in.
++ |.endif
+ |.else
+ | neg.d FRET2, FRET1
+ | dsll TMP0, TMP0, 32
+ | dmtc1 TMP0, f4
++ |.if MIPSR6
++ | dmtc1 AT, FTMP1
++ | sel.d FTMP1, FRET1, FRET2
++ |.if "func" == "ceil"
++ | cmp.lt.d FRET1, FTMP1, FARG1 // x > result?
++ |.else
++ | cmp.lt.d FRET1, FARG1, FTMP1 // x < result?
++ |.endif
++ | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1.
++ | jr ra
++ |. sel.d FRET1, FTMP1, FRET2
++ |.else
+ | movn.d FRET1, FRET2, AT // Merge sign bit back in.
+ |.if "func" == "ceil"
+ | c.olt.d 0, FRET1, FARG1 // x > result?
+@@ -2521,6 +2703,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | jr ra
+ |. movt.d FRET1, FRET2, 0
+ |.endif
++ |.endif
+ |1:
+ | jr ra
+ |. mov.d FRET1, FARG1
+@@ -2628,12 +2811,40 @@ static void build_subroutines(BuildCtx *ctx)
+ |. slt CRET1, CARG2, CARG1
+ |8:
+ | jr ra
+- |. nop
++ |. li CRET1, 0
+ |9:
+ | jr ra
+ |. move CRET1, CRET2
+ |.endif
+ |
++ |->vm_sfcmpogt:
++ |.if not FPU
++ | dsll AT, CARG2, 1
++ | dsll TMP0, CARG1, 1
++ | or TMP1, AT, TMP0
++ | beqz TMP1, >8 // Both args +-0: return 0.
++ |. lui TMP1, 0xffe0
++ | dsll TMP1, TMP1, 32
++ | sltu AT, TMP1, AT
++ | sltu TMP0, TMP1, TMP0
++ | or TMP1, AT, TMP0
++ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
++ |. and AT, CARG2, CARG1
++ | bltz AT, >5 // Both args negative?
++ |. nop
++ | jr ra
++ |. slt CRET1, CARG2, CARG1
++ |5: // Swap conditions if both operands are negative.
++ | jr ra
++ |. slt CRET1, CARG1, CARG2
++ |8:
++ | jr ra
++ |. li CRET1, 0
++ |9:
++ | jr ra
++ |. li CRET1, 0
++ |.endif
++ |
+ |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
+ |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfcmpolex:
+@@ -2665,10 +2876,98 @@ static void build_subroutines(BuildCtx *ctx)
+ |. li CRET1, 0
+ |.endif
+ |
++ |.macro sfmin_max, name, fpcall
++ |->vm_sf .. name:
++ |.if JIT and not FPU
++ | move TMP2, ra
++ | bal ->fpcall
++ |. nop
++ | move ra, TMP2
++ | move TMP0, CRET1
++ | move CRET1, CARG1
++ |.if MIPSR6
++ | selnez CRET1, CRET1, TMP0
++ | seleqz TMP0, CARG2, TMP0
++ | jr ra
++ |. or CRET1, CRET1, TMP0
++ |.else
++ | jr ra
++ |. movz CRET1, CARG2, TMP0
++ |.endif
++ |.endif
++ |.endmacro
++ |
++ | sfmin_max min, vm_sfcmpolt
++ | sfmin_max max, vm_sfcmpogt
++ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
++ |.define NEXT_TAB, TAB:CARG1
++ |.define NEXT_IDX, CARG2
++ |.define NEXT_ASIZE, CARG3
++ |.define NEXT_NIL, CARG4
++ |.define NEXT_TMP0, r12
++ |.define NEXT_TMP1, r13
++ |.define NEXT_TMP2, r14
++ |.define NEXT_RES_VK, CRET1
++ |.define NEXT_RES_IDX, CRET2
++ |.define NEXT_RES_PTR, sp
++ |.define NEXT_RES_VAL, 0(sp)
++ |.define NEXT_RES_KEY, 8(sp)
++ |
++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
++ |// Next idx returned in CRET2.
++ |->vm_next:
++ |.if JIT and ENDIAN_LE
++ | lw NEXT_ASIZE, NEXT_TAB->asize
++ | ld NEXT_TMP0, NEXT_TAB->array
++ | li NEXT_NIL, LJ_TNIL
++ |1: // Traverse array part.
++ | sltu AT, NEXT_IDX, NEXT_ASIZE
++ | sll NEXT_TMP1, NEXT_IDX, 3
++ | beqz AT, >5
++ |. daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
++ | li AT, LJ_TISNUM
++ | ld NEXT_TMP2, 0(NEXT_TMP1)
++ | dsll AT, AT, 47
++ | or NEXT_TMP1, NEXT_IDX, AT
++ | beq NEXT_TMP2, NEXT_NIL, <1
++ |. addiu NEXT_IDX, NEXT_IDX, 1
++ | sd NEXT_TMP2, NEXT_RES_VAL
++ | sd NEXT_TMP1, NEXT_RES_KEY
++ | move NEXT_RES_VK, NEXT_RES_PTR
++ | jr ra
++ |. move NEXT_RES_IDX, NEXT_IDX
++ |
++ |5: // Traverse hash part.
++ | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
++ | ld NODE:NEXT_RES_VK, NEXT_TAB->node
++ | sll NEXT_TMP2, NEXT_RES_IDX, 5
++ | lw NEXT_TMP0, NEXT_TAB->hmask
++ | sll AT, NEXT_RES_IDX, 3
++ | subu AT, NEXT_TMP2, AT
++ | daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
++ |6:
++ | sltu AT, NEXT_TMP0, NEXT_RES_IDX
++ | bnez AT, >8
++ |. nop
++ | ld NEXT_TMP2, NODE:NEXT_RES_VK->val
++ | bne NEXT_TMP2, NEXT_NIL, >9
++ |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1
++ | // Skip holes in hash part.
++ | b <6
++ |. daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
++ |
++ |8: // End of iteration. Set the key to nil (not the value).
++ | sd NEXT_NIL, NEXT_RES_KEY
++ | move NEXT_RES_VK, NEXT_RES_PTR
++ |9:
++ | jr ra
++ |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
++ |.endif
++ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+@@ -2832,7 +3131,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | slt AT, CARG1, CARG2
+ | addu TMP2, TMP2, TMP3
++ |.if MIPSR6
++ | movop TMP2, TMP2, AT
++ |.else
+ | movop TMP2, r0, AT
++ |.endif
+ |1:
+ | daddu PC, PC, TMP2
+ | ins_next
+@@ -2850,16 +3153,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.endif
+ |3: // RA and RD are both numbers.
+ |.if FPU
+- | fcomp f20, f22
++ |.if MIPSR6
++ | fcomp FTMP0, FTMP0, FTMP2
++ | addu TMP2, TMP2, TMP3
++ | mfc1 TMP3, FTMP0
++ | b <1
++ |. fmovop TMP2, TMP2, TMP3
++ |.else
++ | fcomp FTMP0, FTMP2
+ | addu TMP2, TMP2, TMP3
+ | b <1
+ |. fmovop TMP2, r0
++ |.endif
+ |.else
+ | bal sfcomp
+ |. addu TMP2, TMP2, TMP3
+ | b <1
++ |.if MIPSR6
++ |. movop TMP2, TMP2, CRET1
++ |.else
+ |. movop TMP2, r0, CRET1
+ |.endif
++ |.endif
+ |
+ |4: // RA is a number, RD is not a number.
+ | bne CARG4, TISNUM, ->vmeta_comp
+@@ -2906,15 +3221,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.endif
+ |.endmacro
+ |
++ |.if MIPSR6
+ if (op == BC_ISLT) {
+- | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt
++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISGE) {
+- | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt
++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISLE) {
+- | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult
++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d,
->vm_sfcmpult
+ } else {
+- | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult
++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d,
->vm_sfcmpult
+ }
++ |.else
++ if (op == BC_ISLT) {
++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt
++ } else if (op == BC_ISGE) {
++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt
++ } else if (op == BC_ISLE) {
++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult
++ } else {
++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult
++ }
++ |.endif
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+@@ -2960,7 +3287,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |2: // Check if the tags are the same and it's a table or userdata.
+ | xor AT, CARG3, CARG4 // Same type?
+ | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata?
++ |.if MIPSR6
++ | seleqz TMP0, TMP0, AT
++ |.else
+ | movn TMP0, r0, AT
++ |.endif
+ if (vk) {
+ | beqz TMP0, <1
+ } else {
+@@ -3010,11 +3341,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | xor TMP1, CARG1, CARG2
+ | addu TMP2, TMP2, TMP3
++ |.if MIPSR6
++ if (vk) {
++ | seleqz TMP2, TMP2, TMP1
++ } else {
++ | selnez TMP2, TMP2, TMP1
++ }
++ |.else
+ if (vk) {
+ | movn TMP2, r0, TMP1
+ } else {
+ | movz TMP2, r0, TMP1
+ }
++ |.endif
+ | daddu PC, PC, TMP2
+ | ins_next
+ break;
+@@ -3041,6 +3380,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | bne CARG4, TISNUM, >6
+ |. addu TMP2, TMP2, TMP3
+ | xor AT, CARG1, CARG2
++ |.if MIPSR6
++ if (vk) {
++ | seleqz TMP2, TMP2, AT
++ |1:
++ | daddu PC, PC, TMP2
++ |2:
++ } else {
++ | selnez TMP2, TMP2, AT
++ |1:
++ |2:
++ | daddu PC, PC, TMP2
++ }
++ |.else
+ if (vk) {
+ | movn TMP2, r0, AT
+ |1:
+@@ -3052,6 +3404,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |2:
+ | daddu PC, PC, TMP2
+ }
++ |.endif
+ | ins_next
+ |
+ |3: // RA is not an integer.
+@@ -3064,30 +3417,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |. addu TMP2, TMP2, TMP3
+ | sltu AT, CARG4, TISNUM
+ |.if FPU
+- | ldc1 f20, 0(RA)
+- | ldc1 f22, 0(RD)
++ | ldc1 FTMP0, 0(RA)
++ | ldc1 FTMP2, 0(RD)
+ |.endif
+ | beqz AT, >5
+ |. nop
+ |4: // RA and RD are both numbers.
+ |.if FPU
+- | c.eq.d f20, f22
++ |.if MIPSR6
++ | cmp.eq.d FTMP0, FTMP0, FTMP2
++ | dmfc1 TMP1, FTMP0
++ | b <1
++ if (vk) {
++ |. selnez TMP2, TMP2, TMP1
++ } else {
++ |. seleqz TMP2, TMP2, TMP1
++ }
++ |.else
++ | c.eq.d FTMP0, FTMP2
+ | b <1
+ if (vk) {
+ |. movf TMP2, r0
+ } else {
+ |. movt TMP2, r0
+ }
++ |.endif
+ |.else
+ | bal ->vm_sfcmpeq
+ |. nop
+ | b <1
++ |.if MIPSR6
++ if (vk) {
++ |. selnez TMP2, TMP2, CRET1
++ } else {
++ |. seleqz TMP2, TMP2, CRET1
++ }
++ |.else
+ if (vk) {
+ |. movz TMP2, r0, CRET1
+ } else {
+ |. movn TMP2, r0, CRET1
+ }
+ |.endif
++ |.endif
+ |
+ |5: // RA is a number, RD is not a number.
+ |.if FFI
+@@ -3097,9 +3469,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.endif
+ | // RA is a number, RD is an integer. Convert RD to a number.
+ |.if FPU
+- |. lwc1 f22, LO(RD)
++ |. lwc1 FTMP2, LO(RD)
+ | b <4
+- |. cvt.d.w f22, f22
++ |. cvt.d.w FTMP2, FTMP2
+ |.else
+ |. sextw CARG2, CARG2
+ | bal ->vm_sfi2d_2
+@@ -3117,10 +3489,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.endif
+ | // RA is an integer, RD is a number. Convert RA to a number.
+ |.if FPU
+- |. lwc1 f20, LO(RA)
+- | ldc1 f22, 0(RD)
++ |. lwc1 FTMP0, LO(RA)
++ | ldc1 FTMP2, 0(RD)
+ | b <4
+- | cvt.d.w f20, f20
++ | cvt.d.w FTMP0, FTMP0
+ |.else
+ |. sextw CARG1, CARG1
+ | bal ->vm_sfi2d_1
+@@ -3163,11 +3535,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | decode_RD4b TMP2
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | addu TMP2, TMP2, TMP3
++ |.if MIPSR6
++ if (vk) {
++ | seleqz TMP2, TMP2, TMP0
++ } else {
++ | selnez TMP2, TMP2, TMP0
++ }
++ |.else
+ if (vk) {
+ | movn TMP2, r0, TMP0
+ } else {
+ | movz TMP2, r0, TMP0
+ }
++ |.endif
+ | daddu PC, PC, TMP2
+ | ins_next
+ break;
+@@ -3186,11 +3566,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | decode_RD4b TMP2
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | addu TMP2, TMP2, TMP3
++ |.if MIPSR6
++ if (op == BC_IST) {
++ | selnez TMP2, TMP2, TMP0;
++ } else {
++ | seleqz TMP2, TMP2, TMP0;
++ }
++ |.else
+ if (op == BC_IST) {
+ | movz TMP2, r0, TMP0
+ } else {
+ | movn TMP2, r0, TMP0
+ }
++ |.endif
+ | daddu PC, PC, TMP2
+ } else {
+ | ld CRET1, 0(RD)
+@@ -3433,9 +3821,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | bltz TMP1, ->vmeta_arith
+ |. daddu RA, BASE, RA
+ |.elif "intins" == "mult"
++ |.if MIPSR6
++ |. nop
++ | mul CRET1, CARG3, CARG4
++ | muh TMP2, CARG3, CARG4
++ |.else
+ |. intins CARG3, CARG4
+ | mflo CRET1
+ | mfhi TMP2
++ |.endif
+ | sra TMP1, CRET1, 31
+ | bne TMP1, TMP2, ->vmeta_arith
+ |. daddu RA, BASE, RA
+@@ -3458,16 +3852,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.endif
+ |
+ |5: // Check for two numbers.
+- | .FPU ldc1 f20, 0(RB)
++ | .FPU ldc1 FTMP0, 0(RB)
+ | sltu AT, TMP0, TISNUM
+ | sltu TMP0, TMP1, TISNUM
+- | .FPU ldc1 f22, 0(RC)
++ | .FPU ldc1 FTMP2, 0(RC)
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_arith
+ |. daddu RA, BASE, RA
+ |
+ |.if FPU
+- | fpins FRET1, f20, f22
++ | fpins FRET1, FTMP0, FTMP2
+ |.elif "fpcall" == "sfpmod"
+ | sfpmod
+ |.else
+@@ -3797,7 +4191,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | li TMP0, 0x801
+ | addiu AT, CARG2, -0x7ff
+ | srl CARG3, RD, 14
++ |.if MIPSR6
++ | seleqz TMP0, TMP0, AT
++ | selnez CARG2, CARG2, AT
++ | or CARG2, CARG2, TMP0
++ |.else
+ | movz CARG2, TMP0, AT
++ |.endif
+ | // (lua_State *L, int32_t asize, uint32_t hbits)
+ | call_intern lj_tab_new
+ |. move CARG1, L
+@@ -3904,9 +4304,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TGETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | lw TMP0, TAB:RB->hmask
+- | lw TMP1, STR:RC->hash
++ | lw TMP1, STR:RC->sid
+ | ld NODE:TMP2, TAB:RB->node
+- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | sll TMP0, TMP1, 5
+ | sll TMP1, TMP1, 3
+ | subu TMP1, TMP0, TMP1
+@@ -4067,10 +4467,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TSETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
+ | lw TMP0, TAB:RB->hmask
+- | lw TMP1, STR:RC->hash
++ | lw TMP1, STR:RC->sid
+ | ld NODE:TMP2, TAB:RB->node
+ | sb r0, TAB:RB->nomm // Clear metamethod cache.
+- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | sll TMP0, TMP1, 5
+ | sll TMP1, TMP1, 3
+ | subu TMP1, TMP0, TMP1
+@@ -4078,7 +4478,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ | settp STR:RC, TMP3 // Tagged key to look for.
+ |.if FPU
+- | ldc1 f20, 0(RA)
++ | ldc1 FTMP0, 0(RA)
+ |.else
+ | ld CRET1, 0(RA)
+ |.endif
+@@ -4094,7 +4494,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | bnez AT, >7
+ |.if FPU
+- |. sdc1 f20, NODE:TMP2->val
++ |. sdc1 FTMP0, NODE:TMP2->val
+ |.else
+ |. sd CRET1, NODE:TMP2->val
+ |.endif
+@@ -4135,7 +4535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ld BASE, L->base
+ |.if FPU
+ | b <3 // No 2nd write barrier needed.
+- |. sdc1 f20, 0(CRET1)
++ |. sdc1 FTMP0, 0(CRET1)
+ |.else
+ | ld CARG1, 0(RA)
+ | b <3 // No 2nd write barrier needed.
+@@ -4213,7 +4613,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ins_next2
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+- | barrierback TAB:CARG2, TMP3, TMP0, <2
++ | barrierback TAB:CARG2, TMP3, CRET1, <2
+ break;
+
+ case BC_TSETM:
+@@ -4364,10 +4764,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ITERN:
+- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
+- |.if JIT
+- | // NYI: add hotloop, record BC_ITERN.
++ |.if JIT and ENDIAN_LE
++ | hotloop
+ |.endif
++ |->vm_IITERN:
++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
+ | daddu RA, BASE, RA
+ | ld TAB:RB, -16(RA)
+ | lw RC, -8+LO(RA) // Get index from control var.
+@@ -4388,11 +4789,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |. addiu RC, RC, 1
+ | sd TMP2, 0(RA)
+ | sd CARG1, 8(RA)
+- | or TMP0, RC, CARG3
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | decode_RD4b RD
+ | daddu RD, RD, TMP3
+- | sw TMP0, -8+LO(RA) // Update control var.
++ | sw RC, -8+LO(RA) // Update control var.
+ | daddu PC, PC, RD
+ |3:
+ | ins_next
+@@ -4442,9 +4842,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | daddiu TMP1, TMP1, -FF_next_N
+ | or AT, AT, TMP1
+ | bnez AT, >5
+- |. lui TMP1, 0xfffe
++ |. lui TMP1, (LJ_KEYINDEX >> 16)
+ | daddu PC, TMP0, TMP2
+- | ori TMP1, TMP1, 0x7fff
++ | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
+ | dsll TMP1, TMP1, 32
+ | sd TMP1, -8(RA)
+ |1:
+@@ -4454,8 +4854,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | li TMP1, BC_ITERC
+ | sb TMP3, -4+OFS_OP(PC)
+ | daddu PC, TMP0, TMP2
++ |.if JIT
++ | lb TMP0, OFS_OP(PC)
++ | li AT, BC_ITERN
++ | bne TMP0, AT, >6
++ |. lhu TMP2, OFS_RD(PC)
++ |.endif
+ | b <1
+ |. sb TMP1, OFS_OP(PC)
++ |.if JIT
++ |6: // Unpatch JLOOP.
++ | ld TMP0, DISPATCH_J(trace)(DISPATCH)
++ | sll TMP2, TMP2, 3
++ | daddu TMP0, TMP0, TMP2
++ | ld TRACE:TMP2, 0(TMP0)
++ | lw TMP0, TRACE:TMP2->startins
++ | li AT, -256
++ | and TMP0, TMP0, AT
++ | or TMP0, TMP0, TMP1
++ | b <1
++ |. sw TMP0, 0(PC)
++ |.endif
+ break;
+
+ case BC_VARG:
+@@ -4478,7 +4897,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ld CARG1, 0(RC)
+ | sltu AT, RC, TMP3
+ | daddiu RC, RC, 8
++ |.if MIPSR6
++ | selnez CARG1, CARG1, AT
++ | seleqz AT, TISNIL, AT
++ | or CARG1, CARG1, AT
++ |.else
+ | movz CARG1, TISNIL, AT
++ |.endif
+ | sd CARG1, 0(RA)
+ | sltu AT, RA, TMP2
+ | bnez AT, <1
+@@ -4667,7 +5092,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | dext AT, CRET1, 31, 0
+ | slt CRET1, CARG2, CARG3
+ | slt TMP1, CARG3, CARG2
++ |.if MIPSR6
++ | selnez TMP1, TMP1, AT
++ | seleqz CRET1, CRET1, AT
++ | or CRET1, CRET1, TMP1
++ |.else
+ | movn CRET1, TMP1, AT
++ |.endif
+ } else {
+ | bne CARG3, TISNUM, >5
+ |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type
+@@ -4683,20 +5114,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | slt CRET1, CRET1, CARG1
+ | slt AT, CARG2, r0
+ | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
++ |.if MIPSR6
++ | selnez TMP1, TMP1, AT
++ | seleqz CRET1, CRET1, AT
++ | or CRET1, CRET1, TMP1
++ |.else
+ | movn CRET1, TMP1, AT
++ |.endif
+ | or CRET1, CRET1, TMP0
+ | zextw CARG1, CARG1
+ | settp CARG1, TISNUM
+ }
+ |1:
+ if (op == BC_FORI) {
++ |.if MIPSR6
++ | selnez TMP2, TMP2, CRET1
++ |.else
+ | movz TMP2, r0, CRET1
++ |.endif
+ | daddu PC, PC, TMP2
+ } else if (op == BC_JFORI) {
+ | daddu PC, PC, TMP2
+ | lhu RD, -4+OFS_RD(PC)
+ } else if (op == BC_IFORL) {
++ |.if MIPSR6
++ | seleqz TMP2, TMP2, CRET1
++ |.else
+ | movn TMP2, r0, CRET1
++ |.endif
+ | daddu PC, PC, TMP2
+ }
+ if (vk) {
+@@ -4726,6 +5171,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_for
+ |. slt TMP3, TMP3, r0
++ |.if MIPSR6
++ | dmtc1 TMP3, FTMP2
++ | cmp.lt.d FTMP0, f0, f2
++ | cmp.lt.d FTMP1, f2, f0
++ | sel.d FTMP2, FTMP1, FTMP0
++ | b <1
++ |. dmfc1 CRET1, FTMP2
++ |.else
+ | c.ole.d 0, f0, f2
+ | c.ole.d 1, f2, f0
+ | li CRET1, 1
+@@ -4733,12 +5186,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | movt AT, r0, 1
+ | b <1
+ |. movn CRET1, AT, TMP3
++ |.endif
+ } else {
+ | ldc1 f0, FORL_IDX*8(RA)
+ | ldc1 f4, FORL_STEP*8(RA)
+ | ldc1 f2, FORL_STOP*8(RA)
+ | ld TMP3, FORL_STEP*8(RA)
+ | add.d f0, f0, f4
++ |.if MIPSR6
++ | slt TMP3, TMP3, r0
++ | dmtc1 TMP3, FTMP2
++ | cmp.lt.d FTMP0, f0, f2
++ | cmp.lt.d FTMP1, f2, f0
++ | sel.d FTMP2, FTMP1, FTMP0
++ | dmfc1 CRET1, FTMP2
++ if (op == BC_IFORL) {
++ | seleqz TMP2, TMP2, CRET1
++ | daddu PC, PC, TMP2
++ }
++ |.else
+ | c.ole.d 0, f0, f2
+ | c.ole.d 1, f2, f0
+ | slt TMP3, TMP3, r0
+@@ -4751,6 +5217,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | movn TMP2, r0, CRET1
+ | daddu PC, PC, TMP2
+ }
++ |.endif
+ | sdc1 f0, FORL_IDX*8(RA)
+ | ins_next1
+ | b <2
+@@ -4926,8 +5393,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ld TMP0, 0(RA)
+ | sltu AT, RA, RC // Less args than parameters?
+ | move CARG1, TMP0
++ |.if MIPSR6
++ | selnez TMP0, TMP0, AT
++ | seleqz TMP3, TISNIL, AT
++ | or TMP0, TMP0, TMP3
++ | seleqz TMP3, CARG1, AT
++ | selnez CARG1, TISNIL, AT
++ | or CARG1, CARG1, TMP3
++ |.else
+ | movz TMP0, TISNIL, AT // Clear missing parameters.
+ | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC).
++ |.endif
+ | addiu TMP2, TMP2, -1
+ | sd TMP0, 16(TMP1)
+ | daddiu TMP1, TMP1, 8
+diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
+index b4260ebc..a376c849 100644
+--- a/src/vm_ppc.dasc
++++ b/src/vm_ppc.dasc
+@@ -1,6 +1,6 @@
+ |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
+ |// Bytecode interpreter, fast functions and helper functions.
+-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ |
+ |.arch ppc
+ |.section code_op, code_sub
+@@ -18,7 +18,6 @@
+ |// DynASM defines used by the PPC port:
+ |//
+ |// P64 64 bit pointers (only for GPR64 testing).
+-|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port.
+ |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
+ |// Affects reg saves, stack layout, carry/overflow/dot flags etc.
+ |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
+@@ -103,6 +102,18 @@
+ |// Fixed register assignments for the interpreter.
+ |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
+ |
++|.macro .FPU, a, b
++|.if FPU
++| a, b
++|.endif
++|.endmacro
++|
++|.macro .FPU, a, b, c
++|.if FPU
++| a, b, c
++|.endif
++|.endmacro
++|
+ |// The following must be C callee-save (but BASE is often refetched).
+ |.define BASE, r14 // Base of current Lua stack frame.
+ |.define KBASE, r15 // Constants of current Lua function.
+@@ -116,8 +127,10 @@
+ |.define TISNUM, r22
+ |.define TISNIL, r23
+ |.define ZERO, r24
++|.if FPU
+ |.define TOBIT, f30 // 2^52 + 2^51.
+ |.define TONUM, f31 // 2^52 + 2^51 + 2^31.
++|.endif
+ |
+ |// The following temporaries are not saved across C calls, except for RA.
+ |.define RA, r20 // Callee-save.
+@@ -133,6 +146,7 @@
+ |
+ |// Saved temporaries.
+ |.define SAVE0, r21
++|.define SAVE1, r25
+ |
+ |// Calling conventions.
+ |.define CARG1, r3
+@@ -141,8 +155,10 @@
+ |.define CARG4, r6 // Overlaps TMP3.
+ |.define CARG5, r7 // Overlaps INS.
+ |
++|.if FPU
+ |.define FARG1, f1
+ |.define FARG2, f2
++|.endif
+ |
+ |.define CRET1, r3
+ |.define CRET2, r4
+@@ -213,10 +229,16 @@
+ |.endif
+ |.else
+ |
++|.if FPU
+ |.define SAVE_LR, 276(sp)
+ |.define CFRAME_SPACE, 272 // Delta for sp.
+ |// Back chain for sp: 272(sp) <-- sp entering interpreter
+ |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
++|.else
++|.define SAVE_LR, 132(sp)
++|.define CFRAME_SPACE, 128 // Delta for sp.
++|// Back chain for sp: 128(sp) <-- sp entering interpreter
++|.endif
+ |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
+ |.define SAVE_CR, 52(sp) // 32 bit CR save.
+ |.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
+@@ -226,16 +248,25 @@
+ |.define SAVE_PC, 32(sp)
+ |.define SAVE_MULTRES, 28(sp)
+ |.define UNUSED1, 24(sp)
++|.if FPU
+ |.define TMPD_LO, 20(sp)
+ |.define TMPD_HI, 16(sp)
+ |.define TONUM_LO, 12(sp)
+ |.define TONUM_HI, 8(sp)
++|.else
++|.define SFSAVE_4, 20(sp)
++|.define SFSAVE_3, 16(sp)
++|.define SFSAVE_2, 12(sp)
++|.define SFSAVE_1, 8(sp)
++|.endif
+ |// Next frame lr: 4(sp)
+ |// Back chain for sp: 0(sp) <-- sp while in interpreter
+ |
++|.if FPU
+ |.define TMPD_BLO, 23(sp)
+ |.define TMPD, TMPD_HI
+ |.define TONUM_D, TONUM_HI
++|.endif
+ |
+ |.endif
+ |
+@@ -245,7 +276,7 @@
+ |.else
+ | stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
+ |.endif
+-| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
++| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
+ |.endmacro
+ |.macro rest_, reg
+ |.if GPR64
+@@ -253,7 +284,7 @@
+ |.else
+ | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
+ |.endif
+-| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
++| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
+ |.endmacro
+ |
+ |.macro saveregs
+@@ -323,6 +354,7 @@
+ |// Trap for not-yet-implemented parts.
+ |.macro NYI; tw 4, sp, sp; .endmacro
+ |
++|.if FPU
+ |// int/FP conversions.
+ |.macro tonum_i, freg, reg
+ | xoris reg, reg, 0x8000
+@@ -346,6 +378,7 @@
+ |.macro toint, reg, freg
+ | toint reg, freg, freg
+ |.endmacro
++|.endif
+ |
+ |//-----------------------------------------------------------------------
+ |
+@@ -533,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx)
+ | beq >2
+ |1:
+ | addic. TMP1, TMP1, -8
++ |.if FPU
+ | lfd f0, 0(RA)
++ |.else
++ | lwz CARG1, 0(RA)
++ | lwz CARG2, 4(RA)
++ |.endif
+ | addi RA, RA, 8
++ |.if FPU
+ | stfd f0, 0(BASE)
++ |.else
++ | stw CARG1, 0(BASE)
++ | stw CARG2, 4(BASE)
++ |.endif
+ | addi BASE, BASE, 8
+ | bney <1
+ |
+@@ -613,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx)
+ | .toc ld TOCREG, SAVE_TOC
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ | lp BASE, L->base
+- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
+ | li ZERO, 0
+- | stw TMP3, TMPD
++ | .FPU stw TMP3, TMPD
+ | li TMP1, LJ_TFALSE
+- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | li TISNIL, LJ_TNIL
+ | li_vmstate INTERP
+- | lfs TOBIT, TMPD
++ | .FPU lfs TOBIT, TMPD
+ | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
+ | la RA, -8(BASE) // Results start at BASE-8.
+- | stw TMP3, TMPD
++ | .FPU stw TMP3, TMPD
+ | addi DISPATCH, DISPATCH, GG_G2DISP
+ | stw TMP1, 0(RA) // Prepend false to error message.
+ | li RD, 16 // 2 results: false + error message.
+ | st_vmstate
+- | lfs TONUM, TMPD
++ | .FPU lfs TONUM, TMPD
+ | b ->vm_returnc
+ |
+ |//-----------------------------------------------------------------------
+@@ -690,22 +733,22 @@ static void build_subroutines(BuildCtx *ctx)
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ | lp TMP1, L->top
+ | lwz PC, FRAME_PC(BASE)
+- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | stb CARG3, L->status
+- | stw TMP3, TMPD
+- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+- | lfs TOBIT, TMPD
++ | .FPU stw TMP3, TMPD
++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
++ | .FPU lfs TOBIT, TMPD
+ | sub RD, TMP1, BASE
+- | stw TMP3, TMPD
+- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
++ | .FPU stw TMP3, TMPD
++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | addi RD, RD, 8
+- | stw TMP0, TONUM_HI
++ | .FPU stw TMP0, TONUM_HI
+ | li_vmstate INTERP
+ | li ZERO, 0
+ | st_vmstate
+ | andix. TMP0, PC, FRAME_TYPE
+ | mr MULTRES, RD
+- | lfs TONUM, TMPD
++ | .FPU lfs TONUM, TMPD
+ | li TISNIL, LJ_TNIL
+ | beq ->BC_RET_Z
+ | b ->vm_return
+@@ -739,19 +782,19 @@ static void build_subroutines(BuildCtx *ctx)
+ | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ | lp TMP1, L->top
+- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | add PC, PC, BASE
+- | stw TMP3, TMPD
++ | .FPU stw TMP3, TMPD
+ | li ZERO, 0
+- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+- | lfs TOBIT, TMPD
++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
++ | .FPU lfs TOBIT, TMPD
+ | sub PC, PC, TMP2 // PC = frame delta + frame type
+- | stw TMP3, TMPD
+- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
++ | .FPU stw TMP3, TMPD
++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | sub NARGS8:RC, TMP1, BASE
+- | stw TMP0, TONUM_HI
++ | .FPU stw TMP0, TONUM_HI
+ | li_vmstate INTERP
+- | lfs TONUM, TMPD
++ | .FPU lfs TONUM, TMPD
+ | li TISNIL, LJ_TNIL
+ | st_vmstate
+ |
+@@ -816,11 +859,11 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endif
+ | lwz PC, -16(RB) // Restore PC from [cont|PC].
+ | subi TMP2, RD, 8
+- | lwz TMP1, LFUNC:TMP1->pc
+ | stwx TISNIL, RA, TMP2 // Ensure one valid arg.
+ |.if FFI
+ | ble >1
+ |.endif
++ | lwz TMP1, LFUNC:TMP1->pc
+ | lwz KBASE, PC2PROTO(k)(TMP1)
+ | // BASE = base, RA = resultptr, RB = meta base
+ | mtctr TMP0
+@@ -839,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx)
+ | lwz INS, -4(PC)
+ | subi CARG2, RB, 16
+ | decode_RB8 SAVE0, INS
++ |.if FPU
+ | lfd f0, 0(RA)
++ |.else
++ | lwz TMP2, 0(RA)
++ | lwz TMP3, 4(RA)
++ |.endif
+ | add TMP1, BASE, SAVE0
+ | stp BASE, L->base
+ | cmplw TMP1, CARG2
+ | sub CARG3, CARG2, TMP1
+ | decode_RA8 RA, INS
++ |.if FPU
+ | stfd f0, 0(CARG2)
++ |.else
++ | stw TMP2, 0(CARG2)
++ | stw TMP3, 4(CARG2)
++ |.endif
+ | bney ->BC_CAT_Z
++ |.if FPU
+ | stfdx f0, BASE, RA
++ |.else
++ | stwux TMP2, RA, BASE
++ | stw TMP3, 4(RA)
++ |.endif
+ | b ->cont_nop
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+@@ -900,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | cmplwi CRET1, 0
+ | beq >3
++ |.if FPU
+ | lfd f0, 0(CRET1)
++ |.else
++ | lwz TMP0, 0(CRET1)
++ | lwz TMP1, 4(CRET1)
++ |.endif
+ | ins_next1
++ |.if FPU
+ | stfdx f0, BASE, RA
++ |.else
++ | stwux TMP0, RA, BASE
++ | stw TMP1, 4(RA)
++ |.endif
+ | ins_next2
+ |
+ |3: // Call __index metamethod.
+@@ -920,7 +988,12 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Returns cTValue * or NULL.
+ | cmplwi CRET1, 0
+ | beq >1
++ |.if FPU
+ | lfd f14, 0(CRET1)
++ |.else
++ | lwz SAVE0, 0(CRET1)
++ | lwz SAVE1, 4(CRET1)
++ |.endif
+ | b ->BC_TGETR_Z
+ |1:
+ | stwx TISNIL, BASE, RA
+@@ -975,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx)
+ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | cmplwi CRET1, 0
++ |.if FPU
+ | lfdx f0, BASE, RA
++ |.else
++ | lwzux TMP2, RA, BASE
++ | lwz TMP3, 4(RA)
++ |.endif
+ | beq >3
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | ins_next1
++ |.if FPU
+ | stfd f0, 0(CRET1)
++ |.else
++ | stw TMP2, 0(CRET1)
++ | stw TMP3, 4(CRET1)
++ |.endif
+ | ins_next2
+ |
+ |3: // Call __newindex metamethod.
+@@ -990,15 +1073,26 @@ static void build_subroutines(BuildCtx *ctx)
+ | add PC, TMP1, BASE
+ | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
+ | li NARGS8:RC, 24 // 3 args for func(t, k, v)
++ |.if FPU
+ | stfd f0, 16(BASE) // Copy value to third argument.
++ |.else
++ | stw TMP2, 16(BASE)
++ | stw TMP3, 20(BASE)
++ |.endif
+ | b ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+ | stp BASE, L->base
++ | mr CARG1, L
+ | stw PC, SAVE_PC
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // Returns TValue *.
++ |.if FPU
+ | stfd f14, 0(CRET1)
++ |.else
++ | stw SAVE0, 0(CRET1)
++ | stw SAVE1, 4(CRET1)
++ |.endif
+ | b ->cont_nop
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |->cont_ra: // RA = resultptr
+ | lwz INS, -4(PC)
++ |.if FPU
+ | lfd f0, 0(RA)
++ |.else
++ | lwz CARG1, 0(RA)
++ | lwz CARG2, 4(RA)
++ |.endif
+ | decode_RA8 TMP1, INS
++ |.if FPU
+ | stfdx f0, BASE, TMP1
++ |.else
++ | stwux CARG1, TMP1, BASE
++ | stw CARG2, 4(TMP1)
++ |.endif
+ | b ->cont_nop
+ |
+ |->cont_condt: // RA = resultptr
+@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx)
+ |.macro .ffunc_n, name
+ |->ff_ .. name:
+ | cmplwi NARGS8:RC, 8
+- | lwz CARG3, 0(BASE)
++ | lwz CARG1, 0(BASE)
++ |.if FPU
+ | lfd FARG1, 0(BASE)
++ |.else
++ | lwz CARG2, 4(BASE)
++ |.endif
+ | blt ->fff_fallback
+- | checknum CARG3; bge ->fff_fallback
++ | checknum CARG1; bge ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ |->ff_ .. name:
+ | cmplwi NARGS8:RC, 16
+- | lwz CARG3, 0(BASE)
++ | lwz CARG1, 0(BASE)
++ |.if FPU
+ | lfd FARG1, 0(BASE)
+- | lwz CARG4, 8(BASE)
++ | lwz CARG3, 8(BASE)
+ | lfd FARG2, 8(BASE)
++ |.else
++ | lwz CARG2, 4(BASE)
++ | lwz CARG3, 8(BASE)
++ | lwz CARG4, 12(BASE)
++ |.endif
+ | blt ->fff_fallback
++ | checknum CARG1; bge ->fff_fallback
+ | checknum CARG3; bge ->fff_fallback
+- | checknum CARG4; bge ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
+@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx)
+ | bge cr1, ->fff_fallback
+ | stw CARG3, 0(RA)
+ | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
++ | addi TMP1, BASE, 8
++ | add TMP2, RA, NARGS8:RC
+ | stw CARG1, 4(RA)
+ | beq ->fff_res // Done if exactly 1 argument.
+- | li TMP1, 8
+- | subi RC, RC, 8
+ |1:
+- | cmplw TMP1, RC
+- | lfdx f0, BASE, TMP1
+- | stfdx f0, RA, TMP1
++ | cmplw TMP1, TMP2
++ |.if FPU
++ | lfd f0, 0(TMP1)
++ | stfd f0, 0(TMP1)
++ |.else
++ | lwz CARG1, 0(TMP1)
++ | lwz CARG2, 4(TMP1)
++ | stw CARG1, -8(TMP1)
++ | stw CARG2, -4(TMP1)
++ |.endif
+ | addi TMP1, TMP1, 8
+ | bney <1
+ | b ->fff_res
+@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx)
+ | orc TMP1, TMP2, TMP0
+ | addi TMP1, TMP1, ~LJ_TISNUM+1
+ | slwi TMP1, TMP1, 3
++ |.if FPU
+ | la TMP2, CFUNC:RB->upvalue
+ | lfdx FARG1, TMP2, TMP1
++ |.else
++ | add TMP1, CFUNC:RB, TMP1
++ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
++ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
++ |.endif
+ | b ->fff_resn
+ |
+ |//-- Base library: getters and setters ---------------------------------
+@@ -1320,9 +1447,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | beq ->fff_restv
+ | lwz TMP0, TAB:CARG1->hmask
+ | li CARG3, LJ_TTAB // Use metatable as default result.
+- | lwz TMP1, STR:RC->hash
++ | lwz TMP1, STR:RC->sid
+ | lwz NODE:TMP2, TAB:CARG1->node
+- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | slwi TMP0, TMP1, 5
+ | slwi TMP1, TMP1, 3
+ | sub TMP1, TMP0, TMP1
+@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx)
+ | mr CARG1, L
+ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // Returns cTValue *.
++ |.if FPU
+ | lfd FARG1, 0(CRET1)
++ |.else
++ | lwz CARG2, 4(CRET1)
++ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
++ |.endif
+ | b ->fff_resn
+ |
+ |//-- Base library: conversions ------------------------------------------
+@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Only handles the number case inline (without a base argument).
+ | cmplwi NARGS8:RC, 8
+ | lwz CARG1, 0(BASE)
++ |.if FPU
+ | lfd FARG1, 0(BASE)
++ |.else
++ | lwz CARG2, 4(BASE)
++ |.endif
+ | bne ->fff_fallback // Exactly one argument.
+ | checknum CARG1; bgt ->fff_fallback
+ | b ->fff_resn
+@@ -1423,32 +1559,24 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+- |.ffunc next
+- | cmplwi NARGS8:RC, 8
+- | lwz CARG1, 0(BASE)
+- | lwz TAB:CARG2, 4(BASE)
+- | blt ->fff_fallback
++ |.ffunc_1 next
+ | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
+- | checktab CARG1
++ | checktab CARG3
+ | lwz PC, FRAME_PC(BASE)
+ | bne ->fff_fallback
+- | stp BASE, L->base // Add frame since C call can throw.
+- | mr CARG1, L
+- | stp BASE, L->top // Dummy frame length is ok.
+- | la CARG3, 8(BASE)
+- | stw PC, SAVE_PC
+- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+- | // Returns 0 at end of traversal.
+- | cmplwi CRET1, 0
+- | li CARG3, LJ_TNIL
+- | beq ->fff_restv // End of traversal: return nil.
+- | lfd f0, 8(BASE) // Copy key and value to results.
++ | la CARG2, 8(BASE)
++ | la CARG3, -8(BASE)
++ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
++ | // Returns 1=found, 0=end, -1=error.
++ | cmpwi CRET1, 0
+ | la RA, -8(BASE)
+- | lfd f1, 16(BASE)
+- | stfd f0, 0(RA)
+ | li RD, (2+1)*8
+- | stfd f1, 8(RA)
+- | b ->fff_res
++ | bgt ->fff_res // Found key/value.
++ | li CARG3, LJ_TNIL
++ | beq ->fff_restv // End of traversal: return nil.
++ | lwz CFUNC:RB, FRAME_FUNC(BASE)
++ | li NARGS8:RC, 2*8
++ | b ->fff_fallback // Invalid key.
+ |
+ |.ffunc_1 pairs
+ | checktab CARG3
+@@ -1456,17 +1584,32 @@ static void build_subroutines(BuildCtx *ctx)
+ | bne ->fff_fallback
+ #if LJ_52
+ | lwz TAB:TMP2, TAB:CARG1->metatable
++ |.if FPU
+ | lfd f0, CFUNC:RB->upvalue[0]
++ |.else
++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
++ |.endif
+ | cmplwi TAB:TMP2, 0
+ | la RA, -8(BASE)
+ | bne ->fff_fallback
+ #else
++ |.if FPU
+ | lfd f0, CFUNC:RB->upvalue[0]
++ |.else
++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
++ |.endif
+ | la RA, -8(BASE)
+ #endif
+ | stw TISNIL, 8(BASE)
+ | li RD, (3+1)*8
++ |.if FPU
+ | stfd f0, 0(RA)
++ |.else
++ | stw TMP0, 0(RA)
++ | stw TMP1, 4(RA)
++ |.endif
+ | b ->fff_res
+ |
+ |.ffunc ipairs_aux
+@@ -1512,14 +1655,24 @@ static void build_subroutines(BuildCtx *ctx)
+ | stfd FARG2, 0(RA)
+ |.endif
+ | ble >2 // Not in array part?
++ |.if FPU
+ | lwzx TMP2, TMP1, TMP3
+ | lfdx f0, TMP1, TMP3
++ |.else
++ | lwzux TMP2, TMP1, TMP3
++ | lwz TMP3, 4(TMP1)
++ |.endif
+ |1:
+ | checknil TMP2
+ | li RD, (0+1)*8
+ | beq ->fff_res // End of iteration, return 0 results.
+ | li RD, (2+1)*8
++ |.if FPU
+ | stfd f0, 8(RA)
++ |.else
++ | stw TMP2, 8(RA)
++ | stw TMP3, 12(RA)
++ |.endif
+ | b ->fff_res
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | lwz TMP0, TAB:CARG1->hmask
+@@ -1533,7 +1686,11 @@ static void build_subroutines(BuildCtx *ctx)
+ | li RD, (0+1)*8
+ | beq ->fff_res
+ | lwz TMP2, 0(CRET1)
++ |.if FPU
+ | lfd f0, 0(CRET1)
++ |.else
++ | lwz TMP3, 4(CRET1)
++ |.endif
+ | b <1
+ |
+ |.ffunc_1 ipairs
+@@ -1542,12 +1699,22 @@ static void build_subroutines(BuildCtx *ctx)
+ | bne ->fff_fallback
+ #if LJ_52
+ | lwz TAB:TMP2, TAB:CARG1->metatable
++ |.if FPU
+ | lfd f0, CFUNC:RB->upvalue[0]
++ |.else
++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
++ |.endif
+ | cmplwi TAB:TMP2, 0
+ | la RA, -8(BASE)
+ | bne ->fff_fallback
+ #else
++ |.if FPU
+ | lfd f0, CFUNC:RB->upvalue[0]
++ |.else
++ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
++ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
++ |.endif
+ | la RA, -8(BASE)
+ #endif
+ |.if DUALNUM
+@@ -1557,7 +1724,12 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endif
+ | stw ZERO, 12(BASE)
+ | li RD, (3+1)*8
++ |.if FPU
+ | stfd f0, 0(RA)
++ |.else
++ | stw TMP0, 0(RA)
++ | stw TMP1, 4(RA)
++ |.endif
+ | b ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+@@ -1576,19 +1748,32 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.ffunc xpcall
+ | cmplwi NARGS8:RC, 16
+- | lwz CARG4, 8(BASE)
++ | lwz CARG3, 8(BASE)
++ |.if FPU
+ | lfd FARG2, 8(BASE)
+ | lfd FARG1, 0(BASE)
++ |.else
++ | lwz CARG1, 0(BASE)
++ | lwz CARG2, 4(BASE)
++ | lwz CARG4, 12(BASE)
++ |.endif
+ | blt ->fff_fallback
+ | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
+ | mr TMP2, BASE
+- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
++ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
+ | la BASE, 16(BASE)
+ | // Remember active hook before pcall.
+ | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
++ |.if FPU
+ | stfd FARG2, 0(TMP2) // Swap function and traceback.
+- | subi NARGS8:RC, NARGS8:RC, 16
+ | stfd FARG1, 8(TMP2)
++ |.else
++ | stw CARG3, 0(TMP2)
++ | stw CARG4, 4(TMP2)
++ | stw CARG1, 8(TMP2)
++ | stw CARG2, 12(TMP2)
++ |.endif
++ | subi NARGS8:RC, NARGS8:RC, 16
+ | addi PC, TMP1, 16+FRAME_PCALL
+ | b ->vm_call_dispatch
+ |
+@@ -1631,9 +1816,21 @@ static void build_subroutines(BuildCtx *ctx)
+ | stp BASE, L->top
+ |2: // Move args to coroutine.
+ | cmpw TMP1, NARGS8:RC
++ |.if FPU
+ | lfdx f0, BASE, TMP1
++ |.else
++ | add CARG3, BASE, TMP1
++ | lwz TMP2, 0(CARG3)
++ | lwz TMP3, 4(CARG3)
++ |.endif
+ | beq >3
++ |.if FPU
+ | stfdx f0, CARG2, TMP1
++ |.else
++ | add CARG3, CARG2, TMP1
++ | stw TMP2, 0(CARG3)
++ | stw TMP3, 4(CARG3)
++ |.endif
+ | addi TMP1, TMP1, 8
+ | b <2
+ |3:
+@@ -1664,8 +1861,17 @@ static void build_subroutines(BuildCtx *ctx)
+ | stp TMP2, L:SAVE0->top // Clear coroutine stack.
+ |5: // Move results from coroutine.
+ | cmplw TMP1, TMP3
++ |.if FPU
+ | lfdx f0, TMP2, TMP1
+ | stfdx f0, BASE, TMP1
++ |.else
++ | add CARG3, TMP2, TMP1
++ | lwz CARG1, 0(CARG3)
++ | lwz CARG2, 4(CARG3)
++ | add CARG3, BASE, TMP1
++ | stw CARG1, 0(CARG3)
++ | stw CARG2, 4(CARG3)
++ |.endif
+ | addi TMP1, TMP1, 8
+ | bne <5
+ |6:
+@@ -1690,12 +1896,22 @@ static void build_subroutines(BuildCtx *ctx)
+ | andix. TMP0, PC, FRAME_TYPE
+ | la TMP3, -8(TMP3)
+ | li TMP1, LJ_TFALSE
++ |.if FPU
+ | lfd f0, 0(TMP3)
++ |.else
++ | lwz CARG1, 0(TMP3)
++ | lwz CARG2, 4(TMP3)
++ |.endif
+ | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
+ | li RD, (2+1)*8
+ | stw TMP1, -8(BASE) // Prepend false to results.
+ | la RA, -8(BASE)
++ |.if FPU
+ | stfd f0, 0(BASE) // Copy error message.
++ |.else
++ | stw CARG1, 0(BASE) // Copy error message.
++ | stw CARG2, 4(BASE)
++ |.endif
+ | b <7
+ |.else
+ | mr CARG1, L
+@@ -1874,7 +2090,12 @@ static void build_subroutines(BuildCtx *ctx)
+ | lus CARG1, 0x8000 // -(2^31).
+ | beqy ->fff_resi
+ |5:
++ |.if FPU
+ | lfd FARG1, 0(BASE)
++ |.else
++ | lwz CARG1, 0(BASE)
++ | lwz CARG2, 4(BASE)
++ |.endif
+ | blex func
+ | b ->fff_resn
+ |.endmacro
+@@ -1898,10 +2119,14 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.ffunc math_log
+ | cmplwi NARGS8:RC, 8
+- | lwz CARG3, 0(BASE)
+- | lfd FARG1, 0(BASE)
++ | lwz CARG1, 0(BASE)
+ | bne ->fff_fallback // Need exactly 1 argument.
+- | checknum CARG3; bge ->fff_fallback
++ | checknum CARG1; bge ->fff_fallback
++ |.if FPU
++ | lfd FARG1, 0(BASE)
++ |.else
++ | lwz CARG2, 4(BASE)
++ |.endif
+ | blex log
+ | b ->fff_resn
+ |
+@@ -1923,17 +2148,24 @@ static void build_subroutines(BuildCtx *ctx)
+ |.if DUALNUM
+ |.ffunc math_ldexp
+ | cmplwi NARGS8:RC, 16
+- | lwz CARG3, 0(BASE)
++ | lwz TMP0, 0(BASE)
++ |.if FPU
+ | lfd FARG1, 0(BASE)
+- | lwz CARG4, 8(BASE)
++ |.else
++ | lwz CARG1, 0(BASE)
++ | lwz CARG2, 4(BASE)
++ |.endif
++ | lwz TMP1, 8(BASE)
+ |.if GPR64
+ | lwz CARG2, 12(BASE)
+- |.else
++ |.elif FPU
+ | lwz CARG1, 12(BASE)
++ |.else
++ | lwz CARG3, 12(BASE)
+ |.endif
+ | blt ->fff_fallback
+- | checknum CARG3; bge ->fff_fallback
+- | checknum CARG4; bne ->fff_fallback
++ | checknum TMP0; bge ->fff_fallback
++ | checknum TMP1; bne ->fff_fallback
+ |.else
+ |.ffunc_nn math_ldexp
+ |.if GPR64
+@@ -1948,8 +2180,10 @@ static void build_subroutines(BuildCtx *ctx)
+ |.ffunc_n math_frexp
+ |.if GPR64
+ | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
+- |.else
++ |.elif FPU
+ | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
++ |.else
++ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
+ |.endif
+ | lwz PC, FRAME_PC(BASE)
+ | blex frexp
+@@ -1958,7 +2192,12 @@ static void build_subroutines(BuildCtx *ctx)
+ |.if not DUALNUM
+ | tonum_i FARG2, TMP1
+ |.endif
++ |.if FPU
+ | stfd FARG1, 0(RA)
++ |.else
++ | stw CRET1, 0(RA)
++ | stw CRET2, 4(RA)
++ |.endif
+ | li RD, (2+1)*8
+ |.if DUALNUM
+ | stw TISNUM, 8(RA)
+@@ -1971,13 +2210,20 @@ static void build_subroutines(BuildCtx *ctx)
+ |.ffunc_n math_modf
+ |.if GPR64
+ | la CARG2, -8(BASE)
+- |.else
++ |.elif FPU
+ | la CARG1, -8(BASE)
++ |.else
++ | la CARG3, -8(BASE)
+ |.endif
+ | lwz PC, FRAME_PC(BASE)
+ | blex modf
+ | la RA, -8(BASE)
++ |.if FPU
+ | stfd FARG1, 0(BASE)
++ |.else
++ | stw CRET1, 0(BASE)
++ | stw CRET2, 4(BASE)
++ |.endif
+ | li RD, (2+1)*8
+ | b ->fff_res
+ |
+@@ -1985,13 +2231,13 @@ static void build_subroutines(BuildCtx *ctx)
+ |.if DUALNUM
+ | .ffunc_1 name
+ | checknum CARG3
+- | addi TMP1, BASE, 8
+- | add TMP2, BASE, NARGS8:RC
++ | addi SAVE0, BASE, 8
++ | add SAVE1, BASE, NARGS8:RC
+ | bne >4
+ |1: // Handle integers.
+- | lwz CARG4, 0(TMP1)
+- | cmplw cr1, TMP1, TMP2
+- | lwz CARG2, 4(TMP1)
++ | lwz CARG4, 0(SAVE0)
++ | cmplw cr1, SAVE0, SAVE1
++ | lwz CARG2, 4(SAVE0)
+ | bge cr1, ->fff_resi
+ | checknum CARG4
+ | xoris TMP0, CARG1, 0x8000
+@@ -2008,36 +2254,76 @@ static void build_subroutines(BuildCtx *ctx)
+ |.if GPR64
+ | rldicl CARG1, CARG1, 0, 32
+ |.endif
+- | addi TMP1, TMP1, 8
++ | addi SAVE0, SAVE0, 8
+ | b <1
+ |3:
+ | bge ->fff_fallback
+ | // Convert intermediate result to number and continue below.
++ |.if FPU
+ | tonum_i FARG1, CARG1
+- | lfd FARG2, 0(TMP1)
++ | lfd FARG2, 0(SAVE0)
++ |.else
++ | mr CARG2, CARG1
++ | bl ->vm_sfi2d_1
++ | lwz CARG3, 0(SAVE0)
++ | lwz CARG4, 4(SAVE0)
++ |.endif
+ | b >6
+ |4:
++ |.if FPU
+ | lfd FARG1, 0(BASE)
++ |.else
++ | lwz CARG1, 0(BASE)
++ | lwz CARG2, 4(BASE)
++ |.endif
+ | bge ->fff_fallback
+ |5: // Handle numbers.
+- | lwz CARG4, 0(TMP1)
+- | cmplw cr1, TMP1, TMP2
+- | lfd FARG2, 0(TMP1)
++ | lwz CARG3, 0(SAVE0)
++ | cmplw cr1, SAVE0, SAVE1
++ |.if FPU
++ | lfd FARG2, 0(SAVE0)
++ |.else
++ | lwz CARG4, 4(SAVE0)
++ |.endif
+ | bge cr1, ->fff_resn
+- | checknum CARG4; bge >7
++ | checknum CARG3; bge >7
+ |6:
+- | fsub f0, FARG1, FARG2
+- | addi TMP1, TMP1, 8
++ | addi SAVE0, SAVE0, 8
++ |.if FPU
+ |.if ismax
++ | fsub f0, FARG1, FARG2
++ |.else
++ | fsub f0, FARG2, FARG1
++ |.endif
+ | fsel FARG1, f0, FARG1, FARG2
+ |.else
+- | fsel FARG1, f0, FARG2, FARG1
++ | stw CARG1, SFSAVE_1
++ | stw CARG2, SFSAVE_2
++ | stw CARG3, SFSAVE_3
++ | stw CARG4, SFSAVE_4
++ | blex __ledf2
++ | cmpwi CRET1, 0
++ |.if ismax
++ | blt >8
++ |.else
++ | bge >8
++ |.endif
++ | lwz CARG1, SFSAVE_1
++ | lwz CARG2, SFSAVE_2
++ | b <5
++ |8:
++ | lwz CARG1, SFSAVE_3
++ | lwz CARG2, SFSAVE_4
+ |.endif
+ | b <5
+ |7: // Convert integer to number and continue above.
+- | lwz CARG2, 4(TMP1)
++ | lwz CARG3, 4(SAVE0)
+ | bne ->fff_fallback
+- | tonum_i FARG2, CARG2
++ |.if FPU
++ | tonum_i FARG2, CARG3
++ |.else
++ | bl ->vm_sfi2d_2
++ |.endif
+ | b <6
+ |.else
+ | .ffunc_n name
+@@ -2049,13 +2335,13 @@ static void build_subroutines(BuildCtx *ctx)
+ | checknum CARG2
+ | bge cr1, ->fff_resn
+ | bge ->fff_fallback
+- | fsub f0, FARG1, FARG2
+- | addi TMP1, TMP1, 8
+ |.if ismax
+- | fsel FARG1, f0, FARG1, FARG2
++ | fsub f0, FARG1, FARG2
+ |.else
+- | fsel FARG1, f0, FARG2, FARG1
++ | fsub f0, FARG2, FARG1
+ |.endif
++ | addi TMP1, TMP1, 8
++ | fsel FARG1, f0, FARG1, FARG2
+ | b <1
+ |.endif
+ |.endmacro
+@@ -2211,7 +2497,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | stw L, SBUF:CARG1->L
+ | stp BASE, L->base
+ | stw PC, SAVE_PC
+- | stw TMP0, SBUF:CARG1->p
++ | stw TMP0, SBUF:CARG1->w
+ | bl extern lj_buf_putstr_ .. name
+ | bl extern lj_buf_tostr
+ | b ->fff_resstr
+@@ -2237,28 +2523,37 @@ static void build_subroutines(BuildCtx *ctx)
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name
+- | addi TMP1, BASE, 8
+- | add TMP2, BASE, NARGS8:RC
++ | addi SAVE0, BASE, 8
++ | add SAVE1, BASE, NARGS8:RC
+ |1:
+- | lwz CARG4, 0(TMP1)
+- | cmplw cr1, TMP1, TMP2
++ | lwz CARG4, 0(SAVE0)
++ | cmplw cr1, SAVE0, SAVE1
+ |.if DUALNUM
+- | lwz CARG2, 4(TMP1)
++ | lwz CARG2, 4(SAVE0)
+ |.else
+- | lfd FARG1, 0(TMP1)
++ | lfd FARG1, 0(SAVE0)
+ |.endif
+ | bgey cr1, ->fff_resi
+ | checknum CARG4
+ |.if DUALNUM
++ |.if FPU
+ | bnel ->fff_bitop_fb
+ |.else
++ | beq >3
++ | stw CARG1, SFSAVE_1
++ | bl ->fff_bitop_fb
++ | mr CARG2, CARG1
++ | lwz CARG1, SFSAVE_1
++ |3:
++ |.endif
++ |.else
+ | fadd FARG1, FARG1, TOBIT
+ | bge ->fff_fallback
+ | stfd FARG1, TMPD
+ | lwz CARG2, TMPD_LO
+ |.endif
+ | ins CARG1, CARG1, CARG2
+- | addi TMP1, TMP1, 8
++ | addi SAVE0, SAVE0, 8
+ | b <1
+ |.endmacro
+ |
+@@ -2280,7 +2575,14 @@ static void build_subroutines(BuildCtx *ctx)
+ |.macro .ffunc_bit_sh, name, ins, shmod
+ |.if DUALNUM
+ | .ffunc_2 bit_..name
++ |.if FPU
+ | checknum CARG3; bnel ->fff_tobit_fb
++ |.else
++ | checknum CARG3; beq >1
++ | bl ->fff_tobit_fb
++ | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
++ |1:
++ |.endif
+ | // Note: no inline conversion from number for 2nd argument!
+ | checknum CARG4; bne ->fff_fallback
+ |.else
+@@ -2317,27 +2619,77 @@ static void build_subroutines(BuildCtx *ctx)
+ |->fff_resn:
+ | lwz PC, FRAME_PC(BASE)
+ | la RA, -8(BASE)
++ |.if FPU
+ | stfd FARG1, -8(BASE)
++ |.else
++ | stw CARG1, -8(BASE)
++ | stw CARG2, -4(BASE)
++ |.endif
+ | b ->fff_res1
+ |
+ |// Fallback FP number to bit conversion.
+ |->fff_tobit_fb:
+ |.if DUALNUM
++ |.if FPU
+ | lfd FARG1, 0(BASE)
+ | bgt ->fff_fallback
+ | fadd FARG1, FARG1, TOBIT
+ | stfd FARG1, TMPD
+ | lwz CARG1, TMPD_LO
+ | blr
++ |.else
++ | bgt ->fff_fallback
++ | mr CARG2, CARG1
++ | mr CARG1, CARG3
++ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
++ |->vm_tobit:
++ | slwi TMP2, CARG1, 1
++ | addis TMP2, TMP2, 0x0020
++ | cmpwi TMP2, 0
++ | bge >2
++ | li TMP1, 0x3e0
++ | srawi TMP2, TMP2, 21
++ | not TMP1, TMP1
++ | sub. TMP2, TMP1, TMP2
++ | cmpwi cr7, CARG1, 0
++ | blt >1
++ | slwi TMP1, CARG1, 11
++ | srwi TMP0, CARG2, 21
++ | oris TMP1, TMP1, 0x8000
++ | or TMP1, TMP1, TMP0
++ | srw CARG1, TMP1, TMP2
++ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
++ | neg CARG1, CARG1
++ | blr
++ |1:
++ | addi TMP2, TMP2, 21
++ | srw TMP1, CARG2, TMP2
++ | slwi CARG2, CARG1, 12
++ | subfic TMP2, TMP2, 20
++ | slw TMP0, CARG2, TMP2
++ | or CARG1, TMP1, TMP0
++ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
++ | neg CARG1, CARG1
++ | blr
++ |2:
++ | li CARG1, 0
++ | blr
++ |.endif
+ |.endif
+ |->fff_bitop_fb:
+ |.if DUALNUM
+- | lfd FARG1, 0(TMP1)
++ |.if FPU
++ | lfd FARG1, 0(SAVE0)
+ | bgt ->fff_fallback
+ | fadd FARG1, FARG1, TOBIT
+ | stfd FARG1, TMPD
+ | lwz CARG2, TMPD_LO
+ | blr
++ |.else
++ | bgt ->fff_fallback
++ | mr CARG1, CARG4
++ | b ->vm_tobit
++ |.endif
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+@@ -2530,10 +2882,21 @@ static void build_subroutines(BuildCtx *ctx)
+ | decode_RA8 RC, INS // Call base.
+ | beq >2
+ |1: // Move results down.
++ |.if FPU
+ | lfd f0, 0(RA)
++ |.else
++ | lwz CARG1, 0(RA)
++ | lwz CARG2, 4(RA)
++ |.endif
+ | addic. TMP1, TMP1, -8
+ | addi RA, RA, 8
++ |.if FPU
+ | stfdx f0, BASE, RC
++ |.else
++ | add CARG3, BASE, RC
++ | stw CARG1, 0(CARG3)
++ | stw CARG2, 4(CARG3)
++ |.endif
+ | addi RC, RC, 8
+ | bne <1
+ |2:
+@@ -2586,10 +2949,12 @@ static void build_subroutines(BuildCtx *ctx)
+ |//-----------------------------------------------------------------------
+ |
+ |.macro savex_, a, b, c, d
++ |.if FPU
+ | stfd f..a, 16+a*8(sp)
+ | stfd f..b, 16+b*8(sp)
+ | stfd f..c, 16+c*8(sp)
+ | stfd f..d, 16+d*8(sp)
++ |.endif
+ |.endmacro
+ |
+ |->vm_exit_handler:
+@@ -2661,16 +3026,16 @@ static void build_subroutines(BuildCtx *ctx)
+ | lwz KBASE, PC2PROTO(k)(TMP1)
+ | // Setup type comparison constants.
+ | li TISNUM, LJ_TISNUM
+- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+- | stw TMP3, TMPD
++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | .FPU stw TMP3, TMPD
+ | li ZERO, 0
+- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+- | lfs TOBIT, TMPD
+- | stw TMP3, TMPD
+- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
++ | .FPU lfs TOBIT, TMPD
++ | .FPU stw TMP3, TMPD
++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | li TISNIL, LJ_TNIL
+- | stw TMP0, TONUM_HI
+- | lfs TONUM, TMPD
++ | .FPU stw TMP0, TONUM_HI
++ | .FPU lfs TONUM, TMPD
+ | // Modified copy of ins_next which handles function header dispatch, too.
+ | lwz INS, 0(PC)
+ | addi PC, PC, 4
+@@ -2708,14 +3073,42 @@ static void build_subroutines(BuildCtx *ctx)
+ |9: // Rethrow error from the right C frame.
+ | neg CARG2, CARG1
+ | mr CARG1, L
+- | bl extern lj_err_throw // (lua_State *L, int errcode)
++ | bl extern lj_err_trace // (lua_State *L, int errcode)
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+- |// NYI: Use internal implementations of floor, ceil, trunc.
++ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
++ |
++ |.macro sfi2d, AHI, ALO
++ |.if not FPU
++ | mr. AHI, ALO
++ | bclr 12, 2 // Handle zero first.
++ | srawi TMP0, ALO, 31
++ | xor TMP1, ALO, TMP0
++ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
++ | cntlzw AHI, TMP1
++ | andix. TMP0, TMP0, 0x800 // Mask sign bit.
++ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
++ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
++ | slwi ALO, TMP1, 21
++ | or AHI, AHI, TMP0 // Sign | Exponent.
++ | srwi TMP1, TMP1, 11
++ | slwi AHI, AHI, 20 // Align left.
++ | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
++ | blr
++ |.endif
++ |.endmacro
++ |
++ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
++ |->vm_sfi2d_1:
++ | sfi2d CARG1, CARG2
++ |
++ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
++ |->vm_sfi2d_2:
++ | sfi2d CARG3, CARG4
+ |
+ |->vm_modi:
+ | divwo. TMP0, CARG1, CARG2
+@@ -2770,6 +3163,11 @@ static void build_subroutines(BuildCtx *ctx)
+ | blr
+ |.endif
+ |
++ |->vm_next:
++ |.if JIT
++ | NYI // On big-endian.
++ |.endif
++ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+@@ -2783,21 +3181,21 @@ static void build_subroutines(BuildCtx *ctx)
+ | addi DISPATCH, r12, GG_G2DISP
+ | stw r11, CTSTATE->cb.slot
+ | stw r3, CTSTATE->cb.gpr[0]
+- | stfd f1, CTSTATE->cb.fpr[0]
++ | .FPU stfd f1, CTSTATE->cb.fpr[0]
+ | stw r4, CTSTATE->cb.gpr[1]
+- | stfd f2, CTSTATE->cb.fpr[1]
++ | .FPU stfd f2, CTSTATE->cb.fpr[1]
+ | stw r5, CTSTATE->cb.gpr[2]
+- | stfd f3, CTSTATE->cb.fpr[2]
++ | .FPU stfd f3, CTSTATE->cb.fpr[2]
+ | stw r6, CTSTATE->cb.gpr[3]
+- | stfd f4, CTSTATE->cb.fpr[3]
++ | .FPU stfd f4, CTSTATE->cb.fpr[3]
+ | stw r7, CTSTATE->cb.gpr[4]
+- | stfd f5, CTSTATE->cb.fpr[4]
++ | .FPU stfd f5, CTSTATE->cb.fpr[4]
+ | stw r8, CTSTATE->cb.gpr[5]
+- | stfd f6, CTSTATE->cb.fpr[5]
++ | .FPU stfd f6, CTSTATE->cb.fpr[5]
+ | stw r9, CTSTATE->cb.gpr[6]
+- | stfd f7, CTSTATE->cb.fpr[6]
++ | .FPU stfd f7, CTSTATE->cb.fpr[6]
+ | stw r10, CTSTATE->cb.gpr[7]
+- | stfd f8, CTSTATE->cb.fpr[7]
++ | .FPU stfd f8, CTSTATE->cb.fpr[7]
+ | addi TMP0, sp, CFRAME_SPACE+8
+ | stw TMP0, CTSTATE->cb.stack
+ | mr CARG1, CTSTATE
+@@ -2808,21 +3206,21 @@ static void build_subroutines(BuildCtx *ctx)
+ | lp BASE, L:CRET1->base
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ | lp RC, L:CRET1->top
+- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
++ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | li ZERO, 0
+ | mr L, CRET1
+- | stw TMP3, TMPD
+- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
++ | .FPU stw TMP3, TMPD
++ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | lwz LFUNC:RB, FRAME_FUNC(BASE)
+- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+- | stw TMP0, TONUM_HI
++ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
++ | .FPU stw TMP0, TONUM_HI
+ | li TISNIL, LJ_TNIL
+ | li_vmstate INTERP
+- | lfs TOBIT, TMPD
+- | stw TMP3, TMPD
++ | .FPU lfs TOBIT, TMPD
++ | .FPU stw TMP3, TMPD
+ | sub RC, RC, BASE
+ | st_vmstate
+- | lfs TONUM, TMPD
++ | .FPU lfs TONUM, TMPD
+ | ins_callt
+ |.endif
+ |
+@@ -2836,7 +3234,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | mr CARG2, RA
+ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
+ | lwz CRET1, CTSTATE->cb.gpr[0]
+- | lfd FARG1, CTSTATE->cb.fpr[0]
++ | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
+ | lwz CRET2, CTSTATE->cb.gpr[1]
+ | b ->vm_leave_unw
+ |.endif
+@@ -2870,14 +3268,14 @@ static void build_subroutines(BuildCtx *ctx)
+ | bge <1
+ |2:
+ | bney cr1, >3
+- | lfd f1, CCSTATE->fpr[0]
+- | lfd f2, CCSTATE->fpr[1]
+- | lfd f3, CCSTATE->fpr[2]
+- | lfd f4, CCSTATE->fpr[3]
+- | lfd f5, CCSTATE->fpr[4]
+- | lfd f6, CCSTATE->fpr[5]
+- | lfd f7, CCSTATE->fpr[6]
+- | lfd f8, CCSTATE->fpr[7]
++ | .FPU lfd f1, CCSTATE->fpr[0]
++ | .FPU lfd f2, CCSTATE->fpr[1]
++ | .FPU lfd f3, CCSTATE->fpr[2]
++ | .FPU lfd f4, CCSTATE->fpr[3]
++ | .FPU lfd f5, CCSTATE->fpr[4]
++ | .FPU lfd f6, CCSTATE->fpr[5]
++ | .FPU lfd f7, CCSTATE->fpr[6]
++ | .FPU lfd f8, CCSTATE->fpr[7]
+ |3:
+ | lp TMP0, CCSTATE->func
+ | lwz CARG2, CCSTATE->gpr[1]
+@@ -2894,7 +3292,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | lwz TMP2, -4(r14)
+ | lwz TMP0, 4(r14)
+ | stw CARG1, CCSTATE:TMP1->gpr[0]
+- | stfd FARG1, CCSTATE:TMP1->fpr[0]
++ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
+ | stw CARG2, CCSTATE:TMP1->gpr[1]
+ | mtlr TMP0
+ | stw CARG3, CCSTATE:TMP1->gpr[2]
+@@ -2923,19 +3321,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
+ |.if DUALNUM
+- | lwzux TMP0, RA, BASE
++ | lwzux CARG1, RA, BASE
+ | addi PC, PC, 4
+ | lwz CARG2, 4(RA)
+- | lwzux TMP1, RD, BASE
++ | lwzux CARG3, RD, BASE
+ | lwz TMP2, -4(PC)
+- | checknum cr0, TMP0
+- | lwz CARG3, 4(RD)
++ | checknum cr0, CARG1
++ | lwz CARG4, 4(RD)
+ | decode_RD4 TMP2, TMP2
+- | checknum cr1, TMP1
+- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
++ | checknum cr1, CARG3
++ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
+ | bne cr0, >7
+ | bne cr1, >8
+- | cmpw CARG2, CARG3
++ | cmpw CARG2, CARG4
+ if (op == BC_ISLT) {
+ | bge >2
+ } else if (op == BC_ISGE) {
+@@ -2946,28 +3344,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ble >2
+ }
+ |1:
+- | add PC, PC, TMP2
++ | add PC, PC, SAVE0
+ |2:
+ | ins_next
+ |
+ |7: // RA is not an integer.
+ | bgt cr0, ->vmeta_comp
+ | // RA is a number.
+- | lfd f0, 0(RA)
++ | .FPU lfd f0, 0(RA)
+ | bgt cr1, ->vmeta_comp
+ | blt cr1, >4
+ | // RA is a number, RD is an integer.
+- | tonum_i f1, CARG3
++ |.if FPU
++ | tonum_i f1, CARG4
++ |.else
++ | bl ->vm_sfi2d_2
++ |.endif
+ | b >5
+ |
+ |8: // RA is an integer, RD is not an integer.
+ | bgt cr1, ->vmeta_comp
+ | // RA is an integer, RD is a number.
++ |.if FPU
+ | tonum_i f0, CARG2
++ |.else
++ | bl ->vm_sfi2d_1
++ |.endif
+ |4:
+- | lfd f1, 0(RD)
++ | .FPU lfd f1, 0(RD)
+ |5:
++ |.if FPU
+ | fcmpu cr0, f0, f1
++ |.else
++ | blex __ledf2
++ | cmpwi CRET1, 0
++ |.endif
+ if (op == BC_ISLT) {
+ | bge <2
+ } else if (op == BC_ISGE) {
+@@ -3015,42 +3426,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ vk = op == BC_ISEQV;
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
+ |.if DUALNUM
+- | lwzux TMP0, RA, BASE
++ | lwzux CARG1, RA, BASE
+ | addi PC, PC, 4
+ | lwz CARG2, 4(RA)
+- | lwzux TMP1, RD, BASE
+- | checknum cr0, TMP0
+- | lwz TMP2, -4(PC)
+- | checknum cr1, TMP1
+- | decode_RD4 TMP2, TMP2
+- | lwz CARG3, 4(RD)
++ | lwzux CARG3, RD, BASE
++ | checknum cr0, CARG1
++ | lwz SAVE0, -4(PC)
++ | checknum cr1, CARG3
++ | decode_RD4 SAVE0, SAVE0
++ | lwz CARG4, 4(RD)
+ | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
+- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
+ if (vk) {
+ | ble cr7, ->BC_ISEQN_Z
+ } else {
+ | ble cr7, ->BC_ISNEN_Z
+ }
+ |.else
+- | lwzux TMP0, RA, BASE
+- | lwz TMP2, 0(PC)
++ | lwzux CARG1, RA, BASE
++ | lwz SAVE0, 0(PC)
+ | lfd f0, 0(RA)
+ | addi PC, PC, 4
+- | lwzux TMP1, RD, BASE
+- | checknum cr0, TMP0
+- | decode_RD4 TMP2, TMP2
++ | lwzux CARG3, RD, BASE
++ | checknum cr0, CARG1
++ | decode_RD4 SAVE0, SAVE0
+ | lfd f1, 0(RD)
+- | checknum cr1, TMP1
+- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
++ | checknum cr1, CARG3
++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
+ | bge cr0, >5
+ | bge cr1, >5
+ | fcmpu cr0, f0, f1
+ if (vk) {
+ | bne >1
+- | add PC, PC, TMP2
++ | add PC, PC, SAVE0
+ } else {
+ | beq >1
+- | add PC, PC, TMP2
++ | add PC, PC, SAVE0
+ }
+ |1:
+ | ins_next
+@@ -3058,36 +3469,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |5: // Either or both types are not numbers.
+ |.if not DUALNUM
+ | lwz CARG2, 4(RA)
+- | lwz CARG3, 4(RD)
++ | lwz CARG4, 4(RD)
+ |.endif
+ |.if FFI
+- | cmpwi cr7, TMP0, LJ_TCDATA
+- | cmpwi cr5, TMP1, LJ_TCDATA
++ | cmpwi cr7, CARG1, LJ_TCDATA
++ | cmpwi cr5, CARG3, LJ_TCDATA
+ |.endif
+- | not TMP3, TMP0
+- | cmplw TMP0, TMP1
+- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
++ | not TMP2, CARG1
++ | cmplw CARG1, CARG3
++ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
+ |.if FFI
+ | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
+ |.endif
+- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
++ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
+ |.if FFI
+ | beq cr7, ->vmeta_equal_cd
+ |.endif
+- | cmplw cr5, CARG2, CARG3
++ | cmplw cr5, CARG2, CARG4
+ | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
+ | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
+- | mr SAVE0, PC
++ | mr SAVE1, PC
+ | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
+ | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
+ if (vk) {
+ | bne cr0, >6
+- | add PC, PC, TMP2
++ | add PC, PC, SAVE0
+ |6:
+ } else {
+ | beq cr0, >6
+- | add PC, PC, TMP2
++ | add PC, PC, SAVE0
+ |6:
+ }
+ |.if DUALNUM
+@@ -3102,6 +3513,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
++ | mr CARG3, CARG4
+ | lwz TAB:TMP2, TAB:CARG2->metatable
+ | li CARG4, 1-vk // ne = 0 or 1.
+ | cmplwi TAB:TMP2, 0
+@@ -3109,7 +3521,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lbz TMP2, TAB:TMP2->nomm
+ | andix. TMP2, TMP2, 1<<MM_eq
+ | bne <1 // Or 'no __eq' flag set?
+- | mr PC, SAVE0 // Restore old PC.
++ | mr PC, SAVE1 // Restore old PC.
+ | b ->vmeta_equal // Handle __eq metamethod.
+ break;
+
+@@ -3150,16 +3562,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ vk = op == BC_ISEQN;
+ | // RA = src*8, RD = num_const*8, JMP with RD = target
+ |.if DUALNUM
+- | lwzux TMP0, RA, BASE
++ | lwzux CARG1, RA, BASE
+ | addi PC, PC, 4
+ | lwz CARG2, 4(RA)
+- | lwzux TMP1, RD, KBASE
+- | checknum cr0, TMP0
+- | lwz TMP2, -4(PC)
+- | checknum cr1, TMP1
+- | decode_RD4 TMP2, TMP2
+- | lwz CARG3, 4(RD)
+- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
++ | lwzux CARG3, RD, KBASE
++ | checknum cr0, CARG1
++ | lwz SAVE0, -4(PC)
++ | checknum cr1, CARG3
++ | decode_RD4 SAVE0, SAVE0
++ | lwz CARG4, 4(RD)
++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
+ if (vk) {
+ |->BC_ISEQN_Z:
+ } else {
+@@ -3167,7 +3579,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ }
+ | bne cr0, >7
+ | bne cr1, >8
+- | cmpw CARG2, CARG3
++ | cmpw CARG2, CARG4
+ |4:
+ |.else
+ if (vk) {
+@@ -3175,20 +3587,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ } else {
+ |->BC_ISNEN_Z: // Dummy label.
+ }
+- | lwzx TMP0, BASE, RA
++ | lwzx CARG1, BASE, RA
+ | addi PC, PC, 4
+ | lfdx f0, BASE, RA
+- | lwz TMP2, -4(PC)
++ | lwz SAVE0, -4(PC)
+ | lfdx f1, KBASE, RD
+- | decode_RD4 TMP2, TMP2
+- | checknum TMP0
+- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
++ | decode_RD4 SAVE0, SAVE0
++ | checknum CARG1
++ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
+ | bge >3
+ | fcmpu cr0, f0, f1
+ |.endif
+ if (vk) {
+ | bne >1
+- | add PC, PC, TMP2
++ | add PC, PC, SAVE0
+ |1:
+ |.if not FFI
+ |3:
+@@ -3199,13 +3611,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.if not FFI
+ |3:
+ |.endif
+- | add PC, PC, TMP2
++ | add PC, PC, SAVE0
+ |2:
+ }
+ | ins_next
+ |.if FFI
+ |3:
+- | cmpwi TMP0, LJ_TCDATA
++ | cmpwi CARG1, LJ_TCDATA
+ | beq ->vmeta_equal_cd
+ | b <1
+ |.endif
+@@ -3213,18 +3625,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |7: // RA is not an integer.
+ | bge cr0, <3
+ | // RA is a number.
+- | lfd f0, 0(RA)
++ | .FPU lfd f0, 0(RA)
+ | blt cr1, >1
+ | // RA is a number, RD is an integer.
+- | tonum_i f1, CARG3
++ |.if FPU
++ | tonum_i f1, CARG4
++ |.else
++ | bl ->vm_sfi2d_2
++ |.endif
+ | b >2
+ |
+ |8: // RA is an integer, RD is a number.
++ |.if FPU
+ | tonum_i f0, CARG2
++ |.else
++ | bl ->vm_sfi2d_1
++ |.endif
+ |1:
+- | lfd f1, 0(RD)
++ | .FPU lfd f1, 0(RD)
+ |2:
++ |.if FPU
+ | fcmpu cr0, f0, f1
++ |.else
++ | blex __ledf2
++ | cmpwi CRET1, 0
++ |.endif
+ | b <4
+ |.endif
+ break;
+@@ -3279,7 +3704,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | add PC, PC, TMP2
+ } else {
+ | li TMP1, LJ_TFALSE
++ |.if FPU
+ | lfdx f0, BASE, RD
++ |.else
++ | lwzux CARG1, RD, BASE
++ | lwz CARG2, 4(RD)
++ |.endif
+ | cmplw TMP0, TMP1
+ if (op == BC_ISTC) {
+ | bge >1
+@@ -3288,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ }
+ | addis PC, PC, -(BCBIAS_J*4 >> 16)
+ | decode_RD4 TMP2, INS
++ |.if FPU
+ | stfdx f0, BASE, RA
++ |.else
++ | stwux CARG1, RA, BASE
++ | stw CARG2, 4(RA)
++ |.endif
+ | add PC, PC, TMP2
+ |1:
+ }
+@@ -3323,8 +3758,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_MOV:
+ | // RA = dst*8, RD = src*8
+ | ins_next1
++ |.if FPU
+ | lfdx f0, BASE, RD
+ | stfdx f0, BASE, RA
++ |.else
++ | lwzux TMP0, RD, BASE
++ | lwz TMP1, 4(RD)
++ | stwux TMP0, RA, BASE
++ | stw TMP1, 4(RA)
++ |.endif
+ | ins_next2
+ break;
+ case BC_NOT:
+@@ -3426,44 +3868,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+- | lwzx TMP1, BASE, RB
++ | lwzx CARG1, BASE, RB
+ | .if DUALNUM
+- | lwzx TMP2, KBASE, RC
++ | lwzx CARG3, KBASE, RC
+ | .endif
++ | .if FPU
+ | lfdx f14, BASE, RB
+ | lfdx f15, KBASE, RC
++ | .else
++ | add TMP1, BASE, RB
++ | add TMP2, KBASE, RC
++ | lwz CARG2, 4(TMP1)
++ | lwz CARG4, 4(TMP2)
++ | .endif
+ | .if DUALNUM
+- | checknum cr0, TMP1
+- | checknum cr1, TMP2
++ | checknum cr0, CARG1
++ | checknum cr1, CARG3
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ | bge ->vmeta_arith_vn
+ | .else
+- | checknum TMP1; bge ->vmeta_arith_vn
++ | checknum CARG1; bge ->vmeta_arith_vn
+ | .endif
+ || break;
+ ||case 1:
+- | lwzx TMP1, BASE, RB
++ | lwzx CARG1, BASE, RB
+ | .if DUALNUM
+- | lwzx TMP2, KBASE, RC
++ | lwzx CARG3, KBASE, RC
+ | .endif
++ | .if FPU
+ | lfdx f15, BASE, RB
+ | lfdx f14, KBASE, RC
++ | .else
++ | add TMP1, BASE, RB
++ | add TMP2, KBASE, RC
++ | lwz CARG2, 4(TMP1)
++ | lwz CARG4, 4(TMP2)
++ | .endif
+ | .if DUALNUM
+- | checknum cr0, TMP1
+- | checknum cr1, TMP2
++ | checknum cr0, CARG1
++ | checknum cr1, CARG3
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ | bge ->vmeta_arith_nv
+ | .else
+- | checknum TMP1; bge ->vmeta_arith_nv
++ | checknum CARG1; bge ->vmeta_arith_nv
+ | .endif
+ || break;
+ ||default:
+- | lwzx TMP1, BASE, RB
+- | lwzx TMP2, BASE, RC
++ | lwzx CARG1, BASE, RB
++ | lwzx CARG3, BASE, RC
++ | .if FPU
+ | lfdx f14, BASE, RB
+ | lfdx f15, BASE, RC
+- | checknum cr0, TMP1
+- | checknum cr1, TMP2
++ | .else
++ | add TMP1, BASE, RB
++ | add TMP2, BASE, RC
++ | lwz CARG2, 4(TMP1)
++ | lwz CARG4, 4(TMP2)
++ | .endif
++ | checknum cr0, CARG1
++ | checknum cr1, CARG3
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ | bge ->vmeta_arith_vv
+ || break;
+@@ -3497,48 +3960,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | fsub a, b, a // b - floor(b/c)*c
+ |.endmacro
+ |
++ |.macro sfpmod
++ |->BC_MODVN_Z:
++ | stw CARG1, SFSAVE_1
++ | stw CARG2, SFSAVE_2
++ | mr SAVE0, CARG3
++ | mr SAVE1, CARG4
++ | blex __divdf3
++ | blex floor
++ | mr CARG3, SAVE0
++ | mr CARG4, SAVE1
++ | blex __muldf3
++ | mr CARG3, CRET1
++ | mr CARG4, CRET2
++ | lwz CARG1, SFSAVE_1
++ | lwz CARG2, SFSAVE_2
++ | blex __subdf3
++ |.endmacro
++ |
+ |.macro ins_arithfp, fpins
+ | ins_arithpre
+ |.if "fpins" == "fpmod_"
+ | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
+- |.else
++ |.elif FPU
+ | fpins f0, f14, f15
+ | ins_next1
+ | stfdx f0, BASE, RA
+ | ins_next2
++ |.else
++ | blex __divdf3 // Only soft-float div uses this macro.
++ | ins_next1
++ | stwux CRET1, RA, BASE
++ | stw CRET2, 4(RA)
++ | ins_next2
+ |.endif
+ |.endmacro
+ |
+- |.macro ins_arithdn, intins, fpins
++ |.macro ins_arithdn, intins, fpins, fpcall
+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+- | lwzux TMP1, RB, BASE
+- | lwzux TMP2, RC, KBASE
+- | lwz CARG1, 4(RB)
+- | checknum cr0, TMP1
+- | lwz CARG2, 4(RC)
++ | lwzux CARG1, RB, BASE
++ | lwzux CARG3, RC, KBASE
++ | lwz CARG2, 4(RB)
++ | checknum cr0, CARG1
++ | lwz CARG4, 4(RC)
++ | checknum cr1, CARG3
+ || break;
+ ||case 1:
+- | lwzux TMP1, RB, BASE
+- | lwzux TMP2, RC, KBASE
+- | lwz CARG2, 4(RB)
+- | checknum cr0, TMP1
+- | lwz CARG1, 4(RC)
++ | lwzux CARG3, RB, BASE
++ | lwzux CARG1, RC, KBASE
++ | lwz CARG4, 4(RB)
++ | checknum cr0, CARG3
++ | lwz CARG2, 4(RC)
++ | checknum cr1, CARG1
+ || break;
+ ||default:
+- | lwzux TMP1, RB, BASE
+- | lwzux TMP2, RC, BASE
+- | lwz CARG1, 4(RB)
+- | checknum cr0, TMP1
+- | lwz CARG2, 4(RC)
++ | lwzux CARG1, RB, BASE
++ | lwzux CARG3, RC, BASE
++ | lwz CARG2, 4(RB)
++ | checknum cr0, CARG1
++ | lwz CARG4, 4(RC)
++ | checknum cr1, CARG3
+ || break;
+ ||}
+- | checknum cr1, TMP2
+ | bne >5
+ | bne cr1, >5
+- | intins CARG1, CARG1, CARG2
++ |.if "intins" == "intmod"
++ | mr CARG1, CARG2
++ | mr CARG2, CARG4
++ |.endif
++ | intins CARG1, CARG2, CARG4
+ | bso >4
+ |1:
+ | ins_next1
+@@ -3550,29 +4043,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | checkov TMP0, <1 // Ignore unrelated overflow.
+ | ins_arithfallback b
+ |5: // FP variant.
++ |.if FPU
+ ||if (vk == 1) {
+ | lfd f15, 0(RB)
+- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ | lfd f14, 0(RC)
+ ||} else {
+ | lfd f14, 0(RB)
+- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ | lfd f15, 0(RC)
+ ||}
++ |.endif
++ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ | ins_arithfallback bge
+ |.if "fpins" == "fpmod_"
+ | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
+ |.else
++ |.if FPU
+ | fpins f0, f14, f15
+- | ins_next1
+ | stfdx f0, BASE, RA
++ |.else
++ |.if "fpcall" == "sfpmod"
++ | sfpmod
++ |.else
++ | blex fpcall
++ |.endif
++ | stwux CRET1, RA, BASE
++ | stw CRET2, 4(RA)
++ |.endif
++ | ins_next1
+ | b <2
+ |.endif
+ |.endmacro
+ |
+- |.macro ins_arith, intins, fpins
++ |.macro ins_arith, intins, fpins, fpcall
+ |.if DUALNUM
+- | ins_arithdn intins, fpins
++ | ins_arithdn intins, fpins, fpcall
+ |.else
+ | ins_arithfp fpins
+ |.endif
+@@ -3583,13 +4087,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.macro addo32., y, a, b
+ | // Need to check overflow for (a<<32) + (b<<32).
+ | rldicr TMP0, a, 32, 31
+- | rldicr TMP3, b, 32, 31
+- | addo. TMP0, TMP0, TMP3
++ | rldicr TMP1, b, 32, 31
++ | addo. TMP0, TMP0, TMP1
+ | add y, a, b
+ |.endmacro
+- | ins_arith addo32., fadd
++ | ins_arith addo32., fadd, __adddf3
+ |.else
+- | ins_arith addo., fadd
++ | ins_arith addo., fadd, __adddf3
+ |.endif
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+@@ -3597,40 +4101,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.macro subo32., y, a, b
+ | // Need to check overflow for (a<<32) - (b<<32).
+ | rldicr TMP0, a, 32, 31
+- | rldicr TMP3, b, 32, 31
+- | subo. TMP0, TMP0, TMP3
++ | rldicr TMP1, b, 32, 31
++ | subo. TMP0, TMP0, TMP1
+ | sub y, a, b
+ |.endmacro
+- | ins_arith subo32., fsub
++ | ins_arith subo32., fsub, __subdf3
+ |.else
+- | ins_arith subo., fsub
++ | ins_arith subo., fsub, __subdf3
+ |.endif
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+- | ins_arith mullwo., fmul
++ | ins_arith mullwo., fmul, __muldf3
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arithfp fdiv
+ break;
+ case BC_MODVN:
+- | ins_arith intmod, fpmod
++ | ins_arith intmod, fpmod, sfpmod
+ break;
+ case BC_MODNV: case BC_MODVV:
+- | ins_arith intmod, fpmod_
++ | ins_arith intmod, fpmod_, sfpmod
+ break;
+ case BC_POW:
+ | // NYI: (partial) integer arithmetic.
+- | lwzx TMP1, BASE, RB
++ | lwzx CARG1, BASE, RB
++ | lwzx CARG3, BASE, RC
++ |.if FPU
+ | lfdx FARG1, BASE, RB
+- | lwzx TMP2, BASE, RC
+ | lfdx FARG2, BASE, RC
+- | checknum cr0, TMP1
+- | checknum cr1, TMP2
++ |.else
++ | add TMP1, BASE, RB
++ | add TMP2, BASE, RC
++ | lwz CARG2, 4(TMP1)
++ | lwz CARG4, 4(TMP2)
++ |.endif
++ | checknum cr0, CARG1
++ | checknum cr1, CARG3
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ | bge ->vmeta_arith_vv
+ | blex pow
+ | ins_next1
++ |.if FPU
+ | stfdx FARG1, BASE, RA
++ |.else
++ | stwux CARG1, RA, BASE
++ | stw CARG2, 4(RA)
++ |.endif
+ | ins_next2
+ break;
+
+@@ -3650,8 +4166,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lp BASE, L->base
+ | bne ->vmeta_binop
+ | ins_next1
++ |.if FPU
+ | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
+ | stfdx f0, BASE, RA
++ |.else
++ | lwzux TMP0, SAVE0, BASE
++ | lwz TMP1, 4(SAVE0)
++ | stwux TMP0, RA, BASE
++ | stw TMP1, 4(RA)
++ |.endif
+ | ins_next2
+ break;
+
+@@ -3714,8 +4237,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_KNUM:
+ | // RA = dst*8, RD = num_const*8
+ | ins_next1
++ |.if FPU
+ | lfdx f0, KBASE, RD
+ | stfdx f0, BASE, RA
++ |.else
++ | lwzux TMP0, RD, KBASE
++ | lwz TMP1, 4(RD)
++ | stwux TMP0, RA, BASE
++ | stw TMP1, 4(RA)
++ |.endif
+ | ins_next2
+ break;
+ case BC_KPRI:
+@@ -3748,8 +4278,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lwzx UPVAL:RB, LFUNC:RB, RD
+ | ins_next1
+ | lwz TMP1, UPVAL:RB->v
++ |.if FPU
+ | lfd f0, 0(TMP1)
+ | stfdx f0, BASE, RA
++ |.else
++ | lwz TMP2, 0(TMP1)
++ | lwz TMP3, 4(TMP1)
++ | stwux TMP2, RA, BASE
++ | stw TMP3, 4(RA)
++ |.endif
+ | ins_next2
+ break;
+ case BC_USETV:
+@@ -3757,14 +4294,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lwz LFUNC:RB, FRAME_FUNC(BASE)
+ | srwi RA, RA, 1
+ | addi RA, RA, offsetof(GCfuncL, uvptr)
++ |.if FPU
+ | lfdux f0, RD, BASE
++ |.else
++ | lwzux CARG1, RD, BASE
++ | lwz CARG3, 4(RD)
++ |.endif
+ | lwzx UPVAL:RB, LFUNC:RB, RA
+ | lbz TMP3, UPVAL:RB->marked
+ | lwz CARG2, UPVAL:RB->v
+ | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
+ | lbz TMP0, UPVAL:RB->closed
+ | lwz TMP2, 0(RD)
++ |.if FPU
+ | stfd f0, 0(CARG2)
++ |.else
++ | stw CARG1, 0(CARG2)
++ | stw CARG3, 4(CARG2)
++ |.endif
+ | cmplwi cr1, TMP0, 0
+ | lwz TMP1, 4(RD)
+ | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
+@@ -3820,11 +4367,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lwz LFUNC:RB, FRAME_FUNC(BASE)
+ | srwi RA, RA, 1
+ | addi RA, RA, offsetof(GCfuncL, uvptr)
++ |.if FPU
+ | lfdx f0, KBASE, RD
++ |.else
++ | lwzux TMP2, RD, KBASE
++ | lwz TMP3, 4(RD)
++ |.endif
+ | lwzx UPVAL:RB, LFUNC:RB, RA
+ | ins_next1
+ | lwz TMP1, UPVAL:RB->v
++ |.if FPU
+ | stfd f0, 0(TMP1)
++ |.else
++ | stw TMP2, 0(TMP1)
++ | stw TMP3, 4(TMP1)
++ |.endif
+ | ins_next2
+ break;
+ case BC_USETP:
+@@ -3972,11 +4529,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.endif
+ | ble ->vmeta_tgetv // Integer key and in array part?
+ | lwzx TMP0, TMP1, TMP2
++ |.if FPU
+ | lfdx f14, TMP1, TMP2
++ |.else
++ | lwzux SAVE0, TMP1, TMP2
++ | lwz SAVE1, 4(TMP1)
++ |.endif
+ | checknil TMP0; beq >2
+ |1:
+ | ins_next1
++ |.if FPU
+ | stfdx f14, BASE, RA
++ |.else
++ | stwux SAVE0, RA, BASE
++ | stw SAVE1, 4(RA)
++ |.endif
+ | ins_next2
+ |
+ |2: // Check for __index if table value is nil.
+@@ -4007,9 +4574,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TGETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | lwz TMP0, TAB:RB->hmask
+- | lwz TMP1, STR:RC->hash
++ | lwz TMP1, STR:RC->sid
+ | lwz NODE:TMP2, TAB:RB->node
+- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | slwi TMP0, TMP1, 5
+ | slwi TMP1, TMP1, 3
+ | sub TMP1, TMP0, TMP1
+@@ -4052,12 +4619,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lwz TMP1, TAB:RB->asize
+ | lwz TMP2, TAB:RB->array
+ | cmplw TMP0, TMP1; bge ->vmeta_tgetb
++ |.if FPU
+ | lwzx TMP1, TMP2, RC
+ | lfdx f0, TMP2, RC
++ |.else
++ | lwzux TMP1, TMP2, RC
++ | lwz TMP3, 4(TMP2)
++ |.endif
+ | checknil TMP1; beq >5
+ |1:
+ | ins_next1
++ |.if FPU
+ | stfdx f0, BASE, RA
++ |.else
++ | stwux TMP1, RA, BASE
++ | stw TMP3, 4(RA)
++ |.endif
+ | ins_next2
+ |
+ |5: // Check for __index if table value is nil.
+@@ -4087,10 +4664,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cmplw TMP0, CARG2
+ | slwi TMP2, CARG2, 3
+ | ble ->vmeta_tgetr // In array part?
++ |.if FPU
+ | lfdx f14, TMP1, TMP2
++ |.else
++ | lwzux SAVE0, TMP2, TMP1
++ | lwz SAVE1, 4(TMP2)
++ |.endif
+ |->BC_TGETR_Z:
+ | ins_next1
++ |.if FPU
+ | stfdx f14, BASE, RA
++ |.else
++ | stwux SAVE0, RA, BASE
++ | stw SAVE1, 4(RA)
++ |.endif
+ | ins_next2
+ break;
+
+@@ -4131,11 +4718,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | ble ->vmeta_tsetv // Integer key and in array part?
+ | lwzx TMP2, TMP1, TMP0
+ | lbz TMP3, TAB:RB->marked
++ |.if FPU
+ | lfdx f14, BASE, RA
++ |.else
++ | add SAVE1, BASE, RA
++ | lwz SAVE0, 0(SAVE1)
++ | lwz SAVE1, 4(SAVE1)
++ |.endif
+ | checknil TMP2; beq >3
+ |1:
+ | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
++ |.if FPU
+ | stfdx f14, TMP1, TMP0
++ |.else
++ | stwux SAVE0, TMP1, TMP0
++ | stw SAVE1, 4(TMP1)
++ |.endif
+ | bne >7
+ |2:
+ | ins_next
+@@ -4172,11 +4770,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |->BC_TSETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
+ | lwz TMP0, TAB:RB->hmask
+- | lwz TMP1, STR:RC->hash
++ | lwz TMP1, STR:RC->sid
+ | lwz NODE:TMP2, TAB:RB->node
+ | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
+- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
++ |.if FPU
+ | lfdx f14, BASE, RA
++ |.else
++ | add CARG2, BASE, RA
++ | lwz SAVE0, 0(CARG2)
++ | lwz SAVE1, 4(CARG2)
++ |.endif
+ | slwi TMP0, TMP1, 5
+ | slwi TMP1, TMP1, 3
+ | sub TMP1, TMP0, TMP1
+@@ -4192,7 +4796,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | checknil CARG2; beq >4 // Key found, but nil value?
+ |2:
+ | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
++ |.if FPU
+ | stfd f14, NODE:TMP2->val
++ |.else
++ | stw SAVE0, NODE:TMP2->val.u32.hi
++ | stw SAVE1, NODE:TMP2->val.u32.lo
++ |.endif
+ | bne >7
+ |3:
+ | ins_next
+@@ -4231,7 +4840,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+ | // Returns TValue *.
+ | lp BASE, L->base
++ |.if FPU
+ | stfd f14, 0(CRET1)
++ |.else
++ | stw SAVE0, 0(CRET1)
++ | stw SAVE1, 4(CRET1)
++ |.endif
+ | b <3 // No 2nd write barrier needed.
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+@@ -4248,13 +4862,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | lwz TMP2, TAB:RB->array
+ | lbz TMP3, TAB:RB->marked
+ | cmplw TMP0, TMP1
++ |.if FPU
+ | lfdx f14, BASE, RA
++ |.else
++ | add CARG2, BASE, RA
++ | lwz SAVE0, 0(CARG2)
++ | lwz SAVE1, 4(CARG2)
++ |.endif
+ | bge ->vmeta_tsetb
+ | lwzx TMP1, TMP2, RC
+ | checknil TMP1; beq >5
+ |1:
+ | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
++ |.if FPU
+ | stfdx f14, TMP2, RC
++ |.else
++ | stwux SAVE0, RC, TMP2
++ | stw SAVE1, 4(RC)
++ |.endif
+ | bne >7
+ |2:
+ | ins_next
+@@ -4294,10 +4919,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |2:
+ | cmplw TMP0, CARG3
+ | slwi TMP2, CARG3, 3
++ |.if FPU
+ | lfdx f14, BASE, RA
++ |.else
++ | lwzux SAVE0, RA, BASE
++ | lwz SAVE1, 4(RA)
++ |.endif
+ | ble ->vmeta_tsetr // In array part?
+ | ins_next1
++ |.if FPU
+ | stfdx f14, TMP1, TMP2
++ |.else
++ | stwux SAVE0, TMP1, TMP2
++ | stw SAVE1, 4(TMP1)
++ |.endif
+ | ins_next2
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+@@ -4327,10 +4962,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | add TMP1, TMP1, TMP0
+ | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
+ |3: // Copy result slots to table.
++ |.if FPU
+ | lfd f0, 0(RA)
++ |.else
++ | lwz SAVE0, 0(RA)
++ | lwz SAVE1, 4(RA)
++ |.endif
+ | addi RA, RA, 8
+ | cmpw cr1, RA, TMP2
++ |.if FPU
+ | stfd f0, 0(TMP1)
++ |.else
++ | stw SAVE0, 0(TMP1)
++ | stw SAVE1, 4(TMP1)
++ |.endif
+ | addi TMP1, TMP1, 8
+ | blt cr1, <3
+ | bne >7
+@@ -4397,9 +5042,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | beq cr1, >3
+ |2:
+ | addi TMP3, TMP2, 8
++ |.if FPU
+ | lfdx f0, RA, TMP2
++ |.else
++ | add CARG3, RA, TMP2
++ | lwz CARG1, 0(CARG3)
++ | lwz CARG2, 4(CARG3)
++ |.endif
+ | cmplw cr1, TMP3, NARGS8:RC
++ |.if FPU
+ | stfdx f0, BASE, TMP2
++ |.else
++ | stwux CARG1, TMP2, BASE
++ | stw CARG2, 4(TMP2)
++ |.endif
+ | mr TMP2, TMP3
+ | bne cr1, <2
+ |3:
+@@ -4432,14 +5088,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | add BASE, BASE, RA
+ | lwz TMP1, -24(BASE)
+ | lwz LFUNC:RB, -20(BASE)
++ |.if FPU
+ | lfd f1, -8(BASE)
+ | lfd f0, -16(BASE)
++ |.else
++ | lwz CARG1, -8(BASE)
++ | lwz CARG2, -4(BASE)
++ | lwz CARG3, -16(BASE)
++ | lwz CARG4, -12(BASE)
++ |.endif
+ | stw TMP1, 0(BASE) // Copy callable.
+ | stw LFUNC:RB, 4(BASE)
+ | checkfunc TMP1
+- | stfd f1, 16(BASE) // Copy control var.
+ | li NARGS8:RC, 16 // Iterators get 2 arguments.
++ |.if FPU
++ | stfd f1, 16(BASE) // Copy control var.
+ | stfdu f0, 8(BASE) // Copy state.
++ |.else
++ | stw CARG1, 16(BASE) // Copy control var.
++ | stw CARG2, 20(BASE)
++ | stwu CARG3, 8(BASE) // Copy state.
++ | stw CARG4, 4(BASE)
++ |.endif
+ | bne ->vmeta_call
+ | ins_call
+ break;
+@@ -4447,8 +5117,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ case BC_ITERN:
+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
+ |.if JIT
+- | // NYI: add hotloop, record BC_ITERN.
++ | // NYI on big-endian
+ |.endif
++ |->vm_IITERN:
+ | add RA, BASE, RA
+ | lwz TAB:RB, -12(RA)
+ | lwz RC, -4(RA) // Get index from control var.
+@@ -4460,7 +5131,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | slwi TMP3, RC, 3
+ | bge >5 // Index points after array part?
+ | lwzx TMP2, TMP1, TMP3
++ |.if FPU
+ | lfdx f0, TMP1, TMP3
++ |.else
++ | lwzux CARG1, TMP3, TMP1
++ | lwz CARG2, 4(TMP3)
++ |.endif
+ | checknil TMP2
+ | lwz INS, -4(PC)
+ | beq >4
+@@ -4472,7 +5148,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |.endif
+ | addi RC, RC, 1
+ | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
++ |.if FPU
+ | stfd f0, 8(RA)
++ |.else
++ | stw CARG1, 8(RA)
++ | stw CARG2, 12(RA)
++ |.endif
+ | decode_RD4 TMP1, INS
+ | stw RC, -4(RA) // Update control var.
+ | add PC, TMP1, TMP3
+@@ -4497,17 +5178,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | slwi RB, RC, 3
+ | sub TMP3, TMP3, RB
+ | lwzx RB, TMP2, TMP3
++ |.if FPU
+ | lfdx f0, TMP2, TMP3
++ |.else
++ | add CARG3, TMP2, TMP3
++ | lwz CARG1, 0(CARG3)
++ | lwz CARG2, 4(CARG3)
++ |.endif
+ | add NODE:TMP3, TMP2, TMP3
+ | checknil RB
+ | lwz INS, -4(PC)
+ | beq >7
++ |.if FPU
+ | lfd f1, NODE:TMP3->key
++ |.else
++ | lwz CARG3, NODE:TMP3->key.u32.hi
++ | lwz CARG4, NODE:TMP3->key.u32.lo
++ |.endif
+ | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
++ |.if FPU
+ | stfd f0, 8(RA)
++ |.else
++ | stw CARG1, 8(RA)
++ | stw CARG2, 12(RA)
++ |.endif
+ | add RC, RC, TMP0
+ | decode_RD4 TMP1, INS
++ |.if FPU
+ | stfd f1, 0(RA)
++ |.else
++ | stw CARG3, 0(RA)
++ | stw CARG4, 4(RA)
++ |.endif
+ | addi RC, RC, 1
+ | add PC, TMP1, TMP2
+ | stw RC, -4(RA) // Update control var.
+@@ -4536,8 +5238,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
+ | add TMP3, PC, TMP0
+ | bne cr0, >5
+- | lus TMP1, 0xfffe
+- | ori TMP1, TMP1, 0x7fff
++ | lus TMP1, (LJ_KEYINDEX >> 16)
++ | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
+ | stw ZERO, -4(RA) // Initialize control var.
+ | stw TMP1, -8(RA)
+ | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
+@@ -4548,6 +5250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | li TMP1, BC_ITERC
+ | stb TMP0, -1(PC)
+ | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
++ | // NYI on big-endian: unpatch JLOOP.
+ | stb TMP1, 3(PC)
+ | b <1
+ break;
+@@ -4573,9 +5276,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | subi TMP2, TMP2, 16
+ | ble >2 // No vararg slots?
+ |1: // Copy vararg slots to destination slots.
++ |.if FPU
+ | lfd f0, 0(RC)
++ |.else
++ | lwz CARG1, 0(RC)
++ | lwz CARG2, 4(RC)
++ |.endif
+ | addi RC, RC, 8
++ |.if FPU
+ | stfd f0, 0(RA)
++ |.else
++ | stw CARG1, 0(RA)
++ | stw CARG2, 4(RA)
++ |.endif
+ | cmplw RA, TMP2
+ | cmplw cr1, RC, TMP3
+ | bge >3 // All destination slots filled?
+@@ -4598,9 +5311,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | addi MULTRES, TMP1, 8
+ | bgt >7
+ |6:
++ |.if FPU
+ | lfd f0, 0(RC)
++ |.else
++ | lwz CARG1, 0(RC)
++ | lwz CARG2, 4(RC)
++ |.endif
+ | addi RC, RC, 8
++ |.if FPU
+ | stfd f0, 0(RA)
++ |.else
++ | stw CARG1, 0(RA)
++ | stw CARG2, 4(RA)
++ |.endif
+ | cmplw RC, TMP3
+ | addi RA, RA, 8
+ | blt <6 // More vararg slots?
+@@ -4651,14 +5374,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | li TMP1, 0
+ |2:
+ | addi TMP3, TMP1, 8
++ |.if FPU
+ | lfdx f0, RA, TMP1
++ |.else
++ | add CARG3, RA, TMP1
++ | lwz CARG1, 0(CARG3)
++ | lwz CARG2, 4(CARG3)
++ |.endif
+ | cmpw TMP3, RC
++ |.if FPU
+ | stfdx f0, TMP2, TMP1
++ |.else
++ | add CARG3, TMP2, TMP1
++ | stw CARG1, 0(CARG3)
++ | stw CARG2, 4(CARG3)
++ |.endif
+ | beq >3
+ | addi TMP1, TMP3, 8
++ |.if FPU
+ | lfdx f1, RA, TMP3
++ |.else
++ | add CARG3, RA, TMP3
++ | lwz CARG1, 0(CARG3)
++ | lwz CARG2, 4(CARG3)
++ |.endif
+ | cmpw TMP1, RC
++ |.if FPU
+ | stfdx f1, TMP2, TMP3
++ |.else
++ | add CARG3, TMP2, TMP3
++ | stw CARG1, 0(CARG3)
++ | stw CARG2, 4(CARG3)
++ |.endif
+ | bne <2
+ |3:
+ |5:
+@@ -4700,8 +5447,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | subi TMP2, BASE, 8
+ | decode_RB8 RB, INS
+ if (op == BC_RET1) {
++ |.if FPU
+ | lfd f0, 0(RA)
+ | stfd f0, 0(TMP2)
++ |.else
++ | lwz CARG1, 0(RA)
++ | lwz CARG2, 4(RA)
++ | stw CARG1, 0(TMP2)
++ | stw CARG2, 4(TMP2)
++ |.endif
+ }
+ |5:
+ | cmplw RB, RD
+@@ -4762,11 +5516,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ |4:
+ | stw CARG1, FORL_IDX*8+4(RA)
+ } else {
+- | lwz TMP3, FORL_STEP*8(RA)
++ | lwz SAVE0, FORL_STEP*8(RA)
+ | lwz CARG3, FORL_STEP*8+4(RA)
+ | lwz TMP2, FORL_STOP*8(RA)
+ | lwz CARG2, FORL_STOP*8+4(RA)
+- | cmplw cr7, TMP3, TISNUM
++ | cmplw cr7, SAVE0, TISNUM
+ | cmplw cr1, TMP2, TISNUM
+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
+ | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
+@@ -4809,41 +5563,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ if (vk) {
+ |.if DUALNUM
+ |9: // FP loop.
++ |.if FPU
+ | lfd f1, FORL_IDX*8(RA)
+ |.else
++ | lwz CARG1, FORL_IDX*8(RA)
++ | lwz CARG2, FORL_IDX*8+4(RA)
++ |.endif
++ |.else
+ | lfdux f1, RA, BASE
+ |.endif
++ |.if FPU
+ | lfd f3, FORL_STEP*8(RA)
+ | lfd f2, FORL_STOP*8(RA)
+- | lwz TMP3, FORL_STEP*8(RA)
+ | fadd f1, f1, f3
+ | stfd f1, FORL_IDX*8(RA)
++ |.else
++ | lwz CARG3, FORL_STEP*8(RA)
++ | lwz CARG4, FORL_STEP*8+4(RA)
++ | mr SAVE1, RD
++ | blex __adddf3
++ | mr RD, SAVE1
++ | stw CRET1, FORL_IDX*8(RA)
++ | stw CRET2, FORL_IDX*8+4(RA)
++ | lwz CARG3, FORL_STOP*8(RA)
++ | lwz CARG4, FORL_STOP*8+4(RA)
++ |.endif
++ | lwz SAVE0, FORL_STEP*8(RA)
+ } else {
+ |.if DUALNUM
+ |9: // FP loop.
+ |.else
+ | lwzux TMP1, RA, BASE
+- | lwz TMP3, FORL_STEP*8(RA)
++ | lwz SAVE0, FORL_STEP*8(RA)
+ | lwz TMP2, FORL_STOP*8(RA)
+ | cmplw cr0, TMP1, TISNUM
+- | cmplw cr7, TMP3, TISNUM
++ | cmplw cr7, SAVE0, TISNUM
+ | cmplw cr1, TMP2, TISNUM
+ |.endif
++ |.if FPU
+ | lfd f1, FORL_IDX*8(RA)
++ |.else
++ | lwz CARG1, FORL_IDX*8(RA)
++ | lwz CARG2, FORL_IDX*8+4(RA)
++ |.endif
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
++ |.if FPU
+ | lfd f2, FORL_STOP*8(RA)
++ |.else
++ | lwz CARG3, FORL_STOP*8(RA)
++ | lwz CARG4, FORL_STOP*8+4(RA)
++ |.endif
+ | bge ->vmeta_for
+ }
+- | cmpwi cr6, TMP3, 0
++ | cmpwi cr6, SAVE0, 0
+ if (op != BC_JFORL) {
+ | srwi RD, RD, 1
+ }
++ |.if FPU
+ | stfd f1, FORL_EXT*8(RA)
++ |.else
++ | stw CARG1, FORL_EXT*8(RA)
++ | stw CARG2, FORL_EXT*8+4(RA)
++ |.endif
+ if (op != BC_JFORL) {
+ | add RD, PC, RD
+ }
++ |.if FPU
+ | fcmpu cr0, f1, f2
++ |.else
++ | mr SAVE1, RD
++ | blex __ledf2
++ | cmpwi CRET1, 0
++ | mr RD, SAVE1
++ |.endif
+ if (op == BC_JFORI) {
+ | addis PC, RD, -(BCBIAS_J*4 >> 16)
+ }
+diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
+index a003fb4f..fdffd4b6 100644
+--- a/src/vm_x64.dasc
++++ b/src/vm_x64.dasc
+@@ -1,6 +1,6 @@
+ |// Low-level VM code for x64 CPUs in LJ_GC64 mode.
+ |// Bytecode interpreter, fast functions and helper functions.
+-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ |
+ |.arch x64
+ |.section code_op, code_sub
+@@ -1230,7 +1230,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | mov [BASE-16], TAB:RC // Store metatable as default result.
+ | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
+ | mov RAd, TAB:RB->hmask
+- | and RAd, STR:RC->hash
++ | and RAd, STR:RC->sid
+ | settp STR:RC, LJ_TSTR
+ | imul RAd, #NODE
+ | add NODE:RA, TAB:RB->node
+@@ -1346,44 +1346,28 @@ static void build_subroutines(BuildCtx *ctx)
+ |.ffunc_1 next
+ | je >2 // Missing 2nd arg?
+ |1:
+- |.if X64WIN
+- | mov RA, [BASE]
+- | checktab RA, ->fff_fallback
+- |.else
+- | mov CARG2, [BASE]
+- | checktab CARG2, ->fff_fallback
+- |.endif
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Add frame since C call can throw.
+- | mov L:RB->top, BASE // Dummy frame length is ok.
++ | mov CARG1, [BASE]
+ | mov PC, [BASE-8]
++ | checktab CARG1, ->fff_fallback
++ | mov RB, BASE // Save BASE.
+ |.if X64WIN
+- | lea CARG3, [BASE+8]
+- | mov CARG2, RA // Caveat: CARG2 == BASE.
+- | mov CARG1, L:RB
++ | lea CARG3, [BASE-16]
++ | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE.
+ |.else
+- | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
+- | mov CARG1, L:RB
++ | lea CARG2, [BASE+8]
++ | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE.
+ |.endif
+- | mov SAVE_PC, PC // Needed for ITERN fallback.
+- | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+- | // Flag returned in eax (RD).
+- | mov BASE, L:RB->base
+- | test RDd, RDd; jz >3 // End of traversal?
+- | // Copy key and value to results.
+- | mov RB, [BASE+8]
+- | mov RD, [BASE+16]
+- | mov [BASE-16], RB
+- | mov [BASE-8], RD
+- |->fff_res2:
+- | mov RDd, 1+2
+- | jmp ->fff_res
++ | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
++ | // 1=found, 0=end, -1=error returned in eax (RD).
++ | mov BASE, RB // Restore BASE.
++ | test RDd, RDd; jg ->fff_res2 // Found key/value.
++ | js ->fff_fallback_2 // Invalid key.
++ | // End of traversal: return nil.
++ | mov aword [BASE-16], LJ_TNIL
++ | jmp ->fff_res1
+ |2: // Set missing 2nd arg to nil.
+ | mov aword [BASE+8], LJ_TNIL
+ | jmp <1
+- |3: // End of traversal: return nil.
+- | mov aword [BASE-16], LJ_TNIL
+- | jmp ->fff_res1
+ |
+ |.ffunc_1 pairs
+ | mov TAB:RB, [BASE]
+@@ -1432,7 +1416,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | // Copy array slot.
+ | mov RB, [RD]
+ | mov [BASE-8], RB
+- | jmp ->fff_res2
++ |->fff_res2:
++ | mov RDd, 1+2
++ | jmp ->fff_res
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | cmp dword TAB:RB->hmask, 0; je ->fff_res0
+ |.if X64WIN
+@@ -1840,7 +1826,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | jmp ->fff_res
+ |
+ |.macro math_minmax, name, cmovop, sseop
+- | .ffunc name
++ | .ffunc_1 name
+ | mov RAd, 2
+ |.if DUALNUM
+ | mov RB, [BASE]
+@@ -2011,7 +1997,7 @@ static void build_subroutines(BuildCtx *ctx)
+ |.endif
+ | mov RC, SBUF:CARG1->b
+ | mov SBUF:CARG1->L, L:RB
+- | mov SBUF:CARG1->p, RC
++ | mov SBUF:CARG1->w, RC
+ | mov SAVE_PC, PC
+ | call extern lj_buf_putstr_ .. name
+ | mov CARG1, rax
+@@ -2509,10 +2495,10 @@ static void build_subroutines(BuildCtx *ctx)
+ | jmp <2
+ |
+ |9: // Rethrow error from the right C frame.
+- | neg RD
++ | mov CARG2d, RDd
+ | mov CARG1, L:RB
+- | mov CARG2, RD
+- | call extern lj_err_throw // (lua_State *L, int errcode)
++ | neg CARG2d
++ | call extern lj_err_trace // (lua_State *L, int errcode)
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+@@ -2647,6 +2633,67 @@ static void build_subroutines(BuildCtx *ctx)
+ | .if X64WIN; pop rsi; .endif
+ | ret
+ |
++ |.define NEXT_TAB, TAB:CARG1
++ |.define NEXT_IDX, CARG2d
++ |.define NEXT_IDXa, CARG2
++ |.define NEXT_PTR, RC
++ |.define NEXT_PTRd, RCd
++ |.define NEXT_TMP, CARG3
++ |.define NEXT_ASIZE, CARG4d
++ |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
++ |.if X64WIN
++ |.define NEXT_RES_PTR, [rsp+aword*5]
++ |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
++ |.else
++ |.define NEXT_RES_PTR, [rsp+aword*1]
++ |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
++ |.endif
++ |
++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
++ |// Next idx returned in edx.
++ |->vm_next:
++ |.if JIT
++ | mov NEXT_ASIZE, NEXT_TAB->asize
++ |1: // Traverse array part.
++ | cmp NEXT_IDX, NEXT_ASIZE; jae >5
++ | mov NEXT_TMP, NEXT_TAB->array
++ | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8]
++ | cmp NEXT_TMP, LJ_TNIL; je >2
++ | lea NEXT_PTR, NEXT_RES_PTR
++ | mov qword [NEXT_PTR], NEXT_TMP
++ |.if DUALNUM
++ | setint NEXT_TMP, NEXT_IDXa
++ | mov qword [NEXT_PTR+qword*1], NEXT_TMP
++ |.else
++ | cvtsi2sd xmm0, NEXT_IDX
++ | movsd qword [NEXT_PTR+qword*1], xmm0
++ |.endif
++ | NEXT_RES_IDX 1
++ | ret
++ |2: // Skip holes in array part.
++ | add NEXT_IDX, 1
++ | jmp <1
++ |
++ |5: // Traverse hash part.
++ | sub NEXT_IDX, NEXT_ASIZE
++ |6:
++ | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
++ | imul NEXT_PTRd, NEXT_IDX, #NODE
++ | add NODE:NEXT_PTR, NEXT_TAB->node
++ | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7
++ | NEXT_RES_IDXL NEXT_ASIZE+1
++ | ret
++ |7: // Skip holes in hash part.
++ | add NEXT_IDX, 1
++ | jmp <6
++ |
++ |9: // End of iteration. Set the key to nil (not the value).
++ | NEXT_RES_IDX NEXT_ASIZE
++ | lea NEXT_PTR, NEXT_RES_PTR
++ | mov qword [NEXT_PTR+qword*1], LJ_TNIL
++ | ret
++ |.endif
++ |
+ |//-----------------------------------------------------------------------
+ |//-- Assertions ---------------------------------------------------------
+ |//-----------------------------------------------------------------------
+@@ -3674,7 +3721,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | checktab TAB:RB, ->vmeta_tgets
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
+ | mov TMPRd, TAB:RB->hmask
+- | and TMPRd, STR:RC->hash
++ | and TMPRd, STR:RC->sid
+ | imul TMPRd, #NODE
+ | add NODE:TMPR, TAB:RB->node
+ | settp ITYPE, STR:RC, LJ_TSTR
+@@ -3806,7 +3853,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | checktab TAB:RB, ->vmeta_tsets
+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
+ | mov TMPRd, TAB:RB->hmask
+- | and TMPRd, STR:RC->hash
++ | and TMPRd, STR:RC->sid
+ | imul TMPRd, #NODE
+ | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
+ | add NODE:TMPR, TAB:RB->node
+@@ -4058,10 +4105,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ITERN:
+- | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ |.if JIT
+- | // NYI: add hotloop, record BC_ITERN.
++ | hotloop RBd
+ |.endif
++ |->vm_IITERN:
++ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ | mov TAB:RB, [BASE+RA*8-16]
+ | cleartp TAB:RB
+ | mov RCd, [BASE+RA*8-8] // Get index from control var.
+@@ -4125,15 +4173,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
+ | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
+ | branchPC RD
+- | mov64 TMPR, U64x(fffe7fff, 00000000)
++ | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32)
+ | mov [BASE+RA*8-8], TMPR // Initialize control var.
+ |1:
+ | ins_next
+ |5: // Despecialize bytecode if any of the checks fail.
+ | mov PC_OP, BC_JMP
+ | branchPC RD
++ |.if JIT
++ | cmp byte [PC], BC_ITERN
++ | jne >6
++ |.endif
+ | mov byte [PC], BC_ITERC
+ | jmp <1
++ |.if JIT
++ |6: // Unpatch JLOOP.
++ | mov RA, [DISPATCH+DISPATCH_J(trace)]
++ | movzx RCd, word [PC+2]
++ | mov TRACE:RA, [RA+RC*8]
++ | mov eax, TRACE:RA->startins
++ | mov al, BC_ITERC
++ | mov dword [PC], eax
++ | jmp <1
++ |.endif
+ break;
+
+ case BC_VARG:
+@@ -4734,7 +4796,7 @@ static void emit_asm_debug(BuildCtx *ctx)
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+ #endif
+ #if !LJ_NO_UNWIND
+-#if (defined(__sun__) && defined(__svr4__))
++#if LJ_TARGET_SOLARIS
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
+ #else
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
+index 211ae7b9..cbf0810c 100644
+--- a/src/vm_x86.dasc
++++ b/src/vm_x86.dasc
+@@ -1,6 +1,6 @@
+ |// Low-level VM code for x86 CPUs.
+ |// Bytecode interpreter, fast functions and helper functions.
+-|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ |
+ |.if P64
+ |.arch x64
+@@ -1372,7 +1372,11 @@ static void build_subroutines(BuildCtx *ctx)
+ | mov LFUNC:RB, [RA-8]
+ | add NARGS:RD, 1
+ | // This is fragile. L->base must not move, KBASE must always be defined.
++ |.if x64
++ | cmp KBASEa, rdx // Continue with CALLT if flag set.
++ |.else
+ | cmp KBASE, BASE // Continue with CALLT if flag set.
++ |.endif
+ | je ->BC_CALLT_Z
+ | mov BASE, RA
+ | ins_call // Otherwise call resolved metamethod.
+@@ -1522,7 +1526,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
+ | mov [BASE-8], TAB:RB
+ | mov RA, TAB:RB->hmask
+- | and RA, STR:RC->hash
++ | and RA, STR:RC->sid
+ | imul RA, #NODE
+ | add NODE:RA, TAB:RB->node
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+@@ -1669,55 +1673,35 @@ static void build_subroutines(BuildCtx *ctx)
+ | je >2 // Missing 2nd arg?
+ |1:
+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
+- | mov L:RB, SAVE_L
+- | mov L:RB->base, BASE // Add frame since C call can throw.
+- | mov L:RB->top, BASE // Dummy frame length is ok.
+ | mov PC, [BASE-4]
++ | mov RB, BASE // Save BASE.
+ |.if X64WIN
+- | lea CARG3d, [BASE+8]
+- | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
+- | mov CARG1d, L:RB
++ | mov CARG1d, [BASE]
++ | lea CARG3d, [BASE-8]
++ | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE.
+ |.elif X64
+- | mov CARG2d, [BASE]
+- | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
+- | mov CARG1d, L:RB
++ | mov CARG1d, [BASE]
++ | lea CARG2d, [BASE+8]
++ | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE.
+ |.else
+ | mov TAB:RD, [BASE]
+- | mov ARG2, TAB:RD
+- | mov ARG1, L:RB
++ | mov ARG1, TAB:RD
+ | add BASE, 8
++ | mov ARG2, BASE
++ | sub BASE, 8+8
+ | mov ARG3, BASE
+ |.endif
+- | mov SAVE_PC, PC // Needed for ITERN fallback.
+- | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+- | // Flag returned in eax (RD).
+- | mov BASE, L:RB->base
+- | test RD, RD; jz >3 // End of traversal?
+- | // Copy key and value to results.
+- |.if X64
+- | mov RBa, [BASE+8]
+- | mov RDa, [BASE+16]
+- | mov [BASE-8], RBa
+- | mov [BASE], RDa
+- |.else
+- | mov RB, [BASE+8]
+- | mov RD, [BASE+12]
+- | mov [BASE-8], RB
+- | mov [BASE-4], RD
+- | mov RB, [BASE+16]
+- | mov RD, [BASE+20]
+- | mov [BASE], RB
+- | mov [BASE+4], RD
+- |.endif
+- |->fff_res2:
+- | mov RD, 1+2
+- | jmp ->fff_res
++ | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
++ | // 1=found, 0=end, -1=error returned in eax (RD).
++ | mov BASE, RB // Restore BASE.
++ | test RD, RD; jg ->fff_res2 // Found key/value.
++ | js ->fff_fallback_2 // Invalid key.
++ | // End of traversal: return nil.
++ | mov dword [BASE-4], LJ_TNIL
++ | jmp ->fff_res1
+ |2: // Set missing 2nd arg to nil.
+ | mov dword [BASE+12], LJ_TNIL
+ | jmp <1
+- |3: // End of traversal: return nil.
+- | mov dword [BASE-4], LJ_TNIL
+- | jmp ->fff_res1
+ |
+ |.ffunc_1 pairs
+ | mov TAB:RB, [BASE]
+@@ -1771,7 +1755,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | mov [BASE], RB
+ | mov [BASE+4], RD
+ |.endif
+- | jmp ->fff_res2
++ |->fff_res2:
++ | mov RD, 1+2
++ | jmp ->fff_res
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | cmp dword TAB:RB->hmask, 0; je ->fff_res0
+ | mov FCARG1, TAB:RB
+@@ -2233,7 +2219,7 @@ static void build_subroutines(BuildCtx *ctx)
+ | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
+ |
+ |.macro math_minmax, name, cmovop, sseop
+- | .ffunc name
++ | .ffunc_1 name
+ | mov RA, 2
+ | cmp dword [BASE+4], LJ_TISNUM
+ |.if DUALNUM
+@@ -2419,9 +2405,9 @@ static void build_subroutines(BuildCtx *ctx)
+ | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
+ | mov L:RB->base, BASE
+ | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
+- | mov RC, SBUF:FCARG1->b
++ | mov RCa, SBUF:FCARG1->b
+ | mov SBUF:FCARG1->L, L:RB
+- | mov SBUF:FCARG1->p, RC
++ | mov SBUF:FCARG1->w, RCa
+ | mov SAVE_PC, PC
+ | call extern lj_buf_putstr_ .. name .. @8
+ | mov FCARG1, eax
+@@ -2960,10 +2946,10 @@ static void build_subroutines(BuildCtx *ctx)
+ | jmp <2
+ |
+ |9: // Rethrow error from the right C frame.
+- | neg RD
+- | mov FCARG1, L:RB
+ | mov FCARG2, RD
+- | call extern lj_err_throw@8 // (lua_State *L, int errcode)
++ | mov FCARG1, L:RB
++ | neg FCARG2
++ | call extern lj_err_trace@8 // (lua_State *L, int errcode)
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+@@ -3134,6 +3120,86 @@ static void build_subroutines(BuildCtx *ctx)
+ | ret
+ |.endif
+ |
++ |.define NEXT_TAB, TAB:FCARG1
++ |.define NEXT_IDX, FCARG2
++ |.define NEXT_PTR, RCa
++ |.define NEXT_PTRd, RC
++ |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
++ |.if X64
++ |.define NEXT_TMP, CARG3d
++ |.define NEXT_TMPq, CARG3
++ |.define NEXT_ASIZE, CARG4d
++ |.macro NEXT_ENTER; .endmacro
++ |.macro NEXT_LEAVE; ret; .endmacro
++ |.if X64WIN
++ |.define NEXT_RES_PTR, [rsp+aword*5]
++ |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
++ |.else
++ |.define NEXT_RES_PTR, [rsp+aword*1]
++ |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
++ |.endif
++ |.else
++ |.define NEXT_ASIZE, esi
++ |.define NEXT_TMP, edi
++ |.macro NEXT_ENTER; push esi; push edi; .endmacro
++ |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro
++ |.define NEXT_RES_PTR, [esp+dword*3]
++ |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
++ |.endif
++ |
++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
++ |// Next idx returned in edx.
++ |->vm_next:
++ |.if JIT
++ | NEXT_ENTER
++ | mov NEXT_ASIZE, NEXT_TAB->asize
++ |1: // Traverse array part.
++ | cmp NEXT_IDX, NEXT_ASIZE; jae >5
++ | mov NEXT_TMP, NEXT_TAB->array
++ | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2
++ | lea NEXT_PTR, NEXT_RES_PTR
++ |.if X64
++ | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8]
++ | mov qword [NEXT_PTR], NEXT_TMPq
++ |.else
++ | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4]
++ | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8]
++ | mov dword [NEXT_PTR+4], NEXT_ASIZE
++ | mov dword [NEXT_PTR], NEXT_TMP
++ |.endif
++ |.if DUALNUM
++ | mov dword [NEXT_PTR+dword*3], LJ_TISNUM
++ | mov dword [NEXT_PTR+dword*2], NEXT_IDX
++ |.else
++ | cvtsi2sd xmm0, NEXT_IDX
++ | movsd qword [NEXT_PTR+dword*2], xmm0
++ |.endif
++ | NEXT_RES_IDX 1
++ | NEXT_LEAVE
++ |2: // Skip holes in array part.
++ | add NEXT_IDX, 1
++ | jmp <1
++ |
++ |5: // Traverse hash part.
++ | sub NEXT_IDX, NEXT_ASIZE
++ |6:
++ | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
++ | imul NEXT_PTRd, NEXT_IDX, #NODE
++ | add NODE:NEXT_PTRd, dword NEXT_TAB->node
++ | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7
++ | NEXT_RES_IDXL NEXT_ASIZE+1
++ | NEXT_LEAVE
++ |7: // Skip holes in hash part.
++ | add NEXT_IDX, 1
++ | jmp <6
++ |
++ |9: // End of iteration. Set the key to nil (not the value).
++ | NEXT_RES_IDX NEXT_ASIZE
++ | lea NEXT_PTR, NEXT_RES_PTR
++ | mov dword [NEXT_PTR+dword*3], LJ_TNIL
++ | NEXT_LEAVE
++ |.endif
++ |
+ |//-----------------------------------------------------------------------
+ |//-- Assertions ---------------------------------------------------------
+ |//-----------------------------------------------------------------------
+@@ -4286,7 +4352,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | mov TAB:RB, [BASE+RB*8]
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
+ | mov RA, TAB:RB->hmask
+- | and RA, STR:RC->hash
++ | and RA, STR:RC->sid
+ | imul RA, #NODE
+ | add NODE:RA, TAB:RB->node
+ |1:
+@@ -4457,7 +4523,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | mov TAB:RB, [BASE+RB*8]
+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
+ | mov RA, TAB:RB->hmask
+- | and RA, STR:RC->hash
++ | and RA, STR:RC->sid
+ | imul RA, #NODE
+ | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
+ | add NODE:RA, TAB:RB->node
+@@ -4785,10 +4851,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ break;
+
+ case BC_ITERN:
+- | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ |.if JIT
+- | // NYI: add hotloop, record BC_ITERN.
++ | hotloop RB
+ |.endif
++ |->vm_IITERN:
++ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ | mov TMP1, KBASE // Need two more free registers.
+ | mov TMP2, DISPATCH
+ | mov TAB:RB, [BASE+RA*8-16]
+@@ -4876,14 +4943,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+ | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
+ | branchPC RD
+ | mov dword [BASE+RA*8-8], 0 // Initialize control var.
+- | mov dword [BASE+RA*8-4], 0xfffe7fff
++ | mov dword [BASE+RA*8-4], LJ_KEYINDEX
+ |1:
+ | ins_next
+ |5: // Despecialize bytecode if any of the checks fail.
+ | mov PC_OP, BC_JMP
+ | branchPC RD
++ |.if JIT
++ | cmp byte [PC], BC_ITERN
++ | jne >6
++ |.endif
+ | mov byte [PC], BC_ITERC
+ | jmp <1
++ |.if JIT
++ |6: // Unpatch JLOOP.
++ | mov RA, [DISPATCH+DISPATCH_J(trace)]
++ | movzx RC, word [PC+2]
++ | mov TRACE:RA, [RA+RC*4]
++ | mov eax, TRACE:RA->startins
++ | mov al, BC_ITERC
++ | mov dword [PC], eax
++ | jmp <1
++ |.endif
+ break;
+
+ case BC_VARG:
+@@ -5548,7 +5629,7 @@ static void emit_asm_debug(BuildCtx *ctx)
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+ #endif
+ #if !LJ_NO_UNWIND
+-#if (defined(__sun__) && defined(__svr4__))
++#if LJ_TARGET_SOLARIS
+ #if LJ_64
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
+ #else
+diff --git a/src/xb1build.bat b/src/xb1build.bat
+index 847e84a5..2eb68171 100644
+--- a/src/xb1build.bat
++++ b/src/xb1build.bat
+@@ -9,12 +9,12 @@
+
+ @setlocal
+ @echo ---- Host compiler ----
+-@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
/DLUAJIT_ENABLE_GC64
++@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
+ @set LJLINK=link /nologo
+ @set LJMT=mt /nologo
+ @set DASMDIR=..\dynasm
+ @set DASM=%DASMDIR%\dynasm.lua
+-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c
++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+
+ %LJCOMPILE% host\minilua.c
+ @if errorlevel 1 goto :BAD
+diff --git a/src/xedkbuild.bat b/src/xedkbuild.bat
+index 240ec878..37322d03 100644
+--- a/src/xedkbuild.bat
++++ b/src/xedkbuild.bat
+@@ -14,7 +14,7 @@
+ @set LJMT=mt /nologo
+ @set DASMDIR=..\dynasm
+ @set DASM=%DASMDIR%\dynasm.lua
+-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c
++@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c
lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+
+ %LJCOMPILE% host\minilua.c
+ @if errorlevel 1 goto :BAD
diff --git a/luajit-openresty-features.patch b/luajit-openresty-features.patch
deleted file mode 100644
index 8a9d90c..0000000
--- a/luajit-openresty-features.patch
+++ /dev/null
@@ -1,824 +0,0 @@
-From a6879cb3982f02744dd77b6663ae6bc14162e652 Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <agentzh(a)gmail.com>
-Date: Sat, 19 Dec 2015 10:43:32 -0800
-Subject: [PATCH 02/13] Makefile: ensure we always install the symlink for
- "luajit".
-
----
- Makefile | 7 +------
- 1 file changed, 1 insertion(+), 6 deletions(-)
-
-diff --git a/Makefile b/Makefile
-index 923bf72b..f4b84081 100644
---- a/Makefile
-+++ b/Makefile
-@@ -130,13 +130,8 @@ install: $(LUAJIT_BIN)
- $(RM) $(FILE_PC).tmp
- cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
- cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
-+ $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
- @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
-- @echo ""
-- @echo "Note: the development releases deliberately do NOT install a symlink for
luajit"
-- @echo "You can do this now by running this command (with sudo):"
-- @echo ""
-- @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
-- @echo ""
-
-
- uninstall:
---
-2.21.0
-
-
-From e29e78dd64573947777e8ca7741d46d1c0ba2f7b Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <agentzh(a)gmail.com>
-Date: Tue, 14 Mar 2017 14:26:48 -0700
-Subject: [PATCH 03/13] optimize: lj_str_new: tests the full hash value before
- doing the full string comparison on hash collisions. thanks Shuxin Yang for
- the patch.
-
----
- src/lj_str.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/lj_str.c b/src/lj_str.c
-index 264dedc1..f1b5fb5d 100644
---- a/src/lj_str.c
-+++ b/src/lj_str.c
-@@ -152,7 +152,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
- if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
- while (o != NULL) {
- GCstr *sx = gco2str(o);
-- if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
-+ if (sx->len == len && sx->hash == h && str_fastcmp(str,
strdata(sx), len) == 0) {
- /* Resurrect if dead. Can only happen with fixstring() (keywords). */
- if (isdead(g, o)) flipwhite(o);
- return sx; /* Return existing string. */
-@@ -162,7 +162,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
- } else { /* Slow path: end of string is too close to a page boundary. */
- while (o != NULL) {
- GCstr *sx = gco2str(o);
-- if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
-+ if (sx->len == len && sx->hash == h && memcmp(str,
strdata(sx), len) == 0) {
- /* Resurrect if dead. Can only happen with fixstring() (keywords). */
- if (isdead(g, o)) flipwhite(o);
- return sx; /* Return existing string. */
---
-2.21.0
-
-
-From 555ee4e814f799937ca505423fc05c0b0402f81c Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <yichun(a)openresty.com>
-Date: Tue, 15 Jan 2019 12:17:50 -0800
-Subject: [PATCH 04/13] bugfix: fixed assertion failure "lj_record.c:92:
- rec_check_slots: Assertion `nslots <= 250' failed" found by stressing our
- edgelang compiler.
-
----
- src/lj_record.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/lj_record.c b/src/lj_record.c
-index 7f37d6c6..4a50de1b 100644
---- a/src/lj_record.c
-+++ b/src/lj_record.c
-@@ -1860,6 +1860,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
- lj_trace_err_info(J, LJ_TRERR_NYIBC);
- }
- }
-+ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
-+ lj_trace_err(J, LJ_TRERR_STACKOV);
- }
-
- /* -- Record allocations -------------------------------------------------- */
---
-2.21.0
-
-
-From 58e9941b6268202f7953a5534e0c662ad90b2510 Mon Sep 17 00:00:00 2001
-From: doujiang24 <doujiang24(a)gmail.com>
-Date: Sun, 12 Mar 2017 21:04:50 +0800
-Subject: [PATCH 05/13] feature: added the bytecode option `L` to display lua
- source line numbers.
-
-Signed-off-by: Yichun Zhang (agentzh) <agentzh(a)gmail.com>
----
- src/jit/bc.lua | 20 +++++++++++++-------
- src/jit/bcsave.lua | 11 ++++++++---
- src/lib_jit.c | 6 ++++++
- 3 files changed, 27 insertions(+), 10 deletions(-)
-
-diff --git a/src/jit/bc.lua b/src/jit/bc.lua
-index 193cf01f..80f92689 100644
---- a/src/jit/bc.lua
-+++ b/src/jit/bc.lua
-@@ -63,15 +63,21 @@ local function ctlsub(c)
- end
-
- -- Return one bytecode line.
--local function bcline(func, pc, prefix)
-- local ins, m = funcbc(func, pc)
-+local function bcline(func, pc, prefix, lineinfo)
-+ local ins, m, l = funcbc(func, pc, lineinfo and 1 or 0)
- if not ins then return end
- local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128)
- local a = band(shr(ins, 8), 0xff)
- local oidx = 6*band(ins, 0xff)
- local op = sub(bcnames, oidx+1, oidx+6)
-- local s = format("%04d %s %-6s %3s ",
-- pc, prefix or " ", op, ma == 0 and "" or a)
-+ local s
-+ if lineinfo then
-+ s = format("%04d %7s %s %-6s %3s ",
-+ pc, "["..l.."]", prefix or " ", op, ma == 0 and
"" or a)
-+ else
-+ s = format("%04d %s %-6s %3s ",
-+ pc, prefix or " ", op, ma == 0 and "" or a)
-+ end
- local d = shr(ins, 16)
- if mc == 13*128 then -- BCMjump
- return format("%s=> %04d\n", s, pc+d-0x7fff)
-@@ -124,20 +130,20 @@ local function bctargets(func)
- end
-
- -- Dump bytecode instructions of a function.
--local function bcdump(func, out, all)
-+local function bcdump(func, out, all, lineinfo)
- if not out then out = stdout end
- local fi = funcinfo(func)
- if all and fi.children then
- for n=-1,-1000000000,-1 do
- local k = funck(func, n)
- if not k then break end
-- if type(k) == "proto" then bcdump(k, out, true) end
-+ if type(k) == "proto" then bcdump(k, out, true, lineinfo) end
- end
- end
- out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined))
- local target = bctargets(func)
- for pc=1,1000000000 do
-- local s = bcline(func, pc, target[pc] and "=>")
-+ local s = bcline(func, pc, target[pc] and "=>", lineinfo)
- if not s then break end
- out:write(s)
- end
-diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
-index 2553d97e..9c6146c2 100644
---- a/src/jit/bcsave.lua
-+++ b/src/jit/bcsave.lua
-@@ -23,6 +23,7 @@ local function usage()
- io.stderr:write[[
- Save LuaJIT bytecode: luajit -b[options] input output
- -l Only list bytecode.
-+ -L Only list bytecode with lineinfo.
- -s Strip debug info (default).
- -g Keep debug info.
- -n name Set module name (default: auto-detect from input name).
-@@ -575,9 +576,9 @@ end
-
- ------------------------------------------------------------------------------
-
--local function bclist(input, output)
-+local function bclist(input, output, lineinfo)
- local f = readfile(input)
-- require("jit.bc").dump(f, savefile(output, "w"), true)
-+ require("jit.bc").dump(f, savefile(output, "w"), true, lineinfo)
- end
-
- local function bcsave(ctx, input, output)
-@@ -604,6 +605,7 @@ local function docmd(...)
- local arg = {...}
- local n = 1
- local list = false
-+ local lineinfo = false
- local ctx = {
- strip = true, arch = jit.arch, os = string.lower(jit.os),
- type = false, modname = false,
-@@ -617,6 +619,9 @@ local function docmd(...)
- local opt = string.sub(a, m, m)
- if opt == "l" then
- list = true
-+ elseif opt == "L" then
-+ list = true
-+ lineinfo = true
- elseif opt == "s" then
- ctx.strip = true
- elseif opt == "g" then
-@@ -645,7 +650,7 @@ local function docmd(...)
- end
- if list then
- if #arg == 0 or #arg > 2 then usage() end
-- bclist(arg[1], arg[2] or "-")
-+ bclist(arg[1], arg[2] or "-", lineinfo)
- else
- if #arg ~= 2 then usage() end
- bcsave(ctx, arg[1], arg[2])
-diff --git a/src/lib_jit.c b/src/lib_jit.c
-index 6e265fdb..6972550b 100644
---- a/src/lib_jit.c
-+++ b/src/lib_jit.c
-@@ -224,6 +224,7 @@ LJLIB_CF(jit_util_funcbc)
- {
- GCproto *pt = check_Lproto(L, 0);
- BCPos pc = (BCPos)lj_lib_checkint(L, 2);
-+ int lineinfo = lj_lib_optint(L, 3, 0);
- if (pc < pt->sizebc) {
- BCIns ins = proto_bc(pt)[pc];
- BCOp op = bc_op(ins);
-@@ -231,6 +232,11 @@ LJLIB_CF(jit_util_funcbc)
- setintV(L->top, ins);
- setintV(L->top+1, lj_bc_mode[op]);
- L->top += 2;
-+ if (lineinfo) {
-+ setintV(L->top, lj_debug_line(pt, pc));
-+ L->top += 1;
-+ return 3;
-+ }
- return 2;
- }
- return 0;
---
-2.21.0
-
-
-From a61c93d0784c532db4ec0797475a0e0ad93dda4c Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <yichun(a)openresty.com>
-Date: Wed, 27 Feb 2019 17:20:19 -0800
-Subject: [PATCH 06/13] bugfix: ffi.C.FUNC(): it lacked a write barrier which
- might lead to use-after-free issues and memory corruptions.
-
-Fix #42.
----
- src/lj_clib.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/lj_clib.c b/src/lj_clib.c
-index f016b06b..a8672052 100644
---- a/src/lj_clib.c
-+++ b/src/lj_clib.c
-@@ -384,6 +384,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
- cd = lj_cdata_new(cts, id, CTSIZE_PTR);
- *(void **)cdataptr(cd) = p;
- setcdataV(L, tv, cd);
-+ lj_gc_anybarriert(L, cl->cache);
- }
- }
- return tv;
---
-2.21.0
-
-
-From 3086b483e76ad12ae0a0dfab60960c1175b69dab Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <agentzh(a)gmail.com>
-Date: Thu, 15 May 2014 16:03:29 -0700
-Subject: [PATCH 07/13] feature: added internal memory-buffer-based trace
- entry/exit/start-recording event logging, mainly for debugging bugs in the
- JIT compiler. it requires -DLUA_USE_TRACE_LOGS when building.
-
----
- src/lj_debug.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++
- src/lj_debug.h | 11 +++++
- src/lj_trace.c | 9 ++++
- src/vm_x86.dasc | 24 ++++++++++
- 4 files changed, 169 insertions(+)
-
-diff --git a/src/lj_debug.c b/src/lj_debug.c
-index 959dc289..7f4f793a 100644
---- a/src/lj_debug.c
-+++ b/src/lj_debug.c
-@@ -697,3 +697,128 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const
char *msg,
- lua_concat(L, (int)(L->top - L->base) - top);
- }
-
-+#ifdef LUA_USE_TRACE_LOGS
-+
-+#include "lj_dispatch.h"
-+
-+#define MAX_TRACE_EVENTS 64
-+
-+enum {
-+ LJ_TRACE_EVENT_ENTER,
-+ LJ_TRACE_EVENT_EXIT,
-+ LJ_TRACE_EVENT_START
-+};
-+
-+typedef struct {
-+ int event;
-+ unsigned traceno;
-+ unsigned exitno;
-+ int directexit;
-+ const BCIns *ins;
-+ lua_State *thread;
-+ GCfunc *fn;
-+} lj_trace_event_record_t;
-+
-+static lj_trace_event_record_t lj_trace_events[MAX_TRACE_EVENTS];
-+
-+static int rb_start = 0;
-+static int rb_end = 0;
-+static int rb_full = 0;
-+
-+static void
-+lj_trace_log_event(lj_trace_event_record_t *rec)
-+{
-+ lj_trace_events[rb_end] = *rec;
-+
-+ if (rb_full) {
-+ rb_end++;
-+ if (rb_end == MAX_TRACE_EVENTS) {
-+ rb_end = 0;
-+ }
-+ rb_start = rb_end;
-+
-+ } else {
-+ rb_end++;
-+ if (rb_end == MAX_TRACE_EVENTS) {
-+ rb_end = 0;
-+ rb_full = MAX_TRACE_EVENTS;
-+ }
-+ }
-+}
-+
-+static GCfunc*
-+lj_debug_top_frame_fn(lua_State *L, const BCIns *pc)
-+{
-+ int size;
-+ cTValue *frame;
-+
-+ frame = lj_debug_frame(L, 0, &size);
-+ if (frame == NULL) {
-+ return NULL;
-+ }
-+
-+ return frame_func(frame);
-+}
-+
-+void
-+lj_log_trace_start_record(lua_State *L, unsigned traceno, const BCIns *pc,
-+ GCfunc *fn)
-+{
-+ lj_trace_event_record_t r;
-+
-+ r.event = LJ_TRACE_EVENT_START;
-+ r.thread = L;
-+ r.ins = pc;
-+ r.traceno = traceno;
-+ r.fn = fn;
-+
-+ lj_trace_log_event(&r);
-+}
-+
-+void
-+lj_log_trace_entry(lua_State *L, unsigned traceno, const BCIns *pc)
-+{
-+ lj_trace_event_record_t r;
-+
-+ r.event = LJ_TRACE_EVENT_ENTER;
-+ r.thread = L;
-+ r.ins = pc;
-+ r.traceno = traceno;
-+ r.fn = lj_debug_top_frame_fn(L, pc);
-+
-+ lj_trace_log_event(&r);
-+}
-+
-+static void
-+lj_log_trace_exit_helper(lua_State *L, int vmstate, const BCIns *pc, int direct)
-+{
-+ if (vmstate >= 0) {
-+ lj_trace_event_record_t r;
-+
-+ jit_State *J = L2J(L);
-+
-+ r.event = LJ_TRACE_EVENT_EXIT;
-+ r.thread = L;
-+ r.ins = pc;
-+ r.traceno = vmstate;
-+ r.exitno = J->exitno;
-+ r.directexit = direct;
-+ r.fn = lj_debug_top_frame_fn(L, pc);
-+
-+ lj_trace_log_event(&r);
-+ }
-+}
-+
-+void
-+lj_log_trace_normal_exit(lua_State *L, int vmstate, const BCIns *pc)
-+{
-+ lj_log_trace_exit_helper(L, vmstate, pc, 0);
-+}
-+
-+void
-+lj_log_trace_direct_exit(lua_State *L, int vmstate, const BCIns *pc)
-+{
-+ lj_log_trace_exit_helper(L, vmstate, pc, 1);
-+}
-+
-+#endif /* LUA_USE_TRACE_LOGS */
-diff --git a/src/lj_debug.h b/src/lj_debug.h
-index 5917c00b..82f53bda 100644
---- a/src/lj_debug.h
-+++ b/src/lj_debug.h
-@@ -62,4 +62,15 @@ enum {
- VARNAME__MAX
- };
-
-+#ifdef LUA_USE_TRACE_LOGS
-+LJ_FUNC void LJ_FASTCALL lj_log_trace_direct_exit(lua_State *L,
-+ int vmstate, const BCIns *pc);
-+LJ_FUNC void LJ_FASTCALL lj_log_trace_normal_exit(lua_State *L,
-+ int vmstate, const BCIns *pc);
-+LJ_FUNC void LJ_FASTCALL lj_log_trace_entry(lua_State *L,
-+ unsigned traceno, const BCIns *pc);
-+LJ_FUNC void LJ_FASTCALL lj_log_trace_start_record(lua_State *L, unsigned traceno,
-+ const BCIns *pc, GCfunc *fn);
-+#endif
-+
- #endif
-diff --git a/src/lj_trace.c b/src/lj_trace.c
-index d85b47f8..c2f0d8cf 100644
---- a/src/lj_trace.c
-+++ b/src/lj_trace.c
-@@ -404,6 +404,9 @@ static void trace_start(jit_State *J)
- {
- lua_State *L;
- TraceNo traceno;
-+#ifdef LUA_USE_TRACE_LOGS
-+ const BCIns *pc = J->pc;
-+#endif
-
- if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
- if (J->parent == 0 && J->exitno == 0) {
-@@ -462,6 +465,9 @@ static void trace_start(jit_State *J)
- }
- );
- lj_record_setup(J);
-+#ifdef LUA_USE_TRACE_LOGS
-+ lj_log_trace_start_record(L, (unsigned) J->cur.traceno, pc, J->fn);
-+#endif
- }
-
- /* Stop tracing. */
-@@ -890,6 +896,9 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
- }
- }
- }
-+#ifdef LUA_USE_TRACE_LOGS
-+ lj_log_trace_normal_exit(L, (int) T->traceno, pc);
-+#endif
- /* Return MULTRES or 0. */
- ERRNO_RESTORE
- switch (bc_op(*pc)) {
-diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
-index 211ae7b9..66377cd5 100644
---- a/src/vm_x86.dasc
-+++ b/src/vm_x86.dasc
-@@ -2919,6 +2919,19 @@ static void build_subroutines(BuildCtx *ctx)
- | mov r13, TMPa
- | mov r12, TMPQ
- |.endif
-+#ifdef LUA_USE_TRACE_LOGS
-+ | mov FCARG1, SAVE_L
-+ | mov L:FCARG1->base, BASE
-+ | mov RB, RD // Save RD
-+ | mov TMP1, PC // Save PC
-+ | mov CARG3d, PC // CARG3d == BASE
-+ | mov FCARG2, dword [DISPATCH+DISPATCH_GL(vmstate)]
-+ | call extern lj_log_trace_direct_exit@8
-+ | mov PC, TMP1
-+ | mov RD, RB
-+ | mov RB, SAVE_L
-+ | mov BASE, L:RB->base
-+#endif
- | test RD, RD; js >9 // Check for error from exit.
- | mov L:RB, SAVE_L
- | mov MULTRES, RD
-@@ -5260,6 +5273,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_JLOOP:
- |.if JIT
- | ins_AD // RA = base (ignored), RD = traceno
-+#ifdef LUA_USE_TRACE_LOGS
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Save BASE
-+ | mov TMP1, RD // Save RD
-+ | mov CARG3d, PC // CARG3d == BASE
-+ | mov FCARG2, RD
-+ | mov FCARG1, RB
-+ | call extern lj_log_trace_entry@8
-+ | mov RD, TMP1
-+ | mov BASE, L:RB->base
-+#endif
- | mov RA, [DISPATCH+DISPATCH_J(trace)]
- | mov TRACE:RD, [RA+RD*4]
- | mov RDa, TRACE:RD->mcode
---
-2.21.0
-
-
-From 00a5957d632f1715fdc88c1a3fe7cc355f5a13cb Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <agentzh(a)gmail.com>
-Date: Wed, 21 May 2014 16:05:13 -0700
-Subject: [PATCH 08/13] bugfix: fixed build regression on i386 introduced by
- the LUA_USE_TRACE_LOGS feature.
-
----
- src/vm_x86.dasc | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
-index 66377cd5..50210010 100644
---- a/src/vm_x86.dasc
-+++ b/src/vm_x86.dasc
-@@ -2920,6 +2920,7 @@ static void build_subroutines(BuildCtx *ctx)
- | mov r12, TMPQ
- |.endif
- #ifdef LUA_USE_TRACE_LOGS
-+ |.if X64
- | mov FCARG1, SAVE_L
- | mov L:FCARG1->base, BASE
- | mov RB, RD // Save RD
-@@ -2931,6 +2932,7 @@ static void build_subroutines(BuildCtx *ctx)
- | mov RD, RB
- | mov RB, SAVE_L
- | mov BASE, L:RB->base
-+ |.endif
- #endif
- | test RD, RD; js >9 // Check for error from exit.
- | mov L:RB, SAVE_L
-@@ -5274,6 +5276,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.if JIT
- | ins_AD // RA = base (ignored), RD = traceno
- #ifdef LUA_USE_TRACE_LOGS
-+ |.if X64
- | mov L:RB, SAVE_L
- | mov L:RB->base, BASE // Save BASE
- | mov TMP1, RD // Save RD
-@@ -5283,6 +5286,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | call extern lj_log_trace_entry@8
- | mov RD, TMP1
- | mov BASE, L:RB->base
-+ |.endif
- #endif
- | mov RA, [DISPATCH+DISPATCH_J(trace)]
- | mov TRACE:RD, [RA+RD*4]
---
-2.21.0
-
-
-From 7950afe36eadad8b529f4aa90b303861619a2322 Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <agentzh(a)gmail.com>
-Date: Sat, 7 Jun 2014 13:41:24 -0700
-Subject: [PATCH 09/13] fixed compilation errors on Solaris when
- -DLUA_USE_TRACE_LOGS is enabled.
-
----
- src/lj_debug.c | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/lj_debug.c b/src/lj_debug.c
-index 7f4f793a..b93b69d3 100644
---- a/src/lj_debug.c
-+++ b/src/lj_debug.c
-@@ -760,7 +760,7 @@ lj_debug_top_frame_fn(lua_State *L, const BCIns *pc)
- return frame_func(frame);
- }
-
--void
-+LJ_FUNC void LJ_FASTCALL
- lj_log_trace_start_record(lua_State *L, unsigned traceno, const BCIns *pc,
- GCfunc *fn)
- {
-@@ -775,7 +775,7 @@ lj_log_trace_start_record(lua_State *L, unsigned traceno, const BCIns
*pc,
- lj_trace_log_event(&r);
- }
-
--void
-+LJ_FUNC void LJ_FASTCALL
- lj_log_trace_entry(lua_State *L, unsigned traceno, const BCIns *pc)
- {
- lj_trace_event_record_t r;
-@@ -809,13 +809,13 @@ lj_log_trace_exit_helper(lua_State *L, int vmstate, const BCIns
*pc, int direct)
- }
- }
-
--void
-+LJ_FUNC void LJ_FASTCALL
- lj_log_trace_normal_exit(lua_State *L, int vmstate, const BCIns *pc)
- {
- lj_log_trace_exit_helper(L, vmstate, pc, 0);
- }
-
--void
-+LJ_FUNC void LJ_FASTCALL
- lj_log_trace_direct_exit(lua_State *L, int vmstate, const BCIns *pc)
- {
- lj_log_trace_exit_helper(L, vmstate, pc, 1);
---
-2.21.0
-
-
-From bd304a366be2ffb10eec6aeba390595232958320 Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <agentzh(a)gmail.com>
-Date: Tue, 27 May 2014 12:37:13 -0700
-Subject: [PATCH 10/13] feature: jit.dump: output Lua source location after
- every BC.
-
----
- src/jit/dump.lua | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/src/jit/dump.lua b/src/jit/dump.lua
-index 2bea652b..ef0dca61 100644
---- a/src/jit/dump.lua
-+++ b/src/jit/dump.lua
-@@ -591,6 +591,9 @@ local function dump_record(tr, func, pc, depth, callee)
- if pc >= 0 then
- line = bcline(func, pc, recprefix)
- if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end
-+ if pc > 0 then
-+ line = sub(line, 1, -2) .. " (" .. fmtfunc(func, pc) ..
")\n"
-+ end
- else
- line = "0000 "..recprefix.." FUNCC \n"
- callee = func
---
-2.21.0
-
-
-From cce112ca4fdde7d1ca5963c50d0621fb2e526524 Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <yichun(a)openresty.com>
-Date: Fri, 5 Apr 2019 12:38:40 -0700
-Subject: [PATCH 11/13] feature: luajit -bl: dump the constant tables (KGC and
- KN) for each lua proto object as well.
-
----
- src/jit/bc.lua | 32 ++++++++++++++++++++++++++++++++
- 1 file changed, 32 insertions(+)
-
-diff --git a/src/jit/bc.lua b/src/jit/bc.lua
-index 80f92689..9fee4cda 100644
---- a/src/jit/bc.lua
-+++ b/src/jit/bc.lua
-@@ -141,6 +141,38 @@ local function bcdump(func, out, all, lineinfo)
- end
- end
- out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined))
-+
-+ for n=-1,-1000000000,-1 do
-+ local kc = funck(func, n)
-+ if not kc then break end
-+
-+ local typ = type(kc)
-+ if typ == "string" then
-+ kc = format(#kc > 40 and '"%.40s"~' or
'"%s"', gsub(kc, "%c", ctlsub))
-+ out:write(format("KGC %d %s\n", -(n + 1), kc))
-+ elseif typ == "proto" then
-+ local fi = funcinfo(kc)
-+ if fi.ffid then
-+ kc = vmdef.ffnames[fi.ffid]
-+ else
-+ kc = fi.loc
-+ end
-+ out:write(format("KGC %d %s\n", -(n + 1), kc))
-+ elseif typ == "table" then
-+ out:write(format("KGC %d table\n", -(n + 1)))
-+ else
-+ -- error("unknown KGC type: " .. typ)
-+ end
-+ end
-+
-+ for n=1,1000000000 do
-+ local kc = funck(func, n)
-+ if not kc then break end
-+ if type(kc) == "number" then
-+ out:write(format("KN %d %s\n", n, kc))
-+ end
-+ end
-+
- local target = bctargets(func)
- for pc=1,1000000000 do
- local s = bcline(func, pc, target[pc] and "=>", lineinfo)
---
-2.21.0
-
-
-From 7d5f5be581ed392059601168a95068e026765aa0 Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <yichun(a)openresty.com>
-Date: Fri, 17 May 2019 14:49:48 -0700
-Subject: [PATCH 13/13] bugfix: thanks Julien Desgats for the report and Peter
- Cawley for the patch.
-
-The test covering this bug was submitted to the openresty/luajit2-test-suite
-repo as commit ce2c916d55.
----
- src/lj_tab.c | 81 ++++++++++++++++++++++++++++++++++------------------
- 1 file changed, 53 insertions(+), 28 deletions(-)
-
-diff --git a/src/lj_tab.c b/src/lj_tab.c
-index c51666d3..ff216f3c 100644
---- a/src/lj_tab.c
-+++ b/src/lj_tab.c
-@@ -474,6 +474,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
- lua_assert(freenode != &G(L)->nilnode);
- collide = hashkey(t, &n->key);
- if (collide != n) { /* Colliding node not the main node? */
-+ Node *nn;
- while (noderef(collide->next) != n) /* Find predecessor. */
- collide = nextnode(collide);
- setmref(collide->next, freenode); /* Relink chain. */
-@@ -483,39 +484,63 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
- freenode->next = n->next;
- setmref(n->next, NULL);
- setnilV(&n->val);
-- /* Rechain pseudo-resurrected string keys with colliding hashes. */
-- while (nextnode(freenode)) {
-- Node *nn = nextnode(freenode);
-- if (tvisstr(&nn->key) && !tvisnil(&nn->val) &&
-- hashstr(t, strV(&nn->key)) == n) {
-- freenode->next = nn->next;
-- nn->next = n->next;
-- setmref(n->next, nn);
-- /*
-- ** Rechaining a resurrected string key creates a new dilemma:
-- ** Another string key may have originally been resurrected via
-- ** _any_ of the previous nodes as a chain anchor. Including
-- ** a node that had to be moved, which makes them unreachable.
-- ** It's not feasible to check for all previous nodes, so rechain
-- ** any string key that's currently in a non-main positions.
-- */
-- while ((nn = nextnode(freenode))) {
-- if (tvisstr(&nn->key) && !tvisnil(&nn->val)) {
-- Node *mn = hashstr(t, strV(&nn->key));
-- if (mn != freenode) {
-- freenode->next = nn->next;
-- nn->next = mn->next;
-- setmref(mn->next, nn);
-+ /*
-+ ** Nodes after n might have n as their main node, and need rechaining
-+ ** back onto n. We make use of the following property of tables: for all
-+ ** nodes m, at least one of the following four statements is true:
-+ ** 1. tvisnil(&m->key) NB: tvisnil(&m->val) is a stronger
statement
-+ ** 2. tvisstr(&m->key)
-+ ** 3. tvisstr(&main(m)->key)
-+ ** 4. main(m) == main(main(m))
-+ ** Initially, we need to rechain any nn which has main(nn) == n. As
-+ ** main(n) != n (because collide != n earlier), main(nn) == n requires
-+ ** either statement 2 or statement 3 to be true about nn.
-+ */
-+ if (!tvisstr(&n->key)) {
-+ /* Statement 3 is not true, so only need to consider string keys. */
-+ while ((nn = nextnode(freenode))) {
-+ if (tvisstr(&nn->key) && !tvisnil(&nn->val) &&
-+ hashstr(t, strV(&nn->key)) == n) {
-+ goto rechain;
-+ }
-+ freenode = nn;
-+ }
-+ } else {
-+ /* Statement 3 is true, so need to consider all types of key. */
-+ while ((nn = nextnode(freenode))) {
-+ if (!tvisnil(&nn->val) && hashkey(t, &nn->key) == n) {
-+ rechain:
-+ freenode->next = nn->next;
-+ nn->next = n->next;
-+ setmref(n->next, nn);
-+ /*
-+ ** Rechaining one node onto n creates a new dilemma: we now need
-+ ** to rechain any nn which has main(nn) == n OR has main(nn) equal
-+ ** to any node which has already been rechained. Furthermore, at
-+ ** least one of n and n->next will have a string key, so all types
-+ ** of nn key need to be considered. Rather than testing whether
-+ ** main(nn) definitely _is_ in the new chain, we test whether it
-+ ** might _not_ be in the old chain, and if so re-link it into
-+ ** the correct chain.
-+ */
-+ while ((nn = nextnode(freenode))) {
-+ if (!tvisnil(&nn->val)) {
-+ Node *mn = hashkey(t, &nn->key);
-+ if (mn != freenode && mn != nn) {
-+ freenode->next = nn->next;
-+ nn->next = mn->next;
-+ setmref(mn->next, nn);
-+ } else {
-+ freenode = nn;
-+ }
- } else {
- freenode = nn;
- }
-- } else {
-- freenode = nn;
- }
-+ break;
-+ } else {
-+ freenode = nn;
- }
-- break;
-- } else {
-- freenode = nn;
- }
- }
- } else { /* Otherwise use free node. */
---
-2.21.0
-
diff --git a/luajit-s390x.patch b/luajit-s390x.patch
deleted file mode 100644
index 2b10d06..0000000
--- a/luajit-s390x.patch
+++ /dev/null
@@ -1,44523 +0,0 @@
-From 0b8f9ea1080a6b2c4beab991ab7736fd0a7896a1 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Thu, 10 Nov 2016 10:33:16 +0530
-Subject: [PATCH 001/247] Create lj_target_s390.h
-
-Adding file lj_target_s390.h
-Few arm based instructions are changed with equivalent s390x instructions
----
- src/lj_target_s390.h | 287 +++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 287 insertions(+)
- create mode 100644 src/lj_target_s390.h
-
-diff --git a/src/lj_target_s390.h b/src/lj_target_s390.h
-new file mode 100644
-index 0000000..7da2063
---- /dev/null
-+++ b/src/lj_target_s390.h
-@@ -0,0 +1,287 @@
-+/*
-+** Definitions for S390 CPUs.
-+** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
-+*/
-+
-+#ifndef _LJ_TARGET_S390_H
-+#define _LJ_TARGET_S390_H
-+
-+/* -- Registers IDs ------------------------------------------------------- */
-+
-+#define GPRDEF(_) \
-+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
-+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \
-+#if LJ_SOFTFP
-+#define FPRDEF(_)
-+#else
-+#define FPRDEF(_) \
-+ _(F0) _(F2) _(F4) _(F6)
-+#endif
-+#define VRIDDEF(_)
-+
-+#define RIDENUM(name) RID_##name,
-+
-+enum {
-+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
-+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
-+ RID_MAX,
-+ RID_TMP = RID_LR,
-+
-+ /* Calling conventions. */
-+ RID_RET = RID_R0,
-+ RID_RETLO = RID_R0,
-+ RID_RETHI = RID_R1,
-+#if LJ_SOFTFP
-+ RID_FPRET = RID_R0,
-+#else
-+ RID_FPRET = RID_D0,
-+#endif
-+
-+ /* These definitions must match with the *.dasc file(s): */
-+ RID_BASE = RID_R9, /* Interpreter BASE. */
-+ RID_LPC = RID_R6, /* Interpreter PC. */
-+ RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
-+ RID_LREG = RID_R8, /* Interpreter L. */
-+
-+ /* Register ranges [min, max) and number of registers. */
-+ RID_MIN_GPR = RID_R0,
-+ RID_MAX_GPR = RID_PC+1,
-+ RID_MIN_FPR = RID_MAX_GPR,
-+#if LJ_SOFTFP
-+ RID_MAX_FPR = RID_MIN_FPR,
-+#else
-+ RID_MAX_FPR = RID_D15+1,
-+#endif
-+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
-+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
-+};
-+
-+#define RID_NUM_KREF RID_NUM_GPR
-+#define RID_MIN_KREF RID_R0
-+
-+/* -- Register sets ------------------------------------------------------- */
-+
-+/* Make use of all registers, except sp, lr and pc. */
-+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
-+#define RSET_GPREVEN \
-+ (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
-+ RID2RSET(RID_R8)|RID2RSET(RID_R10))
-+#define RSET_GPRODD \
-+ (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
-+ RID2RSET(RID_R9)|RID2RSET(RID_R11))
-+#if LJ_SOFTFP
-+#define RSET_FPR 0
-+#else
-+#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
-+#endif
-+#define RSET_ALL (RSET_GPR|RSET_FPR)
-+#define RSET_INIT RSET_ALL
-+
-+/* ABI-specific register sets. lr is an implicit scratch register. */
-+#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
-+#ifdef __APPLE__
-+#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
-+#else
-+#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
-+#endif
-+#if LJ_SOFTFP
-+#define RSET_SCRATCH_FPR 0
-+#else
-+#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
-+#endif
-+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
-+#define REGARG_FIRSTGPR RID_R0
-+#define REGARG_LASTGPR RID_R3
-+#define REGARG_NUMGPR 4
-+#if LJ_ABI_SOFTFP
-+#define REGARG_FIRSTFPR 0
-+#define REGARG_LASTFPR 0
-+#define REGARG_NUMFPR 0
-+#else
-+#define REGARG_FIRSTFPR RID_D0
-+#define REGARG_LASTFPR RID_D7
-+#define REGARG_NUMFPR 8
-+#endif
-+
-+/* -- Spill slots --------------------------------------------------------- */
-+
-+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
-+**
-+** SPS_FIXED: Available fixed spill slots in interpreter frame.
-+** This definition must match with the *.dasc file(s).
-+**
-+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
-+*/
-+#define SPS_FIXED 2
-+#define SPS_FIRST 2
-+
-+#define SPOFS_TMP 0
-+
-+#define sps_scale(slot) (4 * (int32_t)(slot))
-+#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
-+
-+/* -- Exit state ---------------------------------------------------------- */
-+
-+/* This definition must match with the *.dasc file(s). */
-+typedef struct {
-+#if !LJ_SOFTFP
-+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
-+#endif
-+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
-+ int32_t spill[256]; /* Spill slots. */
-+} ExitState;
-+
-+/* PC after instruction that caused an exit. Used to find the trace number. */
-+#define EXITSTATE_PCREG RID_PC
-+/* Highest exit + 1 indicates stack check. */
-+#define EXITSTATE_CHECKEXIT 1
-+
-+#define EXITSTUB_SPACING 4
-+#define EXITSTUBS_PER_GROUP 32
-+
-+/* -- Instructions -------------------------------------------------------- */
-+
-+/* Instruction fields. */
-+#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28))
-+#define ARMF_N(r) ((r) << 16)
-+#define ARMF_D(r) ((r) << 12)
-+#define ARMF_S(r) ((r) << 8)
-+#define ARMF_M(r) (r)
-+#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
-+#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
-+
-+typedef enum S390Ins {
-+
-+ // Unsupported in S390
-+ #ARMI_LDRSB = 0xe01000d0,
-+ #ARMI_S = 0x000100000,
-+ #ARMI_LDRD = 0xe00000d0,
-+ #ARMI_ADC = 0xe0a00000,
-+ #ARMI_SBC = 0xe0c00000,
-+ #ARMI_STRB = 0xe4400000,
-+ #ARMI_STRH = 0xe00000b0,
-+ #ARMI_STRD = 0xe00000f0,
-+ #ARMI_BL = 0xeb000000,
-+ #ARMI_BLX = 0xfa000000,
-+ #ARMI_BLXr = 0xe12fff30,
-+ #ARMI_BIC = 0xe1c00000,
-+ #ARMI_ORR = 0xe1800000,
-+ #ARMI_LDRB = 0xe4500000,
-+ #ARMI_MVN = 0xe1e00000,
-+ #ARMI_LDRSH = 0xe01000f0,
-+ #ARMI_NOP = 0xe1a00000,
-+ #ARMI_PUSH = 0xe92d0000,
-+ #ARMI_RSB = 0xe0600000,
-+ #ARMI_RSC = 0xe0e00000,
-+ #ARMI_TEQ = 0xe1300000,
-+ #ARMI_CCAL = 0xe0000000,
-+ #ARMI_K12 = 0x02000000,
-+ #ARMI_KNEG = 0x00200000,
-+ #ARMI_LS_W = 0x00200000,
-+ #ARMI_LS_U = 0x00800000,
-+ #ARMI_LS_P = 0x01000000,
-+ #ARMI_LS_R = 0x02000000,
-+ #ARMI_LSX_I = 0x00400000,
-+
-+
-+ #ARMI_SUB = 0xe0400000,
-+ #ARMI_ADD = 0xe0800000,
-+ #ARMI_AND = 0xe0000000,
-+ #ARMI_EOR = 0xe0200000,
-+ #ARMI_MUL = 0xe0000090,
-+ #ARMI_LDR = 0xe4100000,
-+ #ARMI_CMP = 0xe1500000,
-+ #ARMI_LDRH = 0xe01000b0,
-+ #ARMI_B = 0xea000000,
-+ #ARMI_MOV = 0xe1a00000,
-+ #ARMI_STR = 0xe4000000,
-+ #ARMI_TST = 0xe1100000,
-+ #ARMI_SMULL = 0xe0c00090,
-+ #ARMI_CMN = 0xe1700000,
-+ S390I_SR = 0x1B000000,
-+ S390I_AR = 0x1A000000,
-+ S390I_NR = 0x14000000,
-+ S390I_XR = 0x17000000,
-+ S390I_MR = 0x1C000000,
-+ S390I_LR = 0x18000000,
-+ S390I_C = 0x59000000,
-+ S390I_LH = 0x48000000,
-+ S390I_BASR = 0x0D000000,
-+ S390I_MVCL = 0x0e000000,
-+ S390I_ST = 0x50000000,
-+ S390I_TM = 0x91000000,
-+ S390I_MP = 0xbd000090,
-+ S390I_CLR = 0x15000000,
-+
-+ /* ARMv6 */
-+ #ARMI_REV = 0xe6bf0f30,
-+ #ARMI_SXTB = 0xe6af0070,
-+ #ARMI_SXTH = 0xe6bf0070,
-+ #ARMI_UXTB = 0xe6ef0070,
-+ #ARMI_UXTH = 0xe6ff0070,
-+
-+ /* ARMv6T2 */
-+ #ARMI_MOVW = 0xe3000000,
-+ #ARMI_MOVT = 0xe3400000,
-+
-+ /* VFP */
-+ ARMI_VMOV_D = 0xeeb00b40,
-+ ARMI_VMOV_S = 0xeeb00a40,
-+ ARMI_VMOVI_D = 0xeeb00b00,
-+
-+ ARMI_VMOV_R_S = 0xee100a10,
-+ ARMI_VMOV_S_R = 0xee000a10,
-+ ARMI_VMOV_RR_D = 0xec500b10,
-+ ARMI_VMOV_D_RR = 0xec400b10,
-+
-+ ARMI_VADD_D = 0xee300b00,
-+ ARMI_VSUB_D = 0xee300b40,
-+ ARMI_VMUL_D = 0xee200b00,
-+ ARMI_VMLA_D = 0xee000b00,
-+ ARMI_VMLS_D = 0xee000b40,
-+ ARMI_VNMLS_D = 0xee100b00,
-+ ARMI_VDIV_D = 0xee800b00,
-+
-+ ARMI_VABS_D = 0xeeb00bc0,
-+ ARMI_VNEG_D = 0xeeb10b40,
-+ ARMI_VSQRT_D = 0xeeb10bc0,
-+
-+ ARMI_VCMP_D = 0xeeb40b40,
-+ ARMI_VCMPZ_D = 0xeeb50b40,
-+
-+ ARMI_VMRS = 0xeef1fa10,
-+
-+ ARMI_VCVT_S32_F32 = 0xeebd0ac0,
-+ ARMI_VCVT_S32_F64 = 0xeebd0bc0,
-+ ARMI_VCVT_U32_F32 = 0xeebc0ac0,
-+ ARMI_VCVT_U32_F64 = 0xeebc0bc0,
-+ ARMI_VCVTR_S32_F32 = 0xeebd0a40,
-+ ARMI_VCVTR_S32_F64 = 0xeebd0b40,
-+ ARMI_VCVTR_U32_F32 = 0xeebc0a40,
-+ ARMI_VCVTR_U32_F64 = 0xeebc0b40,
-+ ARMI_VCVT_F32_S32 = 0xeeb80ac0,
-+ ARMI_VCVT_F64_S32 = 0xeeb80bc0,
-+ ARMI_VCVT_F32_U32 = 0xeeb80a40,
-+ ARMI_VCVT_F64_U32 = 0xeeb80b40,
-+ ARMI_VCVT_F32_F64 = 0xeeb70bc0,
-+ ARMI_VCVT_F64_F32 = 0xeeb70ac0,
-+
-+ ARMI_VLDR_S = 0xed100a00,
-+ ARMI_VLDR_D = 0xed100b00,
-+ ARMI_VSTR_S = 0xed000a00,
-+ ARMI_VSTR_D = 0xed000b00,
-+} S390Ins;
-+
-+typedef enum S390Shift {
-+ S390SH_SLL, S390SH_SRL, S390SH_SRA
-+ # Adjustment needed for ROR
-+} S390Shift;
-+
-+/* ARM condition codes. */
-+typedef enum ARMCC {
-+ CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
-+ CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
-+ CC_HS = CC_CS, CC_LO = CC_CC
-+} ARMCC;
-+
-+#endif
---
-2.20.1
-
-
-From db6a5d23dcab4e4cffd70e8d8284306ea2cd3891 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Thu, 10 Nov 2016 10:35:35 +0530
-Subject: [PATCH 002/247] Update Makefile
-
-Added condition for s390 in Makefile
----
- src/Makefile | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/src/Makefile b/src/Makefile
-index d22eb73..f388db1 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -245,6 +245,9 @@ else
- ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= arm
- else
-+ifneq (,$(findstring LJ_TARGET_S390 ,$(TARGET_TESTARCH)))
-+ TARGET_LJARCH= s390
-+else
- ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
- ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
- TARGET_ARCH= -D__AARCH64EB__=1
---
-2.20.1
-
-
-From 8623a84d2984441f4b58f001ee829abb7edb7b85 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Thu, 10 Nov 2016 10:42:51 +0530
-Subject: [PATCH 003/247] Update lj_arch.h
-
-Added supporting lines for s390
-Lines added using arm lines as reference
----
- src/lj_arch.h | 24 ++++++++++++++++++++++++
- 1 file changed, 24 insertions(+)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 31a1159..6421545 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -29,6 +29,7 @@
- #define LUAJIT_ARCH_mips32 6
- #define LUAJIT_ARCH_MIPS64 7
- #define LUAJIT_ARCH_mips64 7
-+#define LUAJIT_ARCH_S390 8
-
- /* Target OS. */
- #define LUAJIT_OS_OTHER 0
-@@ -49,6 +50,8 @@
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM
- #elif defined(__aarch64__)
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
-+#elif defined(__s390__) || defined(__s390) || defined(__S390__) || defined(__S390) ||
defined(S390)
-+#define LUAJIT_TARGET LUAJIT_ARCH_S390
- #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) ||
defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC)
|| defined(_M_PPC)
- #define LUAJIT_TARGET LUAJIT_ARCH_PPC
- #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) ||
defined(__MIPS64)
-@@ -241,6 +244,23 @@
-
- #define LJ_ARCH_VERSION 80
-
-+#elif LUAJIT_TARGET == LUAJIT_ARCH_S390
-+
-+ #define LJ_ARCH_NAME "s390"
-+ #define LJ_ARCH_BITS 64
-+ #define LJ_ARCH_ENDIAN LUAJIT_BE
-+ #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__
-+ #define LJ_ARCH_HASFPU 1
-+ #endif
-+ #define LJ_ABI_EABI 1
-+ #define LJ_TARGET_S390 1
-+ #define LJ_TARGET_EHRETREG 0
-+ #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
-+ #define LJ_TARGET_MASKSHIFT 0
-+ #define LJ_TARGET_MASKROT 1
-+ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
-+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-+
- #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
-
- #ifndef LJ_ARCH_ENDIAN
-@@ -410,6 +430,10 @@
- #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ <
5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
- #error "Need at least Clang 3.5 or newer"
- #endif
-+#elif LJ_TARGET_S390
-+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
-+#error "Need at least GCC 4.2 or newer"
-+#endif
- #else
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
- #error "Need at least GCC 4.8 or newer"
---
-2.20.1
-
-
-From bfb48077af7cd8f8fcc863b2cd79de24d99420cf Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Thu, 10 Nov 2016 19:00:41 +0530
-Subject: [PATCH 004/247] Copy of dasm_arm64.lua file, with few changes
-
-Have changed few sections of file, other part is common across architectures
----
- dynasm/dasm_s390x.lua | 1177 +++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 1177 insertions(+)
- create mode 100644 dynasm/dasm_s390x.lua
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-new file mode 100644
-index 0000000..a0a50e1
---- /dev/null
-+++ b/dynasm/dasm_s390x.lua
-@@ -0,0 +1,1177 @@
-+------------------------------------------------------------------------------
-+-- DynASM s390x module.
-+--
-+-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
-+-- See dynasm.lua for full copyright notice.
-+------------------------------------------------------------------------------
-+
-+-- Module information:
-+local _info = {
-+ arch = "s390x",
-+ description = "DynASM s390x module",
-+ version = "1.4.0",
-+ vernum = 10400,
-+ release = "2015-10-18",
-+ author = "Mike Pall",
-+ license = "MIT",
-+}
-+
-+-- Exported glue functions for the arch-specific module.
-+local _M = { _info = _info }
-+
-+-- Cache library functions.
-+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
-+local assert, setmetatable, rawget = assert, setmetatable, rawget
-+local _s = string
-+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
-+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
-+local concat, sort, insert = table.concat, table.sort, table.insert
-+local bit = bit or require("bit")
-+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
-+local ror, tohex = bit.ror, bit.tohex
-+
-+-- Inherited tables and callbacks.
-+local g_opt, g_arch
-+local wline, werror, wfatal, wwarn
-+
-+-- Action name list.
-+-- CHECK: Keep this in sync with the C code!
-+local action_names = {
-+ "STOP", "SECTION", "ESC", "REL_EXT",
-+ "ALIGN", "REL_LG", "LABEL_LG",
-+ "REL_PC", "LABEL_PC", "IMM", "IMM6",
"IMM12", "IMM13W", "IMM13X", "IMML",
-+}
-+
-+-- Maximum number of section buffer positions for dasm_put().
-+-- CHECK: Keep this in sync with the C code!
-+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
-+
-+-- Action name -> action number.
-+local map_action = {}
-+for n,name in ipairs(action_names) do
-+ map_action[name] = n-1
-+end
-+
-+-- Action list buffer.
-+local actlist = {}
-+
-+-- Argument list for next dasm_put(). Start with offset 0 into action list.
-+local actargs = { 0 }
-+
-+-- Current number of section buffer positions for dasm_put().
-+local secpos = 1
-+
-+------------------------------------------------------------------------------
-+
-+-- Dump action names and numbers.
-+local function dumpactions(out)
-+ out:write("DynASM encoding engine action codes:\n")
-+ for n,name in ipairs(action_names) do
-+ local num = map_action[name]
-+ out:write(format(" %-10s %02X %d\n", name, num, num))
-+ end
-+ out:write("\n")
-+end
-+
-+-- Write action list buffer as a huge static C array.
-+local function writeactions(out, name)
-+ local nn = #actlist
-+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
-+ out:write("static const unsigned int ", name, "[", nn, "] =
{\n")
-+ for i = 1,nn-1 do
-+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
-+ end
-+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
-+end
-+
-+------------------------------------------------------------------------------
-+
-+-- Add word to action list.
-+local function wputxw(n)
-+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of
range")
-+ actlist[#actlist+1] = n
-+end
-+
-+-- Add action to list with optional arg. Advance buffer pos, too.
-+local function waction(action, val, a, num)
-+ local w = assert(map_action[action], "bad action name
`"..action.."'")
-+ wputxw(w * 0x10000 + (val or 0))
-+ if a then actargs[#actargs+1] = a end
-+ if a or num then secpos = secpos + (num or 1) end
-+end
-+
-+-- Flush action list (intervening C code or buffer pos overflow).
-+local function wflush(term)
-+ if #actlist == actargs[1] then return end -- Nothing to flush.
-+ if not term then waction("STOP") end -- Terminate action list.
-+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
-+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
-+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
-+end
-+
-+-- Put escaped word.
-+local function wputw(n)
-+ if n <= 0x000fffff then waction("ESC") end
-+ wputxw(n)
-+end
-+
-+-- Reserve position for word.
-+local function wpos()
-+ local pos = #actlist+1
-+ actlist[pos] = ""
-+ return pos
-+end
-+
-+-- Store word to reserved position.
-+local function wputpos(pos, n)
-+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of
range")
-+ if n <= 0x000fffff then
-+ insert(actlist, pos+1, n)
-+ n = map_action.ESC * 0x10000
-+ end
-+ actlist[pos] = n
-+end
-+
-+------------------------------------------------------------------------------
-+
-+-- Global label name -> global label number. With auto assignment on 1st use.
-+local next_global = 20
-+local map_global = setmetatable({}, { __index = function(t, name)
-+ if not match(name, "^[%a_][%w_]*$") then werror("bad global
label") end
-+ local n = next_global
-+ if n > 2047 then werror("too many global labels") end
-+ next_global = n + 1
-+ t[name] = n
-+ return n
-+end})
-+
-+-- Dump global labels.
-+local function dumpglobals(out, lvl)
-+ local t = {}
-+ for name, n in pairs(map_global) do t[n] = name end
-+ out:write("Global labels:\n")
-+ for i=20,next_global-1 do
-+ out:write(format(" %s\n", t[i]))
-+ end
-+ out:write("\n")
-+end
-+
-+-- Write global label enum.
-+local function writeglobals(out, prefix)
-+ local t = {}
-+ for name, n in pairs(map_global) do t[n] = name end
-+ out:write("enum {\n")
-+ for i=20,next_global-1 do
-+ out:write(" ", prefix, t[i], ",\n")
-+ end
-+ out:write(" ", prefix, "_MAX\n};\n")
-+end
-+
-+-- Write global label names.
-+local function writeglobalnames(out, name)
-+ local t = {}
-+ for name, n in pairs(map_global) do t[n] = name end
-+ out:write("static const char *const ", name, "[] = {\n")
-+ for i=20,next_global-1 do
-+ out:write(" \"", t[i], "\",\n")
-+ end
-+ out:write(" (const char *)0\n};\n")
-+end
-+
-+------------------------------------------------------------------------------
-+
-+-- Extern label name -> extern label number. With auto assignment on 1st use.
-+local next_extern = 0
-+local map_extern_ = {}
-+local map_extern = setmetatable({}, { __index = function(t, name)
-+ -- No restrictions on the name for now.
-+ local n = next_extern
-+ if n > 2047 then werror("too many extern labels") end
-+ next_extern = n + 1
-+ t[name] = n
-+ map_extern_[n] = name
-+ return n
-+end})
-+
-+-- Dump extern labels.
-+local function dumpexterns(out, lvl)
-+ out:write("Extern labels:\n")
-+ for i=0,next_extern-1 do
-+ out:write(format(" %s\n", map_extern_[i]))
-+ end
-+ out:write("\n")
-+end
-+
-+-- Write extern label names.
-+local function writeexternnames(out, name)
-+ out:write("static const char *const ", name, "[] = {\n")
-+ for i=0,next_extern-1 do
-+ out:write(" \"", map_extern_[i], "\",\n")
-+ end
-+ out:write(" (const char *)0\n};\n")
-+end
-+
-+------------------------------------------------------------------------------
-+
-+-- Arch-specific maps.
-+-- TODO: add s390x related register names
-+-- Ext. register name -> int. name.
-+--local map_archdef = { xzr = "@x31", wzr = "@w31", lr =
"x30", }
-+local map_archdef = {}
-+
-+-- Int. register name -> ext. name.
-+-- local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] =
"wzr", x30 = "lr", }
-+local map_reg_rev = {}
-+
-+local map_type = {} -- Type name -> { ctype, reg }
-+local ctypenum = 0 -- Type number (for Dt... macros).
-+
-+-- Reverse defines for registers.
-+function _M.revdef(s)
-+ return map_reg_rev[s] or s
-+end
-+-- not sure of these
-+local map_shift = { lsl = 0, lsr = 1, asr = 2, }
-+
-+local map_extend = {
-+ uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
-+ sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
-+}
-+
-+local map_cond = {
-+ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
-+ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
-+ hs = 2, lo = 3,
-+}
-+
-+------------------------------------------------------------------------------
-+
-+local parse_reg_type
-+
-+
-+local function parse_gpr(expr)
-+ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
-+ local tp = map_type[tname or expr]
-+ if tp then
-+ local reg = ovreg or tp.reg
-+ if not reg then
-+ werror("type `"..(tname or expr).."' needs a register
override")
-+ end
-+ expr = reg
-+ end
-+ local r = match(expr, "^r([1-3]?[0-9])$")
-+ if r then
-+ r = tonumber(r)
-+ if r <= 31 then return r, tp end
-+ end
-+ werror("bad register name `"..expr.."'")
-+end
-+
-+local function parse_fpr(expr)
-+ local r = match(expr, "^f([1-3]?[0-9])$")
-+ if r then
-+ r = tonumber(r)
-+ if r <= 31 then return r end
-+ end
-+ werror("bad register name `"..expr.."'")
-+end
-+
-+
-+
-+
-+
-+local function parse_reg_base(expr)
-+ if expr == "sp" then return 0x3e0 end
-+ local base, tp = parse_reg(expr)
-+ if parse_reg_type ~= "x" then werror("bad register type") end
-+ parse_reg_type = false
-+ return shl(base, 5), tp
-+end
-+
-+local parse_ctx = {}
-+
-+local loadenv = setfenv and function(s)
-+ local code = loadstring(s, "")
-+ if code then setfenv(code, parse_ctx) end
-+ return code
-+end or function(s)
-+ return load(s, "", nil, parse_ctx)
-+end
-+
-+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
-+local function parse_number(n)
-+ local x = tonumber(n)
-+ if x then return x end
-+ local code = loadenv("return "..n)
-+ if code then
-+ local ok, y = pcall(code)
-+ if ok then return y end
-+ end
-+ return nil
-+end
-+
-+local function parse_imm(imm, bits, shift, scale, signed)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ if n then
-+ local m = sar(n, scale)
-+ if shl(m, scale) == n then
-+ if signed then
-+ local s = sar(m, bits-1)
-+ if s == 0 then return shl(m, shift)
-+ elseif s == -1 then return shl(m + shl(1, bits), shift) end
-+ else
-+ if sar(m, bits) == 0 then return shl(m, shift) end
-+ end
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_imm12(imm)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ if n then
-+ if shr(n, 12) == 0 then
-+ return shl(n, 10)
-+ elseif band(n, 0xff000fff) == 0 then
-+ return shr(n, 2) + 0x00400000
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ waction("IMM12", 0, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_imm13(imm)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ local r64 = parse_reg_type == "x"
-+ if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
-+ local inv = false
-+ if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
-+ local t = {}
-+ for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
-+ local b = table.concat(t)
-+ b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
-+ local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
-+ if p0 then
-+ local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
-+ if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
-+ local s = band(-2*w, 0x3f) - 1
-+ if w == 64 then s = s + 0x1000 end
-+ if inv then
-+ return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
-+ else
-+ return shl(w-#p0, 16) + shl(s+#p1, 10)
-+ end
-+ end
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ elseif r64 then
-+ waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
-+ actargs[#actargs+1] = format("(unsigned int)((unsigned long
long)(%s)>>32)", imm)
-+ return 0
-+ else
-+ waction("IMM13W", 0, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_imm6(imm)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ if n then
-+ if n >= 0 and n <= 63 then
-+ return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ waction("IMM6", 0, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_imm_load(imm, scale)
-+ local n = parse_number(imm)
-+ if n then
-+ local m = sar(n, scale)
-+ if shl(m, scale) == n and m >= 0 and m < 0x1000 then
-+ return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
-+ elseif n >= -256 and n < 256 then
-+ return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ waction("IMML", 0, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_fpimm(imm)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ if n then
-+ local m, e = math.frexp(n)
-+ local s, e2 = 0, band(e-2, 7)
-+ if m < 0 then m = -m; s = 0x00100000 end
-+ m = m*32-16
-+ if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
-+ return s + shl(e2, 17) + shl(m, 13)
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ werror("NYI fpimm action")
-+ end
-+end
-+
-+local function parse_shift(expr)
-+ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
-+ s = map_shift[s]
-+ if not s then werror("expected shift operand") end
-+ return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
-+end
-+
-+local function parse_lslx16(expr)
-+ local n = match(expr, "^lsl%s*#(%d+)$")
-+ n = tonumber(n)
-+ if not n then werror("expected shift operand") end
-+ if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
-+ werror("bad shift amount")
-+ end
-+ return shl(n, 17)
-+end
-+
-+local function parse_extend(expr)
-+ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
-+ if s == "lsl" then
-+ s = parse_reg_type == "x" and 3 or 2
-+ else
-+ s = map_extend[s]
-+ end
-+ if not s then werror("expected extend operand") end
-+ return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
-+end
-+
-+local function parse_cond(expr, inv)
-+ local c = map_cond[expr]
-+ if not c then werror("expected condition operand") end
-+ return shl(bit.bxor(c, inv), 12)
-+end
-+
-+local function parse_load(params, nparams, n, op)
-+ if params[n+2] then werror("too many operands") end
-+ local pn, p2 = params[n], params[n+1]
-+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
-+ if not p1 then
-+ if not p2 then
-+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
-+ if reg and tailr ~= "" then
-+ local base, tp = parse_reg_base(reg)
-+ if tp then
-+ waction("IMML", 0, format(tp.ctypefmt, tailr))
-+ return op + base
-+ end
-+ end
-+ end
-+ werror("expected address operand")
-+ end
-+ local scale = shr(op, 30)
-+ if p2 then
-+ if wb == "!" then werror("bad use of '!'") end
-+ op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
-+ elseif wb == "!" then
-+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
-+ if not p1a then werror("bad use of '!'") end
-+ op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
-+ else
-+ local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
-+ op = op + parse_reg_base(p1a)
-+ if p2a ~= "" then
-+ local imm = match(p2a, "^,%s*#(.*)$")
-+ if imm then
-+ op = op + parse_imm_load(imm, scale)
-+ else
-+ local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
-+ op = op + shl(parse_reg(p2b), 16) + 0x00200800
-+ if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
-+ werror("bad index register type")
-+ end
-+ if p3b == "" then
-+ if parse_reg_type ~= "x" then werror("bad index register type")
end
-+ op = op + 0x6000
-+ else
-+ if p3s == "" or p3s == "#0" then
-+ elseif p3s == "#"..scale then
-+ op = op + 0x1000
-+ else
-+ werror("bad scale")
-+ end
-+ if parse_reg_type == "x" then
-+ if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
-+ elseif p3b == "sxtx" then op = op + 0xe000
-+ else
-+ werror("bad extend/shift specifier")
-+ end
-+ else
-+ if p3b == "uxtw" then op = op + 0x4000
-+ elseif p3b == "sxtw" then op = op + 0xc000
-+ else
-+ werror("bad extend/shift specifier")
-+ end
-+ end
-+ end
-+ end
-+ else
-+ if wb == "!" then werror("bad use of '!'") end
-+ op = op + 0x01000000
-+ end
-+ end
-+ return op
-+end
-+
-+local function parse_load_pair(params, nparams, n, op)
-+ if params[n+2] then werror("too many operands") end
-+ local pn, p2 = params[n], params[n+1]
-+ local scale = shr(op, 30) == 0 and 2 or 3
-+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
-+ if not p1 then
-+ if not p2 then
-+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
-+ if reg and tailr ~= "" then
-+ local base, tp = parse_reg_base(reg)
-+ if tp then
-+ waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
-+ return op + base + 0x01000000
-+ end
-+ end
-+ end
-+ werror("expected address operand")
-+ end
-+ if p2 then
-+ if wb == "!" then werror("bad use of '!'") end
-+ op = op + 0x00800000
-+ else
-+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
-+ if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
-+ op = op + (wb == "!" and 0x01800000 or 0x01000000)
-+ end
-+ return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
-+end
-+
-+local function parse_label(label, def)
-+ local prefix = sub(label, 1, 2)
-+ -- =>label (pc label reference)
-+ if prefix == "=>" then
-+ return "PC", 0, sub(label, 3)
-+ end
-+ -- ->name (global label reference)
-+ if prefix == "->" then
-+ return "LG", map_global[sub(label, 3)]
-+ end
-+ if def then
-+ -- [1-9] (local label definition)
-+ if match(label, "^[1-9]$") then
-+ return "LG", 10+tonumber(label)
-+ end
-+ else
-+ -- [<>][1-9] (local label reference)
-+ local dir, lnum = match(label, "^([<>])([1-9])$")
-+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
-+ return "LG", lnum + (dir == ">" and 0 or 10)
-+ end
-+ -- extern label (extern label reference)
-+ local extname = match(label, "^extern%s+(%S+)$")
-+ if extname then
-+ return "EXT", map_extern[extname]
-+ end
-+ end
-+ werror("bad label `"..label.."'")
-+end
-+
-+local function branch_type(op)
-+ if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
-+ elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
-+ band(op, 0x3b000000) == 0x18000000 then
-+ return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
-+ elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
-+ elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
-+ elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
-+ else
-+ assert(false, "unknown branch type")
-+ end
-+end
-+
-+------------------------------------------------------------------------------
-+
-+local map_op, op_template
-+
-+local function op_alias(opname, f)
-+ return function(params, nparams)
-+ if not params then return "-> "..opname:sub(1, -3) end
-+ f(params, nparams)
-+ op_template(params, map_op[opname], nparams)
-+ end
-+end
-+
-+local function alias_bfx(p)
-+ p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
-+end
-+
-+local function alias_bfiz(p)
-+ parse_reg(p[1])
-+ if parse_reg_type == "w" then
-+ p[3] = "#-("..p[3]:sub(2)..")%32"
-+ p[4] = "#("..p[4]:sub(2)..")-1"
-+ else
-+ p[3] = "#-("..p[3]:sub(2)..")%64"
-+ p[4] = "#("..p[4]:sub(2)..")-1"
-+ end
-+end
-+
-+local alias_lslimm = op_alias("ubfm_4", function(p)
-+ parse_reg(p[1])
-+ local sh = p[3]:sub(2)
-+ if parse_reg_type == "w" then
-+ p[3] = "#-("..sh..")%32"
-+ p[4] = "#31-("..sh..")"
-+ else
-+ p[3] = "#-("..sh..")%64"
-+ p[4] = "#63-("..sh..")"
-+ end
-+end)
-+
-+-- Template strings for ARM instructions.
-+map_op = {
-+ -- Basic data processing instructions.
-+ add_2 =
"00000000005a0000RX-a|00000000001aRR|00000000b9f80000RRF-a|00000000e35a0000RXY-a|00000000e3080000RXY-a",
-+
-+-- and has several possible ways, need to find one, currently added two type of
-+ and_2 =
"0000000000540000RX-a|00000000140000RR|00000000b9f4RRF-a|00000000e3540000RXY-a|00000000b9800000RRE|
00000000b9e40000RRF-a",
-+ and_c = "0000000000d40000SS-a",
-+ and_i = "0000000000940000SI|00000000eb540000SIY",
-+
-+and_2 =
"0000000000540000RX-a|0000000000140000RR|00000000b9f40000RRF-a|00000000e3540000RXY-a",
-+ and_3 = "00000000e3800000RXY-a|00000000b9800000RRE|00000000b9e40000RRF-a",
-+ and_c = "0000000000d40000SS-a",
-+ and_i = "0000000000940000SI",
-+ and_i4 = "00000000eb540000SIY"
-+ and_i3 =
"000000000a540000RI-a|000000000a550000RI-a|000000000c0a0000RIL-a|000000000a560000RI-a|000000000a570000RI-a|000000000c0bRIL-a"
-+ --branch related instrcutions
-+ bal = "0000000000450000RX-a",
-+ balr = "0000000000050000RR",
-+ bas = "00000000004d0000RX-a",
-+ basr = "00000000000d0000RR",
-+ bassm = "00000000000c0000RR",
-+ bsm = "00000000000b0000RR",
-+ bc = "0000000000470000Rx-b",
-+ bcr = "00000000000070000RR",
-+ bct = "0000000000460000RX-a",
-+ bctr = "0000000000060000RR",
-+ bctg = "00000000e3460000RXY-a",
-+ bctgr = "00000000b9460000RRE",
-+ bxh = "0000000000860000RS-a",
-+ bxhg = "00000000eb440000RSY-a",
-+ bxle = "0000000000870000RS-a",
-+ bxleg = "00000000eb450000RSY-a",
-+ bras = "000000000a750000RI-b",
-+ brasl = "000000000c050000RIL-b",
-+ brc = "000000000a740000RI-c",
-+ brcl = "000000000c040000RIL-c",
-+ brct = "000000000a760000RI-b",
-+ brctg = "000000000a770000RI-b",
-+ brctg = "00000000occ60000RIL-b",
-+ brxh = "0000000000840000RSI",
-+ brxhg = "00000000ec440000RIE-e",
-+ brxle = "0000000000850000RSI",
-+ brxlg = "00000000ec450000RIE-e",
-+
-+ ----subtraction (basic operation)
-+ sub = "00000000005b0000RX-a"
-+ sr = "00000000001b0000RR"
-+ srk = "00000000b9f90000RRF-a"
-+ sy = "00000000e35b0000RXY-a"
-+ sg = "00000000e3090000RXY-a"
-+ sgr = "00000000b9090000RRE"
-+ sgrk = "00000000b9e90000RRF-a"
-+ sgf = "00000000e3190000RXY-a"
-+ sgfr = "00000000b9190000RRE"
-+ sh = "00000000004b0000RX-a"
-+ shy = "00000000e37b0000RXY-a"
-+ shhhr = "00000000b9c90000RRF-a"
-+ shhlr = "00000000b9d90000RX-a"
-+ sl = "00000000005f0000RX-a"
-+ slr = "00000000001f0000RR"
-+ slrk = "00000000b9f80000RR"
-+ sly = "00000000e35f0000RXY-a",
-+ slg = "00000000e30b0000RXY-a",
-+ slgr = "00000000b9080000RRE",
-+ slgrk = "00000000b9eb0000RRF-a",
-+ slgf = "00000000e3180000RXY-a",
-+ slgfr = "00000000b91b0000RRE",
-+ slhhhr = "00000000b9cb0000RRF-a",
-+ slhhlr = "00000000b9db0000RRF-a",
-+ slfi = "000000000c250000RIL-a",
-+ slgfi = "000000000c240000RIL-a",
-+ slb = "00000000e3990000RXY-a",
-+ slbr = "00000000b9990000RRE" ,
-+ slbg = "00000000e3890000RXY-a",
-+ slbgr = "00000000b9890000RXY-a",
-+
-+ cmp_2 = "0000000000590000RX-a|0000000000190000RR|00000000e3590000RXY-a",
-+ cmp_3 = "00000000e3200000RXY-a|00000000b9200000RRE|00000000e3300000RXY-a|
00000000b9300000RRE",
-+
-+ div_2 =
"00000000005d0000RX-a|00000000001d0000RR|00000000e3970000RXY-a|00000000b9970000RRE",
-+ div_3 ="00000000e3870000RXY-a|00000000b9870000RRE",
-+ div_sing
="00000000e30d0000RXY-a|00000000b90d0000RRE|00000000e31d0000RXY-a|00000000b91d0000RRE",
-+
-+ eor_2 =
"0000000000570000RX-a|0000000000170000RR|00000000b9f70000RRF-a|00000000e3570000RXY-a",
-+ eor_3 = "00000000e3820000RXY-a|00000000b9820000RRE|00000000b9e70000RRF-a|
-+ eor_c = "0000000000d70000SS-a",
-+ eor_i = "0000000000970000SI|
00000000eb570000|000000000c060000a|000000000c070000RIL-a",
-+
-+ -- load instruction to be added and the following instructions need to be changed (are
not s390x related)
-+
-+ neg_2 = "4b0003e0DMg",
-+ neg_3 = "4b0003e0DMSg",
-+ negs_2 = "6b0003e0DMg",
-+ negs_3 = "6b0003e0DMSg",
-+ adc_3 = "1a000000DNMg",
-+ adcs_3 = "3a000000DNMg",
-+ sbc_3 = "5a000000DNMg",
-+ sbcs_3 = "7a000000DNMg",
-+ ngc_2 = "5a0003e0DMg",
-+ ngcs_2 = "7a0003e0DMg",
-+ and_3 = "0a000000DNMg|12000000pDNig",
-+ and_4 = "0a000000DNMSg",
-+ orr_3 = "2a000000DNMg|32000000pDNig",
-+ orr_4 = "2a000000DNMSg",
-+ eor_3 = "4a000000DNMg|52000000pDNig",
-+ eor_4 = "4a000000DNMSg",
-+ ands_3 = "6a000000DNMg|72000000DNig",
-+ ands_4 = "6a000000DNMSg",
-+ tst_2 = "6a00001fNMg|7200001fNig",
-+ tst_3 = "6a00001fNMSg",
-+ bic_3 = "0a200000DNMg",
-+ bic_4 = "0a200000DNMSg",
-+ orn_3 = "2a200000DNMg",
-+ orn_4 = "2a200000DNMSg",
-+ eon_3 = "4a200000DNMg",
-+ eon_4 = "4a200000DNMSg",
-+ bics_3 = "6a200000DNMg",
-+ bics_4 = "6a200000DNMSg",
-+ movn_2 = "12800000DWg",
-+ movn_3 = "12800000DWRg",
-+ movz_2 = "52800000DWg",
-+ movz_3 = "52800000DWRg",
-+ movk_2 = "72800000DWg",
-+ movk_3 = "72800000DWRg",
-+ -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
-+ mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
-+ mov_3 = "2a0003e0DMSg",
-+ mvn_2 = "2a2003e0DMg",
-+ mvn_3 = "2a2003e0DMSg",
-+ adr_2 = "10000000DBx",
-+ adrp_2 = "90000000DBx",
-+ csel_4 = "1a800000DNMCg",
-+ csinc_4 = "1a800400DNMCg",
-+ csinv_4 = "5a800000DNMCg",
-+ csneg_4 = "5a800400DNMCg",
-+ cset_2 = "1a9f07e0Dcg",
-+ csetm_2 = "5a9f03e0Dcg",
-+ cinc_3 = "1a800400DNmcg",
-+ cinv_3 = "5a800000DNmcg",
-+ cneg_3 = "5a800400DNmcg",
-+ ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
-+ ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
-+ madd_4 = "1b000000DNMAg",
-+ msub_4 = "1b008000DNMAg",
-+ mul_3 = "1b007c00DNMg",
-+ mneg_3 = "1b00fc00DNMg",
-+ smaddl_4 = "9b200000DxNMwAx",
-+ smsubl_4 = "9b208000DxNMwAx",
-+ smull_3 = "9b207c00DxNMw",
-+ smnegl_3 = "9b20fc00DxNMw",
-+ smulh_3 = "9b407c00DNMx",
-+ umaddl_4 = "9ba00000DxNMwAx",
-+ umsubl_4 = "9ba08000DxNMwAx",
-+ umull_3 = "9ba07c00DxNMw",
-+ umnegl_3 = "9ba0fc00DxNMw",
-+ umulh_3 = "9bc07c00DNMx",
-+ udiv_3 = "1ac00800DNMg",
-+ sdiv_3 = "1ac00c00DNMg",
-+ -- Bit operations.
-+ sbfm_4 = "13000000DN12w|93400000DN12x",
-+ bfm_4 = "33000000DN12w|b3400000DN12x",
-+ ubfm_4 = "53000000DN12w|d3400000DN12x",
-+ extr_4 = "13800000DNM2w|93c00000DNM2x",
-+ sxtb_2 = "13001c00DNw|93401c00DNx",
-+ sxth_2 = "13003c00DNw|93403c00DNx",
-+ sxtw_2 = "93407c00DxNw",
-+ uxtb_2 = "53001c00DNw",
-+ uxth_2 = "53003c00DNw",
-+ sbfx_4 = op_alias("sbfm_4", alias_bfx),
-+ bfxil_4 = op_alias("bfm_4", alias_bfx),
-+ ubfx_4 = op_alias("ubfm_4", alias_bfx),
-+ sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
-+ bfi_4 = op_alias("bfm_4", alias_bfiz),
-+ ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
-+ lsl_3 = function(params, nparams)
-+ if params and params[3]:byte() == 35 then
-+ return alias_lslimm(params, nparams)
-+ else
-+ return op_template(params, "1ac02000DNMg", nparams)
-+ end
-+ end,
-+ lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
-+ asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
-+ ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
-+ clz_2 = "5ac01000DNg",
-+ cls_2 = "5ac01400DNg",
-+ rbit_2 = "5ac00000DNg",
-+ rev_2 = "5ac00800DNw|dac00c00DNx",
-+ rev16_2 = "5ac00400DNg",
-+ rev32_2 = "dac00800DNx",
-+ -- Loads and stores.
-+ ["strb_*"] = "38000000DwL",
-+ ["ldrb_*"] = "38400000DwL",
-+ ["ldrsb_*"] = "38c00000DwL|38800000DxL",
-+ ["strh_*"] = "78000000DwL",
-+ ["ldrh_*"] = "78400000DwL",
-+ ["ldrsh_*"] = "78c00000DwL|78800000DxL",
-+ ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
-+ ["ldr_*"] =
"18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
-+ ["ldrsw_*"] = "98000000DxB|b8800000DxL",
-+ -- NOTE: ldur etc. are handled by ldr et al.
-+ ["stp_*"] =
"28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
-+ ["ldp_*"] =
"28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
-+ ["ldpsw_*"] = "68400000DAxP",
-+ -- Branches.
-+ b_1 = "14000000B",
-+ bl_1 = "94000000B",
-+ blr_1 = "d63f0000Nx",
-+ br_1 = "d61f0000Nx",
-+ ret_0 = "d65f03c0",
-+ ret_1 = "d65f0000Nx",
-+ -- b.cond is added below.
-+ cbz_2 = "34000000DBg",
-+ cbnz_2 = "35000000DBg",
-+ tbz_3 = "36000000DTBw|36000000DTBx",
-+ tbnz_3 = "37000000DTBw|37000000DTBx",
-+ -- Miscellaneous instructions.
-+ -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
-+ -- TODO: sys, sysl, ic, dc, at, tlbi
-+ -- TODO: hint, yield, wfe, wfi, sev, sevl
-+ -- TODO: clrex, dsb, dmb, isb
-+ nop_0 = "d503201f",
-+ brk_0 = "d4200000",
-+ brk_1 = "d4200000W",
-+ -- Floating point instructions.
-+ fmov_2 =
"1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
-+ fabs_2 = "1e20c000DNf",
-+ fneg_2 = "1e214000DNf",
-+ fsqrt_2 = "1e21c000DNf",
-+ fcvt_2 = "1e22c000DdNs|1e624000DsNd",
-+ -- TODO: half-precision and fixed-point conversions.
-+ fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
-+ fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
-+ fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
-+ fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
-+ fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
-+ fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
-+ fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
-+ fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
-+ fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
-+ fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
-+ scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
-+ ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
-+ frintn_2 = "1e244000DNf",
-+ frintp_2 = "1e24c000DNf",
-+ frintm_2 = "1e254000DNf",
-+ frintz_2 = "1e25c000DNf",
-+ frinta_2 = "1e264000DNf",
-+ frintx_2 = "1e274000DNf",
-+ frinti_2 = "1e27c000DNf",
-+ fadd_3 = "1e202800DNMf",
-+ fsub_3 = "1e203800DNMf",
-+ fmul_3 = "1e200800DNMf",
-+ fnmul_3 = "1e208800DNMf",
-+ fdiv_3 = "1e201800DNMf",
-+ fmadd_4 = "1f000000DNMAf",
-+ fmsub_4 = "1f008000DNMAf",
-+ fnmadd_4 = "1f200000DNMAf",
-+ fnmsub_4 = "1f208000DNMAf",
-+ fmax_3 = "1e204800DNMf",
-+ fmaxnm_3 = "1e206800DNMf",
-+ fmin_3 = "1e205800DNMf",
-+ fminnm_3 = "1e207800DNMf",
-+ fcmp_2 = "1e202000NMf|1e202008NZf",
-+ fcmpe_2 = "1e202010NMf|1e202018NZf",
-+ fccmp_4 = "1e200400NMVCf",
-+ fccmpe_4 = "1e200410NMVCf",
-+ fcsel_4 = "1e200c00DNMCf",
-+ -- TODO: crc32*, aes*, sha*, pmull
-+ -- TODO: SIMD instructions.
-+}
-+for cond,c in pairs(map_cond) do
-+ map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
-+end
-+------------------------------------------------------------------------------
-+-- Handle opcodes defined with template strings.
-+local function parse_template(params, template, nparams, pos)
-+ local op = tonumber(sub(template, 1, 12), 16) -- 13-16 ignored since those are
trailing zeros added after the instruction
-+ -- 00000000005a0000 converts to 90
-+ local n,rs = 1,26
-+
-+ parse_reg_type = false
-+ -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on)
-+ for p in gmatch(sub(template, 17), ".") do
-+ local q = params[n]
-+ if p == "R" then
-+ op = op + parse_reg(q); n = n + 1
-+ elseif p == "N" then
-+ op = op + shl(parse_reg(q), 5); n = n + 1
-+ elseif p == "M" then
-+ op = op + shl(parse_reg(q), 16); n = n + 1
-+ elseif p == "A" then
-+ op = op + shl(parse_reg(q), 10); n = n + 1
-+ elseif p == "m" then
-+ op = op + shl(parse_reg(params[n-1]), 16)
-+ elseif p == "p" then
-+ if q == "sp" then params[n] = "@x31" end
-+ elseif p == "g" then
-+ if parse_reg_type == "x" then
-+ op = op + 0x80000000
-+ elseif parse_reg_type ~= "w" then
-+ werror("bad register type")
-+ end
-+ parse_reg_type = false
-+ elseif p == "f" then
-+ if parse_reg_type == "d" then
-+ op = op + 0x00400000
-+ elseif parse_reg_type ~= "s" then
-+ werror("bad register type")
-+ end
-+ parse_reg_type = false
-+ elseif p == "x" or p == "w" or p == "d" or p ==
"s" then
-+ if parse_reg_type ~= p then
-+ werror("register size mismatch")
-+ end
-+ parse_reg_type = false
-+ elseif p == "L" then
-+ op = parse_load(params, nparams, n, op)
-+ elseif p == "P" then
-+ op = parse_load_pair(params, nparams, n, op)
-+ elseif p == "B" then
-+ local mode, v, s = parse_label(q, false); n = n + 1
-+ local m = branch_type(op)
-+ waction("REL_"..mode, v+m, s, 1)
-+ elseif p == "I" then
-+ op = op + parse_imm12(q); n = n + 1
-+ elseif p == "i" then
-+ op = op + parse_imm13(q); n = n + 1
-+ elseif p == "W" then
-+ op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
-+ elseif p == "T" then
-+ op = op + parse_imm6(q); n = n + 1
-+ elseif p == "1" then
-+ op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
-+ elseif p == "2" then
-+ op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
-+ elseif p == "5" then
-+ op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
-+ elseif p == "V" then
-+ op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
-+ elseif p == "F" then
-+ op = op + parse_fpimm(q); n = n + 1
-+ elseif p == "Z" then
-+ if q ~= "#0" and q ~= "#0.0" then werror("expected zero
immediate") end
-+ n = n + 1
-+ elseif p == "S" then
-+ op = op + parse_shift(q); n = n + 1
-+ elseif p == "X" then
-+ op = op + parse_extend(q); n = n + 1
-+ elseif p == "R" then
-+ op = op + parse_lslx16(q); n = n + 1
-+ elseif p == "C" then
-+ op = op + parse_cond(q, 0); n = n + 1
-+ elseif p == "c" then
-+ op = op + parse_cond(q, 1); n = n + 1
-+ else
-+ assert(false)
-+ end
-+ end
-+ wputpos(pos, op)
-+end
-+function op_template(params, template, nparams)
-+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "")
end
-+ -- Limit number of section buffer positions used by a single dasm_put().
-+ -- A single opcode needs a maximum of 3 positions.
-+ if secpos+3 > maxsecpos then wflush() end
-+ local pos = wpos()
-+ local lpos, apos, spos = #actlist, #actargs, secpos
-+ local ok, err
-+ for t in gmatch(template, "[^|]+") do
-+ ok, err = pcall(parse_template, params, t, nparams, pos)
-+ if ok then return end
-+ secpos = spos
-+ actlist[lpos+1] = nil
-+ actlist[lpos+2] = nil
-+ actlist[lpos+3] = nil
-+ actargs[apos+1] = nil
-+ actargs[apos+2] = nil
-+ actargs[apos+3] = nil
-+ end
-+ error(err, 0)
-+end
-+map_op[".template__"] = op_template
-+------------------------------------------------------------------------------
-+-- Pseudo-opcode to mark the position where the action list is to be emitted.
-+map_op[".actionlist_1"] = function(params)
-+ if not params then return "cvar" end
-+ local name = params[1] -- No syntax check. You get to keep the pieces.
-+ wline(function(out) writeactions(out, name) end)
-+end
-+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
-+map_op[".globals_1"] = function(params)
-+ if not params then return "prefix" end
-+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
-+ wline(function(out) writeglobals(out, prefix) end)
-+end
-+-- Pseudo-opcode to mark the position where the global names are to be emitted.
-+map_op[".globalnames_1"] = function(params)
-+ if not params then return "cvar" end
-+ local name = params[1] -- No syntax check. You get to keep the pieces.
-+ wline(function(out) writeglobalnames(out, name) end)
-+end
-+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
-+map_op[".externnames_1"] = function(params)
-+ if not params then return "cvar" end
-+ local name = params[1] -- No syntax check. You get to keep the pieces.
-+ wline(function(out) writeexternnames(out, name) end)
-+end
-+------------------------------------------------------------------------------
-+-- Label pseudo-opcode (converted from trailing colon form).
-+map_op[".label_1"] = function(params)
-+ if not params then return "[1-9] | ->global | =>pcexpr" end
-+ if secpos+1 > maxsecpos then wflush() end
-+ local mode, n, s = parse_label(params[1], true)
-+ if mode == "EXT" then werror("bad label definition") end
-+ waction("LABEL_"..mode, n, s, 1)
-+end
-+------------------------------------------------------------------------------
-+-- Pseudo-opcodes for data storage.
-+map_op[".long_*"] = function(params)
-+ if not params then return "imm..." end
-+ for _,p in ipairs(params) do
-+ local n = tonumber(p)
-+ if not n then werror("bad immediate `"..p.."'") end
-+ if n < 0 then n = n + 2^32 end
-+ wputw(n)
-+ if secpos+2 > maxsecpos then wflush() end
-+ end
-+end
-+-- Alignment pseudo-opcode.
-+map_op[".align_1"] = function(params)
-+ if not params then return "numpow2" end
-+ if secpos+1 > maxsecpos then wflush() end
-+ local align = tonumber(params[1])
-+ if align then
-+ local x = align
-+ -- Must be a power of 2 in the range (2 ... 256).
-+ for i=1,8 do
-+ x = x / 2
-+ if x == 1 then
-+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
-+ return
-+ end
-+ end
-+ end
-+ werror("bad alignment")
-+end
-+------------------------------------------------------------------------------
-+-- Pseudo-opcode for (primitive) type definitions (map to C types).
-+map_op[".type_3"] = function(params, nparams)
-+ if not params then
-+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
-+ end
-+ local name, ctype, reg = params[1], params[2], params[3]
-+ if not match(name, "^[%a_][%w_]*$") then
-+ werror("bad type name `"..name.."'")
-+ end
-+ local tp = map_type[name]
-+ if tp then
-+ werror("duplicate type `"..name.."'")
-+ end
-+ -- Add #type to defines. A bit unclean to put it in map_archdef.
-+ map_archdef["#"..name] = "sizeof("..ctype..")"
-+ -- Add new type and emit shortcut define.
-+ local num = ctypenum + 1
-+ map_type[name] = {
-+ ctype = ctype,
-+ ctypefmt = format("Dt%X(%%s)", num),
-+ reg = reg,
-+ }
-+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num,
ctype))
-+ ctypenum = num
-+end
-+map_op[".type_2"] = map_op[".type_3"]
-+-- Dump type definitions.
-+local function dumptypes(out, lvl)
-+ local t = {}
-+ for name in pairs(map_type) do t[#t+1] = name end
-+ sort(t)
-+ out:write("Type definitions:\n")
-+ for _,name in ipairs(t) do
-+ local tp = map_type[name]
-+ local reg = tp.reg or ""
-+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
-+ end
-+ out:write("\n")
-+end
-+------------------------------------------------------------------------------
-+-- Set the current section.
-+function _M.section(num)
-+ waction("SECTION", num)
-+ wflush(true) -- SECTION is a terminal action.
-+end
-+------------------------------------------------------------------------------
-+-- Dump architecture description.
-+function _M.dumparch(out)
-+ out:write(format("DynASM %s version %s, released %s\n\n",
-+ _info.arch, _info.version, _info.release))
-+ dumpactions(out)
-+end
-+-- Dump all user defined elements.
-+function _M.dumpdef(out, lvl)
-+ dumptypes(out, lvl)
-+ dumpglobals(out, lvl)
-+ dumpexterns(out, lvl)
-+end
-+------------------------------------------------------------------------------
-+-- Pass callbacks from/to the DynASM core.
-+function _M.passcb(wl, we, wf, ww)
-+ wline, werror, wfatal, wwarn = wl, we, wf, ww
-+ return wflush
-+end
-+-- Setup the arch-specific module.
-+function _M.setup(arch, opt)
-+ g_arch, g_opt = arch, opt
-+end
-+-- Merge the core maps and the arch-specific maps.
-+function _M.mergemaps(map_coreop, map_def)
-+ setmetatable(map_op, { __index = map_coreop })
-+ setmetatable(map_def, { __index = map_archdef })
-+ return map_op, map_def
-+end
-+return _M
-+------------------------------------------------------------------------------
---
-2.20.1
-
-
-From b8253103f7e5646ad487a7f491a0ca4fc6365abb Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Thu, 10 Nov 2016 19:00:51 +0530
-Subject: [PATCH 005/247] Created s390x header file
-
-Currently copy of ppc.h, which is same as arm64.h, and added the architecture definition
----
- dynasm/dasm_s390x.h | 418 ++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 418 insertions(+)
- create mode 100644 dynasm/dasm_s390x.h
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-new file mode 100644
-index 0000000..577920a
---- /dev/null
-+++ b/dynasm/dasm_s390x.h
-@@ -0,0 +1,418 @@
-+/*
-+** DynASM s390x encoding engine.
-+** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
-+** Released under the MIT license. See dynasm.lua for full copyright notice.
-+*/
-+
-+#include <stddef.h>
-+#include <stdarg.h>
-+#include <string.h>
-+#include <stdlib.h>
-+
-+#define DASM_ARCH "s390"
-+
-+#ifndef DASM_EXTERN
-+#define DASM_EXTERN(a,b,c,d) 0
-+#endif
-+
-+/* Action definitions. */
-+enum {
-+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
-+ /* The following actions need a buffer position. */
-+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
-+ /* The following actions also have an argument. */
-+ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
-+ DASM__MAX
-+};
-+
-+/* Maximum number of section buffer positions for a single dasm_put() call. */
-+#define DASM_MAXSECPOS 25
-+
-+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
-+#define DASM_S_OK 0x00000000
-+#define DASM_S_NOMEM 0x01000000
-+#define DASM_S_PHASE 0x02000000
-+#define DASM_S_MATCH_SEC 0x03000000
-+#define DASM_S_RANGE_I 0x11000000
-+#define DASM_S_RANGE_SEC 0x12000000
-+#define DASM_S_RANGE_LG 0x13000000
-+#define DASM_S_RANGE_PC 0x14000000
-+#define DASM_S_RANGE_REL 0x15000000
-+#define DASM_S_UNDEF_LG 0x21000000
-+#define DASM_S_UNDEF_PC 0x22000000
-+
-+/* Macros to convert positions (8 bit section + 24 bit index). */
-+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
-+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
-+#define DASM_SEC2POS(sec) ((sec)<<24)
-+#define DASM_POS2SEC(pos) ((pos)>>24)
-+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
-+
-+/* Action list type. */
-+typedef const unsigned int *dasm_ActList;
-+
-+/* Per-section structure. */
-+typedef struct dasm_Section {
-+ int *rbuf; /* Biased buffer pointer (negative section bias). */
-+ int *buf; /* True buffer pointer. */
-+ size_t bsize; /* Buffer size in bytes. */
-+ int pos; /* Biased buffer position. */
-+ int epos; /* End of biased buffer position - max single put. */
-+ int ofs; /* Byte offset into section. */
-+} dasm_Section;
-+
-+/* Core structure holding the DynASM encoding state. */
-+struct dasm_State {
-+ size_t psize; /* Allocated size of this structure. */
-+ dasm_ActList actionlist; /* Current actionlist pointer. */
-+ int *lglabels; /* Local/global chain/pos ptrs. */
-+ size_t lgsize;
-+ int *pclabels; /* PC label chains/pos ptrs. */
-+ size_t pcsize;
-+ void **globals; /* Array of globals (bias -10). */
-+ dasm_Section *section; /* Pointer to active section. */
-+ size_t codesize; /* Total size of all code sections. */
-+ int maxsection; /* 0 <= sectionidx < maxsection. */
-+ int status; /* Status code. */
-+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
-+};
-+
-+/* The size of the core structure depends on the max. number of sections. */
-+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
-+
-+
-+/* Initialize DynASM state. */
-+void dasm_init(Dst_DECL, int maxsection)
-+{
-+ dasm_State *D;
-+ size_t psz = 0;
-+ int i;
-+ Dst_REF = NULL;
-+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
-+ D = Dst_REF;
-+ D->psize = psz;
-+ D->lglabels = NULL;
-+ D->lgsize = 0;
-+ D->pclabels = NULL;
-+ D->pcsize = 0;
-+ D->globals = NULL;
-+ D->maxsection = maxsection;
-+ for (i = 0; i < maxsection; i++) {
-+ D->sections[i].buf = NULL; /* Need this for pass3. */
-+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-+ D->sections[i].bsize = 0;
-+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
-+ }
-+}
-+
-+/* Free DynASM state. */
-+void dasm_free(Dst_DECL)
-+{
-+ dasm_State *D = Dst_REF;
-+ int i;
-+ for (i = 0; i < D->maxsection; i++)
-+ if (D->sections[i].buf)
-+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
-+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
-+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
-+ DASM_M_FREE(Dst, D, D->psize);
-+}
-+
-+/* Setup global label array. Must be called before dasm_setup(). */
-+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
-+{
-+ dasm_State *D = Dst_REF;
-+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
-+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
-+}
-+
-+/* Grow PC label array. Can be called after dasm_setup(), too. */
-+void dasm_growpc(Dst_DECL, unsigned int maxpc)
-+{
-+ dasm_State *D = Dst_REF;
-+ size_t osz = D->pcsize;
-+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
-+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
-+}
-+
-+/* Setup encoder. */
-+void dasm_setup(Dst_DECL, const void *actionlist)
-+{
-+ dasm_State *D = Dst_REF;
-+ int i;
-+ D->actionlist = (dasm_ActList)actionlist;
-+ D->status = DASM_S_OK;
-+ D->section = &D->sections[0];
-+ memset((void *)D->lglabels, 0, D->lgsize);
-+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
-+ for (i = 0; i < D->maxsection; i++) {
-+ D->sections[i].pos = DASM_SEC2POS(i);
-+ D->sections[i].ofs = 0;
-+ }
-+}
-+
-+
-+#ifdef DASM_CHECKS
-+#define CK(x, st) \
-+ do { if (!(x)) { \
-+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
-+#define CKPL(kind, st) \
-+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
-+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
-+#else
-+#define CK(x, st) ((void)0)
-+#define CKPL(kind, st) ((void)0)
-+#endif
-+
-+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
-+void dasm_put(Dst_DECL, int start, ...)
-+{
-+ va_list ap;
-+ dasm_State *D = Dst_REF;
-+ dasm_ActList p = D->actionlist + start;
-+ dasm_Section *sec = D->section;
-+ int pos = sec->pos, ofs = sec->ofs;
-+ int *b;
-+
-+ if (pos >= sec->epos) {
-+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
-+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
-+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
-+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
-+ }
-+
-+ b = sec->rbuf;
-+ b[pos++] = start;
-+
-+ va_start(ap, start);
-+ while (1) {
-+ unsigned int ins = *p++;
-+ unsigned int action = (ins >> 16);
-+ if (action >= DASM__MAX) {
-+ ofs += 4;
-+ } else {
-+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
-+ switch (action) {
-+ case DASM_STOP: goto stop;
-+ case DASM_SECTION:
-+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
-+ D->section = &D->sections[n]; goto stop;
-+ case DASM_ESC: p++; ofs += 4; break;
-+ case DASM_REL_EXT: break;
-+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
-+ case DASM_REL_LG:
-+ n = (ins & 2047) - 10; pl = D->lglabels + n;
-+ /* Bkwd rel or global. */
-+ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
-+ pl += 10; n = *pl;
-+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
-+ goto linkrel;
-+ case DASM_REL_PC:
-+ pl = D->pclabels + n; CKPL(pc, PC);
-+ putrel:
-+ n = *pl;
-+ if (n < 0) { /* Label exists. Get label pos and store it. */
-+ b[pos] = -n;
-+ } else {
-+ linkrel:
-+ b[pos] = n; /* Else link to rel chain, anchored at label. */
-+ *pl = pos;
-+ }
-+ pos++;
-+ break;
-+ case DASM_LABEL_LG:
-+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
-+ case DASM_LABEL_PC:
-+ pl = D->pclabels + n; CKPL(pc, PC);
-+ putlabel:
-+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
-+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
-+ }
-+ *pl = -pos; /* Label exists now. */
-+ b[pos++] = ofs; /* Store pass1 offset estimate. */
-+ break;
-+ case DASM_IMM:
-+#ifdef DASM_CHECKS
-+ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
-+#endif
-+ n >>= ((ins>>10)&31);
-+#ifdef DASM_CHECKS
-+ if (ins & 0x8000)
-+ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) ==
0, RANGE_I);
-+ else
-+ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
-+#endif
-+ b[pos++] = n;
-+ break;
-+ case DASM_IMMSH:
-+ CK((n >> 6) == 0, RANGE_I);
-+ b[pos++] = n;
-+ break;
-+ }
-+ }
-+ }
-+stop:
-+ va_end(ap);
-+ sec->pos = pos;
-+ sec->ofs = ofs;
-+}
-+#undef CK
-+
-+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
-+int dasm_link(Dst_DECL, size_t *szp)
-+{
-+ dasm_State *D = Dst_REF;
-+ int secnum;
-+ int ofs = 0;
-+
-+#ifdef DASM_CHECKS
-+ *szp = 0;
-+ if (D->status != DASM_S_OK) return D->status;
-+ {
-+ int pc;
-+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
-+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
-+ }
-+#endif
-+
-+ { /* Handle globals not defined in this translation unit. */
-+ int idx;
-+ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
-+ int n = D->lglabels[idx];
-+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
-+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
-+ }
-+ }
-+
-+ /* Combine all code sections. No support for data sections (yet). */
-+ for (secnum = 0; secnum < D->maxsection; secnum++) {
-+ dasm_Section *sec = D->sections + secnum;
-+ int *b = sec->rbuf;
-+ int pos = DASM_SEC2POS(secnum);
-+ int lastpos = sec->pos;
-+
-+ while (pos != lastpos) {
-+ dasm_ActList p = D->actionlist + b[pos++];
-+ while (1) {
-+ unsigned int ins = *p++;
-+ unsigned int action = (ins >> 16);
-+ switch (action) {
-+ case DASM_STOP: case DASM_SECTION: goto stop;
-+ case DASM_ESC: p++; break;
-+ case DASM_REL_EXT: break;
-+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
-+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
-+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-+ case DASM_IMM: case DASM_IMMSH: pos++; break;
-+ }
-+ }
-+ stop: (void)0;
-+ }
-+ ofs += sec->ofs; /* Next section starts right after current section. */
-+ }
-+
-+ D->codesize = ofs; /* Total size of all code sections */
-+ *szp = ofs;
-+ return DASM_S_OK;
-+}
-+
-+#ifdef DASM_CHECKS
-+#define CK(x, st) \
-+ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
-+#else
-+#define CK(x, st) ((void)0)
-+#endif
-+
-+/* Pass 3: Encode sections. */
-+int dasm_encode(Dst_DECL, void *buffer)
-+{
-+ dasm_State *D = Dst_REF;
-+ char *base = (char *)buffer;
-+ unsigned int *cp = (unsigned int *)buffer;
-+ int secnum;
-+
-+ /* Encode all code sections. No support for data sections (yet). */
-+ for (secnum = 0; secnum < D->maxsection; secnum++) {
-+ dasm_Section *sec = D->sections + secnum;
-+ int *b = sec->buf;
-+ int *endb = sec->rbuf + sec->pos;
-+
-+ while (b != endb) {
-+ dasm_ActList p = D->actionlist + *b++;
-+ while (1) {
-+ unsigned int ins = *p++;
-+ unsigned int action = (ins >> 16);
-+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
-+ switch (action) {
-+ case DASM_STOP: case DASM_SECTION: goto stop;
-+ case DASM_ESC: *cp++ = *p++; break;
-+ case DASM_REL_EXT:
-+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
-+ goto patchrel;
-+ case DASM_ALIGN:
-+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
-+ break;
-+ case DASM_REL_LG:
-+ CK(n >= 0, UNDEF_LG);
-+ case DASM_REL_PC:
-+ CK(n >= 0, UNDEF_PC);
-+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
-+ patchrel:
-+ CK((n & 3) == 0 &&
-+ (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
-+ ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
-+ cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
-+ break;
-+ case DASM_LABEL_LG:
-+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
-+ break;
-+ case DASM_LABEL_PC: break;
-+ case DASM_IMM:
-+ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
-+ break;
-+ case DASM_IMMSH:
-+ cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) :
((n&31)<<6)|(n&32);
-+ break;
-+ default: *cp++ = ins; break;
-+ }
-+ }
-+ stop: (void)0;
-+ }
-+ }
-+
-+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
-+ return DASM_S_PHASE;
-+ return DASM_S_OK;
-+}
-+#undef CK
-+
-+/* Get PC label offset. */
-+int dasm_getpclabel(Dst_DECL, unsigned int pc)
-+{
-+ dasm_State *D = Dst_REF;
-+ if (pc*sizeof(int) < D->pcsize) {
-+ int pos = D->pclabels[pc];
-+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
-+ if (pos > 0) return -1; /* Undefined. */
-+ }
-+ return -2; /* Unused or out of range. */
-+}
-+
-+#ifdef DASM_CHECKS
-+/* Optional sanity checker to call between isolated encoding steps. */
-+int dasm_checkstep(Dst_DECL, int secmatch)
-+{
-+ dasm_State *D = Dst_REF;
-+ if (D->status == DASM_S_OK) {
-+ int i;
-+ for (i = 1; i <= 9; i++) {
-+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
-+ D->lglabels[i] = 0;
-+ }
-+ }
-+ if (D->status == DASM_S_OK && secmatch >= 0 &&
-+ D->section != &D->sections[secmatch])
-+ D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
-+ return D->status;
-+}
-+#endif
---
-2.20.1
-
-
-From fa87b1a8e1ed070d0e2d980e840d1ad4abab91c9 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Fri, 11 Nov 2016 12:04:51 +0530
-Subject: [PATCH 006/247] Update lj_arch.h
-
-changed S390 to S390x
----
- src/lj_arch.h | 20 +++++---------------
- 1 file changed, 5 insertions(+), 15 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 6421545..c781eb1 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -29,7 +29,7 @@
- #define LUAJIT_ARCH_mips32 6
- #define LUAJIT_ARCH_MIPS64 7
- #define LUAJIT_ARCH_mips64 7
--#define LUAJIT_ARCH_S390 8
-+#define LUAJIT_ARCH_S390x 8
-
- /* Target OS. */
- #define LUAJIT_OS_OTHER 0
-@@ -50,8 +50,8 @@
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM
- #elif defined(__aarch64__)
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
--#elif defined(__s390__) || defined(__s390) || defined(__S390__) || defined(__S390) ||
defined(S390)
--#define LUAJIT_TARGET LUAJIT_ARCH_S390
-+#elif defined(__s390x__) || defined(__s390x) || defined(__S390x__) || defined(__S390x)
|| defined(S390x)
-+#define LUAJIT_TARGET LUAJIT_ARCH_S390x
- #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) ||
defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC)
|| defined(_M_PPC)
- #define LUAJIT_TARGET LUAJIT_ARCH_PPC
- #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) ||
defined(__MIPS64)
-@@ -246,20 +246,10 @@
-
- #elif LUAJIT_TARGET == LUAJIT_ARCH_S390
-
-- #define LJ_ARCH_NAME "s390"
-+ #define LJ_ARCH_NAME "s390x"
- #define LJ_ARCH_BITS 64
- #define LJ_ARCH_ENDIAN LUAJIT_BE
-- #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__
-- #define LJ_ARCH_HASFPU 1
-- #endif
-- #define LJ_ABI_EABI 1
- #define LJ_TARGET_S390 1
-- #define LJ_TARGET_EHRETREG 0
-- #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
-- #define LJ_TARGET_MASKSHIFT 0
-- #define LJ_TARGET_MASKROT 1
-- #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
-- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-
- #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
-
-@@ -430,7 +420,7 @@
- #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ <
5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
- #error "Need at least Clang 3.5 or newer"
- #endif
--#elif LJ_TARGET_S390
-+#elif LJ_TARGET_S390x
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
- #error "Need at least GCC 4.2 or newer"
- #endif
---
-2.20.1
-
-
-From c585381130dd3c553986f36d7527e06f71b58f7c Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Fri, 11 Nov 2016 12:08:47 +0530
-Subject: [PATCH 007/247] Update Makefile
-
-changed S390 to S390x
----
- src/Makefile | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index f388db1..2bf15d2 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -245,8 +245,8 @@ else
- ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= arm
- else
--ifneq (,$(findstring LJ_TARGET_S390 ,$(TARGET_TESTARCH)))
-- TARGET_LJARCH= s390
-+ifneq (,$(findstring LJ_TARGET_S390x ,$(TARGET_TESTARCH)))
-+ TARGET_LJARCH= s390x
- else
- ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
- ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
---
-2.20.1
-
-
-From 342ac2f0a831bb12277eee09f1b6e2158bfa248e Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Fri, 11 Nov 2016 12:09:32 +0530
-Subject: [PATCH 008/247] Rename lj_target_s390.h to lj_target_s390x.h
-
-changed file name
----
- src/lj_target_s390.h | 287 ------------------------------------------
- src/lj_target_s390x.h | 287 ++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 287 insertions(+), 287 deletions(-)
- delete mode 100644 src/lj_target_s390.h
- create mode 100644 src/lj_target_s390x.h
-
-diff --git a/src/lj_target_s390.h b/src/lj_target_s390.h
-deleted file mode 100644
-index 7da2063..0000000
---- a/src/lj_target_s390.h
-+++ /dev/null
-@@ -1,287 +0,0 @@
--/*
--** Definitions for S390 CPUs.
--** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
--*/
--
--#ifndef _LJ_TARGET_S390_H
--#define _LJ_TARGET_S390_H
--
--/* -- Registers IDs ------------------------------------------------------- */
--
--#define GPRDEF(_) \
-- _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
-- _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \
--#if LJ_SOFTFP
--#define FPRDEF(_)
--#else
--#define FPRDEF(_) \
-- _(F0) _(F2) _(F4) _(F6)
--#endif
--#define VRIDDEF(_)
--
--#define RIDENUM(name) RID_##name,
--
--enum {
-- GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
-- FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
-- RID_MAX,
-- RID_TMP = RID_LR,
--
-- /* Calling conventions. */
-- RID_RET = RID_R0,
-- RID_RETLO = RID_R0,
-- RID_RETHI = RID_R1,
--#if LJ_SOFTFP
-- RID_FPRET = RID_R0,
--#else
-- RID_FPRET = RID_D0,
--#endif
--
-- /* These definitions must match with the *.dasc file(s): */
-- RID_BASE = RID_R9, /* Interpreter BASE. */
-- RID_LPC = RID_R6, /* Interpreter PC. */
-- RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
-- RID_LREG = RID_R8, /* Interpreter L. */
--
-- /* Register ranges [min, max) and number of registers. */
-- RID_MIN_GPR = RID_R0,
-- RID_MAX_GPR = RID_PC+1,
-- RID_MIN_FPR = RID_MAX_GPR,
--#if LJ_SOFTFP
-- RID_MAX_FPR = RID_MIN_FPR,
--#else
-- RID_MAX_FPR = RID_D15+1,
--#endif
-- RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
-- RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
--};
--
--#define RID_NUM_KREF RID_NUM_GPR
--#define RID_MIN_KREF RID_R0
--
--/* -- Register sets ------------------------------------------------------- */
--
--/* Make use of all registers, except sp, lr and pc. */
--#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
--#define RSET_GPREVEN \
-- (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
-- RID2RSET(RID_R8)|RID2RSET(RID_R10))
--#define RSET_GPRODD \
-- (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
-- RID2RSET(RID_R9)|RID2RSET(RID_R11))
--#if LJ_SOFTFP
--#define RSET_FPR 0
--#else
--#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
--#endif
--#define RSET_ALL (RSET_GPR|RSET_FPR)
--#define RSET_INIT RSET_ALL
--
--/* ABI-specific register sets. lr is an implicit scratch register. */
--#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
--#ifdef __APPLE__
--#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
--#else
--#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
--#endif
--#if LJ_SOFTFP
--#define RSET_SCRATCH_FPR 0
--#else
--#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
--#endif
--#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
--#define REGARG_FIRSTGPR RID_R0
--#define REGARG_LASTGPR RID_R3
--#define REGARG_NUMGPR 4
--#if LJ_ABI_SOFTFP
--#define REGARG_FIRSTFPR 0
--#define REGARG_LASTFPR 0
--#define REGARG_NUMFPR 0
--#else
--#define REGARG_FIRSTFPR RID_D0
--#define REGARG_LASTFPR RID_D7
--#define REGARG_NUMFPR 8
--#endif
--
--/* -- Spill slots --------------------------------------------------------- */
--
--/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
--**
--** SPS_FIXED: Available fixed spill slots in interpreter frame.
--** This definition must match with the *.dasc file(s).
--**
--** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
--*/
--#define SPS_FIXED 2
--#define SPS_FIRST 2
--
--#define SPOFS_TMP 0
--
--#define sps_scale(slot) (4 * (int32_t)(slot))
--#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
--
--/* -- Exit state ---------------------------------------------------------- */
--
--/* This definition must match with the *.dasc file(s). */
--typedef struct {
--#if !LJ_SOFTFP
-- lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
--#endif
-- int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
-- int32_t spill[256]; /* Spill slots. */
--} ExitState;
--
--/* PC after instruction that caused an exit. Used to find the trace number. */
--#define EXITSTATE_PCREG RID_PC
--/* Highest exit + 1 indicates stack check. */
--#define EXITSTATE_CHECKEXIT 1
--
--#define EXITSTUB_SPACING 4
--#define EXITSTUBS_PER_GROUP 32
--
--/* -- Instructions -------------------------------------------------------- */
--
--/* Instruction fields. */
--#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28))
--#define ARMF_N(r) ((r) << 16)
--#define ARMF_D(r) ((r) << 12)
--#define ARMF_S(r) ((r) << 8)
--#define ARMF_M(r) (r)
--#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
--#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
--
--typedef enum S390Ins {
--
-- // Unsupported in S390
-- #ARMI_LDRSB = 0xe01000d0,
-- #ARMI_S = 0x000100000,
-- #ARMI_LDRD = 0xe00000d0,
-- #ARMI_ADC = 0xe0a00000,
-- #ARMI_SBC = 0xe0c00000,
-- #ARMI_STRB = 0xe4400000,
-- #ARMI_STRH = 0xe00000b0,
-- #ARMI_STRD = 0xe00000f0,
-- #ARMI_BL = 0xeb000000,
-- #ARMI_BLX = 0xfa000000,
-- #ARMI_BLXr = 0xe12fff30,
-- #ARMI_BIC = 0xe1c00000,
-- #ARMI_ORR = 0xe1800000,
-- #ARMI_LDRB = 0xe4500000,
-- #ARMI_MVN = 0xe1e00000,
-- #ARMI_LDRSH = 0xe01000f0,
-- #ARMI_NOP = 0xe1a00000,
-- #ARMI_PUSH = 0xe92d0000,
-- #ARMI_RSB = 0xe0600000,
-- #ARMI_RSC = 0xe0e00000,
-- #ARMI_TEQ = 0xe1300000,
-- #ARMI_CCAL = 0xe0000000,
-- #ARMI_K12 = 0x02000000,
-- #ARMI_KNEG = 0x00200000,
-- #ARMI_LS_W = 0x00200000,
-- #ARMI_LS_U = 0x00800000,
-- #ARMI_LS_P = 0x01000000,
-- #ARMI_LS_R = 0x02000000,
-- #ARMI_LSX_I = 0x00400000,
--
--
-- #ARMI_SUB = 0xe0400000,
-- #ARMI_ADD = 0xe0800000,
-- #ARMI_AND = 0xe0000000,
-- #ARMI_EOR = 0xe0200000,
-- #ARMI_MUL = 0xe0000090,
-- #ARMI_LDR = 0xe4100000,
-- #ARMI_CMP = 0xe1500000,
-- #ARMI_LDRH = 0xe01000b0,
-- #ARMI_B = 0xea000000,
-- #ARMI_MOV = 0xe1a00000,
-- #ARMI_STR = 0xe4000000,
-- #ARMI_TST = 0xe1100000,
-- #ARMI_SMULL = 0xe0c00090,
-- #ARMI_CMN = 0xe1700000,
-- S390I_SR = 0x1B000000,
-- S390I_AR = 0x1A000000,
-- S390I_NR = 0x14000000,
-- S390I_XR = 0x17000000,
-- S390I_MR = 0x1C000000,
-- S390I_LR = 0x18000000,
-- S390I_C = 0x59000000,
-- S390I_LH = 0x48000000,
-- S390I_BASR = 0x0D000000,
-- S390I_MVCL = 0x0e000000,
-- S390I_ST = 0x50000000,
-- S390I_TM = 0x91000000,
-- S390I_MP = 0xbd000090,
-- S390I_CLR = 0x15000000,
--
-- /* ARMv6 */
-- #ARMI_REV = 0xe6bf0f30,
-- #ARMI_SXTB = 0xe6af0070,
-- #ARMI_SXTH = 0xe6bf0070,
-- #ARMI_UXTB = 0xe6ef0070,
-- #ARMI_UXTH = 0xe6ff0070,
--
-- /* ARMv6T2 */
-- #ARMI_MOVW = 0xe3000000,
-- #ARMI_MOVT = 0xe3400000,
--
-- /* VFP */
-- ARMI_VMOV_D = 0xeeb00b40,
-- ARMI_VMOV_S = 0xeeb00a40,
-- ARMI_VMOVI_D = 0xeeb00b00,
--
-- ARMI_VMOV_R_S = 0xee100a10,
-- ARMI_VMOV_S_R = 0xee000a10,
-- ARMI_VMOV_RR_D = 0xec500b10,
-- ARMI_VMOV_D_RR = 0xec400b10,
--
-- ARMI_VADD_D = 0xee300b00,
-- ARMI_VSUB_D = 0xee300b40,
-- ARMI_VMUL_D = 0xee200b00,
-- ARMI_VMLA_D = 0xee000b00,
-- ARMI_VMLS_D = 0xee000b40,
-- ARMI_VNMLS_D = 0xee100b00,
-- ARMI_VDIV_D = 0xee800b00,
--
-- ARMI_VABS_D = 0xeeb00bc0,
-- ARMI_VNEG_D = 0xeeb10b40,
-- ARMI_VSQRT_D = 0xeeb10bc0,
--
-- ARMI_VCMP_D = 0xeeb40b40,
-- ARMI_VCMPZ_D = 0xeeb50b40,
--
-- ARMI_VMRS = 0xeef1fa10,
--
-- ARMI_VCVT_S32_F32 = 0xeebd0ac0,
-- ARMI_VCVT_S32_F64 = 0xeebd0bc0,
-- ARMI_VCVT_U32_F32 = 0xeebc0ac0,
-- ARMI_VCVT_U32_F64 = 0xeebc0bc0,
-- ARMI_VCVTR_S32_F32 = 0xeebd0a40,
-- ARMI_VCVTR_S32_F64 = 0xeebd0b40,
-- ARMI_VCVTR_U32_F32 = 0xeebc0a40,
-- ARMI_VCVTR_U32_F64 = 0xeebc0b40,
-- ARMI_VCVT_F32_S32 = 0xeeb80ac0,
-- ARMI_VCVT_F64_S32 = 0xeeb80bc0,
-- ARMI_VCVT_F32_U32 = 0xeeb80a40,
-- ARMI_VCVT_F64_U32 = 0xeeb80b40,
-- ARMI_VCVT_F32_F64 = 0xeeb70bc0,
-- ARMI_VCVT_F64_F32 = 0xeeb70ac0,
--
-- ARMI_VLDR_S = 0xed100a00,
-- ARMI_VLDR_D = 0xed100b00,
-- ARMI_VSTR_S = 0xed000a00,
-- ARMI_VSTR_D = 0xed000b00,
--} S390Ins;
--
--typedef enum S390Shift {
-- S390SH_SLL, S390SH_SRL, S390SH_SRA
-- # Adjustment needed for ROR
--} S390Shift;
--
--/* ARM condition codes. */
--typedef enum ARMCC {
-- CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
-- CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
-- CC_HS = CC_CS, CC_LO = CC_CC
--} ARMCC;
--
--#endif
-diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
-new file mode 100644
-index 0000000..7da2063
---- /dev/null
-+++ b/src/lj_target_s390x.h
-@@ -0,0 +1,287 @@
-+/*
-+** Definitions for S390 CPUs.
-+** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
-+*/
-+
-+#ifndef _LJ_TARGET_S390_H
-+#define _LJ_TARGET_S390_H
-+
-+/* -- Registers IDs ------------------------------------------------------- */
-+
-+#define GPRDEF(_) \
-+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
-+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \
-+#if LJ_SOFTFP
-+#define FPRDEF(_)
-+#else
-+#define FPRDEF(_) \
-+ _(F0) _(F2) _(F4) _(F6)
-+#endif
-+#define VRIDDEF(_)
-+
-+#define RIDENUM(name) RID_##name,
-+
-+enum {
-+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
-+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
-+ RID_MAX,
-+ RID_TMP = RID_LR,
-+
-+ /* Calling conventions. */
-+ RID_RET = RID_R0,
-+ RID_RETLO = RID_R0,
-+ RID_RETHI = RID_R1,
-+#if LJ_SOFTFP
-+ RID_FPRET = RID_R0,
-+#else
-+ RID_FPRET = RID_D0,
-+#endif
-+
-+ /* These definitions must match with the *.dasc file(s): */
-+ RID_BASE = RID_R9, /* Interpreter BASE. */
-+ RID_LPC = RID_R6, /* Interpreter PC. */
-+ RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
-+ RID_LREG = RID_R8, /* Interpreter L. */
-+
-+ /* Register ranges [min, max) and number of registers. */
-+ RID_MIN_GPR = RID_R0,
-+ RID_MAX_GPR = RID_PC+1,
-+ RID_MIN_FPR = RID_MAX_GPR,
-+#if LJ_SOFTFP
-+ RID_MAX_FPR = RID_MIN_FPR,
-+#else
-+ RID_MAX_FPR = RID_D15+1,
-+#endif
-+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
-+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
-+};
-+
-+#define RID_NUM_KREF RID_NUM_GPR
-+#define RID_MIN_KREF RID_R0
-+
-+/* -- Register sets ------------------------------------------------------- */
-+
-+/* Make use of all registers, except sp, lr and pc. */
-+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
-+#define RSET_GPREVEN \
-+ (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
-+ RID2RSET(RID_R8)|RID2RSET(RID_R10))
-+#define RSET_GPRODD \
-+ (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
-+ RID2RSET(RID_R9)|RID2RSET(RID_R11))
-+#if LJ_SOFTFP
-+#define RSET_FPR 0
-+#else
-+#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
-+#endif
-+#define RSET_ALL (RSET_GPR|RSET_FPR)
-+#define RSET_INIT RSET_ALL
-+
-+/* ABI-specific register sets. lr is an implicit scratch register. */
-+#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
-+#ifdef __APPLE__
-+#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
-+#else
-+#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
-+#endif
-+#if LJ_SOFTFP
-+#define RSET_SCRATCH_FPR 0
-+#else
-+#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
-+#endif
-+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
-+#define REGARG_FIRSTGPR RID_R0
-+#define REGARG_LASTGPR RID_R3
-+#define REGARG_NUMGPR 4
-+#if LJ_ABI_SOFTFP
-+#define REGARG_FIRSTFPR 0
-+#define REGARG_LASTFPR 0
-+#define REGARG_NUMFPR 0
-+#else
-+#define REGARG_FIRSTFPR RID_D0
-+#define REGARG_LASTFPR RID_D7
-+#define REGARG_NUMFPR 8
-+#endif
-+
-+/* -- Spill slots --------------------------------------------------------- */
-+
-+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
-+**
-+** SPS_FIXED: Available fixed spill slots in interpreter frame.
-+** This definition must match with the *.dasc file(s).
-+**
-+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
-+*/
-+#define SPS_FIXED 2
-+#define SPS_FIRST 2
-+
-+#define SPOFS_TMP 0
-+
-+#define sps_scale(slot) (4 * (int32_t)(slot))
-+#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
-+
-+/* -- Exit state ---------------------------------------------------------- */
-+
-+/* This definition must match with the *.dasc file(s). */
-+typedef struct {
-+#if !LJ_SOFTFP
-+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
-+#endif
-+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
-+ int32_t spill[256]; /* Spill slots. */
-+} ExitState;
-+
-+/* PC after instruction that caused an exit. Used to find the trace number. */
-+#define EXITSTATE_PCREG RID_PC
-+/* Highest exit + 1 indicates stack check. */
-+#define EXITSTATE_CHECKEXIT 1
-+
-+#define EXITSTUB_SPACING 4
-+#define EXITSTUBS_PER_GROUP 32
-+
-+/* -- Instructions -------------------------------------------------------- */
-+
-+/* Instruction fields. */
-+#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28))
-+#define ARMF_N(r) ((r) << 16)
-+#define ARMF_D(r) ((r) << 12)
-+#define ARMF_S(r) ((r) << 8)
-+#define ARMF_M(r) (r)
-+#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
-+#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
-+
-+typedef enum S390Ins {
-+
-+ // Unsupported in S390
-+ #ARMI_LDRSB = 0xe01000d0,
-+ #ARMI_S = 0x000100000,
-+ #ARMI_LDRD = 0xe00000d0,
-+ #ARMI_ADC = 0xe0a00000,
-+ #ARMI_SBC = 0xe0c00000,
-+ #ARMI_STRB = 0xe4400000,
-+ #ARMI_STRH = 0xe00000b0,
-+ #ARMI_STRD = 0xe00000f0,
-+ #ARMI_BL = 0xeb000000,
-+ #ARMI_BLX = 0xfa000000,
-+ #ARMI_BLXr = 0xe12fff30,
-+ #ARMI_BIC = 0xe1c00000,
-+ #ARMI_ORR = 0xe1800000,
-+ #ARMI_LDRB = 0xe4500000,
-+ #ARMI_MVN = 0xe1e00000,
-+ #ARMI_LDRSH = 0xe01000f0,
-+ #ARMI_NOP = 0xe1a00000,
-+ #ARMI_PUSH = 0xe92d0000,
-+ #ARMI_RSB = 0xe0600000,
-+ #ARMI_RSC = 0xe0e00000,
-+ #ARMI_TEQ = 0xe1300000,
-+ #ARMI_CCAL = 0xe0000000,
-+ #ARMI_K12 = 0x02000000,
-+ #ARMI_KNEG = 0x00200000,
-+ #ARMI_LS_W = 0x00200000,
-+ #ARMI_LS_U = 0x00800000,
-+ #ARMI_LS_P = 0x01000000,
-+ #ARMI_LS_R = 0x02000000,
-+ #ARMI_LSX_I = 0x00400000,
-+
-+
-+ #ARMI_SUB = 0xe0400000,
-+ #ARMI_ADD = 0xe0800000,
-+ #ARMI_AND = 0xe0000000,
-+ #ARMI_EOR = 0xe0200000,
-+ #ARMI_MUL = 0xe0000090,
-+ #ARMI_LDR = 0xe4100000,
-+ #ARMI_CMP = 0xe1500000,
-+ #ARMI_LDRH = 0xe01000b0,
-+ #ARMI_B = 0xea000000,
-+ #ARMI_MOV = 0xe1a00000,
-+ #ARMI_STR = 0xe4000000,
-+ #ARMI_TST = 0xe1100000,
-+ #ARMI_SMULL = 0xe0c00090,
-+ #ARMI_CMN = 0xe1700000,
-+ S390I_SR = 0x1B000000,
-+ S390I_AR = 0x1A000000,
-+ S390I_NR = 0x14000000,
-+ S390I_XR = 0x17000000,
-+ S390I_MR = 0x1C000000,
-+ S390I_LR = 0x18000000,
-+ S390I_C = 0x59000000,
-+ S390I_LH = 0x48000000,
-+ S390I_BASR = 0x0D000000,
-+ S390I_MVCL = 0x0e000000,
-+ S390I_ST = 0x50000000,
-+ S390I_TM = 0x91000000,
-+ S390I_MP = 0xbd000090,
-+ S390I_CLR = 0x15000000,
-+
-+ /* ARMv6 */
-+ #ARMI_REV = 0xe6bf0f30,
-+ #ARMI_SXTB = 0xe6af0070,
-+ #ARMI_SXTH = 0xe6bf0070,
-+ #ARMI_UXTB = 0xe6ef0070,
-+ #ARMI_UXTH = 0xe6ff0070,
-+
-+ /* ARMv6T2 */
-+ #ARMI_MOVW = 0xe3000000,
-+ #ARMI_MOVT = 0xe3400000,
-+
-+ /* VFP */
-+ ARMI_VMOV_D = 0xeeb00b40,
-+ ARMI_VMOV_S = 0xeeb00a40,
-+ ARMI_VMOVI_D = 0xeeb00b00,
-+
-+ ARMI_VMOV_R_S = 0xee100a10,
-+ ARMI_VMOV_S_R = 0xee000a10,
-+ ARMI_VMOV_RR_D = 0xec500b10,
-+ ARMI_VMOV_D_RR = 0xec400b10,
-+
-+ ARMI_VADD_D = 0xee300b00,
-+ ARMI_VSUB_D = 0xee300b40,
-+ ARMI_VMUL_D = 0xee200b00,
-+ ARMI_VMLA_D = 0xee000b00,
-+ ARMI_VMLS_D = 0xee000b40,
-+ ARMI_VNMLS_D = 0xee100b00,
-+ ARMI_VDIV_D = 0xee800b00,
-+
-+ ARMI_VABS_D = 0xeeb00bc0,
-+ ARMI_VNEG_D = 0xeeb10b40,
-+ ARMI_VSQRT_D = 0xeeb10bc0,
-+
-+ ARMI_VCMP_D = 0xeeb40b40,
-+ ARMI_VCMPZ_D = 0xeeb50b40,
-+
-+ ARMI_VMRS = 0xeef1fa10,
-+
-+ ARMI_VCVT_S32_F32 = 0xeebd0ac0,
-+ ARMI_VCVT_S32_F64 = 0xeebd0bc0,
-+ ARMI_VCVT_U32_F32 = 0xeebc0ac0,
-+ ARMI_VCVT_U32_F64 = 0xeebc0bc0,
-+ ARMI_VCVTR_S32_F32 = 0xeebd0a40,
-+ ARMI_VCVTR_S32_F64 = 0xeebd0b40,
-+ ARMI_VCVTR_U32_F32 = 0xeebc0a40,
-+ ARMI_VCVTR_U32_F64 = 0xeebc0b40,
-+ ARMI_VCVT_F32_S32 = 0xeeb80ac0,
-+ ARMI_VCVT_F64_S32 = 0xeeb80bc0,
-+ ARMI_VCVT_F32_U32 = 0xeeb80a40,
-+ ARMI_VCVT_F64_U32 = 0xeeb80b40,
-+ ARMI_VCVT_F32_F64 = 0xeeb70bc0,
-+ ARMI_VCVT_F64_F32 = 0xeeb70ac0,
-+
-+ ARMI_VLDR_S = 0xed100a00,
-+ ARMI_VLDR_D = 0xed100b00,
-+ ARMI_VSTR_S = 0xed000a00,
-+ ARMI_VSTR_D = 0xed000b00,
-+} S390Ins;
-+
-+typedef enum S390Shift {
-+ S390SH_SLL, S390SH_SRL, S390SH_SRA
-+ # Adjustment needed for ROR
-+} S390Shift;
-+
-+/* ARM condition codes. */
-+typedef enum ARMCC {
-+ CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
-+ CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
-+ CC_HS = CC_CS, CC_LO = CC_CC
-+} ARMCC;
-+
-+#endif
---
-2.20.1
-
-
-From a8fb2fa7613b5a5f314ae5c1d44fb53b9a89073e Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Fri, 11 Nov 2016 12:13:30 +0530
-Subject: [PATCH 009/247] Update lj_target_s390x.h
-
-removed un replaced arm instructions
-changed S390 to S390x
----
- src/lj_target_s390x.h | 129 ++++--------------------------------------
- 1 file changed, 12 insertions(+), 117 deletions(-)
-
-diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
-index 7da2063..27bb349 100644
---- a/src/lj_target_s390x.h
-+++ b/src/lj_target_s390x.h
-@@ -10,12 +10,15 @@
-
- #define GPRDEF(_) \
- _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
-- _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \
-+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _R(15) \
- #if LJ_SOFTFP
- #define FPRDEF(_)
- #else
- #define FPRDEF(_) \
-- _(F0) _(F2) _(F4) _(F6)
-+ _(F0) _(F1) _(F2) _(F3) \
-+ _(F4) _(F5) _(F6) _(F7) \
-+ _(F8) _(F9) _(F10) _(F11) \
-+ _(F12) _(F13) _(F14) _(F15)
- #endif
- #define VRIDDEF(_)
-
-@@ -150,54 +153,7 @@ typedef struct {
- #define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
- #define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
-
--typedef enum S390Ins {
--
-- // Unsupported in S390
-- #ARMI_LDRSB = 0xe01000d0,
-- #ARMI_S = 0x000100000,
-- #ARMI_LDRD = 0xe00000d0,
-- #ARMI_ADC = 0xe0a00000,
-- #ARMI_SBC = 0xe0c00000,
-- #ARMI_STRB = 0xe4400000,
-- #ARMI_STRH = 0xe00000b0,
-- #ARMI_STRD = 0xe00000f0,
-- #ARMI_BL = 0xeb000000,
-- #ARMI_BLX = 0xfa000000,
-- #ARMI_BLXr = 0xe12fff30,
-- #ARMI_BIC = 0xe1c00000,
-- #ARMI_ORR = 0xe1800000,
-- #ARMI_LDRB = 0xe4500000,
-- #ARMI_MVN = 0xe1e00000,
-- #ARMI_LDRSH = 0xe01000f0,
-- #ARMI_NOP = 0xe1a00000,
-- #ARMI_PUSH = 0xe92d0000,
-- #ARMI_RSB = 0xe0600000,
-- #ARMI_RSC = 0xe0e00000,
-- #ARMI_TEQ = 0xe1300000,
-- #ARMI_CCAL = 0xe0000000,
-- #ARMI_K12 = 0x02000000,
-- #ARMI_KNEG = 0x00200000,
-- #ARMI_LS_W = 0x00200000,
-- #ARMI_LS_U = 0x00800000,
-- #ARMI_LS_P = 0x01000000,
-- #ARMI_LS_R = 0x02000000,
-- #ARMI_LSX_I = 0x00400000,
--
--
-- #ARMI_SUB = 0xe0400000,
-- #ARMI_ADD = 0xe0800000,
-- #ARMI_AND = 0xe0000000,
-- #ARMI_EOR = 0xe0200000,
-- #ARMI_MUL = 0xe0000090,
-- #ARMI_LDR = 0xe4100000,
-- #ARMI_CMP = 0xe1500000,
-- #ARMI_LDRH = 0xe01000b0,
-- #ARMI_B = 0xea000000,
-- #ARMI_MOV = 0xe1a00000,
-- #ARMI_STR = 0xe4000000,
-- #ARMI_TST = 0xe1100000,
-- #ARMI_SMULL = 0xe0c00090,
-- #ARMI_CMN = 0xe1700000,
-+typedef enum S390xIns {
- S390I_SR = 0x1B000000,
- S390I_AR = 0x1A000000,
- S390I_NR = 0x14000000,
-@@ -212,76 +168,15 @@ typedef enum S390Ins {
- S390I_TM = 0x91000000,
- S390I_MP = 0xbd000090,
- S390I_CLR = 0x15000000,
-+} S390xIns;
-
-- /* ARMv6 */
-- #ARMI_REV = 0xe6bf0f30,
-- #ARMI_SXTB = 0xe6af0070,
-- #ARMI_SXTH = 0xe6bf0070,
-- #ARMI_UXTB = 0xe6ef0070,
-- #ARMI_UXTH = 0xe6ff0070,
--
-- /* ARMv6T2 */
-- #ARMI_MOVW = 0xe3000000,
-- #ARMI_MOVT = 0xe3400000,
--
-- /* VFP */
-- ARMI_VMOV_D = 0xeeb00b40,
-- ARMI_VMOV_S = 0xeeb00a40,
-- ARMI_VMOVI_D = 0xeeb00b00,
--
-- ARMI_VMOV_R_S = 0xee100a10,
-- ARMI_VMOV_S_R = 0xee000a10,
-- ARMI_VMOV_RR_D = 0xec500b10,
-- ARMI_VMOV_D_RR = 0xec400b10,
--
-- ARMI_VADD_D = 0xee300b00,
-- ARMI_VSUB_D = 0xee300b40,
-- ARMI_VMUL_D = 0xee200b00,
-- ARMI_VMLA_D = 0xee000b00,
-- ARMI_VMLS_D = 0xee000b40,
-- ARMI_VNMLS_D = 0xee100b00,
-- ARMI_VDIV_D = 0xee800b00,
--
-- ARMI_VABS_D = 0xeeb00bc0,
-- ARMI_VNEG_D = 0xeeb10b40,
-- ARMI_VSQRT_D = 0xeeb10bc0,
--
-- ARMI_VCMP_D = 0xeeb40b40,
-- ARMI_VCMPZ_D = 0xeeb50b40,
--
-- ARMI_VMRS = 0xeef1fa10,
--
-- ARMI_VCVT_S32_F32 = 0xeebd0ac0,
-- ARMI_VCVT_S32_F64 = 0xeebd0bc0,
-- ARMI_VCVT_U32_F32 = 0xeebc0ac0,
-- ARMI_VCVT_U32_F64 = 0xeebc0bc0,
-- ARMI_VCVTR_S32_F32 = 0xeebd0a40,
-- ARMI_VCVTR_S32_F64 = 0xeebd0b40,
-- ARMI_VCVTR_U32_F32 = 0xeebc0a40,
-- ARMI_VCVTR_U32_F64 = 0xeebc0b40,
-- ARMI_VCVT_F32_S32 = 0xeeb80ac0,
-- ARMI_VCVT_F64_S32 = 0xeeb80bc0,
-- ARMI_VCVT_F32_U32 = 0xeeb80a40,
-- ARMI_VCVT_F64_U32 = 0xeeb80b40,
-- ARMI_VCVT_F32_F64 = 0xeeb70bc0,
-- ARMI_VCVT_F64_F32 = 0xeeb70ac0,
--
-- ARMI_VLDR_S = 0xed100a00,
-- ARMI_VLDR_D = 0xed100b00,
-- ARMI_VSTR_S = 0xed000a00,
-- ARMI_VSTR_D = 0xed000b00,
--} S390Ins;
--
--typedef enum S390Shift {
-+typedef enum S390xShift {
- S390SH_SLL, S390SH_SRL, S390SH_SRA
-- # Adjustment needed for ROR
--} S390Shift;
-+} S390xShift;
-
- /* ARM condition codes. */
--typedef enum ARMCC {
-- CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
-- CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
-- CC_HS = CC_CS, CC_LO = CC_CC
--} ARMCC;
-+typedef enum S390xCC {
-+
-+} S390xCC;
-
- #endif
---
-2.20.1
-
-
-From d60e4da56523dd9fb69317ef4bdecf85cc3f47a2 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Tue, 15 Nov 2016 10:42:11 +0530
-Subject: [PATCH 010/247] Create vm_s390x.dasc
-
-created vm_s390x.dasc file
-its a copy of vm_x86.dasc
-working on to change this specific to s390x
----
- src/vm_s390x.dasc | 5779 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 5779 insertions(+)
- create mode 100644 src/vm_s390x.dasc
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-new file mode 100644
-index 0000000..d7d618d
---- /dev/null
-+++ b/src/vm_s390x.dasc
-@@ -0,0 +1,5779 @@
-+|// Low-level VM code for x86 CPUs.
-+|// Bytecode interpreter, fast functions and helper functions.
-+|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
-+|
-+|.if P64
-+|.arch x64
-+|.else
-+|.arch x86
-+|.endif
-+|.section code_op, code_sub
-+|
-+|.actionlist build_actionlist
-+|.globals GLOB_
-+|.globalnames globnames
-+|.externnames extnames
-+|
-+|//-----------------------------------------------------------------------
-+|
-+|.if P64
-+|.define X64, 1
-+|.if WIN
-+|.define X64WIN, 1
-+|.endif
-+|.endif
-+|
-+|// Fixed register assignments for the interpreter.
-+|// This is very fragile and has many dependencies. Caveat emptor.
-+|.define BASE, edx // Not C callee-save, refetched anyway.
-+|.if not X64
-+|.define KBASE, edi // Must be C callee-save.
-+|.define KBASEa, KBASE
-+|.define PC, esi // Must be C callee-save.
-+|.define PCa, PC
-+|.define DISPATCH, ebx // Must be C callee-save.
-+|.elif X64WIN
-+|.define KBASE, edi // Must be C callee-save.
-+|.define KBASEa, rdi
-+|.define PC, esi // Must be C callee-save.
-+|.define PCa, rsi
-+|.define DISPATCH, ebx // Must be C callee-save.
-+|.else
-+|.define KBASE, r15d // Must be C callee-save.
-+|.define KBASEa, r15
-+|.define PC, ebx // Must be C callee-save.
-+|.define PCa, rbx
-+|.define DISPATCH, r14d // Must be C callee-save.
-+|.endif
-+|
-+|.define RA, ecx
-+|.define RAH, ch
-+|.define RAL, cl
-+|.define RB, ebp // Must be ebp (C callee-save).
-+|.define RC, eax // Must be eax.
-+|.define RCW, ax
-+|.define RCH, ah
-+|.define RCL, al
-+|.define OP, RB
-+|.define RD, RC
-+|.define RDW, RCW
-+|.define RDL, RCL
-+|.if X64
-+|.define RAa, rcx
-+|.define RBa, rbp
-+|.define RCa, rax
-+|.define RDa, rax
-+|.else
-+|.define RAa, RA
-+|.define RBa, RB
-+|.define RCa, RC
-+|.define RDa, RD
-+|.endif
-+|
-+|.if not X64
-+|.define FCARG1, ecx // x86 fastcall arguments.
-+|.define FCARG2, edx
-+|.elif X64WIN
-+|.define CARG1, rcx // x64/WIN64 C call arguments.
-+|.define CARG2, rdx
-+|.define CARG3, r8
-+|.define CARG4, r9
-+|.define CARG1d, ecx
-+|.define CARG2d, edx
-+|.define CARG3d, r8d
-+|.define CARG4d, r9d
-+|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
-+|.define FCARG2, CARG2d
-+|.else
-+|.define CARG1, rdi // x64/POSIX C call arguments.
-+|.define CARG2, rsi
-+|.define CARG3, rdx
-+|.define CARG4, rcx
-+|.define CARG5, r8
-+|.define CARG6, r9
-+|.define CARG1d, edi
-+|.define CARG2d, esi
-+|.define CARG3d, edx
-+|.define CARG4d, ecx
-+|.define CARG5d, r8d
-+|.define CARG6d, r9d
-+|.define FCARG1, CARG1d // Simulate x86 fastcall.
-+|.define FCARG2, CARG2d
-+|.endif
-+|
-+|// Type definitions. Some of these are only used for documentation.
-+|.type L, lua_State
-+|.type GL, global_State
-+|.type TVALUE, TValue
-+|.type GCOBJ, GCobj
-+|.type STR, GCstr
-+|.type TAB, GCtab
-+|.type LFUNC, GCfuncL
-+|.type CFUNC, GCfuncC
-+|.type PROTO, GCproto
-+|.type UPVAL, GCupval
-+|.type NODE, Node
-+|.type NARGS, int
-+|.type TRACE, GCtrace
-+|.type SBUF, SBuf
-+|
-+|// Stack layout while in interpreter. Must match with lj_frame.h.
-+|//-----------------------------------------------------------------------
-+|.if not X64 // x86 stack layout.
-+|
-+|.if WIN
-+|
-+|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
-+|.macro saveregs_
-+| push edi; push esi; push ebx
-+| push extern lj_err_unwind_win
-+| fs; push dword [0]
-+| fs; mov [0], esp
-+| sub esp, CFRAME_SPACE
-+|.endmacro
-+|.macro restoreregs
-+| add esp, CFRAME_SPACE
-+| fs; pop dword [0]
-+| pop edi // Short for esp += 4.
-+| pop ebx; pop esi; pop edi; pop ebp
-+|.endmacro
-+|
-+|.else
-+|
-+|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
-+|.macro saveregs_
-+| push edi; push esi; push ebx
-+| sub esp, CFRAME_SPACE
-+|.endmacro
-+|.macro restoreregs
-+| add esp, CFRAME_SPACE
-+| pop ebx; pop esi; pop edi; pop ebp
-+|.endmacro
-+|
-+|.endif
-+|
-+|.macro saveregs
-+| push ebp; saveregs_
-+|.endmacro
-+|
-+|.if WIN
-+|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
-+|.define SAVE_NRES, aword [esp+aword*18]
-+|.define SAVE_CFRAME, aword [esp+aword*17]
-+|.define SAVE_L, aword [esp+aword*16]
-+|//----- 16 byte aligned, ^^^ arguments from C caller
-+|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
-+|.define SAVE_R4, aword [esp+aword*14]
-+|.define SAVE_R3, aword [esp+aword*13]
-+|.define SAVE_R2, aword [esp+aword*12]
-+|//----- 16 byte aligned
-+|.define SAVE_R1, aword [esp+aword*11]
-+|.define SEH_FUNC, aword [esp+aword*10]
-+|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
-+|.define UNUSED2, aword [esp+aword*8]
-+|//----- 16 byte aligned
-+|.define UNUSED1, aword [esp+aword*7]
-+|.define SAVE_PC, aword [esp+aword*6]
-+|.define TMP2, aword [esp+aword*5]
-+|.define TMP1, aword [esp+aword*4]
-+|//----- 16 byte aligned
-+|.define ARG4, aword [esp+aword*3]
-+|.define ARG3, aword [esp+aword*2]
-+|.define ARG2, aword [esp+aword*1]
-+|.define ARG1, aword [esp] //<-- esp while in interpreter.
-+|//----- 16 byte aligned, ^^^ arguments for C callee
-+|.else
-+|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
-+|.define SAVE_NRES, aword [esp+aword*14]
-+|.define SAVE_CFRAME, aword [esp+aword*13]
-+|.define SAVE_L, aword [esp+aword*12]
-+|//----- 16 byte aligned, ^^^ arguments from C caller
-+|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
-+|.define SAVE_R4, aword [esp+aword*10]
-+|.define SAVE_R3, aword [esp+aword*9]
-+|.define SAVE_R2, aword [esp+aword*8]
-+|//----- 16 byte aligned
-+|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
-+|.define SAVE_PC, aword [esp+aword*6]
-+|.define TMP2, aword [esp+aword*5]
-+|.define TMP1, aword [esp+aword*4]
-+|//----- 16 byte aligned
-+|.define ARG4, aword [esp+aword*3]
-+|.define ARG3, aword [esp+aword*2]
-+|.define ARG2, aword [esp+aword*1]
-+|.define ARG1, aword [esp] //<-- esp while in interpreter.
-+|//----- 16 byte aligned, ^^^ arguments for C callee
-+|.endif
-+|
-+|// FPARGx overlaps ARGx and ARG(x+1) on x86.
-+|.define FPARG3, qword [esp+qword*1]
-+|.define FPARG1, qword [esp]
-+|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
-+|.define TMPQ, qword [esp+aword*4]
-+|.define TMP3, ARG4
-+|.define ARG5, TMP1
-+|.define TMPa, TMP1
-+|.define MULTRES, TMP2
-+|
-+|// Arguments for vm_call and vm_pcall.
-+|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
-+|
-+|// Arguments for vm_cpcall.
-+|.define INARG_CP_CALL, SAVE_ERRF
-+|.define INARG_CP_UD, SAVE_NRES
-+|.define INARG_CP_FUNC, SAVE_CFRAME
-+|
-+|//-----------------------------------------------------------------------
-+|.elif X64WIN // x64/Windows stack layout
-+|
-+|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
-+|.macro saveregs_
-+| push rdi; push rsi; push rbx
-+| sub rsp, CFRAME_SPACE
-+|.endmacro
-+|.macro saveregs
-+| push rbp; saveregs_
-+|.endmacro
-+|.macro restoreregs
-+| add rsp, CFRAME_SPACE
-+| pop rbx; pop rsi; pop rdi; pop rbp
-+|.endmacro
-+|
-+|.define SAVE_CFRAME, aword [rsp+aword*13]
-+|.define SAVE_PC, dword [rsp+dword*25]
-+|.define SAVE_L, dword [rsp+dword*24]
-+|.define SAVE_ERRF, dword [rsp+dword*23]
-+|.define SAVE_NRES, dword [rsp+dword*22]
-+|.define TMP2, dword [rsp+dword*21]
-+|.define TMP1, dword [rsp+dword*20]
-+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
-+|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
-+|.define SAVE_R4, aword [rsp+aword*8]
-+|.define SAVE_R3, aword [rsp+aword*7]
-+|.define SAVE_R2, aword [rsp+aword*6]
-+|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
-+|.define ARG5, aword [rsp+aword*4]
-+|.define CSAVE_4, aword [rsp+aword*3]
-+|.define CSAVE_3, aword [rsp+aword*2]
-+|.define CSAVE_2, aword [rsp+aword*1]
-+|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
-+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
-+|
-+|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
-+|.define TMPQ, qword [rsp+aword*10]
-+|.define MULTRES, TMP2
-+|.define TMPa, ARG5
-+|.define ARG5d, dword [rsp+aword*4]
-+|.define TMP3, ARG5d
-+|
-+|//-----------------------------------------------------------------------
-+|.else // x64/POSIX stack layout
-+|
-+|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
-+|.macro saveregs_
-+| push rbx; push r15; push r14
-+|.if NO_UNWIND
-+| push r13; push r12
-+|.endif
-+| sub rsp, CFRAME_SPACE
-+|.endmacro
-+|.macro saveregs
-+| push rbp; saveregs_
-+|.endmacro
-+|.macro restoreregs
-+| add rsp, CFRAME_SPACE
-+|.if NO_UNWIND
-+| pop r12; pop r13
-+|.endif
-+| pop r14; pop r15; pop rbx; pop rbp
-+|.endmacro
-+|
-+|//----- 16 byte aligned,
-+|.if NO_UNWIND
-+|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
-+|.define SAVE_R4, aword [rsp+aword*10]
-+|.define SAVE_R3, aword [rsp+aword*9]
-+|.define SAVE_R2, aword [rsp+aword*8]
-+|.define SAVE_R1, aword [rsp+aword*7]
-+|.define SAVE_RU2, aword [rsp+aword*6]
-+|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
-+|.else
-+|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
-+|.define SAVE_R4, aword [rsp+aword*8]
-+|.define SAVE_R3, aword [rsp+aword*7]
-+|.define SAVE_R2, aword [rsp+aword*6]
-+|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
-+|.endif
-+|.define SAVE_CFRAME, aword [rsp+aword*4]
-+|.define SAVE_PC, dword [rsp+dword*7]
-+|.define SAVE_L, dword [rsp+dword*6]
-+|.define SAVE_ERRF, dword [rsp+dword*5]
-+|.define SAVE_NRES, dword [rsp+dword*4]
-+|.define TMPa, aword [rsp+aword*1]
-+|.define TMP2, dword [rsp+dword*1]
-+|.define TMP1, dword [rsp] //<-- rsp while in interpreter.
-+|//----- 16 byte aligned
-+|
-+|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
-+|.define TMPQ, qword [rsp]
-+|.define TMP3, dword [rsp+aword*1]
-+|.define MULTRES, TMP2
-+|
-+|.endif
-+|
-+|//-----------------------------------------------------------------------
-+|
-+|// Instruction headers.
-+|.macro ins_A; .endmacro
-+|.macro ins_AD; .endmacro
-+|.macro ins_AJ; .endmacro
-+|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
-+|.macro ins_AB_; movzx RB, RCH; .endmacro
-+|.macro ins_A_C; movzx RC, RCL; .endmacro
-+|.macro ins_AND; not RDa; .endmacro
-+|
-+|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
-+|.macro ins_NEXT
-+| mov RC, [PC]
-+| movzx RA, RCH
-+| movzx OP, RCL
-+| add PC, 4
-+| shr RC, 16
-+|.if X64
-+| jmp aword [DISPATCH+OP*8]
-+|.else
-+| jmp aword [DISPATCH+OP*4]
-+|.endif
-+|.endmacro
-+|
-+|// Instruction footer.
-+|.if 1
-+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
-+| .define ins_next, ins_NEXT
-+| .define ins_next_, ins_NEXT
-+|.else
-+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
-+| // Affects only certain kinds of benchmarks (and only with -j off).
-+| // Around 10%-30% slower on Core2, a lot more slower on P4.
-+| .macro ins_next
-+| jmp ->ins_next
-+| .endmacro
-+| .macro ins_next_
-+| ->ins_next:
-+| ins_NEXT
-+| .endmacro
-+|.endif
-+|
-+|// Call decode and dispatch.
-+|.macro ins_callt
-+| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
-+| mov PC, LFUNC:RB->pc
-+| mov RA, [PC]
-+| movzx OP, RAL
-+| movzx RA, RAH
-+| add PC, 4
-+|.if X64
-+| jmp aword [DISPATCH+OP*8]
-+|.else
-+| jmp aword [DISPATCH+OP*4]
-+|.endif
-+|.endmacro
-+|
-+|.macro ins_call
-+| // BASE = new base, RB = LFUNC, RD = nargs+1
-+| mov [BASE-4], PC
-+| ins_callt
-+|.endmacro
-+|
-+|//-----------------------------------------------------------------------
-+|
-+|// Macros to test operand types.
-+|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
-+|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
-+|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
-+|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
-+|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
-+|
-+|// These operands must be used with movzx.
-+|.define PC_OP, byte [PC-4]
-+|.define PC_RA, byte [PC-3]
-+|.define PC_RB, byte [PC-1]
-+|.define PC_RC, byte [PC-2]
-+|.define PC_RD, word [PC-2]
-+|
-+|.macro branchPC, reg
-+| lea PC, [PC+reg*4-BCBIAS_J*4]
-+|.endmacro
-+|
-+|// Assumes DISPATCH is relative to GL.
-+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
-+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
-+|
-+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
-+|
-+|// Decrement hashed hotcount and trigger trace recorder if zero.
-+|.macro hotloop, reg
-+| mov reg, PC
-+| shr reg, 1
-+| and reg, HOTCOUNT_PCMASK
-+| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
-+| jb ->vm_hotloop
-+|.endmacro
-+|
-+|.macro hotcall, reg
-+| mov reg, PC
-+| shr reg, 1
-+| and reg, HOTCOUNT_PCMASK
-+| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
-+| jb ->vm_hotcall
-+|.endmacro
-+|
-+|// Set current VM state.
-+|.macro set_vmstate, st
-+| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
-+|.endmacro
-+|
-+|// x87 compares.
-+|.macro fcomparepp // Compare and pop st0 >< st1.
-+| fucomip st1
-+| fpop
-+|.endmacro
-+|
-+|.macro fpop1; fstp st1; .endmacro
-+|
-+|// Synthesize SSE FP constants.
-+|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
-+|.if X64
-+| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
-+|.else
-+| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
-+|.endif
-+|.endmacro
-+|
-+|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
-+|.if X64
-+| mov64 tmp, U64x(val,00000000); movd reg, tmp
-+|.else
-+| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
-+|.endif
-+|.endmacro
-+|
-+|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
-+| sseconst_hi reg, tmp, 80000000
-+|.endmacro
-+|.macro sseconst_1, reg, tmp // Synthesize 1.0.
-+| sseconst_hi reg, tmp, 3ff00000
-+|.endmacro
-+|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
-+| sseconst_hi reg, tmp, bff00000
-+|.endmacro
-+|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
-+| sseconst_hi reg, tmp, 43300000
-+|.endmacro
-+|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
-+| sseconst_hi reg, tmp, 43380000
-+|.endmacro
-+|
-+|// Move table write barrier back. Overwrites reg.
-+|.macro barrierback, tab, reg
-+| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
-+| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
-+| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
-+| mov tab->gclist, reg
-+|.endmacro
-+|
-+|//-----------------------------------------------------------------------
-+
-+/* Generate subroutines used by opcodes and other parts of the VM. */
-+/* The .code_sub section should be last to help static branch prediction. */
-+static void build_subroutines(BuildCtx *ctx)
-+{
-+ |.code_sub
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Return handling ----------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vm_returnp:
-+ | test PC, FRAME_P
-+ | jz ->cont_dispatch
-+ |
-+ | // Return from pcall or xpcall fast func.
-+ | and PC, -8
-+ | sub BASE, PC // Restore caller base.
-+ | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
-+ | mov PC, [BASE-4] // Fetch PC of previous frame.
-+ | // Prepending may overwrite the pcall frame, so do it at the end.
-+ | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
-+ |
-+ |->vm_returnc:
-+ | add RD, 1 // RD = nresults+1
-+ | jz ->vm_unwind_yield
-+ | mov MULTRES, RD
-+ | test PC, FRAME_TYPE
-+ | jz ->BC_RET_Z // Handle regular return to Lua.
-+ |
-+ |->vm_return:
-+ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
-+ | xor PC, FRAME_C
-+ | test PC, FRAME_TYPE
-+ | jnz ->vm_returnp
-+ |
-+ | // Return to C.
-+ | set_vmstate C
-+ | and PC, -8
-+ | sub PC, BASE
-+ | neg PC // Previous base = BASE - delta.
-+ |
-+ | sub RD, 1
-+ | jz >2
-+ |1: // Move results down.
-+ |.if X64
-+ | mov RBa, [BASE+RA]
-+ | mov [BASE-8], RBa
-+ |.else
-+ | mov RB, [BASE+RA]
-+ | mov [BASE-8], RB
-+ | mov RB, [BASE+RA+4]
-+ | mov [BASE-4], RB
-+ |.endif
-+ | add BASE, 8
-+ | sub RD, 1
-+ | jnz <1
-+ |2:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, PC
-+ |3:
-+ | mov RD, MULTRES
-+ | mov RA, SAVE_NRES // RA = wanted nresults+1
-+ |4:
-+ | cmp RA, RD
-+ | jne >6 // More/less results wanted?
-+ |5:
-+ | sub BASE, 8
-+ | mov L:RB->top, BASE
-+ |
-+ |->vm_leave_cp:
-+ | mov RAa, SAVE_CFRAME // Restore previous C frame.
-+ | mov L:RB->cframe, RAa
-+ | xor eax, eax // Ok return status for vm_pcall.
-+ |
-+ |->vm_leave_unw:
-+ | restoreregs
-+ | ret
-+ |
-+ |6:
-+ | jb >7 // Less results wanted?
-+ | // More results wanted. Check stack size and fill up results with nil.
-+ | cmp BASE, L:RB->maxstack
-+ | ja >8
-+ | mov dword [BASE-4], LJ_TNIL
-+ | add BASE, 8
-+ | add RD, 1
-+ | jmp <4
-+ |
-+ |7: // Less results wanted.
-+ | test RA, RA
-+ | jz <5 // But check for LUA_MULTRET+1.
-+ | sub RA, RD // Negative result!
-+ | lea BASE, [BASE+RA*8] // Correct top.
-+ | jmp <5
-+ |
-+ |8: // Corner case: need to grow stack for filling up results.
-+ | // This can happen if:
-+ | // - A C function grows the stack (a lot).
-+ | // - The GC shrinks the stack in between.
-+ | // - A return back from a lua_call() with (high) nresults adjustment.
-+ | mov L:RB->top, BASE // Save current top held in BASE (yes).
-+ | mov MULTRES, RD // Need to fill only remainder with nil.
-+ | mov FCARG2, RA
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
-+ | jmp <3
-+ |
-+ |->vm_unwind_yield:
-+ | mov al, LUA_YIELD
-+ | jmp ->vm_unwind_c_eh
-+ |
-+ |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall.
-+ | // (void *cframe, int errcode)
-+ |.if X64
-+ | mov eax, CARG2d // Error return status for vm_pcall.
-+ | mov rsp, CARG1
-+ |.else
-+ | mov eax, FCARG2 // Error return status for vm_pcall.
-+ | mov esp, FCARG1
-+ |.if WIN
-+ | lea FCARG1, SEH_NEXT
-+ | fs; mov [0], FCARG1
-+ |.endif
-+ |.endif
-+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
-+ | mov L:RB, SAVE_L
-+ | mov GL:RB, L:RB->glref
-+ | mov dword GL:RB->vmstate, ~LJ_VMST_C
-+ | jmp ->vm_leave_unw
-+ |
-+ |->vm_unwind_rethrow:
-+ |.if X64 and not X64WIN
-+ | mov FCARG1, SAVE_L
-+ | mov FCARG2, eax
-+ | restoreregs
-+ | jmp extern lj_err_throw@8 // (lua_State *L, int errcode)
-+ |.endif
-+ |
-+ |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall.
-+ | // (void *cframe)
-+ |.if X64
-+ | and CARG1, CFRAME_RAWMASK
-+ | mov rsp, CARG1
-+ |.else
-+ | and FCARG1, CFRAME_RAWMASK
-+ | mov esp, FCARG1
-+ |.if WIN
-+ | lea FCARG1, SEH_NEXT
-+ | fs; mov [0], FCARG1
-+ |.endif
-+ |.endif
-+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
-+ | mov L:RB, SAVE_L
-+ | mov RAa, -8 // Results start at BASE+RA = BASE-8.
-+ | mov RD, 1+1 // Really 1+2 results, incr. later.
-+ | mov BASE, L:RB->base
-+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | add DISPATCH, GG_G2DISP
-+ | mov PC, [BASE-4] // Fetch PC of previous frame.
-+ | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
-+ | set_vmstate INTERP
-+ | jmp ->vm_returnc // Increments RD/MULTRES and returns.
-+ |
-+ |.if WIN and not X64
-+ |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
-+ | // (void *cframe, void *excptrec, void *unwinder, int errcode)
-+ | mov [esp], FCARG1 // Return value for RtlUnwind.
-+ | push FCARG2 // Exception record for RtlUnwind.
-+ | push 0 // Ignored by RtlUnwind.
-+ | push dword [FCARG1+CFRAME_OFS_SEH]
-+ | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
-+ | mov FCARG1, eax
-+ | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
-+ | ret // Jump to unwinder.
-+ |.endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Grow stack for calls -----------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vm_growstack_c: // Grow stack for C function.
-+ | mov FCARG2, LUA_MINSTACK
-+ | jmp >2
-+ |
-+ |->vm_growstack_v: // Grow stack for vararg Lua function.
-+ | sub RD, 8
-+ | jmp >1
-+ |
-+ |->vm_growstack_f: // Grow stack for fixarg Lua function.
-+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ |1:
-+ | movzx RA, byte [PC-4+PC2PROTO(framesize)]
-+ | add PC, 4 // Must point after first instruction.
-+ | mov L:RB->base, BASE
-+ | mov L:RB->top, RD
-+ | mov SAVE_PC, PC
-+ | mov FCARG2, RA
-+ |2:
-+ | // RB = L, L->base = new base, L->top = top
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ | mov BASE, L:RB->base
-+ | mov RD, L:RB->top
-+ | mov LFUNC:RB, [BASE-8]
-+ | sub RD, BASE
-+ | shr RD, 3
-+ | add NARGS:RD, 1
-+ | // BASE = new base, RB = LFUNC, RD = nargs+1
-+ | ins_callt // Just retry the call.
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Entry points into the assembler VM ---------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vm_resume: // Setup C frame and resume thread.
-+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
-+ | saveregs
-+ |.if X64
-+ | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
-+ | mov SAVE_L, CARG1d
-+ | mov RA, CARG2d
-+ |.else
-+ | mov L:RB, SAVE_L
-+ | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
-+ |.endif
-+ | mov PC, FRAME_CP
-+ | xor RD, RD
-+ | lea KBASEa, [esp+CFRAME_RESUME]
-+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | add DISPATCH, GG_G2DISP
-+ | mov SAVE_PC, RD // Any value outside of bytecode is ok.
-+ | mov SAVE_CFRAME, RDa
-+ |.if X64
-+ | mov SAVE_NRES, RD
-+ | mov SAVE_ERRF, RD
-+ |.endif
-+ | mov L:RB->cframe, KBASEa
-+ | cmp byte L:RB->status, RDL
-+ | je >2 // Initial resume (like a call).
-+ |
-+ | // Resume after yield (like a return).
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ | set_vmstate INTERP
-+ | mov byte L:RB->status, RDL
-+ | mov BASE, L:RB->base
-+ | mov RD, L:RB->top
-+ | sub RD, RA
-+ | shr RD, 3
-+ | add RD, 1 // RD = nresults+1
-+ | sub RA, BASE // RA = resultofs
-+ | mov PC, [BASE-4]
-+ | mov MULTRES, RD
-+ | test PC, FRAME_TYPE
-+ | jz ->BC_RET_Z
-+ | jmp ->vm_return
-+ |
-+ |->vm_pcall: // Setup protected C frame and enter VM.
-+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
-+ | saveregs
-+ | mov PC, FRAME_CP
-+ |.if X64
-+ | mov SAVE_ERRF, CARG4d
-+ |.endif
-+ | jmp >1
-+ |
-+ |->vm_call: // Setup C frame and enter VM.
-+ | // (lua_State *L, TValue *base, int nres1)
-+ | saveregs
-+ | mov PC, FRAME_C
-+ |
-+ |1: // Entry point for vm_pcall above (PC = ftype).
-+ |.if X64
-+ | mov SAVE_NRES, CARG3d
-+ | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
-+ | mov SAVE_L, CARG1d
-+ | mov RA, CARG2d
-+ |.else
-+ | mov L:RB, SAVE_L
-+ | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
-+ |.endif
-+ |
-+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
-+ | mov SAVE_CFRAME, KBASEa
-+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
-+ | add DISPATCH, GG_G2DISP
-+ |.if X64
-+ | mov L:RB->cframe, rsp
-+ |.else
-+ | mov L:RB->cframe, esp
-+ |.endif
-+ |
-+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ | set_vmstate INTERP
-+ | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
-+ | add PC, RA
-+ | sub PC, BASE // PC = frame delta + frame type
-+ |
-+ | mov RD, L:RB->top
-+ | sub RD, RA
-+ | shr NARGS:RD, 3
-+ | add NARGS:RD, 1 // RD = nargs+1
-+ |
-+ |->vm_call_dispatch:
-+ | mov LFUNC:RB, [RA-8]
-+ | cmp dword [RA-4], LJ_TFUNC
-+ | jne ->vmeta_call // Ensure KBASE defined and != BASE.
-+ |
-+ |->vm_call_dispatch_f:
-+ | mov BASE, RA
-+ | ins_call
-+ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
-+ |
-+ |->vm_cpcall: // Setup protected C frame, call C.
-+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
-+ | saveregs
-+ |.if X64
-+ | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
-+ | mov SAVE_L, CARG1d
-+ |.else
-+ | mov L:RB, SAVE_L
-+ | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
-+ | mov RC, INARG_CP_UD // Get args before they are overwritten.
-+ | mov RA, INARG_CP_FUNC
-+ | mov BASE, INARG_CP_CALL
-+ |.endif
-+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
-+ |
-+ | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
-+ | sub KBASE, L:RB->top
-+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | mov SAVE_ERRF, 0 // No error function.
-+ | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
-+ | add DISPATCH, GG_G2DISP
-+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
-+ |
-+ |.if X64
-+ | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
-+ | mov SAVE_CFRAME, KBASEa
-+ | mov L:RB->cframe, rsp
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ |
-+ | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
-+ |.else
-+ | mov ARG3, RC // Have to copy args downwards.
-+ | mov ARG2, RA
-+ | mov ARG1, L:RB
-+ |
-+ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
-+ | mov SAVE_CFRAME, KBASE
-+ | mov L:RB->cframe, esp
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ |
-+ | call BASE // (lua_State *L, lua_CFunction func, void *ud)
-+ |.endif
-+ | // TValue * (new base) or NULL returned in eax (RC).
-+ | test RC, RC
-+ | jz ->vm_leave_cp // No base? Just remove C frame.
-+ | mov RA, RC
-+ | mov PC, FRAME_CP
-+ | jmp <2 // Else continue with the call.
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Metamethod handling ------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |//-- Continuation dispatch ----------------------------------------------
-+ |
-+ |->cont_dispatch:
-+ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
-+ | add RA, BASE
-+ | and PC, -8
-+ | mov RB, BASE
-+ | sub BASE, PC // Restore caller BASE.
-+ | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
-+ | mov RC, RA // ... in [RC]
-+ | mov PC, [RB-12] // Restore PC from [cont|PC].
-+ |.if X64
-+ | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
-+ |.if FFI
-+ | cmp RA, 1
-+ | jbe >1
-+ |.endif
-+ | lea KBASEa, qword [=>0]
-+ | add RAa, KBASEa
-+ |.else
-+ | mov RA, dword [RB-16]
-+ |.if FFI
-+ | cmp RA, 1
-+ | jbe >1
-+ |.endif
-+ |.endif
-+ | mov LFUNC:KBASE, [BASE-8]
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | // BASE = base, RC = result, RB = meta base
-+ | jmp RAa // Jump to continuation.
-+ |
-+ |.if FFI
-+ |1:
-+ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
-+ | // cont = 0: Tail call from C function.
-+ | sub RB, BASE
-+ | shr RB, 3
-+ | lea RD, [RB-1]
-+ | jmp ->vm_call_tail
-+ |.endif
-+ |
-+ |->cont_cat: // BASE = base, RC = result, RB = mbase
-+ | movzx RA, PC_RB
-+ | sub RB, 16
-+ | lea RA, [BASE+RA*8]
-+ | sub RA, RB
-+ | je ->cont_ra
-+ | neg RA
-+ | shr RA, 3
-+ |.if X64WIN
-+ | mov CARG3d, RA
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE
-+ | mov RCa, [RC]
-+ | mov [RB], RCa
-+ | mov CARG2d, RB
-+ |.elif X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE
-+ | mov CARG3d, RA
-+ | mov RAa, [RC]
-+ | mov [RB], RAa
-+ | mov CARG2d, RB
-+ |.else
-+ | mov ARG3, RA
-+ | mov RA, [RC+4]
-+ | mov RC, [RC]
-+ | mov [RB+4], RA
-+ | mov [RB], RC
-+ | mov ARG2, RB
-+ |.endif
-+ | jmp ->BC_CAT_Z
-+ |
-+ |//-- Table indexing metamethods -----------------------------------------
-+ |
-+ |->vmeta_tgets:
-+ | mov TMP1, RC // RC = GCstr *
-+ | mov TMP2, LJ_TSTR
-+ | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
-+ | cmp PC_OP, BC_GGET
-+ | jne >1
-+ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
-+ | mov [RA], TAB:RB // RB = GCtab *
-+ | mov dword [RA+4], LJ_TTAB
-+ | mov RB, RA
-+ | jmp >2
-+ |
-+ |->vmeta_tgetb:
-+ | movzx RC, PC_RC
-+ |.if DUALNUM
-+ | mov TMP2, LJ_TISNUM
-+ | mov TMP1, RC
-+ |.else
-+ | cvtsi2sd xmm0, RC
-+ | movsd TMPQ, xmm0
-+ |.endif
-+ | lea RCa, TMPQ // Store temp. TValue in TMPQ.
-+ | jmp >1
-+ |
-+ |->vmeta_tgetv:
-+ | movzx RC, PC_RC // Reload TValue *k from RC.
-+ | lea RC, [BASE+RC*8]
-+ |1:
-+ | movzx RB, PC_RB // Reload TValue *t from RB.
-+ | lea RB, [BASE+RB*8]
-+ |2:
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, RB
-+ | mov CARG3, RCa // May be 64 bit ptr to stack.
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | mov ARG2, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
-+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | test RC, RC
-+ | jz >3
-+ |->cont_ra: // BASE = base, RC = result
-+ | movzx RA, PC_RA
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [RC+4]
-+ | mov RC, [RC]
-+ | mov [BASE+RA*8+4], RB
-+ | mov [BASE+RA*8], RC
-+ |.endif
-+ | ins_next
-+ |
-+ |3: // Call __index metamethod.
-+ | // BASE = base, L->top = new base, stack = cont/func/t/k
-+ | mov RA, L:RB->top
-+ | mov [RA-12], PC // [cont|PC]
-+ | lea PC, [RA+FRAME_CONT]
-+ | sub PC, BASE
-+ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
-+ | mov NARGS:RD, 2+1 // 2 args for func(t, k).
-+ | jmp ->vm_call_dispatch_f
-+ |
-+ |->vmeta_tgetr:
-+ | mov FCARG1, TAB:RB
-+ | mov RB, BASE // Save BASE.
-+ | mov FCARG2, RC // Caveat: FCARG2 == BASE
-+ | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
-+ | // cTValue * or NULL returned in eax (RC).
-+ | movzx RA, PC_RA
-+ | mov BASE, RB // Restore BASE.
-+ | test RC, RC
-+ | jnz ->BC_TGETR_Z
-+ | mov dword [BASE+RA*8+4], LJ_TNIL
-+ | jmp ->BC_TGETR2_Z
-+ |
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vmeta_tsets:
-+ | mov TMP1, RC // RC = GCstr *
-+ | mov TMP2, LJ_TSTR
-+ | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
-+ | cmp PC_OP, BC_GSET
-+ | jne >1
-+ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
-+ | mov [RA], TAB:RB // RB = GCtab *
-+ | mov dword [RA+4], LJ_TTAB
-+ | mov RB, RA
-+ | jmp >2
-+ |
-+ |->vmeta_tsetb:
-+ | movzx RC, PC_RC
-+ |.if DUALNUM
-+ | mov TMP2, LJ_TISNUM
-+ | mov TMP1, RC
-+ |.else
-+ | cvtsi2sd xmm0, RC
-+ | movsd TMPQ, xmm0
-+ |.endif
-+ | lea RCa, TMPQ // Store temp. TValue in TMPQ.
-+ | jmp >1
-+ |
-+ |->vmeta_tsetv:
-+ | movzx RC, PC_RC // Reload TValue *k from RC.
-+ | lea RC, [BASE+RC*8]
-+ |1:
-+ | movzx RB, PC_RB // Reload TValue *t from RB.
-+ | lea RB, [BASE+RB*8]
-+ |2:
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, RB
-+ | mov CARG3, RCa // May be 64 bit ptr to stack.
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | mov ARG2, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
-+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | test RC, RC
-+ | jz >3
-+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
-+ | movzx RA, PC_RA
-+ |.if X64
-+ | mov RBa, [BASE+RA*8]
-+ | mov [RC], RBa
-+ |.else
-+ | mov RB, [BASE+RA*8+4]
-+ | mov RA, [BASE+RA*8]
-+ | mov [RC+4], RB
-+ | mov [RC], RA
-+ |.endif
-+ |->cont_nop: // BASE = base, (RC = result)
-+ | ins_next
-+ |
-+ |3: // Call __newindex metamethod.
-+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
-+ | mov RA, L:RB->top
-+ | mov [RA-12], PC // [cont|PC]
-+ | movzx RC, PC_RA
-+ | // Copy value to third argument.
-+ |.if X64
-+ | mov RBa, [BASE+RC*8]
-+ | mov [RA+16], RBa
-+ |.else
-+ | mov RB, [BASE+RC*8+4]
-+ | mov RC, [BASE+RC*8]
-+ | mov [RA+20], RB
-+ | mov [RA+16], RC
-+ |.endif
-+ | lea PC, [RA+FRAME_CONT]
-+ | sub PC, BASE
-+ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
-+ | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
-+ | jmp ->vm_call_dispatch_f
-+ |
-+ |->vmeta_tsetr:
-+ |.if X64WIN
-+ | mov L:CARG1d, SAVE_L
-+ | mov CARG3d, RC
-+ | mov L:CARG1d->base, BASE
-+ | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
-+ |.elif X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov CARG2d, TAB:RB
-+ | mov L:CARG1d->base, BASE
-+ | mov RB, BASE // Save BASE.
-+ | mov CARG3d, RC // Caveat: CARG3d == BASE.
-+ |.else
-+ | mov L:RA, SAVE_L
-+ | mov ARG2, TAB:RB
-+ | mov RB, BASE // Save BASE.
-+ | mov ARG3, RC
-+ | mov ARG1, L:RA
-+ | mov L:RA->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
-+ | // TValue * returned in eax (RC).
-+ | movzx RA, PC_RA
-+ | mov BASE, RB // Restore BASE.
-+ | jmp ->BC_TSETR_Z
-+ |
-+ |//-- Comparison metamethods ---------------------------------------------
-+ |
-+ |->vmeta_comp:
-+ |.if X64
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE.
-+ |.if X64WIN
-+ | lea CARG3d, [BASE+RD*8]
-+ | lea CARG2d, [BASE+RA*8]
-+ |.else
-+ | lea CARG2d, [BASE+RA*8]
-+ | lea CARG3d, [BASE+RD*8]
-+ |.endif
-+ | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA.
-+ | movzx CARG4d, PC_OP
-+ |.else
-+ | movzx RB, PC_OP
-+ | lea RD, [BASE+RD*8]
-+ | lea RA, [BASE+RA*8]
-+ | mov ARG4, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RD
-+ | mov ARG2, RA
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
-+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
-+ |3:
-+ | mov BASE, L:RB->base
-+ | cmp RC, 1
-+ | ja ->vmeta_binop
-+ |4:
-+ | lea PC, [PC+4]
-+ | jb >6
-+ |5:
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |6:
-+ | ins_next
-+ |
-+ |->cont_condt: // BASE = base, RC = result
-+ | add PC, 4
-+ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
-+ | jb <5
-+ | jmp <6
-+ |
-+ |->cont_condf: // BASE = base, RC = result
-+ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
-+ | jmp <4
-+ |
-+ |->vmeta_equal:
-+ | sub PC, 4
-+ |.if X64WIN
-+ | mov CARG3d, RD
-+ | mov CARG4d, RB
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
-+ | mov CARG2d, RA
-+ | mov CARG1d, L:RB // Caveat: CARG1d == RA.
-+ |.elif X64
-+ | mov CARG2d, RA
-+ | mov CARG4d, RB // Caveat: CARG4d == RA.
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG3d == BASE.
-+ | mov CARG3d, RD
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov ARG4, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RD
-+ | mov ARG2, RA
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
-+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
-+ | jmp <3
-+ |
-+ |->vmeta_equal_cd:
-+ |.if FFI
-+ | sub PC, 4
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG1, L:RB
-+ | mov FCARG2, dword [PC-4]
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins)
-+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
-+ | jmp <3
-+ |.endif
-+ |
-+ |->vmeta_istype:
-+ |.if X64
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, RA
-+ | movzx CARG3d, PC_RD
-+ | mov L:CARG1d, L:RB
-+ |.else
-+ | movzx RD, PC_RD
-+ | mov ARG2, RA
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RD
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
-+ | mov BASE, L:RB->base
-+ | jmp <6
-+ |
-+ |//-- Arithmetic metamethods ---------------------------------------------
-+ |
-+ |->vmeta_arith_vno:
-+ |.if DUALNUM
-+ | movzx RB, PC_RB
-+ |.endif
-+ |->vmeta_arith_vn:
-+ | lea RC, [KBASE+RC*8]
-+ | jmp >1
-+ |
-+ |->vmeta_arith_nvo:
-+ |.if DUALNUM
-+ | movzx RC, PC_RC
-+ |.endif
-+ |->vmeta_arith_nv:
-+ | lea RC, [KBASE+RC*8]
-+ | lea RB, [BASE+RB*8]
-+ | xchg RB, RC
-+ | jmp >2
-+ |
-+ |->vmeta_unm:
-+ | lea RC, [BASE+RD*8]
-+ | mov RB, RC
-+ | jmp >2
-+ |
-+ |->vmeta_arith_vvo:
-+ |.if DUALNUM
-+ | movzx RB, PC_RB
-+ |.endif
-+ |->vmeta_arith_vv:
-+ | lea RC, [BASE+RC*8]
-+ |1:
-+ | lea RB, [BASE+RB*8]
-+ |2:
-+ | lea RA, [BASE+RA*8]
-+ |.if X64WIN
-+ | mov CARG3d, RB
-+ | mov CARG4d, RC
-+ | movzx RC, PC_OP
-+ | mov ARG5d, RC
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
-+ | mov CARG2d, RA
-+ | mov CARG1d, L:RB // Caveat: CARG1d == RA.
-+ |.elif X64
-+ | movzx CARG5d, PC_OP
-+ | mov CARG2d, RA
-+ | mov CARG4d, RC // Caveat: CARG4d == RA.
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE.
-+ | mov CARG3d, RB
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | mov ARG3, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG4, RC
-+ | movzx RC, PC_OP
-+ | mov ARG2, RA
-+ | mov ARG5, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
-+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | test RC, RC
-+ | jz ->cont_nop
-+ |
-+ | // Call metamethod for binary op.
-+ |->vmeta_binop:
-+ | // BASE = base, RC = new base, stack = cont/func/o1/o2
-+ | mov RA, RC
-+ | sub RC, BASE
-+ | mov [RA-12], PC // [cont|PC]
-+ | lea PC, [RC+FRAME_CONT]
-+ | mov NARGS:RD, 2+1 // 2 args for func(o1, o2).
-+ | jmp ->vm_call_dispatch
-+ |
-+ |->vmeta_len:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
-+ | mov L:FCARG1, L:RB
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_len@8 // (lua_State *L, TValue *o)
-+ | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+#if LJ_52
-+ | test RC, RC
-+ | jne ->vmeta_binop // Binop call for compatibility.
-+ | movzx RD, PC_RD
-+ | mov TAB:FCARG1, [BASE+RD*8]
-+ | jmp ->BC_LEN_Z
-+#else
-+ | jmp ->vmeta_binop // Binop call for compatibility.
-+#endif
-+ |
-+ |//-- Call metamethod ----------------------------------------------------
-+ |
-+ |->vmeta_call_ra:
-+ | lea RA, [BASE+RA*8+8]
-+ |->vmeta_call: // Resolve and call __call metamethod.
-+ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
-+ | mov TMP2, RA // Save RA, RC for us.
-+ | mov TMP1, NARGS:RD
-+ | sub RA, 8
-+ |.if X64
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, RA
-+ | lea CARG3d, [RA+NARGS:RD*8]
-+ | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
-+ |.else
-+ | lea RC, [RA+NARGS:RD*8]
-+ | mov L:RB, SAVE_L
-+ | mov ARG2, RA
-+ | mov ARG3, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE // This is the callers base!
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
-+ | mov BASE, L:RB->base
-+ | mov RA, TMP2
-+ | mov NARGS:RD, TMP1
-+ | mov LFUNC:RB, [RA-8]
-+ | add NARGS:RD, 1
-+ | // This is fragile. L->base must not move, KBASE must always be defined.
-+ | cmp KBASE, BASE // Continue with CALLT if flag set.
-+ | je ->BC_CALLT_Z
-+ | mov BASE, RA
-+ | ins_call // Otherwise call resolved metamethod.
-+ |
-+ |//-- Argument coercion for 'for' statement ------------------------------
-+ |
-+ |->vmeta_for:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG2, RA // Caveat: FCARG2 == BASE
-+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_for@8 // (lua_State *L, TValue *base)
-+ | mov BASE, L:RB->base
-+ | mov RC, [PC-4]
-+ | movzx RA, RCH
-+ | movzx OP, RCL
-+ | shr RC, 16
-+ |.if X64
-+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
-+ |.else
-+ | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI.
-+ |.endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Fast functions -----------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |.macro .ffunc, name
-+ |->ff_ .. name:
-+ |.endmacro
-+ |
-+ |.macro .ffunc_1, name
-+ |->ff_ .. name:
-+ | cmp NARGS:RD, 1+1; jb ->fff_fallback
-+ |.endmacro
-+ |
-+ |.macro .ffunc_2, name
-+ |->ff_ .. name:
-+ | cmp NARGS:RD, 2+1; jb ->fff_fallback
-+ |.endmacro
-+ |
-+ |.macro .ffunc_nsse, name, op
-+ | .ffunc_1 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | op xmm0, qword [BASE]
-+ |.endmacro
-+ |
-+ |.macro .ffunc_nsse, name
-+ | .ffunc_nsse name, movsd
-+ |.endmacro
-+ |
-+ |.macro .ffunc_nnsse, name
-+ | .ffunc_2 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
-+ | movsd xmm0, qword [BASE]
-+ | movsd xmm1, qword [BASE+8]
-+ |.endmacro
-+ |
-+ |.macro .ffunc_nnr, name
-+ | .ffunc_2 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
-+ | fld qword [BASE+8]
-+ | fld qword [BASE]
-+ |.endmacro
-+ |
-+ |// Inlined GC threshold check. Caveat: uses label 1.
-+ |.macro ffgccheck
-+ | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
-+ | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
-+ | jb >1
-+ | call ->fff_gcstep
-+ |1:
-+ |.endmacro
-+ |
-+ |//-- Base library: checks -----------------------------------------------
-+ |
-+ |.ffunc_1 assert
-+ | mov RB, [BASE+4]
-+ | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
-+ | mov PC, [BASE-4]
-+ | mov MULTRES, RD
-+ | mov [BASE-4], RB
-+ | mov RB, [BASE]
-+ | mov [BASE-8], RB
-+ | sub RD, 2
-+ | jz >2
-+ | mov RA, BASE
-+ |1:
-+ | add RA, 8
-+ |.if X64
-+ | mov RBa, [RA]
-+ | mov [RA-8], RBa
-+ |.else
-+ | mov RB, [RA+4]
-+ | mov [RA-4], RB
-+ | mov RB, [RA]
-+ | mov [RA-8], RB
-+ |.endif
-+ | sub RD, 1
-+ | jnz <1
-+ |2:
-+ | mov RD, MULTRES
-+ | jmp ->fff_res_
-+ |
-+ |.ffunc_1 type
-+ | mov RB, [BASE+4]
-+ |.if X64
-+ | mov RA, RB
-+ | sar RA, 15
-+ | cmp RA, -2
-+ | je >3
-+ |.endif
-+ | mov RC, ~LJ_TNUMX
-+ | not RB
-+ | cmp RC, RB
-+ | cmova RC, RB
-+ |2:
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TSTR
-+ | mov [BASE-8], STR:RC
-+ | jmp ->fff_res1
-+ |.if X64
-+ |3:
-+ | mov RC, ~LJ_TLIGHTUD
-+ | jmp <2
-+ |.endif
-+ |
-+ |//-- Base library: getters and setters ---------------------------------
-+ |
-+ |.ffunc_1 getmetatable
-+ | mov RB, [BASE+4]
-+ | mov PC, [BASE-4]
-+ | cmp RB, LJ_TTAB; jne >6
-+ |1: // Field metatable must be at same offset for GCtab and GCudata!
-+ | mov TAB:RB, [BASE]
-+ | mov TAB:RB, TAB:RB->metatable
-+ |2:
-+ | test TAB:RB, TAB:RB
-+ | mov dword [BASE-4], LJ_TNIL
-+ | jz ->fff_res1
-+ | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)]
-+ | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
-+ | mov [BASE-8], TAB:RB
-+ | mov RA, TAB:RB->hmask
-+ | and RA, STR:RC->hash
-+ | imul RA, #NODE
-+ | add NODE:RA, TAB:RB->node
-+ |3: // Rearranged logic, because we expect _not_ to find the key.
-+ | cmp dword NODE:RA->key.it, LJ_TSTR
-+ | jne >4
-+ | cmp dword NODE:RA->key.gcr, STR:RC
-+ | je >5
-+ |4:
-+ | mov NODE:RA, NODE:RA->next
-+ | test NODE:RA, NODE:RA
-+ | jnz <3
-+ | jmp ->fff_res1 // Not found, keep default result.
-+ |5:
-+ | mov RB, [RA+4]
-+ | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
-+ | mov RC, [RA]
-+ | mov [BASE-4], RB // Return value of mt.__metatable.
-+ | mov [BASE-8], RC
-+ | jmp ->fff_res1
-+ |
-+ |6:
-+ | cmp RB, LJ_TUDATA; je <1
-+ |.if X64
-+ | cmp RB, LJ_TNUMX; ja >8
-+ | cmp RB, LJ_TISNUM; jbe >7
-+ | mov RB, LJ_TLIGHTUD
-+ | jmp >8
-+ |7:
-+ |.else
-+ | cmp RB, LJ_TISNUM; ja >8
-+ |.endif
-+ | mov RB, LJ_TNUMX
-+ |8:
-+ | not RB
-+ | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
-+ | jmp <2
-+ |
-+ |.ffunc_2 setmetatable
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+ | // Fast path: no mt for table yet and not clearing the mt.
-+ | mov TAB:RB, [BASE]
-+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback
-+ | mov TAB:RC, [BASE+8]
-+ | mov TAB:RB->metatable, TAB:RC
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TTAB // Return original table.
-+ | mov [BASE-8], TAB:RB
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jz >1
-+ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
-+ | barrierback TAB:RB, RC
-+ |1:
-+ | jmp ->fff_res1
-+ |
-+ |.ffunc_2 rawget
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+ |.if X64WIN
-+ | mov RB, BASE // Save BASE.
-+ | lea CARG3d, [BASE+8]
-+ | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
-+ | mov CARG1d, SAVE_L
-+ |.elif X64
-+ | mov RB, BASE // Save BASE.
-+ | mov CARG2d, [BASE]
-+ | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
-+ | mov CARG1d, SAVE_L
-+ |.else
-+ | mov TAB:RD, [BASE]
-+ | mov L:RB, SAVE_L
-+ | mov ARG2, TAB:RD
-+ | mov ARG1, L:RB
-+ | mov RB, BASE // Save BASE.
-+ | add BASE, 8
-+ | mov ARG3, BASE
-+ |.endif
-+ | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
-+ | // cTValue * returned in eax (RD).
-+ | mov BASE, RB // Restore BASE.
-+ | // Copy table slot.
-+ |.if X64
-+ | mov RBa, [RD]
-+ | mov PC, [BASE-4]
-+ | mov [BASE-8], RBa
-+ |.else
-+ | mov RB, [RD]
-+ | mov RD, [RD+4]
-+ | mov PC, [BASE-4]
-+ | mov [BASE-8], RB
-+ | mov [BASE-4], RD
-+ |.endif
-+ | jmp ->fff_res1
-+ |
-+ |//-- Base library: conversions ------------------------------------------
-+ |
-+ |.ffunc tonumber
-+ | // Only handles the number case inline (without a base argument).
-+ | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
-+ | cmp dword [BASE+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne >1
-+ | mov RB, dword [BASE]; jmp ->fff_resi
-+ |1:
-+ | ja ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
-+ |
-+ |.ffunc_1 tostring
-+ | // Only handles the string or number case inline.
-+ | mov PC, [BASE-4]
-+ | cmp dword [BASE+4], LJ_TSTR; jne >3
-+ | // A __tostring method in the string base metatable is ignored.
-+ | mov STR:RD, [BASE]
-+ |2:
-+ | mov dword [BASE-4], LJ_TSTR
-+ | mov [BASE-8], STR:RD
-+ | jmp ->fff_res1
-+ |3: // Handle numbers inline, unless a number base metatable is present.
-+ | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
-+ | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
-+ | jne ->fff_fallback
-+ | ffgccheck // Caveat: uses label 1.
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Add frame since C call can throw.
-+ | mov SAVE_PC, PC // Redundant (but a defined value).
-+ |.if X64 and not X64WIN
-+ | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
-+ |.endif
-+ | mov L:FCARG1, L:RB
-+ |.if DUALNUM
-+ | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
-+ |.else
-+ | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
-+ |.endif
-+ | // GCstr returned in eax (RD).
-+ | mov BASE, L:RB->base
-+ | jmp <2
-+ |
-+ |//-- Base library: iterators -------------------------------------------
-+ |
-+ |.ffunc_1 next
-+ | je >2 // Missing 2nd arg?
-+ |1:
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Add frame since C call can throw.
-+ | mov L:RB->top, BASE // Dummy frame length is ok.
-+ | mov PC, [BASE-4]
-+ |.if X64WIN
-+ | lea CARG3d, [BASE+8]
-+ | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
-+ | mov CARG1d, L:RB
-+ |.elif X64
-+ | mov CARG2d, [BASE]
-+ | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov TAB:RD, [BASE]
-+ | mov ARG2, TAB:RD
-+ | mov ARG1, L:RB
-+ | add BASE, 8
-+ | mov ARG3, BASE
-+ |.endif
-+ | mov SAVE_PC, PC // Needed for ITERN fallback.
-+ | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
-+ | // Flag returned in eax (RD).
-+ | mov BASE, L:RB->base
-+ | test RD, RD; jz >3 // End of traversal?
-+ | // Copy key and value to results.
-+ |.if X64
-+ | mov RBa, [BASE+8]
-+ | mov RDa, [BASE+16]
-+ | mov [BASE-8], RBa
-+ | mov [BASE], RDa
-+ |.else
-+ | mov RB, [BASE+8]
-+ | mov RD, [BASE+12]
-+ | mov [BASE-8], RB
-+ | mov [BASE-4], RD
-+ | mov RB, [BASE+16]
-+ | mov RD, [BASE+20]
-+ | mov [BASE], RB
-+ | mov [BASE+4], RD
-+ |.endif
-+ |->fff_res2:
-+ | mov RD, 1+2
-+ | jmp ->fff_res
-+ |2: // Set missing 2nd arg to nil.
-+ | mov dword [BASE+12], LJ_TNIL
-+ | jmp <1
-+ |3: // End of traversal: return nil.
-+ | mov dword [BASE-4], LJ_TNIL
-+ | jmp ->fff_res1
-+ |
-+ |.ffunc_1 pairs
-+ | mov TAB:RB, [BASE]
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+#if LJ_52
-+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
-+#endif
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov CFUNC:RD, CFUNC:RB->upvalue[0]
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TFUNC
-+ | mov [BASE-8], CFUNC:RD
-+ | mov dword [BASE+12], LJ_TNIL
-+ | mov RD, 1+3
-+ | jmp ->fff_res
-+ |
-+ |.ffunc_2 ipairs_aux
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ | mov PC, [BASE-4]
-+ |.if DUALNUM
-+ | mov RD, dword [BASE+8]
-+ | add RD, 1
-+ | mov dword [BASE-4], LJ_TISNUM
-+ | mov dword [BASE-8], RD
-+ |.else
-+ | movsd xmm0, qword [BASE+8]
-+ | sseconst_1 xmm1, RBa
-+ | addsd xmm0, xmm1
-+ | cvttsd2si RD, xmm0
-+ | movsd qword [BASE-8], xmm0
-+ |.endif
-+ | mov TAB:RB, [BASE]
-+ | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
-+ | shl RD, 3
-+ | add RD, TAB:RB->array
-+ |1:
-+ | cmp dword [RD+4], LJ_TNIL; je ->fff_res0
-+ | // Copy array slot.
-+ |.if X64
-+ | mov RBa, [RD]
-+ | mov [BASE], RBa
-+ |.else
-+ | mov RB, [RD]
-+ | mov RD, [RD+4]
-+ | mov [BASE], RB
-+ | mov [BASE+4], RD
-+ |.endif
-+ | jmp ->fff_res2
-+ |2: // Check for empty hash part first. Otherwise call C function.
-+ | cmp dword TAB:RB->hmask, 0; je ->fff_res0
-+ | mov FCARG1, TAB:RB
-+ | mov RB, BASE // Save BASE.
-+ | mov FCARG2, RD // Caveat: FCARG2 == BASE
-+ | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
-+ | // cTValue * or NULL returned in eax (RD).
-+ | mov BASE, RB
-+ | test RD, RD
-+ | jnz <1
-+ |->fff_res0:
-+ | mov RD, 1+0
-+ | jmp ->fff_res
-+ |
-+ |.ffunc_1 ipairs
-+ | mov TAB:RB, [BASE]
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+#if LJ_52
-+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
-+#endif
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov CFUNC:RD, CFUNC:RB->upvalue[0]
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TFUNC
-+ | mov [BASE-8], CFUNC:RD
-+ |.if DUALNUM
-+ | mov dword [BASE+12], LJ_TISNUM
-+ | mov dword [BASE+8], 0
-+ |.else
-+ | xorps xmm0, xmm0
-+ | movsd qword [BASE+8], xmm0
-+ |.endif
-+ | mov RD, 1+3
-+ | jmp ->fff_res
-+ |
-+ |//-- Base library: catch errors ----------------------------------------
-+ |
-+ |.ffunc_1 pcall
-+ | lea RA, [BASE+8]
-+ | sub NARGS:RD, 1
-+ | mov PC, 8+FRAME_PCALL
-+ |1:
-+ | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
-+ | shr RB, HOOK_ACTIVE_SHIFT
-+ | and RB, 1
-+ | add PC, RB // Remember active hook before pcall.
-+ | jmp ->vm_call_dispatch
-+ |
-+ |.ffunc_2 xpcall
-+ | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
-+ | mov RB, [BASE+4] // Swap function and traceback.
-+ | mov [BASE+12], RB
-+ | mov dword [BASE+4], LJ_TFUNC
-+ | mov LFUNC:RB, [BASE]
-+ | mov PC, [BASE+8]
-+ | mov [BASE+8], LFUNC:RB
-+ | mov [BASE], PC
-+ | lea RA, [BASE+16]
-+ | sub NARGS:RD, 2
-+ | mov PC, 16+FRAME_PCALL
-+ | jmp <1
-+ |
-+ |//-- Coroutine library --------------------------------------------------
-+ |
-+ |.macro coroutine_resume_wrap, resume
-+ |.if resume
-+ |.ffunc_1 coroutine_resume
-+ | mov L:RB, [BASE]
-+ |.else
-+ |.ffunc coroutine_wrap_aux
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov L:RB, CFUNC:RB->upvalue[0].gcr
-+ |.endif
-+ | mov PC, [BASE-4]
-+ | mov SAVE_PC, PC
-+ |.if X64
-+ | mov TMP1, L:RB
-+ |.else
-+ | mov ARG1, L:RB
-+ |.endif
-+ |.if resume
-+ | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback
-+ |.endif
-+ | cmp aword L:RB->cframe, 0; jne ->fff_fallback
-+ | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
-+ | mov RA, L:RB->top
-+ | je >1 // Status != LUA_YIELD (i.e. 0)?
-+ | cmp RA, L:RB->base // Check for presence of initial func.
-+ | je ->fff_fallback
-+ |1:
-+ |.if resume
-+ | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
-+ |.else
-+ | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
-+ |.endif
-+ | cmp PC, L:RB->maxstack; ja ->fff_fallback
-+ | mov L:RB->top, PC
-+ |
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ |.if resume
-+ | add BASE, 8 // Keep resumed thread in stack for GC.
-+ |.endif
-+ | mov L:RB->top, BASE
-+ |.if resume
-+ | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
-+ |.else
-+ | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
-+ |.endif
-+ | sub RBa, PCa // Relative to PC.
-+ |
-+ | cmp PC, RA
-+ | je >3
-+ |2: // Move args to coroutine.
-+ |.if X64
-+ | mov RCa, [PC+RB]
-+ | mov [PC-8], RCa
-+ |.else
-+ | mov RC, [PC+RB+4]
-+ | mov [PC-4], RC
-+ | mov RC, [PC+RB]
-+ | mov [PC-8], RC
-+ |.endif
-+ | sub PC, 8
-+ | cmp PC, RA
-+ | jne <2
-+ |3:
-+ |.if X64
-+ | mov CARG2d, RA
-+ | mov CARG1d, TMP1
-+ |.else
-+ | mov ARG2, RA
-+ | xor RA, RA
-+ | mov ARG4, RA
-+ | mov ARG3, RA
-+ |.endif
-+ | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
-+ |
-+ | mov L:RB, SAVE_L
-+ |.if X64
-+ | mov L:PC, TMP1
-+ |.else
-+ | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
-+ |.endif
-+ | mov BASE, L:RB->base
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ | set_vmstate INTERP
-+ |
-+ | cmp eax, LUA_YIELD
-+ | ja >8
-+ |4:
-+ | mov RA, L:PC->base
-+ | mov KBASE, L:PC->top
-+ | mov L:PC->top, RA // Clear coroutine stack.
-+ | mov PC, KBASE
-+ | sub PC, RA
-+ | je >6 // No results?
-+ | lea RD, [BASE+PC]
-+ | shr PC, 3
-+ | cmp RD, L:RB->maxstack
-+ | ja >9 // Need to grow stack?
-+ |
-+ | mov RB, BASE
-+ | sub RBa, RAa
-+ |5: // Move results from coroutine.
-+ |.if X64
-+ | mov RDa, [RA]
-+ | mov [RA+RB], RDa
-+ |.else
-+ | mov RD, [RA]
-+ | mov [RA+RB], RD
-+ | mov RD, [RA+4]
-+ | mov [RA+RB+4], RD
-+ |.endif
-+ | add RA, 8
-+ | cmp RA, KBASE
-+ | jne <5
-+ |6:
-+ |.if resume
-+ | lea RD, [PC+2] // nresults+1 = 1 + true + results.
-+ | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
-+ |.else
-+ | lea RD, [PC+1] // nresults+1 = 1 + results.
-+ |.endif
-+ |7:
-+ | mov PC, SAVE_PC
-+ | mov MULTRES, RD
-+ |.if resume
-+ | mov RAa, -8
-+ |.else
-+ | xor RA, RA
-+ |.endif
-+ | test PC, FRAME_TYPE
-+ | jz ->BC_RET_Z
-+ | jmp ->vm_return
-+ |
-+ |8: // Coroutine returned with error (at co->top-1).
-+ |.if resume
-+ | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
-+ | mov RA, L:PC->top
-+ | sub RA, 8
-+ | mov L:PC->top, RA // Clear error from coroutine stack.
-+ | // Copy error message.
-+ |.if X64
-+ | mov RDa, [RA]
-+ | mov [BASE], RDa
-+ |.else
-+ | mov RD, [RA]
-+ | mov [BASE], RD
-+ | mov RD, [RA+4]
-+ | mov [BASE+4], RD
-+ |.endif
-+ | mov RD, 1+2 // nresults+1 = 1 + false + error.
-+ | jmp <7
-+ |.else
-+ | mov FCARG2, L:PC
-+ | mov FCARG1, L:RB
-+ | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
-+ | // Error function does not return.
-+ |.endif
-+ |
-+ |9: // Handle stack expansion on return from yield.
-+ |.if X64
-+ | mov L:RA, TMP1
-+ |.else
-+ | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
-+ |.endif
-+ | mov L:RA->top, KBASE // Undo coroutine stack clearing.
-+ | mov FCARG2, PC
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ |.if X64
-+ | mov L:PC, TMP1
-+ |.else
-+ | mov L:PC, ARG1
-+ |.endif
-+ | mov BASE, L:RB->base
-+ | jmp <4 // Retry the stack move.
-+ |.endmacro
-+ |
-+ | coroutine_resume_wrap 1 // coroutine.resume
-+ | coroutine_resume_wrap 0 // coroutine.wrap
-+ |
-+ |.ffunc coroutine_yield
-+ | mov L:RB, SAVE_L
-+ | test aword L:RB->cframe, CFRAME_RESUME
-+ | jz ->fff_fallback
-+ | mov L:RB->base, BASE
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | mov L:RB->top, RD
-+ | xor RD, RD
-+ | mov aword L:RB->cframe, RDa
-+ | mov al, LUA_YIELD
-+ | mov byte L:RB->status, al
-+ | jmp ->vm_leave_unw
-+ |
-+ |//-- Math library -------------------------------------------------------
-+ |
-+ |.if not DUALNUM
-+ |->fff_resi: // Dummy.
-+ |.endif
-+ |
-+ |->fff_resn:
-+ | mov PC, [BASE-4]
-+ | fstp qword [BASE-8]
-+ | jmp ->fff_res1
-+ |
-+ | .ffunc_1 math_abs
-+ |.if DUALNUM
-+ | cmp dword [BASE+4], LJ_TISNUM; jne >2
-+ | mov RB, dword [BASE]
-+ | cmp RB, 0; jns ->fff_resi
-+ | neg RB; js >1
-+ |->fff_resbit:
-+ |->fff_resi:
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TISNUM
-+ | mov dword [BASE-8], RB
-+ | jmp ->fff_res1
-+ |1:
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], 0x41e00000 // 2^31.
-+ | mov dword [BASE-8], 0
-+ | jmp ->fff_res1
-+ |2:
-+ | ja ->fff_fallback
-+ |.else
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ |.endif
-+ | movsd xmm0, qword [BASE]
-+ | sseconst_abs xmm1, RDa
-+ | andps xmm0, xmm1
-+ |->fff_resxmm0:
-+ | mov PC, [BASE-4]
-+ | movsd qword [BASE-8], xmm0
-+ | // fallthrough
-+ |
-+ |->fff_res1:
-+ | mov RD, 1+1
-+ |->fff_res:
-+ | mov MULTRES, RD
-+ |->fff_res_:
-+ | test PC, FRAME_TYPE
-+ | jnz >7
-+ |5:
-+ | cmp PC_RB, RDL // More results expected?
-+ | ja >6
-+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
-+ | movzx RA, PC_RA
-+ | not RAa // Note: ~RA = -(RA+1)
-+ | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
-+ | ins_next
-+ |
-+ |6: // Fill up results with nil.
-+ | mov dword [BASE+RD*8-12], LJ_TNIL
-+ | add RD, 1
-+ | jmp <5
-+ |
-+ |7: // Non-standard return case.
-+ | mov RAa, -8 // Results start at BASE+RA = BASE-8.
-+ | jmp ->vm_return
-+ |
-+ |.if X64
-+ |.define fff_resfp, fff_resxmm0
-+ |.else
-+ |.define fff_resfp, fff_resn
-+ |.endif
-+ |
-+ |.macro math_round, func
-+ | .ffunc math_ .. func
-+ |.if DUALNUM
-+ | cmp dword [BASE+4], LJ_TISNUM; jne >1
-+ | mov RB, dword [BASE]; jmp ->fff_resi
-+ |1:
-+ | ja ->fff_fallback
-+ |.else
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ |.endif
-+ | movsd xmm0, qword [BASE]
-+ | call ->vm_ .. func .. _sse
-+ |.if DUALNUM
-+ | cvttsd2si RB, xmm0
-+ | cmp RB, 0x80000000
-+ | jne ->fff_resi
-+ | cvtsi2sd xmm1, RB
-+ | ucomisd xmm0, xmm1
-+ | jp ->fff_resxmm0
-+ | je ->fff_resi
-+ |.endif
-+ | jmp ->fff_resxmm0
-+ |.endmacro
-+ |
-+ | math_round floor
-+ | math_round ceil
-+ |
-+ |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
-+ |
-+ |.ffunc math_log
-+ | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | movsd xmm0, qword [BASE]
-+ |.if not X64
-+ | movsd FPARG1, xmm0
-+ |.endif
-+ | mov RB, BASE
-+ | call extern log
-+ | mov BASE, RB
-+ | jmp ->fff_resfp
-+ |
-+ |.macro math_extern, func
-+ | .ffunc_nsse math_ .. func
-+ |.if not X64
-+ | movsd FPARG1, xmm0
-+ |.endif
-+ | mov RB, BASE
-+ | call extern func
-+ | mov BASE, RB
-+ | jmp ->fff_resfp
-+ |.endmacro
-+ |
-+ |.macro math_extern2, func
-+ | .ffunc_nnsse math_ .. func
-+ |.if not X64
-+ | movsd FPARG1, xmm0
-+ | movsd FPARG3, xmm1
-+ |.endif
-+ | mov RB, BASE
-+ | call extern func
-+ | mov BASE, RB
-+ | jmp ->fff_resfp
-+ |.endmacro
-+ |
-+ | math_extern log10
-+ | math_extern exp
-+ | math_extern sin
-+ | math_extern cos
-+ | math_extern tan
-+ | math_extern asin
-+ | math_extern acos
-+ | math_extern atan
-+ | math_extern sinh
-+ | math_extern cosh
-+ | math_extern tanh
-+ | math_extern2 pow
-+ | math_extern2 atan2
-+ | math_extern2 fmod
-+ |
-+ |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
-+ |
-+ |.ffunc_1 math_frexp
-+ | mov RB, [BASE+4]
-+ | cmp RB, LJ_TISNUM; jae ->fff_fallback
-+ | mov PC, [BASE-4]
-+ | mov RC, [BASE]
-+ | mov [BASE-4], RB; mov [BASE-8], RC
-+ | shl RB, 1; cmp RB, 0xffe00000; jae >3
-+ | or RC, RB; jz >3
-+ | mov RC, 1022
-+ | cmp RB, 0x00200000; jb >4
-+ |1:
-+ | shr RB, 21; sub RB, RC // Extract and unbias exponent.
-+ | cvtsi2sd xmm0, RB
-+ | mov RB, [BASE-4]
-+ | and RB, 0x800fffff // Mask off exponent.
-+ | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
-+ | mov [BASE-4], RB
-+ |2:
-+ | movsd qword [BASE], xmm0
-+ | mov RD, 1+2
-+ | jmp ->fff_res
-+ |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
-+ | xorps xmm0, xmm0; jmp <2
-+ |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
-+ | movsd xmm0, qword [BASE]
-+ | sseconst_hi xmm1, RBa, 43500000 // 2^54.
-+ | mulsd xmm0, xmm1
-+ | movsd qword [BASE-8], xmm0
-+ | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
-+ |
-+ |.ffunc_nsse math_modf
-+ | mov RB, [BASE+4]
-+ | mov PC, [BASE-4]
-+ | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
-+ | movaps xmm4, xmm0
-+ | call ->vm_trunc_sse
-+ | subsd xmm4, xmm0
-+ |1:
-+ | movsd qword [BASE-8], xmm0
-+ | movsd qword [BASE], xmm4
-+ | mov RC, [BASE-4]; mov RB, [BASE+4]
-+ | xor RC, RB; js >3 // Need to adjust sign?
-+ |2:
-+ | mov RD, 1+2
-+ | jmp ->fff_res
-+ |3:
-+ | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
-+ | jmp <2
-+ |4:
-+ | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
-+ |
-+ |.macro math_minmax, name, cmovop, sseop
-+ | .ffunc name
-+ | mov RA, 2
-+ | cmp dword [BASE+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne >4
-+ | mov RB, dword [BASE]
-+ |1: // Handle integers.
-+ | cmp RA, RD; jae ->fff_resi
-+ | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3
-+ | cmp RB, dword [BASE+RA*8-8]
-+ | cmovop RB, dword [BASE+RA*8-8]
-+ | add RA, 1
-+ | jmp <1
-+ |3:
-+ | ja ->fff_fallback
-+ | // Convert intermediate result to number and continue below.
-+ | cvtsi2sd xmm0, RB
-+ | jmp >6
-+ |4:
-+ | ja ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ |
-+ | movsd xmm0, qword [BASE]
-+ |5: // Handle numbers or integers.
-+ | cmp RA, RD; jae ->fff_resxmm0
-+ | cmp dword [BASE+RA*8-4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jb >6
-+ | ja ->fff_fallback
-+ | cvtsi2sd xmm1, dword [BASE+RA*8-8]
-+ | jmp >7
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ |6:
-+ | movsd xmm1, qword [BASE+RA*8-8]
-+ |7:
-+ | sseop xmm0, xmm1
-+ | add RA, 1
-+ | jmp <5
-+ |.endmacro
-+ |
-+ | math_minmax math_min, cmovg, minsd
-+ | math_minmax math_max, cmovl, maxsd
-+ |
-+ |//-- String library -----------------------------------------------------
-+ |
-+ |.ffunc string_byte // Only handle the 1-arg case here.
-+ | cmp NARGS:RD, 1+1; jne ->fff_fallback
-+ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-+ | mov STR:RB, [BASE]
-+ | mov PC, [BASE-4]
-+ | cmp dword STR:RB->len, 1
-+ | jb ->fff_res0 // Return no results for empty string.
-+ | movzx RB, byte STR:RB[1]
-+ |.if DUALNUM
-+ | jmp ->fff_resi
-+ |.else
-+ | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
-+ |.endif
-+ |
-+ |.ffunc string_char // Only handle the 1-arg case here.
-+ | ffgccheck
-+ | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
-+ | cmp dword [BASE+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne ->fff_fallback
-+ | mov RB, dword [BASE]
-+ | cmp RB, 255; ja ->fff_fallback
-+ | mov TMP2, RB
-+ |.else
-+ | jae ->fff_fallback
-+ | cvttsd2si RB, qword [BASE]
-+ | cmp RB, 255; ja ->fff_fallback
-+ | mov TMP2, RB
-+ |.endif
-+ |.if X64
-+ | mov TMP3, 1
-+ |.else
-+ | mov ARG3, 1
-+ |.endif
-+ | lea RDa, TMP2 // Points to stack. Little-endian.
-+ |->fff_newstr:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ |.if X64
-+ | mov CARG3d, TMP3 // Zero-extended to size_t.
-+ | mov CARG2, RDa // May be 64 bit ptr to stack.
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov ARG2, RD
-+ | mov ARG1, L:RB
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_str_new // (lua_State *L, char *str, size_t l)
-+ |->fff_resstr:
-+ | // GCstr * returned in eax (RD).
-+ | mov BASE, L:RB->base
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TSTR
-+ | mov [BASE-8], STR:RD
-+ | jmp ->fff_res1
-+ |
-+ |.ffunc string_sub
-+ | ffgccheck
-+ | mov TMP2, -1
-+ | cmp NARGS:RD, 1+2; jb ->fff_fallback
-+ | jna >1
-+ | cmp dword [BASE+20], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne ->fff_fallback
-+ | mov RB, dword [BASE+16]
-+ | mov TMP2, RB
-+ |.else
-+ | jae ->fff_fallback
-+ | cvttsd2si RB, qword [BASE+16]
-+ | mov TMP2, RB
-+ |.endif
-+ |1:
-+ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ | mov STR:RB, [BASE]
-+ | mov TMP3, STR:RB
-+ | mov RB, STR:RB->len
-+ |.if DUALNUM
-+ | mov RA, dword [BASE+8]
-+ |.else
-+ | cvttsd2si RA, qword [BASE+8]
-+ |.endif
-+ | mov RC, TMP2
-+ | cmp RB, RC // len < end? (unsigned compare)
-+ | jb >5
-+ |2:
-+ | test RA, RA // start <= 0?
-+ | jle >7
-+ |3:
-+ | mov STR:RB, TMP3
-+ | sub RC, RA // start > end?
-+ | jl ->fff_emptystr
-+ | lea RB, [STR:RB+RA+#STR-1]
-+ | add RC, 1
-+ |4:
-+ |.if X64
-+ | mov TMP3, RC
-+ |.else
-+ | mov ARG3, RC
-+ |.endif
-+ | mov RD, RB
-+ | jmp ->fff_newstr
-+ |
-+ |5: // Negative end or overflow.
-+ | jl >6
-+ | lea RC, [RC+RB+1] // end = end+(len+1)
-+ | jmp <2
-+ |6: // Overflow.
-+ | mov RC, RB // end = len
-+ | jmp <2
-+ |
-+ |7: // Negative start or underflow.
-+ | je >8
-+ | add RA, RB // start = start+(len+1)
-+ | add RA, 1
-+ | jg <3 // start > 0?
-+ |8: // Underflow.
-+ | mov RA, 1 // start = 1
-+ | jmp <3
-+ |
-+ |->fff_emptystr: // Range underflow.
-+ | xor RC, RC // Zero length. Any ptr in RB is ok.
-+ | jmp <4
-+ |
-+ |.macro ffstring_op, name
-+ | .ffunc_1 string_ .. name
-+ | ffgccheck
-+ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-+ | mov L:RB, SAVE_L
-+ | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
-+ | mov L:RB->base, BASE
-+ | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
-+ | mov RC, SBUF:FCARG1->b
-+ | mov SBUF:FCARG1->L, L:RB
-+ | mov SBUF:FCARG1->p, RC
-+ | mov SAVE_PC, PC
-+ | call extern lj_buf_putstr_ .. name .. @8
-+ | mov FCARG1, eax
-+ | call extern lj_buf_tostr@4
-+ | jmp ->fff_resstr
-+ |.endmacro
-+ |
-+ |ffstring_op reverse
-+ |ffstring_op lower
-+ |ffstring_op upper
-+ |
-+ |//-- Bit library --------------------------------------------------------
-+ |
-+ |.macro .ffunc_bit, name, kind, fdef
-+ | fdef name
-+ |.if kind == 2
-+ | sseconst_tobit xmm1, RBa
-+ |.endif
-+ | cmp dword [BASE+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne >1
-+ | mov RB, dword [BASE]
-+ |.if kind > 0
-+ | jmp >2
-+ |.else
-+ | jmp ->fff_resbit
-+ |.endif
-+ |1:
-+ | ja ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ | movsd xmm0, qword [BASE]
-+ |.if kind < 2
-+ | sseconst_tobit xmm1, RBa
-+ |.endif
-+ | addsd xmm0, xmm1
-+ | movd RB, xmm0
-+ |2:
-+ |.endmacro
-+ |
-+ |.macro .ffunc_bit, name, kind
-+ | .ffunc_bit name, kind, .ffunc_1
-+ |.endmacro
-+ |
-+ |.ffunc_bit bit_tobit, 0
-+ | jmp ->fff_resbit
-+ |
-+ |.macro .ffunc_bit_op, name, ins
-+ | .ffunc_bit name, 2
-+ | mov TMP2, NARGS:RD // Save for fallback.
-+ | lea RD, [BASE+NARGS:RD*8-16]
-+ |1:
-+ | cmp RD, BASE
-+ | jbe ->fff_resbit
-+ | cmp dword [RD+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne >2
-+ | ins RB, dword [RD]
-+ | sub RD, 8
-+ | jmp <1
-+ |2:
-+ | ja ->fff_fallback_bit_op
-+ |.else
-+ | jae ->fff_fallback_bit_op
-+ |.endif
-+ | movsd xmm0, qword [RD]
-+ | addsd xmm0, xmm1
-+ | movd RA, xmm0
-+ | ins RB, RA
-+ | sub RD, 8
-+ | jmp <1
-+ |.endmacro
-+ |
-+ |.ffunc_bit_op bit_band, and
-+ |.ffunc_bit_op bit_bor, or
-+ |.ffunc_bit_op bit_bxor, xor
-+ |
-+ |.ffunc_bit bit_bswap, 1
-+ | bswap RB
-+ | jmp ->fff_resbit
-+ |
-+ |.ffunc_bit bit_bnot, 1
-+ | not RB
-+ |.if DUALNUM
-+ | jmp ->fff_resbit
-+ |.else
-+ |->fff_resbit:
-+ | cvtsi2sd xmm0, RB
-+ | jmp ->fff_resxmm0
-+ |.endif
-+ |
-+ |->fff_fallback_bit_op:
-+ | mov NARGS:RD, TMP2 // Restore for fallback
-+ | jmp ->fff_fallback
-+ |
-+ |.macro .ffunc_bit_sh, name, ins
-+ |.if DUALNUM
-+ | .ffunc_bit name, 1, .ffunc_2
-+ | // Note: no inline conversion from number for 2nd argument!
-+ | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
-+ | mov RA, dword [BASE+8]
-+ |.else
-+ | .ffunc_nnsse name
-+ | sseconst_tobit xmm2, RBa
-+ | addsd xmm0, xmm2
-+ | addsd xmm1, xmm2
-+ | movd RB, xmm0
-+ | movd RA, xmm1
-+ |.endif
-+ | ins RB, cl // Assumes RA is ecx.
-+ | jmp ->fff_resbit
-+ |.endmacro
-+ |
-+ |.ffunc_bit_sh bit_lshift, shl
-+ |.ffunc_bit_sh bit_rshift, shr
-+ |.ffunc_bit_sh bit_arshift, sar
-+ |.ffunc_bit_sh bit_rol, rol
-+ |.ffunc_bit_sh bit_ror, ror
-+ |
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->fff_fallback_2:
-+ | mov NARGS:RD, 1+2 // Other args are ignored, anyway.
-+ | jmp ->fff_fallback
-+ |->fff_fallback_1:
-+ | mov NARGS:RD, 1+1 // Other args are ignored, anyway.
-+ |->fff_fallback: // Call fast function fallback handler.
-+ | // BASE = new base, RD = nargs+1
-+ | mov L:RB, SAVE_L
-+ | mov PC, [BASE-4] // Fallback may overwrite PC.
-+ | mov SAVE_PC, PC // Redundant (but a defined value).
-+ | mov L:RB->base, BASE
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
-+ | mov L:RB->top, RD
-+ | mov CFUNC:RD, [BASE-8]
-+ | cmp RA, L:RB->maxstack
-+ | ja >5 // Need to grow stack.
-+ |.if X64
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov ARG1, L:RB
-+ |.endif
-+ | call aword CFUNC:RD->f // (lua_State *L)
-+ | mov BASE, L:RB->base
-+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
-+ | test RD, RD; jg ->fff_res // Returned nresults+1?
-+ |1:
-+ | mov RA, L:RB->top
-+ | sub RA, BASE
-+ | shr RA, 3
-+ | test RD, RD
-+ | lea NARGS:RD, [RA+1]
-+ | mov LFUNC:RB, [BASE-8]
-+ | jne ->vm_call_tail // Returned -1?
-+ | ins_callt // Returned 0: retry fast path.
-+ |
-+ |// Reconstruct previous base for vmeta_call during tailcall.
-+ |->vm_call_tail:
-+ | mov RA, BASE
-+ | test PC, FRAME_TYPE
-+ | jnz >3
-+ | movzx RB, PC_RA
-+ | not RBa // Note: ~RB = -(RB+1)
-+ | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8
-+ | jmp ->vm_call_dispatch // Resolve again for tailcall.
-+ |3:
-+ | mov RB, PC
-+ | and RB, -8
-+ | sub BASE, RB
-+ | jmp ->vm_call_dispatch // Resolve again for tailcall.
-+ |
-+ |5: // Grow stack for fallback handler.
-+ | mov FCARG2, LUA_MINSTACK
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ | mov BASE, L:RB->base
-+ | xor RD, RD // Simulate a return 0.
-+ | jmp <1 // Dumb retry (goes through ff first).
-+ |
-+ |->fff_gcstep: // Call GC step function.
-+ | // BASE = new base, RD = nargs+1
-+ | pop RBa // Must keep stack at same level.
-+ | mov TMPa, RBa // Save return address
-+ | mov L:RB, SAVE_L
-+ | mov SAVE_PC, PC // Redundant (but a defined value).
-+ | mov L:RB->base, BASE
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | mov FCARG1, L:RB
-+ | mov L:RB->top, RD
-+ | call extern lj_gc_step@4 // (lua_State *L)
-+ | mov BASE, L:RB->base
-+ | mov RD, L:RB->top
-+ | sub RD, BASE
-+ | shr RD, 3
-+ | add NARGS:RD, 1
-+ | mov RBa, TMPa
-+ | push RBa // Restore return address.
-+ | ret
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Special dispatch targets -------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vm_record: // Dispatch target for recording phase.
-+ |.if JIT
-+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
-+ | test RDL, HOOK_VMEVENT // No recording while in vmevent.
-+ | jnz >5
-+ | // Decrement the hookcount for consistency, but always do the call.
-+ | test RDL, HOOK_ACTIVE
-+ | jnz >1
-+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
-+ | jz >1
-+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
-+ | jmp >1
-+ |.endif
-+ |
-+ |->vm_rethook: // Dispatch target for return hooks.
-+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
-+ | test RDL, HOOK_ACTIVE // Hook already active?
-+ | jnz >5
-+ | jmp >1
-+ |
-+ |->vm_inshook: // Dispatch target for instr/line hooks.
-+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
-+ | test RDL, HOOK_ACTIVE // Hook already active?
-+ | jnz >5
-+ |
-+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
-+ | jz >5
-+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
-+ | jz >1
-+ | test RDL, LUA_MASKLINE
-+ | jz >5
-+ |1:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG2, PC // Caveat: FCARG2 == BASE
-+ | mov FCARG1, L:RB
-+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
-+ | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
-+ |3:
-+ | mov BASE, L:RB->base
-+ |4:
-+ | movzx RA, PC_RA
-+ |5:
-+ | movzx OP, PC_OP
-+ | movzx RD, PC_RD
-+ |.if X64
-+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
-+ |.else
-+ | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins.
-+ |.endif
-+ |
-+ |->cont_hook: // Continue from hook yield.
-+ | add PC, 4
-+ | mov RA, [RB-24]
-+ | mov MULTRES, RA // Restore MULTRES for *M ins.
-+ | jmp <4
-+ |
-+ |->vm_hotloop: // Hot loop counter underflow.
-+ |.if JIT
-+ | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
-+ | mov RB, LFUNC:RB->pc
-+ | movzx RD, byte [RB+PC2PROTO(framesize)]
-+ | lea RD, [BASE+RD*8]
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov L:RB->top, RD
-+ | mov FCARG2, PC
-+ | lea FCARG1, [DISPATCH+GG_DISP2J]
-+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-+ | mov SAVE_PC, PC
-+ | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
-+ | jmp <3
-+ |.endif
-+ |
-+ |->vm_callhook: // Dispatch target for call hooks.
-+ | mov SAVE_PC, PC
-+ |.if JIT
-+ | jmp >1
-+ |.endif
-+ |
-+ |->vm_hotcall: // Hot call counter underflow.
-+ |.if JIT
-+ | mov SAVE_PC, PC
-+ | or PC, 1 // Marker for hot call.
-+ |1:
-+ |.endif
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov L:RB->top, RD
-+ | mov FCARG2, PC
-+ | mov FCARG1, L:RB
-+ | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc)
-+ | // ASMFunction returned in eax/rax (RDa).
-+ | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
-+ |.if JIT
-+ | and PC, -2
-+ |.endif
-+ | mov BASE, L:RB->base
-+ | mov RAa, RDa
-+ | mov RD, L:RB->top
-+ | sub RD, BASE
-+ | mov RBa, RAa
-+ | movzx RA, PC_RA
-+ | shr RD, 3
-+ | add NARGS:RD, 1
-+ | jmp RBa
-+ |
-+ |->cont_stitch: // Trace stitching.
-+ |.if JIT
-+ | // BASE = base, RC = result, RB = mbase
-+ | mov TRACE:RA, [RB-24] // Save previous trace.
-+ | mov TMP1, TRACE:RA
-+ | mov TMP3, DISPATCH // Need one more register.
-+ | mov DISPATCH, MULTRES
-+ | movzx RA, PC_RA
-+ | lea RA, [BASE+RA*8] // Call base.
-+ | sub DISPATCH, 1
-+ | jz >2
-+ |1: // Move results down.
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [RA], RBa
-+ |.else
-+ | mov RB, [RC]
-+ | mov [RA], RB
-+ | mov RB, [RC+4]
-+ | mov [RA+4], RB
-+ |.endif
-+ | add RC, 8
-+ | add RA, 8
-+ | sub DISPATCH, 1
-+ | jnz <1
-+ |2:
-+ | movzx RC, PC_RA
-+ | movzx RB, PC_RB
-+ | add RC, RB
-+ | lea RC, [BASE+RC*8-8]
-+ |3:
-+ | cmp RC, RA
-+ | ja >9 // More results wanted?
-+ |
-+ | mov DISPATCH, TMP3
-+ | mov TRACE:RD, TMP1 // Get previous trace.
-+ | movzx RB, word TRACE:RD->traceno
-+ | movzx RD, word TRACE:RD->link
-+ | cmp RD, RB
-+ | je ->cont_nop // Blacklisted.
-+ | test RD, RD
-+ | jne =>BC_JLOOP // Jump to stitched trace.
-+ |
-+ | // Stitch a new trace to the previous trace.
-+ | mov [DISPATCH+DISPATCH_J(exitno)], RB
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG2, PC
-+ | lea FCARG1, [DISPATCH+GG_DISP2J]
-+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-+ | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
-+ | mov BASE, L:RB->base
-+ | jmp ->cont_nop
-+ |
-+ |9: // Fill up results with nil.
-+ | mov dword [RA+4], LJ_TNIL
-+ | add RA, 8
-+ | jmp <3
-+ |.endif
-+ |
-+ |->vm_profhook: // Dispatch target for profiler hook.
-+#if LJ_HASPROFILE
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG2, PC // Caveat: FCARG2 == BASE
-+ | mov FCARG1, L:RB
-+ | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
-+ | mov BASE, L:RB->base
-+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
-+ | sub PC, 4
-+ | jmp ->cont_nop
-+#endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Trace exit handler -------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |// Called from an exit stub with the exit number on the stack.
-+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
-+ |->vm_exit_handler:
-+ |.if JIT
-+ |.if X64
-+ | push r13; push r12
-+ | push r11; push r10; push r9; push r8
-+ | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
-+ | push rbx; push rdx; push rcx; push rax
-+ | movzx RC, byte [rbp-8] // Reconstruct exit number.
-+ | mov RCH, byte [rbp-16]
-+ | mov [rbp-8], r15; mov [rbp-16], r14
-+ |.else
-+ | push ebp; lea ebp, [esp+12]; push ebp
-+ | push ebx; push edx; push ecx; push eax
-+ | movzx RC, byte [ebp-4] // Reconstruct exit number.
-+ | mov RCH, byte [ebp-8]
-+ | mov [ebp-4], edi; mov [ebp-8], esi
-+ |.endif
-+ | // Caveat: DISPATCH is ebx.
-+ | mov DISPATCH, [ebp]
-+ | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
-+ | set_vmstate EXIT
-+ | mov [DISPATCH+DISPATCH_J(exitno)], RC
-+ | mov [DISPATCH+DISPATCH_J(parent)], RA
-+ |.if X64
-+ |.if X64WIN
-+ | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
-+ |.else
-+ | sub rsp, 16*8 // Room for SSE regs.
-+ |.endif
-+ | add rbp, -128
-+ | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
-+ | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
-+ | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
-+ | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
-+ | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
-+ | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
-+ | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
-+ | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
-+ |.else
-+ | sub esp, 8*8+16 // Room for SSE regs + args.
-+ | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
-+ | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
-+ | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
-+ | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
-+ |.endif
-+ | // Caveat: RB is ebp.
-+ | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
-+ | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
-+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-+ | mov L:RB->base, BASE
-+ |.if X64WIN
-+ | lea CARG2, [rsp+4*8]
-+ |.elif X64
-+ | mov CARG2, rsp
-+ |.else
-+ | lea FCARG2, [esp+16]
-+ |.endif
-+ | lea FCARG1, [DISPATCH+GG_DISP2J]
-+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-+ | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
-+ | // MULTRES or negated error code returned in eax (RD).
-+ | mov RAa, L:RB->cframe
-+ | and RAa, CFRAME_RAWMASK
-+ |.if X64WIN
-+ | // Reposition stack later.
-+ |.elif X64
-+ | mov rsp, RAa // Reposition stack to C frame.
-+ |.else
-+ | mov esp, RAa // Reposition stack to C frame.
-+ |.endif
-+ | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
-+ | mov BASE, L:RB->base
-+ | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC.
-+ |.if X64
-+ | jmp >1
-+ |.endif
-+ |.endif
-+ |->vm_exit_interp:
-+ | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
-+ |.if JIT
-+ |.if X64
-+ | // Restore additional callee-save registers only used in compiled code.
-+ |.if X64WIN
-+ | lea RAa, [rsp+9*16+4*8]
-+ |1:
-+ | movdqa xmm15, [RAa-9*16]
-+ | movdqa xmm14, [RAa-8*16]
-+ | movdqa xmm13, [RAa-7*16]
-+ | movdqa xmm12, [RAa-6*16]
-+ | movdqa xmm11, [RAa-5*16]
-+ | movdqa xmm10, [RAa-4*16]
-+ | movdqa xmm9, [RAa-3*16]
-+ | movdqa xmm8, [RAa-2*16]
-+ | movdqa xmm7, [RAa-1*16]
-+ | mov rsp, RAa // Reposition stack to C frame.
-+ | movdqa xmm6, [RAa]
-+ | mov r15, CSAVE_3
-+ | mov r14, CSAVE_4
-+ |.else
-+ | add rsp, 16 // Reposition stack to C frame.
-+ |1:
-+ |.endif
-+ | mov r13, TMPa
-+ | mov r12, TMPQ
-+ |.endif
-+ | test RD, RD; js >9 // Check for error from exit.
-+ | mov L:RB, SAVE_L
-+ | mov MULTRES, RD
-+ | mov LFUNC:KBASE, [BASE-8]
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | mov L:RB->base, BASE
-+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-+ | set_vmstate INTERP
-+ | // Modified copy of ins_next which handles function header dispatch, too.
-+ | mov RC, [PC]
-+ | movzx RA, RCH
-+ | movzx OP, RCL
-+ | add PC, 4
-+ | shr RC, 16
-+ | cmp OP, BC_FUNCF // Function header?
-+ | jb >3
-+ | cmp OP, BC_FUNCC+2 // Fast function?
-+ | jae >4
-+ |2:
-+ | mov RC, MULTRES // RC/RD holds nres+1.
-+ |3:
-+ |.if X64
-+ | jmp aword [DISPATCH+OP*8]
-+ |.else
-+ | jmp aword [DISPATCH+OP*4]
-+ |.endif
-+ |
-+ |4: // Check frame below fast function.
-+ | mov RC, [BASE-4]
-+ | test RC, FRAME_TYPE
-+ | jnz <2 // Trace stitching continuation?
-+ | // Otherwise set KBASE for Lua function below fast function.
-+ | movzx RC, byte [RC-3]
-+ | not RCa
-+ | mov LFUNC:KBASE, [BASE+RC*8-8]
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | jmp <2
-+ |
-+ |9: // Rethrow error from the right C frame.
-+ | neg RD
-+ | mov FCARG1, L:RB
-+ | mov FCARG2, RD
-+ | call extern lj_err_throw@8 // (lua_State *L, int errcode)
-+ |.endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Math helper functions ----------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |// FP value rounding. Called by math.floor/math.ceil fast functions
-+ |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
-+ |.macro vm_round, name, mode, cond
-+ |->name:
-+ |.if not X64 and cond
-+ | movsd xmm0, qword [esp+4]
-+ | call ->name .. _sse
-+ | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
-+ | fld qword [esp+4]
-+ | ret
-+ |.endif
-+ |
-+ |->name .. _sse:
-+ | sseconst_abs xmm2, RDa
-+ | sseconst_2p52 xmm3, RDa
-+ | movaps xmm1, xmm0
-+ | andpd xmm1, xmm2 // |x|
-+ | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
-+ | jbe >1
-+ | andnpd xmm2, xmm0 // Isolate sign bit.
-+ |.if mode == 2 // trunc(x)?
-+ | movaps xmm0, xmm1
-+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
-+ | subsd xmm1, xmm3
-+ | sseconst_1 xmm3, RDa
-+ | cmpsd xmm0, xmm1, 1 // |x| < result?
-+ | andpd xmm0, xmm3
-+ | subsd xmm1, xmm0 // If yes, subtract -1.
-+ | orpd xmm1, xmm2 // Merge sign bit back in.
-+ |.else
-+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
-+ | subsd xmm1, xmm3
-+ | orpd xmm1, xmm2 // Merge sign bit back in.
-+ | .if mode == 1 // ceil(x)?
-+ | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
-+ | cmpsd xmm0, xmm1, 6 // x > result?
-+ | .else // floor(x)?
-+ | sseconst_1 xmm2, RDa
-+ | cmpsd xmm0, xmm1, 1 // x < result?
-+ | .endif
-+ | andpd xmm0, xmm2
-+ | subsd xmm1, xmm0 // If yes, subtract +-1.
-+ |.endif
-+ | movaps xmm0, xmm1
-+ |1:
-+ | ret
-+ |.endmacro
-+ |
-+ | vm_round vm_floor, 0, 1
-+ | vm_round vm_ceil, 1, JIT
-+ | vm_round vm_trunc, 2, JIT
-+ |
-+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
-+ |->vm_mod:
-+ |// Args in xmm0/xmm1, return value in xmm0.
-+ |// Caveat: xmm0-xmm5 and RC (eax) modified!
-+ | movaps xmm5, xmm0
-+ | divsd xmm0, xmm1
-+ | sseconst_abs xmm2, RDa
-+ | sseconst_2p52 xmm3, RDa
-+ | movaps xmm4, xmm0
-+ | andpd xmm4, xmm2 // |x/y|
-+ | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
-+ | jbe >1
-+ | andnpd xmm2, xmm0 // Isolate sign bit.
-+ | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
-+ | subsd xmm4, xmm3
-+ | orpd xmm4, xmm2 // Merge sign bit back in.
-+ | sseconst_1 xmm2, RDa
-+ | cmpsd xmm0, xmm4, 1 // x/y < result?
-+ | andpd xmm0, xmm2
-+ | subsd xmm4, xmm0 // If yes, subtract 1.0.
-+ | movaps xmm0, xmm5
-+ | mulsd xmm1, xmm4
-+ | subsd xmm0, xmm1
-+ | ret
-+ |1:
-+ | mulsd xmm1, xmm0
-+ | movaps xmm0, xmm5
-+ | subsd xmm0, xmm1
-+ | ret
-+ |
-+ |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
-+ |->vm_powi_sse:
-+ | cmp eax, 1; jle >6 // i<=1?
-+ | // Now 1 < (unsigned)i <= 0x80000000.
-+ |1: // Handle leading zeros.
-+ | test eax, 1; jnz >2
-+ | mulsd xmm0, xmm0
-+ | shr eax, 1
-+ | jmp <1
-+ |2:
-+ | shr eax, 1; jz >5
-+ | movaps xmm1, xmm0
-+ |3: // Handle trailing bits.
-+ | mulsd xmm0, xmm0
-+ | shr eax, 1; jz >4
-+ | jnc <3
-+ | mulsd xmm1, xmm0
-+ | jmp <3
-+ |4:
-+ | mulsd xmm0, xmm1
-+ |5:
-+ | ret
-+ |6:
-+ | je <5 // x^1 ==> x
-+ | jb >7 // x^0 ==> 1
-+ | neg eax
-+ | call <1
-+ | sseconst_1 xmm1, RDa
-+ | divsd xmm1, xmm0
-+ | movaps xmm0, xmm1
-+ | ret
-+ |7:
-+ | sseconst_1 xmm0, RDa
-+ | ret
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Miscellaneous functions --------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
-+ |->vm_cpuid:
-+ |.if X64
-+ | mov eax, CARG1d
-+ | .if X64WIN; push rsi; mov rsi, CARG2; .endif
-+ | push rbx
-+ | xor ecx, ecx
-+ | cpuid
-+ | mov [rsi], eax
-+ | mov [rsi+4], ebx
-+ | mov [rsi+8], ecx
-+ | mov [rsi+12], edx
-+ | pop rbx
-+ | .if X64WIN; pop rsi; .endif
-+ | ret
-+ |.else
-+ | pushfd
-+ | pop edx
-+ | mov ecx, edx
-+ | xor edx, 0x00200000 // Toggle ID bit in flags.
-+ | push edx
-+ | popfd
-+ | pushfd
-+ | pop edx
-+ | xor eax, eax // Zero means no features supported.
-+ | cmp ecx, edx
-+ | jz >1 // No ID toggle means no CPUID support.
-+ | mov eax, [esp+4] // Argument 1 is function number.
-+ | push edi
-+ | push ebx
-+ | xor ecx, ecx
-+ | cpuid
-+ | mov edi, [esp+16] // Argument 2 is result area.
-+ | mov [edi], eax
-+ | mov [edi+4], ebx
-+ | mov [edi+8], ecx
-+ | mov [edi+12], edx
-+ | pop ebx
-+ | pop edi
-+ |1:
-+ | ret
-+ |.endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Assertions ---------------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->assert_bad_for_arg_type:
-+#ifdef LUA_USE_ASSERT
-+ | int3
-+#endif
-+ | int3
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- FFI helper functions -----------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |// Handler for callback functions. Callback slot number in ah/al.
-+ |->vm_ffi_callback:
-+ |.if FFI
-+ |.type CTSTATE, CTState, PC
-+ |.if not X64
-+ | sub esp, 16 // Leave room for SAVE_ERRF etc.
-+ |.endif
-+ | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
-+ | lea DISPATCH, [ebp+GG_G2DISP]
-+ | mov CTSTATE, GL:ebp->ctype_state
-+ | movzx eax, ax
-+ | mov CTSTATE->cb.slot, eax
-+ |.if X64
-+ | mov CTSTATE->cb.gpr[0], CARG1
-+ | mov CTSTATE->cb.gpr[1], CARG2
-+ | mov CTSTATE->cb.gpr[2], CARG3
-+ | mov CTSTATE->cb.gpr[3], CARG4
-+ | movsd qword CTSTATE->cb.fpr[0], xmm0
-+ | movsd qword CTSTATE->cb.fpr[1], xmm1
-+ | movsd qword CTSTATE->cb.fpr[2], xmm2
-+ | movsd qword CTSTATE->cb.fpr[3], xmm3
-+ |.if X64WIN
-+ | lea rax, [rsp+CFRAME_SIZE+4*8]
-+ |.else
-+ | lea rax, [rsp+CFRAME_SIZE]
-+ | mov CTSTATE->cb.gpr[4], CARG5
-+ | mov CTSTATE->cb.gpr[5], CARG6
-+ | movsd qword CTSTATE->cb.fpr[4], xmm4
-+ | movsd qword CTSTATE->cb.fpr[5], xmm5
-+ | movsd qword CTSTATE->cb.fpr[6], xmm6
-+ | movsd qword CTSTATE->cb.fpr[7], xmm7
-+ |.endif
-+ | mov CTSTATE->cb.stack, rax
-+ | mov CARG2, rsp
-+ |.else
-+ | lea eax, [esp+CFRAME_SIZE+16]
-+ | mov CTSTATE->cb.gpr[0], FCARG1
-+ | mov CTSTATE->cb.gpr[1], FCARG2
-+ | mov CTSTATE->cb.stack, eax
-+ | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp.
-+ | mov FCARG2, [esp+CFRAME_SIZE+8]
-+ | mov SAVE_RET, FCARG1
-+ | mov SAVE_R4, FCARG2
-+ | mov FCARG2, esp
-+ |.endif
-+ | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
-+ | mov FCARG1, CTSTATE
-+ | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf)
-+ | // lua_State * returned in eax (RD).
-+ | set_vmstate INTERP
-+ | mov BASE, L:RD->base
-+ | mov RD, L:RD->top
-+ | sub RD, BASE
-+ | mov LFUNC:RB, [BASE-8]
-+ | shr RD, 3
-+ | add RD, 1
-+ | ins_callt
-+ |.endif
-+ |
-+ |->cont_ffi_callback: // Return from FFI callback.
-+ |.if FFI
-+ | mov L:RA, SAVE_L
-+ | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
-+ | mov aword CTSTATE->L, L:RAa
-+ | mov L:RA->base, BASE
-+ | mov L:RA->top, RB
-+ | mov FCARG1, CTSTATE
-+ | mov FCARG2, RC
-+ | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o)
-+ |.if X64
-+ | mov rax, CTSTATE->cb.gpr[0]
-+ | movsd xmm0, qword CTSTATE->cb.fpr[0]
-+ | jmp ->vm_leave_unw
-+ |.else
-+ | mov L:RB, SAVE_L
-+ | mov eax, CTSTATE->cb.gpr[0]
-+ | mov edx, CTSTATE->cb.gpr[1]
-+ | cmp dword CTSTATE->cb.gpr[2], 1
-+ | jb >7
-+ | je >6
-+ | fld qword CTSTATE->cb.fpr[0].d
-+ | jmp >7
-+ |6:
-+ | fld dword CTSTATE->cb.fpr[0].f
-+ |7:
-+ | mov ecx, L:RB->top
-+ | movzx ecx, word [ecx+6] // Get stack adjustment and copy up.
-+ | mov SAVE_L, ecx // Must be one slot above SAVE_RET
-+ | restoreregs
-+ | pop ecx // Move return addr from SAVE_RET.
-+ | add esp, [esp] // Adjust stack.
-+ | add esp, 16
-+ | push ecx
-+ | ret
-+ |.endif
-+ |.endif
-+ |
-+ |->vm_ffi_call@4: // Call C function via FFI.
-+ | // Caveat: needs special frame unwinding, see below.
-+ |.if FFI
-+ |.if X64
-+ | .type CCSTATE, CCallState, rbx
-+ | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
-+ |.else
-+ | .type CCSTATE, CCallState, ebx
-+ | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1
-+ |.endif
-+ |
-+ | // Readjust stack.
-+ |.if X64
-+ | mov eax, CCSTATE->spadj
-+ | sub rsp, rax
-+ |.else
-+ | sub esp, CCSTATE->spadj
-+ |.if WIN
-+ | mov CCSTATE->spadj, esp
-+ |.endif
-+ |.endif
-+ |
-+ | // Copy stack slots.
-+ | movzx ecx, byte CCSTATE->nsp
-+ | sub ecx, 1
-+ | js >2
-+ |1:
-+ |.if X64
-+ | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
-+ | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
-+ |.else
-+ | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
-+ | mov [esp+ecx*4], eax
-+ |.endif
-+ | sub ecx, 1
-+ | jns <1
-+ |2:
-+ |
-+ |.if X64
-+ | movzx eax, byte CCSTATE->nfpr
-+ | mov CARG1, CCSTATE->gpr[0]
-+ | mov CARG2, CCSTATE->gpr[1]
-+ | mov CARG3, CCSTATE->gpr[2]
-+ | mov CARG4, CCSTATE->gpr[3]
-+ |.if not X64WIN
-+ | mov CARG5, CCSTATE->gpr[4]
-+ | mov CARG6, CCSTATE->gpr[5]
-+ |.endif
-+ | test eax, eax; jz >5
-+ | movaps xmm0, CCSTATE->fpr[0]
-+ | movaps xmm1, CCSTATE->fpr[1]
-+ | movaps xmm2, CCSTATE->fpr[2]
-+ | movaps xmm3, CCSTATE->fpr[3]
-+ |.if not X64WIN
-+ | cmp eax, 4; jbe >5
-+ | movaps xmm4, CCSTATE->fpr[4]
-+ | movaps xmm5, CCSTATE->fpr[5]
-+ | movaps xmm6, CCSTATE->fpr[6]
-+ | movaps xmm7, CCSTATE->fpr[7]
-+ |.endif
-+ |5:
-+ |.else
-+ | mov FCARG1, CCSTATE->gpr[0]
-+ | mov FCARG2, CCSTATE->gpr[1]
-+ |.endif
-+ |
-+ | call aword CCSTATE->func
-+ |
-+ |.if X64
-+ | mov CCSTATE->gpr[0], rax
-+ | movaps CCSTATE->fpr[0], xmm0
-+ |.if not X64WIN
-+ | mov CCSTATE->gpr[1], rdx
-+ | movaps CCSTATE->fpr[1], xmm1
-+ |.endif
-+ |.else
-+ | mov CCSTATE->gpr[0], eax
-+ | mov CCSTATE->gpr[1], edx
-+ | cmp byte CCSTATE->resx87, 1
-+ | jb >7
-+ | je >6
-+ | fstp qword CCSTATE->fpr[0].d[0]
-+ | jmp >7
-+ |6:
-+ | fstp dword CCSTATE->fpr[0].f[0]
-+ |7:
-+ |.if WIN
-+ | sub CCSTATE->spadj, esp
-+ |.endif
-+ |.endif
-+ |
-+ |.if X64
-+ | mov rbx, [rbp-8]; leave; ret
-+ |.else
-+ | mov ebx, [ebp-4]; leave; ret
-+ |.endif
-+ |.endif
-+ |// Note: vm_ffi_call must be the last function in this object file!
-+ |
-+ |//-----------------------------------------------------------------------
-+}
-+
-+/* Generate the code for a single instruction. */
-+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-+{
-+ int vk = 0;
-+ |// Note: aligning all instructions does not pay off.
-+ |=>defop:
-+
-+ switch (op) {
-+
-+ /* -- Comparison ops ---------------------------------------------------- */
-+
-+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
-+
-+ |.macro jmp_comp, lt, ge, le, gt, target
-+ ||switch (op) {
-+ ||case BC_ISLT:
-+ | lt target
-+ ||break;
-+ ||case BC_ISGE:
-+ | ge target
-+ ||break;
-+ ||case BC_ISLE:
-+ | le target
-+ ||break;
-+ ||case BC_ISGT:
-+ | gt target
-+ ||break;
-+ ||default: break; /* Shut up GCC. */
-+ ||}
-+ |.endmacro
-+
-+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
-+ | // RA = src1, RD = src2, JMP with RD = target
-+ | ins_AD
-+ |.if DUALNUM
-+ | checkint RA, >7
-+ | checkint RD, >8
-+ | mov RB, dword [BASE+RA*8]
-+ | add PC, 4
-+ | cmp RB, dword [BASE+RD*8]
-+ | jmp_comp jge, jl, jg, jle, >9
-+ |6:
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |9:
-+ | ins_next
-+ |
-+ |7: // RA is not an integer.
-+ | ja ->vmeta_comp
-+ | // RA is a number.
-+ | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
-+ | // RA is a number, RD is an integer.
-+ | cvtsi2sd xmm0, dword [BASE+RD*8]
-+ | jmp >2
-+ |
-+ |8: // RA is an integer, RD is not an integer.
-+ | ja ->vmeta_comp
-+ | // RA is an integer, RD is a number.
-+ | cvtsi2sd xmm1, dword [BASE+RA*8]
-+ | movsd xmm0, qword [BASE+RD*8]
-+ | add PC, 4
-+ | ucomisd xmm0, xmm1
-+ | jmp_comp jbe, ja, jb, jae, <9
-+ | jmp <6
-+ |.else
-+ | checknum RA, ->vmeta_comp
-+ | checknum RD, ->vmeta_comp
-+ |.endif
-+ |1:
-+ | movsd xmm0, qword [BASE+RD*8]
-+ |2:
-+ | add PC, 4
-+ | ucomisd xmm0, qword [BASE+RA*8]
-+ |3:
-+ | // Unordered: all of ZF CF PF set, ordered: PF clear.
-+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
-+ |.if DUALNUM
-+ | jmp_comp jbe, ja, jb, jae, <9
-+ | jmp <6
-+ |.else
-+ | jmp_comp jbe, ja, jb, jae, >1
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |1:
-+ | ins_next
-+ |.endif
-+ break;
-+
-+ case BC_ISEQV: case BC_ISNEV:
-+ vk = op == BC_ISEQV;
-+ | ins_AD // RA = src1, RD = src2, JMP with RD = target
-+ | mov RB, [BASE+RD*8+4]
-+ | add PC, 4
-+ |.if DUALNUM
-+ | cmp RB, LJ_TISNUM; jne >7
-+ | checkint RA, >8
-+ | mov RB, dword [BASE+RD*8]
-+ | cmp RB, dword [BASE+RA*8]
-+ if (vk) {
-+ | jne >9
-+ } else {
-+ | je >9
-+ }
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |9:
-+ | ins_next
-+ |
-+ |7: // RD is not an integer.
-+ | ja >5
-+ | // RD is a number.
-+ | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
-+ | // RD is a number, RA is an integer.
-+ | cvtsi2sd xmm0, dword [BASE+RA*8]
-+ | jmp >2
-+ |
-+ |8: // RD is an integer, RA is not an integer.
-+ | ja >5
-+ | // RD is an integer, RA is a number.
-+ | cvtsi2sd xmm0, dword [BASE+RD*8]
-+ | ucomisd xmm0, qword [BASE+RA*8]
-+ | jmp >4
-+ |
-+ |.else
-+ | cmp RB, LJ_TISNUM; jae >5
-+ | checknum RA, >5
-+ |.endif
-+ |1:
-+ | movsd xmm0, qword [BASE+RA*8]
-+ |2:
-+ | ucomisd xmm0, qword [BASE+RD*8]
-+ |4:
-+ iseqne_fp:
-+ if (vk) {
-+ | jp >2 // Unordered means not equal.
-+ | jne >2
-+ } else {
-+ | jp >2 // Unordered means not equal.
-+ | je >1
-+ }
-+ iseqne_end:
-+ if (vk) {
-+ |1: // EQ: Branch to the target.
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |2: // NE: Fallthrough to next instruction.
-+ |.if not FFI
-+ |3:
-+ |.endif
-+ } else {
-+ |.if not FFI
-+ |3:
-+ |.endif
-+ |2: // NE: Branch to the target.
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |1: // EQ: Fallthrough to next instruction.
-+ }
-+ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
-+ op == BC_ISEQN || op == BC_ISNEN)) {
-+ | jmp <9
-+ } else {
-+ | ins_next
-+ }
-+ |
-+ if (op == BC_ISEQV || op == BC_ISNEV) {
-+ |5: // Either or both types are not numbers.
-+ |.if FFI
-+ | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
-+ | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd
-+ |.endif
-+ | checktp RA, RB // Compare types.
-+ | jne <2 // Not the same type?
-+ | cmp RB, LJ_TISPRI
-+ | jae <1 // Same type and primitive type?
-+ |
-+ | // Same types and not a primitive type. Compare GCobj or pvalue.
-+ | mov RA, [BASE+RA*8]
-+ | mov RD, [BASE+RD*8]
-+ | cmp RA, RD
-+ | je <1 // Same GCobjs or pvalues?
-+ | cmp RB, LJ_TISTABUD
-+ | ja <2 // Different objects and not table/ud?
-+ |.if X64
-+ | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata.
-+ | jb <2
-+ |.endif
-+ |
-+ | // Different tables or userdatas. Need to check __eq metamethod.
-+ | // Field metatable must be at same offset for GCtab and GCudata!
-+ | mov TAB:RB, TAB:RA->metatable
-+ | test TAB:RB, TAB:RB
-+ | jz <2 // No metatable?
-+ | test byte TAB:RB->nomm, 1<<MM_eq
-+ | jnz <2 // Or 'no __eq' flag set?
-+ if (vk) {
-+ | xor RB, RB // ne = 0
-+ } else {
-+ | mov RB, 1 // ne = 1
-+ }
-+ | jmp ->vmeta_equal // Handle __eq metamethod.
-+ } else {
-+ |.if FFI
-+ |3:
-+ | cmp RB, LJ_TCDATA
-+ if (LJ_DUALNUM && vk) {
-+ | jne <9
-+ } else {
-+ | jne <2
-+ }
-+ | jmp ->vmeta_equal_cd
-+ |.endif
-+ }
-+ break;
-+ case BC_ISEQS: case BC_ISNES:
-+ vk = op == BC_ISEQS;
-+ | ins_AND // RA = src, RD = str const, JMP with RD = target
-+ | mov RB, [BASE+RA*8+4]
-+ | add PC, 4
-+ | cmp RB, LJ_TSTR; jne >3
-+ | mov RA, [BASE+RA*8]
-+ | cmp RA, [KBASE+RD*4]
-+ iseqne_test:
-+ if (vk) {
-+ | jne >2
-+ } else {
-+ | je >1
-+ }
-+ goto iseqne_end;
-+ case BC_ISEQN: case BC_ISNEN:
-+ vk = op == BC_ISEQN;
-+ | ins_AD // RA = src, RD = num const, JMP with RD = target
-+ | mov RB, [BASE+RA*8+4]
-+ | add PC, 4
-+ |.if DUALNUM
-+ | cmp RB, LJ_TISNUM; jne >7
-+ | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
-+ | mov RB, dword [KBASE+RD*8]
-+ | cmp RB, dword [BASE+RA*8]
-+ if (vk) {
-+ | jne >9
-+ } else {
-+ | je >9
-+ }
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |9:
-+ | ins_next
-+ |
-+ |7: // RA is not an integer.
-+ | ja >3
-+ | // RA is a number.
-+ | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
-+ | // RA is a number, RD is an integer.
-+ | cvtsi2sd xmm0, dword [KBASE+RD*8]
-+ | jmp >2
-+ |
-+ |8: // RA is an integer, RD is a number.
-+ | cvtsi2sd xmm0, dword [BASE+RA*8]
-+ | ucomisd xmm0, qword [KBASE+RD*8]
-+ | jmp >4
-+ |.else
-+ | cmp RB, LJ_TISNUM; jae >3
-+ |.endif
-+ |1:
-+ | movsd xmm0, qword [KBASE+RD*8]
-+ |2:
-+ | ucomisd xmm0, qword [BASE+RA*8]
-+ |4:
-+ goto iseqne_fp;
-+ case BC_ISEQP: case BC_ISNEP:
-+ vk = op == BC_ISEQP;
-+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
-+ | mov RB, [BASE+RA*8+4]
-+ | add PC, 4
-+ | cmp RB, RD
-+ if (!LJ_HASFFI) goto iseqne_test;
-+ if (vk) {
-+ | jne >3
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |2:
-+ | ins_next
-+ |3:
-+ | cmp RB, LJ_TCDATA; jne <2
-+ | jmp ->vmeta_equal_cd
-+ } else {
-+ | je >2
-+ | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |2:
-+ | ins_next
-+ }
-+ break;
-+
-+ /* -- Unary test and copy ops ------------------------------------------- */
-+
-+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
-+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
-+ | mov RB, [BASE+RD*8+4]
-+ | add PC, 4
-+ | cmp RB, LJ_TISTRUECOND
-+ if (op == BC_IST || op == BC_ISTC) {
-+ | jae >1
-+ } else {
-+ | jb >1
-+ }
-+ if (op == BC_ISTC || op == BC_ISFC) {
-+ | mov [BASE+RA*8+4], RB
-+ | mov RB, [BASE+RD*8]
-+ | mov [BASE+RA*8], RB
-+ }
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |1: // Fallthrough to the next instruction.
-+ | ins_next
-+ break;
-+
-+ case BC_ISTYPE:
-+ | ins_AD // RA = src, RD = -type
-+ | add RD, [BASE+RA*8+4]
-+ | jne ->vmeta_istype
-+ | ins_next
-+ break;
-+ case BC_ISNUM:
-+ | ins_AD // RA = src, RD = -(TISNUM-1)
-+ | checknum RA, ->vmeta_istype
-+ | ins_next
-+ break;
-+
-+ /* -- Unary ops --------------------------------------------------------- */
-+
-+ case BC_MOV:
-+ | ins_AD // RA = dst, RD = src
-+ |.if X64
-+ | mov RBa, [BASE+RD*8]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [BASE+RD*8+4]
-+ | mov RD, [BASE+RD*8]
-+ | mov [BASE+RA*8+4], RB
-+ | mov [BASE+RA*8], RD
-+ |.endif
-+ | ins_next_
-+ break;
-+ case BC_NOT:
-+ | ins_AD // RA = dst, RD = src
-+ | xor RB, RB
-+ | checktp RD, LJ_TISTRUECOND
-+ | adc RB, LJ_TTRUE
-+ | mov [BASE+RA*8+4], RB
-+ | ins_next
-+ break;
-+ case BC_UNM:
-+ | ins_AD // RA = dst, RD = src
-+ |.if DUALNUM
-+ | checkint RD, >5
-+ | mov RB, [BASE+RD*8]
-+ | neg RB
-+ | jo >4
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ | mov dword [BASE+RA*8], RB
-+ |9:
-+ | ins_next
-+ |4:
-+ | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31.
-+ | mov dword [BASE+RA*8], 0
-+ | jmp <9
-+ |5:
-+ | ja ->vmeta_unm
-+ |.else
-+ | checknum RD, ->vmeta_unm
-+ |.endif
-+ | movsd xmm0, qword [BASE+RD*8]
-+ | sseconst_sign xmm1, RDa
-+ | xorps xmm0, xmm1
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.if DUALNUM
-+ | jmp <9
-+ |.else
-+ | ins_next
-+ |.endif
-+ break;
-+ case BC_LEN:
-+ | ins_AD // RA = dst, RD = src
-+ | checkstr RD, >2
-+ | mov STR:RD, [BASE+RD*8]
-+ |.if DUALNUM
-+ | mov RD, dword STR:RD->len
-+ |1:
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ | mov dword [BASE+RA*8], RD
-+ |.else
-+ | xorps xmm0, xmm0
-+ | cvtsi2sd xmm0, dword STR:RD->len
-+ |1:
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.endif
-+ | ins_next
-+ |2:
-+ | checktab RD, ->vmeta_len
-+ | mov TAB:FCARG1, [BASE+RD*8]
-+#if LJ_52
-+ | mov TAB:RB, TAB:FCARG1->metatable
-+ | cmp TAB:RB, 0
-+ | jnz >9
-+ |3:
-+#endif
-+ |->BC_LEN_Z:
-+ | mov RB, BASE // Save BASE.
-+ | call extern lj_tab_len@4 // (GCtab *t)
-+ | // Length of table returned in eax (RD).
-+ |.if DUALNUM
-+ | // Nothing to do.
-+ |.else
-+ | cvtsi2sd xmm0, RD
-+ |.endif
-+ | mov BASE, RB // Restore BASE.
-+ | movzx RA, PC_RA
-+ | jmp <1
-+#if LJ_52
-+ |9: // Check for __len.
-+ | test byte TAB:RB->nomm, 1<<MM_len
-+ | jnz <3
-+ | jmp ->vmeta_len // 'no __len' flag NOT set: check.
-+#endif
-+ break;
-+
-+ /* -- Binary ops -------------------------------------------------------- */
-+
-+ |.macro ins_arithpre, sseins, ssereg
-+ | ins_ABC
-+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-+ ||switch (vk) {
-+ ||case 0:
-+ | checknum RB, ->vmeta_arith_vn
-+ | .if DUALNUM
-+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
-+ | .endif
-+ | movsd xmm0, qword [BASE+RB*8]
-+ | sseins ssereg, qword [KBASE+RC*8]
-+ || break;
-+ ||case 1:
-+ | checknum RB, ->vmeta_arith_nv
-+ | .if DUALNUM
-+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
-+ | .endif
-+ | movsd xmm0, qword [KBASE+RC*8]
-+ | sseins ssereg, qword [BASE+RB*8]
-+ || break;
-+ ||default:
-+ | checknum RB, ->vmeta_arith_vv
-+ | checknum RC, ->vmeta_arith_vv
-+ | movsd xmm0, qword [BASE+RB*8]
-+ | sseins ssereg, qword [BASE+RC*8]
-+ || break;
-+ ||}
-+ |.endmacro
-+ |
-+ |.macro ins_arithdn, intins
-+ | ins_ABC
-+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-+ ||switch (vk) {
-+ ||case 0:
-+ | checkint RB, ->vmeta_arith_vn
-+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn
-+ | mov RB, [BASE+RB*8]
-+ | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno
-+ || break;
-+ ||case 1:
-+ | checkint RB, ->vmeta_arith_nv
-+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv
-+ | mov RC, [KBASE+RC*8]
-+ | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo
-+ || break;
-+ ||default:
-+ | checkint RB, ->vmeta_arith_vv
-+ | checkint RC, ->vmeta_arith_vv
-+ | mov RB, [BASE+RB*8]
-+ | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo
-+ || break;
-+ ||}
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ ||if (vk == 1) {
-+ | mov dword [BASE+RA*8], RC
-+ ||} else {
-+ | mov dword [BASE+RA*8], RB
-+ ||}
-+ | ins_next
-+ |.endmacro
-+ |
-+ |.macro ins_arithpost
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.endmacro
-+ |
-+ |.macro ins_arith, sseins
-+ | ins_arithpre sseins, xmm0
-+ | ins_arithpost
-+ | ins_next
-+ |.endmacro
-+ |
-+ |.macro ins_arith, intins, sseins
-+ |.if DUALNUM
-+ | ins_arithdn intins
-+ |.else
-+ | ins_arith, sseins
-+ |.endif
-+ |.endmacro
-+
-+ | // RA = dst, RB = src1 or num const, RC = src2 or num const
-+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-+ | ins_arith add, addsd
-+ break;
-+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-+ | ins_arith sub, subsd
-+ break;
-+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
-+ | ins_arith imul, mulsd
-+ break;
-+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-+ | ins_arith divsd
-+ break;
-+ case BC_MODVN:
-+ | ins_arithpre movsd, xmm1
-+ |->BC_MODVN_Z:
-+ | call ->vm_mod
-+ | ins_arithpost
-+ | ins_next
-+ break;
-+ case BC_MODNV: case BC_MODVV:
-+ | ins_arithpre movsd, xmm1
-+ | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
-+ break;
-+ case BC_POW:
-+ | ins_arithpre movsd, xmm1
-+ | mov RB, BASE
-+ |.if not X64
-+ | movsd FPARG1, xmm0
-+ | movsd FPARG3, xmm1
-+ |.endif
-+ | call extern pow
-+ | movzx RA, PC_RA
-+ | mov BASE, RB
-+ |.if X64
-+ | ins_arithpost
-+ |.else
-+ | fstp qword [BASE+RA*8]
-+ |.endif
-+ | ins_next
-+ break;
-+
-+ case BC_CAT:
-+ | ins_ABC // RA = dst, RB = src_start, RC = src_end
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE
-+ | lea CARG2d, [BASE+RC*8]
-+ | mov CARG3d, RC
-+ | sub CARG3d, RB
-+ |->BC_CAT_Z:
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | lea RA, [BASE+RC*8]
-+ | sub RC, RB
-+ | mov ARG2, RA
-+ | mov ARG3, RC
-+ |->BC_CAT_Z:
-+ | mov L:RB, SAVE_L
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
-+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | test RC, RC
-+ | jnz ->vmeta_binop
-+ | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
-+ | movzx RA, PC_RA
-+ |.if X64
-+ | mov RCa, [BASE+RB*8]
-+ | mov [BASE+RA*8], RCa
-+ |.else
-+ | mov RC, [BASE+RB*8+4]
-+ | mov RB, [BASE+RB*8]
-+ | mov [BASE+RA*8+4], RC
-+ | mov [BASE+RA*8], RB
-+ |.endif
-+ | ins_next
-+ break;
-+
-+ /* -- Constant ops ------------------------------------------------------ */
-+
-+ case BC_KSTR:
-+ | ins_AND // RA = dst, RD = str const (~)
-+ | mov RD, [KBASE+RD*4]
-+ | mov dword [BASE+RA*8+4], LJ_TSTR
-+ | mov [BASE+RA*8], RD
-+ | ins_next
-+ break;
-+ case BC_KCDATA:
-+ |.if FFI
-+ | ins_AND // RA = dst, RD = cdata const (~)
-+ | mov RD, [KBASE+RD*4]
-+ | mov dword [BASE+RA*8+4], LJ_TCDATA
-+ | mov [BASE+RA*8], RD
-+ | ins_next
-+ |.endif
-+ break;
-+ case BC_KSHORT:
-+ | ins_AD // RA = dst, RD = signed int16 literal
-+ |.if DUALNUM
-+ | movsx RD, RDW
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ | mov dword [BASE+RA*8], RD
-+ |.else
-+ | movsx RD, RDW // Sign-extend literal.
-+ | cvtsi2sd xmm0, RD
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.endif
-+ | ins_next
-+ break;
-+ case BC_KNUM:
-+ | ins_AD // RA = dst, RD = num const
-+ | movsd xmm0, qword [KBASE+RD*8]
-+ | movsd qword [BASE+RA*8], xmm0
-+ | ins_next
-+ break;
-+ case BC_KPRI:
-+ | ins_AND // RA = dst, RD = primitive type (~)
-+ | mov [BASE+RA*8+4], RD
-+ | ins_next
-+ break;
-+ case BC_KNIL:
-+ | ins_AD // RA = dst_start, RD = dst_end
-+ | lea RA, [BASE+RA*8+12]
-+ | lea RD, [BASE+RD*8+4]
-+ | mov RB, LJ_TNIL
-+ | mov [RA-8], RB // Sets minimum 2 slots.
-+ |1:
-+ | mov [RA], RB
-+ | add RA, 8
-+ | cmp RA, RD
-+ | jbe <1
-+ | ins_next
-+ break;
-+
-+ /* -- Upvalue and function ops ------------------------------------------ */
-+
-+ case BC_UGET:
-+ | ins_AD // RA = dst, RD = upvalue #
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
-+ | mov RB, UPVAL:RB->v
-+ |.if X64
-+ | mov RDa, [RB]
-+ | mov [BASE+RA*8], RDa
-+ |.else
-+ | mov RD, [RB+4]
-+ | mov RB, [RB]
-+ | mov [BASE+RA*8+4], RD
-+ | mov [BASE+RA*8], RB
-+ |.endif
-+ | ins_next
-+ break;
-+ case BC_USETV:
-+#define TV2MARKOFS \
-+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
-+ | ins_AD // RA = upvalue #, RD = src
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-+ | cmp byte UPVAL:RB->closed, 0
-+ | mov RB, UPVAL:RB->v
-+ | mov RA, [BASE+RD*8]
-+ | mov RD, [BASE+RD*8+4]
-+ | mov [RB], RA
-+ | mov [RB+4], RD
-+ | jz >1
-+ | // Check barrier for closed upvalue.
-+ | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
-+ | jnz >2
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Upvalue is black. Check if new value is collectable and white.
-+ | sub RD, LJ_TISGCV
-+ | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
-+ | jbe <1
-+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
-+ | jz <1
-+ | // Crossed a write barrier. Move the barrier forward.
-+ |.if X64 and not X64WIN
-+ | mov FCARG2, RB
-+ | mov RB, BASE // Save BASE.
-+ |.else
-+ | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
-+ |.endif
-+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
-+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
-+ | mov BASE, RB // Restore BASE.
-+ | jmp <1
-+ break;
-+#undef TV2MARKOFS
-+ case BC_USETS:
-+ | ins_AND // RA = upvalue #, RD = str const (~)
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-+ | mov GCOBJ:RA, [KBASE+RD*4]
-+ | mov RD, UPVAL:RB->v
-+ | mov [RD], GCOBJ:RA
-+ | mov dword [RD+4], LJ_TSTR
-+ | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
-+ | jnz >2
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Check if string is white and ensure upvalue is closed.
-+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
-+ | jz <1
-+ | cmp byte UPVAL:RB->closed, 0
-+ | jz <1
-+ | // Crossed a write barrier. Move the barrier forward.
-+ | mov RB, BASE // Save BASE (FCARG2 == BASE).
-+ | mov FCARG2, RD
-+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
-+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
-+ | mov BASE, RB // Restore BASE.
-+ | jmp <1
-+ break;
-+ case BC_USETN:
-+ | ins_AD // RA = upvalue #, RD = num const
-+ | mov LFUNC:RB, [BASE-8]
-+ | movsd xmm0, qword [KBASE+RD*8]
-+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-+ | mov RA, UPVAL:RB->v
-+ | movsd qword [RA], xmm0
-+ | ins_next
-+ break;
-+ case BC_USETP:
-+ | ins_AND // RA = upvalue #, RD = primitive type (~)
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-+ | mov RA, UPVAL:RB->v
-+ | mov [RA+4], RD
-+ | ins_next
-+ break;
-+ case BC_UCLO:
-+ | ins_AD // RA = level, RD = target
-+ | branchPC RD // Do this first to free RD.
-+ | mov L:RB, SAVE_L
-+ | cmp dword L:RB->openupval, 0
-+ | je >1
-+ | mov L:RB->base, BASE
-+ | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
-+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
-+ | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
-+ | mov BASE, L:RB->base
-+ |1:
-+ | ins_next
-+ break;
-+
-+ case BC_FNEW:
-+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
-+ |.if X64
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG3d, [BASE-8]
-+ | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *.
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov LFUNC:RA, [BASE-8]
-+ | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, LFUNC:RA
-+ | mov ARG2, PROTO:RD
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
-+ | call extern lj_func_newL_gc
-+ | // GCfuncL * returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | movzx RA, PC_RA
-+ | mov [BASE+RA*8], LFUNC:RC
-+ | mov dword [BASE+RA*8+4], LJ_TFUNC
-+ | ins_next
-+ break;
-+
-+ /* -- Table ops --------------------------------------------------------- */
-+
-+ case BC_TNEW:
-+ | ins_AD // RA = dst, RD = hbits|asize
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
-+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
-+ | mov SAVE_PC, PC
-+ | jae >5
-+ |1:
-+ |.if X64
-+ | mov CARG3d, RD
-+ | and RD, 0x7ff
-+ | shr CARG3d, 11
-+ |.else
-+ | mov RA, RD
-+ | and RD, 0x7ff
-+ | shr RA, 11
-+ | mov ARG3, RA
-+ |.endif
-+ | cmp RD, 0x7ff
-+ | je >3
-+ |2:
-+ |.if X64
-+ | mov L:CARG1d, L:RB
-+ | mov CARG2d, RD
-+ |.else
-+ | mov ARG1, L:RB
-+ | mov ARG2, RD
-+ |.endif
-+ | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
-+ | // Table * returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | movzx RA, PC_RA
-+ | mov [BASE+RA*8], TAB:RC
-+ | mov dword [BASE+RA*8+4], LJ_TTAB
-+ | ins_next
-+ |3: // Turn 0x7ff into 0x801.
-+ | mov RD, 0x801
-+ | jmp <2
-+ |5:
-+ | mov L:FCARG1, L:RB
-+ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
-+ | movzx RD, PC_RD
-+ | jmp <1
-+ break;
-+ case BC_TDUP:
-+ | ins_AND // RA = dst, RD = table const (~) (holding template table)
-+ | mov L:RB, SAVE_L
-+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
-+ | mov SAVE_PC, PC
-+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
-+ | mov L:RB->base, BASE
-+ | jae >3
-+ |2:
-+ | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
-+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
-+ | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
-+ | // Table * returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | movzx RA, PC_RA
-+ | mov [BASE+RA*8], TAB:RC
-+ | mov dword [BASE+RA*8+4], LJ_TTAB
-+ | ins_next
-+ |3:
-+ | mov L:FCARG1, L:RB
-+ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
-+ | movzx RD, PC_RD // Need to reload RD.
-+ | not RDa
-+ | jmp <2
-+ break;
-+
-+ case BC_GGET:
-+ | ins_AND // RA = dst, RD = str const (~)
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov TAB:RB, LFUNC:RB->env
-+ | mov STR:RC, [KBASE+RD*4]
-+ | jmp ->BC_TGETS_Z
-+ break;
-+ case BC_GSET:
-+ | ins_AND // RA = src, RD = str const (~)
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov TAB:RB, LFUNC:RB->env
-+ | mov STR:RC, [KBASE+RD*4]
-+ | jmp ->BC_TSETS_Z
-+ break;
-+
-+ case BC_TGETV:
-+ | ins_ABC // RA = dst, RB = table, RC = key
-+ | checktab RB, ->vmeta_tgetv
-+ | mov TAB:RB, [BASE+RB*8]
-+ |
-+ | // Integer key?
-+ |.if DUALNUM
-+ | checkint RC, >5
-+ | mov RC, dword [BASE+RC*8]
-+ |.else
-+ | // Convert number to int and back and compare.
-+ | checknum RC, >5
-+ | movsd xmm0, qword [BASE+RC*8]
-+ | cvttsd2si RC, xmm0
-+ | cvtsi2sd xmm1, RC
-+ | ucomisd xmm0, xmm1
-+ | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
-+ |.endif
-+ | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-+ | jae ->vmeta_tgetv // Not in array part? Use fallback.
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
-+ | je >2
-+ | // Get array slot.
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [RC]
-+ | mov RC, [RC+4]
-+ | mov [BASE+RA*8], RB
-+ | mov [BASE+RA*8+4], RC
-+ |.endif
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Check for __index if table value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz >3
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_index
-+ | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
-+ | movzx RA, PC_RA // Restore RA.
-+ |3:
-+ | mov dword [BASE+RA*8+4], LJ_TNIL
-+ | jmp <1
-+ |
-+ |5: // String key?
-+ | checkstr RC, ->vmeta_tgetv
-+ | mov STR:RC, [BASE+RC*8]
-+ | jmp ->BC_TGETS_Z
-+ break;
-+ case BC_TGETS:
-+ | ins_ABC // RA = dst, RB = table, RC = str const (~)
-+ | not RCa
-+ | mov STR:RC, [KBASE+RC*4]
-+ | checktab RB, ->vmeta_tgets
-+ | mov TAB:RB, [BASE+RB*8]
-+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
-+ | mov RA, TAB:RB->hmask
-+ | and RA, STR:RC->hash
-+ | imul RA, #NODE
-+ | add NODE:RA, TAB:RB->node
-+ |1:
-+ | cmp dword NODE:RA->key.it, LJ_TSTR
-+ | jne >4
-+ | cmp dword NODE:RA->key.gcr, STR:RC
-+ | jne >4
-+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
-+ | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
-+ | je >5 // Key found, but nil value?
-+ | movzx RC, PC_RA
-+ | // Get node value.
-+ |.if X64
-+ | mov RBa, [RA]
-+ | mov [BASE+RC*8], RBa
-+ |.else
-+ | mov RB, [RA]
-+ | mov RA, [RA+4]
-+ | mov [BASE+RC*8], RB
-+ | mov [BASE+RC*8+4], RA
-+ |.endif
-+ |2:
-+ | ins_next
-+ |
-+ |3:
-+ | movzx RC, PC_RA
-+ | mov dword [BASE+RC*8+4], LJ_TNIL
-+ | jmp <2
-+ |
-+ |4: // Follow hash chain.
-+ | mov NODE:RA, NODE:RA->next
-+ | test NODE:RA, NODE:RA
-+ | jnz <1
-+ | // End of hash chain: key not found, nil result.
-+ |
-+ |5: // Check for __index if table value is nil.
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test TAB:RA, TAB:RA
-+ | jz <3 // No metatable: done.
-+ | test byte TAB:RA->nomm, 1<<MM_index
-+ | jnz <3 // 'no __index' flag set: done.
-+ | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
-+ break;
-+ case BC_TGETB:
-+ | ins_ABC // RA = dst, RB = table, RC = byte literal
-+ | checktab RB, ->vmeta_tgetb
-+ | mov TAB:RB, [BASE+RB*8]
-+ | cmp RC, TAB:RB->asize
-+ | jae ->vmeta_tgetb
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
-+ | je >2
-+ | // Get array slot.
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [RC]
-+ | mov RC, [RC+4]
-+ | mov [BASE+RA*8], RB
-+ | mov [BASE+RA*8+4], RC
-+ |.endif
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Check for __index if table value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz >3
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_index
-+ | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
-+ | movzx RA, PC_RA // Restore RA.
-+ |3:
-+ | mov dword [BASE+RA*8+4], LJ_TNIL
-+ | jmp <1
-+ break;
-+ case BC_TGETR:
-+ | ins_ABC // RA = dst, RB = table, RC = key
-+ | mov TAB:RB, [BASE+RB*8]
-+ |.if DUALNUM
-+ | mov RC, dword [BASE+RC*8]
-+ |.else
-+ | cvttsd2si RC, qword [BASE+RC*8]
-+ |.endif
-+ | cmp RC, TAB:RB->asize
-+ | jae ->vmeta_tgetr // Not in array part? Use fallback.
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | // Get array slot.
-+ |->BC_TGETR_Z:
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [RC]
-+ | mov RC, [RC+4]
-+ | mov [BASE+RA*8], RB
-+ | mov [BASE+RA*8+4], RC
-+ |.endif
-+ |->BC_TGETR2_Z:
-+ | ins_next
-+ break;
-+
-+ case BC_TSETV:
-+ | ins_ABC // RA = src, RB = table, RC = key
-+ | checktab RB, ->vmeta_tsetv
-+ | mov TAB:RB, [BASE+RB*8]
-+ |
-+ | // Integer key?
-+ |.if DUALNUM
-+ | checkint RC, >5
-+ | mov RC, dword [BASE+RC*8]
-+ |.else
-+ | // Convert number to int and back and compare.
-+ | checknum RC, >5
-+ | movsd xmm0, qword [BASE+RC*8]
-+ | cvttsd2si RC, xmm0
-+ | cvtsi2sd xmm1, RC
-+ | ucomisd xmm0, xmm1
-+ | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
-+ |.endif
-+ | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-+ | jae ->vmeta_tsetv
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | cmp dword [RC+4], LJ_TNIL
-+ | je >3 // Previous value is nil?
-+ |1:
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |2: // Set array slot.
-+ |.if X64
-+ | mov RBa, [BASE+RA*8]
-+ | mov [RC], RBa
-+ |.else
-+ | mov RB, [BASE+RA*8+4]
-+ | mov RA, [BASE+RA*8]
-+ | mov [RC+4], RB
-+ | mov [RC], RA
-+ |.endif
-+ | ins_next
-+ |
-+ |3: // Check for __newindex if previous value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz <1
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_newindex
-+ | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <1
-+ |
-+ |5: // String key?
-+ | checkstr RC, ->vmeta_tsetv
-+ | mov STR:RC, [BASE+RC*8]
-+ | jmp ->BC_TSETS_Z
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, RA
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <2
-+ break;
-+ case BC_TSETS:
-+ | ins_ABC // RA = src, RB = table, RC = str const (~)
-+ | not RCa
-+ | mov STR:RC, [KBASE+RC*4]
-+ | checktab RB, ->vmeta_tsets
-+ | mov TAB:RB, [BASE+RB*8]
-+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
-+ | mov RA, TAB:RB->hmask
-+ | and RA, STR:RC->hash
-+ | imul RA, #NODE
-+ | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
-+ | add NODE:RA, TAB:RB->node
-+ |1:
-+ | cmp dword NODE:RA->key.it, LJ_TSTR
-+ | jne >5
-+ | cmp dword NODE:RA->key.gcr, STR:RC
-+ | jne >5
-+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
-+ | cmp dword [RA+4], LJ_TNIL
-+ | je >4 // Previous value is nil?
-+ |2:
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |3: // Set node value.
-+ | movzx RC, PC_RA
-+ |.if X64
-+ | mov RBa, [BASE+RC*8]
-+ | mov [RA], RBa
-+ |.else
-+ | mov RB, [BASE+RC*8+4]
-+ | mov RC, [BASE+RC*8]
-+ | mov [RA+4], RB
-+ | mov [RA], RC
-+ |.endif
-+ | ins_next
-+ |
-+ |4: // Check for __newindex if previous value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz <2
-+ | mov TMP1, RA // Save RA.
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_newindex
-+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
-+ | mov RA, TMP1 // Restore RA.
-+ | jmp <2
-+ |
-+ |5: // Follow hash chain.
-+ | mov NODE:RA, NODE:RA->next
-+ | test NODE:RA, NODE:RA
-+ | jnz <1
-+ | // End of hash chain: key not found, add a new one.
-+ |
-+ | // But check for __newindex first.
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test TAB:RA, TAB:RA
-+ | jz >6 // No metatable: continue.
-+ | test byte TAB:RA->nomm, 1<<MM_newindex
-+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
-+ |6:
-+ | mov TMP1, STR:RC
-+ | mov TMP2, LJ_TSTR
-+ | mov TMP3, TAB:RB // Save TAB:RB for us.
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE
-+ | lea CARG3, TMP1
-+ | mov CARG2d, TAB:RB
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
-+ | mov ARG2, TAB:RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
-+ | // Handles write barrier for the new key. TValue * returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | mov TAB:RB, TMP3 // Need TAB:RB for barrier.
-+ | mov RA, eax
-+ | jmp <2 // Must check write barrier for value.
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, RC // Destroys STR:RC.
-+ | jmp <3
-+ break;
-+ case BC_TSETB:
-+ | ins_ABC // RA = src, RB = table, RC = byte literal
-+ | checktab RB, ->vmeta_tsetb
-+ | mov TAB:RB, [BASE+RB*8]
-+ | cmp RC, TAB:RB->asize
-+ | jae ->vmeta_tsetb
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | cmp dword [RC+4], LJ_TNIL
-+ | je >3 // Previous value is nil?
-+ |1:
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |2: // Set array slot.
-+ |.if X64
-+ | mov RAa, [BASE+RA*8]
-+ | mov [RC], RAa
-+ |.else
-+ | mov RB, [BASE+RA*8+4]
-+ | mov RA, [BASE+RA*8]
-+ | mov [RC+4], RB
-+ | mov [RC], RA
-+ |.endif
-+ | ins_next
-+ |
-+ |3: // Check for __newindex if previous value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz <1
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_newindex
-+ | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <1
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, RA
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <2
-+ break;
-+ case BC_TSETR:
-+ | ins_ABC // RA = src, RB = table, RC = key
-+ | mov TAB:RB, [BASE+RB*8]
-+ |.if DUALNUM
-+ | mov RC, dword [BASE+RC*8]
-+ |.else
-+ | cvttsd2si RC, qword [BASE+RC*8]
-+ |.endif
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |2:
-+ | cmp RC, TAB:RB->asize
-+ | jae ->vmeta_tsetr
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | // Set array slot.
-+ |->BC_TSETR_Z:
-+ |.if X64
-+ | mov RBa, [BASE+RA*8]
-+ | mov [RC], RBa
-+ |.else
-+ | mov RB, [BASE+RA*8+4]
-+ | mov RA, [BASE+RA*8]
-+ | mov [RC+4], RB
-+ | mov [RC], RA
-+ |.endif
-+ | ins_next
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, RA
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <2
-+ break;
-+
-+ case BC_TSETM:
-+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
-+ | mov TMP1, KBASE // Need one more free register.
-+ | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word.
-+ |1:
-+ | lea RA, [BASE+RA*8]
-+ | mov TAB:RB, [RA-8] // Guaranteed to be a table.
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |2:
-+ | mov RD, MULTRES
-+ | sub RD, 1
-+ | jz >4 // Nothing to copy?
-+ | add RD, KBASE // Compute needed size.
-+ | cmp RD, TAB:RB->asize
-+ | ja >5 // Doesn't fit into array part?
-+ | sub RD, KBASE
-+ | shl KBASE, 3
-+ | add KBASE, TAB:RB->array
-+ |3: // Copy result slots to table.
-+ |.if X64
-+ | mov RBa, [RA]
-+ | add RA, 8
-+ | mov [KBASE], RBa
-+ |.else
-+ | mov RB, [RA]
-+ | mov [KBASE], RB
-+ | mov RB, [RA+4]
-+ | add RA, 8
-+ | mov [KBASE+4], RB
-+ |.endif
-+ | add KBASE, 8
-+ | sub RD, 1
-+ | jnz <3
-+ |4:
-+ | mov KBASE, TMP1
-+ | ins_next
-+ |
-+ |5: // Need to resize array part.
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, TAB:RB
-+ | mov CARG3d, RD
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | mov ARG2, TAB:RB
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov ARG3, RD
-+ | mov ARG1, L:RB
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
-+ | mov BASE, L:RB->base
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <1 // Retry.
-+ |
-+ |7: // Possible table write barrier for any value. Skip valiswhite check.
-+ | barrierback TAB:RB, RD
-+ | jmp <2
-+ break;
-+
-+ /* -- Calls and vararg handling ----------------------------------------- */
-+
-+ case BC_CALL: case BC_CALLM:
-+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
-+ if (op == BC_CALLM) {
-+ | add NARGS:RD, MULTRES
-+ }
-+ | cmp dword [BASE+RA*8+4], LJ_TFUNC
-+ | mov LFUNC:RB, [BASE+RA*8]
-+ | jne ->vmeta_call_ra
-+ | lea BASE, [BASE+RA*8+8]
-+ | ins_call
-+ break;
-+
-+ case BC_CALLMT:
-+ | ins_AD // RA = base, RD = extra_nargs
-+ | add NARGS:RD, MULTRES
-+ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
-+ break;
-+ case BC_CALLT:
-+ | ins_AD // RA = base, RD = nargs+1
-+ | lea RA, [BASE+RA*8+8]
-+ | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
-+ | mov LFUNC:RB, [RA-8]
-+ | cmp dword [RA-4], LJ_TFUNC
-+ | jne ->vmeta_call
-+ |->BC_CALLT_Z:
-+ | mov PC, [BASE-4]
-+ | test PC, FRAME_TYPE
-+ | jnz >7
-+ |1:
-+ | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
-+ | mov MULTRES, NARGS:RD
-+ | sub NARGS:RD, 1
-+ | jz >3
-+ |2: // Move args down.
-+ |.if X64
-+ | mov RBa, [RA]
-+ | add RA, 8
-+ | mov [KBASE], RBa
-+ |.else
-+ | mov RB, [RA]
-+ | mov [KBASE], RB
-+ | mov RB, [RA+4]
-+ | add RA, 8
-+ | mov [KBASE+4], RB
-+ |.endif
-+ | add KBASE, 8
-+ | sub NARGS:RD, 1
-+ | jnz <2
-+ |
-+ | mov LFUNC:RB, [BASE-8]
-+ |3:
-+ | mov NARGS:RD, MULTRES
-+ | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
-+ | ja >5
-+ |4:
-+ | ins_callt
-+ |
-+ |5: // Tailcall to a fast function.
-+ | test PC, FRAME_TYPE // Lua frame below?
-+ | jnz <4
-+ | movzx RA, PC_RA
-+ | not RAa
-+ | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE.
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | jmp <4
-+ |
-+ |7: // Tailcall from a vararg function.
-+ | sub PC, FRAME_VARG
-+ | test PC, FRAME_TYPEP
-+ | jnz >8 // Vararg frame below?
-+ | sub BASE, PC // Need to relocate BASE/KBASE down.
-+ | mov KBASE, BASE
-+ | mov PC, [BASE-4]
-+ | jmp <1
-+ |8:
-+ | add PC, FRAME_VARG
-+ | jmp <1
-+ break;
-+
-+ case BC_ITERC:
-+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
-+ | lea RA, [BASE+RA*8+8] // fb = base+1
-+ |.if X64
-+ | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3].
-+ | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2].
-+ | mov [RA], RBa
-+ | mov [RA+8], RCa
-+ |.else
-+ | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
-+ | mov RC, [RA-20]
-+ | mov [RA], RB
-+ | mov [RA+4], RC
-+ | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
-+ | mov RC, [RA-12]
-+ | mov [RA+8], RB
-+ | mov [RA+12], RC
-+ |.endif
-+ | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
-+ | mov RC, [RA-28]
-+ | mov [RA-8], LFUNC:RB
-+ | mov [RA-4], RC
-+ | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
-+ | mov NARGS:RD, 2+1
-+ | jne ->vmeta_call
-+ | mov BASE, RA
-+ | ins_call
-+ break;
-+
-+ case BC_ITERN:
-+ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
-+ |.if JIT
-+ | // NYI: add hotloop, record BC_ITERN.
-+ |.endif
-+ | mov TMP1, KBASE // Need two more free registers.
-+ | mov TMP2, DISPATCH
-+ | mov TAB:RB, [BASE+RA*8-16]
-+ | mov RC, [BASE+RA*8-8] // Get index from control var.
-+ | mov DISPATCH, TAB:RB->asize
-+ | add PC, 4
-+ | mov KBASE, TAB:RB->array
-+ |1: // Traverse array part.
-+ | cmp RC, DISPATCH; jae >5 // Index points after array part?
-+ | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
-+ |.if DUALNUM
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ | mov dword [BASE+RA*8], RC
-+ |.else
-+ | cvtsi2sd xmm0, RC
-+ |.endif
-+ | // Copy array slot to returned value.
-+ |.if X64
-+ | mov RBa, [KBASE+RC*8]
-+ | mov [BASE+RA*8+8], RBa
-+ |.else
-+ | mov RB, [KBASE+RC*8+4]
-+ | mov [BASE+RA*8+12], RB
-+ | mov RB, [KBASE+RC*8]
-+ | mov [BASE+RA*8+8], RB
-+ |.endif
-+ | add RC, 1
-+ | // Return array index as a numeric key.
-+ |.if DUALNUM
-+ | // See above.
-+ |.else
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.endif
-+ | mov [BASE+RA*8-8], RC // Update control var.
-+ |2:
-+ | movzx RD, PC_RD // Get target from ITERL.
-+ | branchPC RD
-+ |3:
-+ | mov DISPATCH, TMP2
-+ | mov KBASE, TMP1
-+ | ins_next
-+ |
-+ |4: // Skip holes in array part.
-+ | add RC, 1
-+ | jmp <1
-+ |
-+ |5: // Traverse hash part.
-+ | sub RC, DISPATCH
-+ |6:
-+ | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
-+ | imul KBASE, RC, #NODE
-+ | add NODE:KBASE, TAB:RB->node
-+ | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7
-+ | lea DISPATCH, [RC+DISPATCH+1]
-+ | // Copy key and value from hash slot.
-+ |.if X64
-+ | mov RBa, NODE:KBASE->key
-+ | mov RCa, NODE:KBASE->val
-+ | mov [BASE+RA*8], RBa
-+ | mov [BASE+RA*8+8], RCa
-+ |.else
-+ | mov RB, NODE:KBASE->key.gcr
-+ | mov RC, NODE:KBASE->key.it
-+ | mov [BASE+RA*8], RB
-+ | mov [BASE+RA*8+4], RC
-+ | mov RB, NODE:KBASE->val.gcr
-+ | mov RC, NODE:KBASE->val.it
-+ | mov [BASE+RA*8+8], RB
-+ | mov [BASE+RA*8+12], RC
-+ |.endif
-+ | mov [BASE+RA*8-8], DISPATCH
-+ | jmp <2
-+ |
-+ |7: // Skip holes in hash part.
-+ | add RC, 1
-+ | jmp <6
-+ break;
-+
-+ case BC_ISNEXT:
-+ | ins_AD // RA = base, RD = target (points to ITERN)
-+ | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5
-+ | mov CFUNC:RB, [BASE+RA*8-24]
-+ | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5
-+ | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5
-+ | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
-+ | branchPC RD
-+ | mov dword [BASE+RA*8-8], 0 // Initialize control var.
-+ | mov dword [BASE+RA*8-4], 0xfffe7fff
-+ |1:
-+ | ins_next
-+ |5: // Despecialize bytecode if any of the checks fail.
-+ | mov PC_OP, BC_JMP
-+ | branchPC RD
-+ | mov byte [PC], BC_ITERC
-+ | jmp <1
-+ break;
-+
-+ case BC_VARG:
-+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
-+ | mov TMP1, KBASE // Need one more free register.
-+ | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
-+ | lea RA, [BASE+RA*8]
-+ | sub KBASE, [BASE-4]
-+ | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
-+ | test RB, RB
-+ | jz >5 // Copy all varargs?
-+ | lea RB, [RA+RB*8-8]
-+ | cmp KBASE, BASE // No vararg slots?
-+ | jnb >2
-+ |1: // Copy vararg slots to destination slots.
-+ |.if X64
-+ | mov RCa, [KBASE-8]
-+ | add KBASE, 8
-+ | mov [RA], RCa
-+ |.else
-+ | mov RC, [KBASE-8]
-+ | mov [RA], RC
-+ | mov RC, [KBASE-4]
-+ | add KBASE, 8
-+ | mov [RA+4], RC
-+ |.endif
-+ | add RA, 8
-+ | cmp RA, RB // All destination slots filled?
-+ | jnb >3
-+ | cmp KBASE, BASE // No more vararg slots?
-+ | jb <1
-+ |2: // Fill up remainder with nil.
-+ | mov dword [RA+4], LJ_TNIL
-+ | add RA, 8
-+ | cmp RA, RB
-+ | jb <2
-+ |3:
-+ | mov KBASE, TMP1
-+ | ins_next
-+ |
-+ |5: // Copy all varargs.
-+ | mov MULTRES, 1 // MULTRES = 0+1
-+ | mov RC, BASE
-+ | sub RC, KBASE
-+ | jbe <3 // No vararg slots?
-+ | mov RB, RC
-+ | shr RB, 3
-+ | add RB, 1
-+ | mov MULTRES, RB // MULTRES = #varargs+1
-+ | mov L:RB, SAVE_L
-+ | add RC, RA
-+ | cmp RC, L:RB->maxstack
-+ | ja >7 // Need to grow stack?
-+ |6: // Copy all vararg slots.
-+ |.if X64
-+ | mov RCa, [KBASE-8]
-+ | add KBASE, 8
-+ | mov [RA], RCa
-+ |.else
-+ | mov RC, [KBASE-8]
-+ | mov [RA], RC
-+ | mov RC, [KBASE-4]
-+ | add KBASE, 8
-+ | mov [RA+4], RC
-+ |.endif
-+ | add RA, 8
-+ | cmp KBASE, BASE // No more vararg slots?
-+ | jb <6
-+ | jmp <3
-+ |
-+ |7: // Grow stack for varargs.
-+ | mov L:RB->base, BASE
-+ | mov L:RB->top, RA
-+ | mov SAVE_PC, PC
-+ | sub KBASE, BASE // Need delta, because BASE may change.
-+ | mov FCARG2, MULTRES
-+ | sub FCARG2, 1
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ | mov BASE, L:RB->base
-+ | mov RA, L:RB->top
-+ | add KBASE, BASE
-+ | jmp <6
-+ break;
-+
-+ /* -- Returns ----------------------------------------------------------- */
-+
-+ case BC_RETM:
-+ | ins_AD // RA = results, RD = extra_nresults
-+ | add RD, MULTRES // MULTRES >=1, so RD >=1.
-+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
-+ break;
-+
-+ case BC_RET: case BC_RET0: case BC_RET1:
-+ | ins_AD // RA = results, RD = nresults+1
-+ if (op != BC_RET0) {
-+ | shl RA, 3
-+ }
-+ |1:
-+ | mov PC, [BASE-4]
-+ | mov MULTRES, RD // Save nresults+1.
-+ | test PC, FRAME_TYPE // Check frame type marker.
-+ | jnz >7 // Not returning to a fixarg Lua func?
-+ switch (op) {
-+ case BC_RET:
-+ |->BC_RET_Z:
-+ | mov KBASE, BASE // Use KBASE for result move.
-+ | sub RD, 1
-+ | jz >3
-+ |2: // Move results down.
-+ |.if X64
-+ | mov RBa, [KBASE+RA]
-+ | mov [KBASE-8], RBa
-+ |.else
-+ | mov RB, [KBASE+RA]
-+ | mov [KBASE-8], RB
-+ | mov RB, [KBASE+RA+4]
-+ | mov [KBASE-4], RB
-+ |.endif
-+ | add KBASE, 8
-+ | sub RD, 1
-+ | jnz <2
-+ |3:
-+ | mov RD, MULTRES // Note: MULTRES may be >255.
-+ | movzx RB, PC_RB // So cannot compare with RDL!
-+ |5:
-+ | cmp RB, RD // More results expected?
-+ | ja >6
-+ break;
-+ case BC_RET1:
-+ |.if X64
-+ | mov RBa, [BASE+RA]
-+ | mov [BASE-8], RBa
-+ |.else
-+ | mov RB, [BASE+RA+4]
-+ | mov [BASE-4], RB
-+ | mov RB, [BASE+RA]
-+ | mov [BASE-8], RB
-+ |.endif
-+ /* fallthrough */
-+ case BC_RET0:
-+ |5:
-+ | cmp PC_RB, RDL // More results expected?
-+ | ja >6
-+ default:
-+ break;
-+ }
-+ | movzx RA, PC_RA
-+ | not RAa // Note: ~RA = -(RA+1)
-+ | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
-+ | mov LFUNC:KBASE, [BASE-8]
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | ins_next
-+ |
-+ |6: // Fill up results with nil.
-+ if (op == BC_RET) {
-+ | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
-+ | add KBASE, 8
-+ } else {
-+ | mov dword [BASE+RD*8-12], LJ_TNIL
-+ }
-+ | add RD, 1
-+ | jmp <5
-+ |
-+ |7: // Non-standard return case.
-+ | lea RB, [PC-FRAME_VARG]
-+ | test RB, FRAME_TYPEP
-+ | jnz ->vm_return
-+ | // Return from vararg function: relocate BASE down and RA up.
-+ | sub BASE, RB
-+ if (op != BC_RET0) {
-+ | add RA, RB
-+ }
-+ | jmp <1
-+ break;
-+
-+ /* -- Loops and branches ------------------------------------------------ */
-+
-+ |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4]
-+ |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12]
-+ |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20]
-+ |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28]
-+
-+ case BC_FORL:
-+ |.if JIT
-+ | hotloop RB
-+ |.endif
-+ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
-+ break;
-+
-+ case BC_JFORI:
-+ case BC_JFORL:
-+#if !LJ_HASJIT
-+ break;
-+#endif
-+ case BC_FORI:
-+ case BC_IFORL:
-+ vk = (op == BC_IFORL || op == BC_JFORL);
-+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
-+ | lea RA, [BASE+RA*8]
-+ if (LJ_DUALNUM) {
-+ | cmp FOR_TIDX, LJ_TISNUM; jne >9
-+ if (!vk) {
-+ | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
-+ | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
-+ | mov RB, dword FOR_IDX
-+ | cmp dword FOR_STEP, 0; jl >5
-+ } else {
-+#ifdef LUA_USE_ASSERT
-+ | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type
-+ | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type
-+#endif
-+ | mov RB, dword FOR_STEP
-+ | test RB, RB; js >5
-+ | add RB, dword FOR_IDX; jo >1
-+ | mov dword FOR_IDX, RB
-+ }
-+ | cmp RB, dword FOR_STOP
-+ | mov FOR_TEXT, LJ_TISNUM
-+ | mov dword FOR_EXT, RB
-+ if (op == BC_FORI) {
-+ | jle >7
-+ |1:
-+ |6:
-+ | branchPC RD
-+ } else if (op == BC_JFORI) {
-+ | branchPC RD
-+ | movzx RD, PC_RD
-+ | jle =>BC_JLOOP
-+ |1:
-+ |6:
-+ } else if (op == BC_IFORL) {
-+ | jg >7
-+ |6:
-+ | branchPC RD
-+ |1:
-+ } else {
-+ | jle =>BC_JLOOP
-+ |1:
-+ |6:
-+ }
-+ |7:
-+ | ins_next
-+ |
-+ |5: // Invert check for negative step.
-+ if (vk) {
-+ | add RB, dword FOR_IDX; jo <1
-+ | mov dword FOR_IDX, RB
-+ }
-+ | cmp RB, dword FOR_STOP
-+ | mov FOR_TEXT, LJ_TISNUM
-+ | mov dword FOR_EXT, RB
-+ if (op == BC_FORI) {
-+ | jge <7
-+ } else if (op == BC_JFORI) {
-+ | branchPC RD
-+ | movzx RD, PC_RD
-+ | jge =>BC_JLOOP
-+ } else if (op == BC_IFORL) {
-+ | jl <7
-+ } else {
-+ | jge =>BC_JLOOP
-+ }
-+ | jmp <6
-+ |9: // Fallback to FP variant.
-+ } else if (!vk) {
-+ | cmp FOR_TIDX, LJ_TISNUM
-+ }
-+ if (!vk) {
-+ | jae ->vmeta_for
-+ | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for
-+ } else {
-+#ifdef LUA_USE_ASSERT
-+ | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type
-+ | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type
-+#endif
-+ }
-+ | mov RB, FOR_TSTEP // Load type/hiword of for step.
-+ if (!vk) {
-+ | cmp RB, LJ_TISNUM; jae ->vmeta_for
-+ }
-+ | movsd xmm0, qword FOR_IDX
-+ | movsd xmm1, qword FOR_STOP
-+ if (vk) {
-+ | addsd xmm0, qword FOR_STEP
-+ | movsd qword FOR_IDX, xmm0
-+ | test RB, RB; js >3
-+ } else {
-+ | jl >3
-+ }
-+ | ucomisd xmm1, xmm0
-+ |1:
-+ | movsd qword FOR_EXT, xmm0
-+ if (op == BC_FORI) {
-+ |.if DUALNUM
-+ | jnb <7
-+ |.else
-+ | jnb >2
-+ | branchPC RD
-+ |.endif
-+ } else if (op == BC_JFORI) {
-+ | branchPC RD
-+ | movzx RD, PC_RD
-+ | jnb =>BC_JLOOP
-+ } else if (op == BC_IFORL) {
-+ |.if DUALNUM
-+ | jb <7
-+ |.else
-+ | jb >2
-+ | branchPC RD
-+ |.endif
-+ } else {
-+ | jnb =>BC_JLOOP
-+ }
-+ |.if DUALNUM
-+ | jmp <6
-+ |.else
-+ |2:
-+ | ins_next
-+ |.endif
-+ |
-+ |3: // Invert comparison if step is negative.
-+ | ucomisd xmm0, xmm1
-+ | jmp <1
-+ break;
-+
-+ case BC_ITERL:
-+ |.if JIT
-+ | hotloop RB
-+ |.endif
-+ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
-+ break;
-+
-+ case BC_JITERL:
-+#if !LJ_HASJIT
-+ break;
-+#endif
-+ case BC_IITERL:
-+ | ins_AJ // RA = base, RD = target
-+ | lea RA, [BASE+RA*8]
-+ | mov RB, [RA+4]
-+ | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
-+ if (op == BC_JITERL) {
-+ | mov [RA-4], RB
-+ | mov RB, [RA]
-+ | mov [RA-8], RB
-+ | jmp =>BC_JLOOP
-+ } else {
-+ | branchPC RD // Otherwise save control var + branch.
-+ | mov RD, [RA]
-+ | mov [RA-4], RB
-+ | mov [RA-8], RD
-+ }
-+ |1:
-+ | ins_next
-+ break;
-+
-+ case BC_LOOP:
-+ | ins_A // RA = base, RD = target (loop extent)
-+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
-+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
-+ |.if JIT
-+ | hotloop RB
-+ |.endif
-+ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
-+ break;
-+
-+ case BC_ILOOP:
-+ | ins_A // RA = base, RD = target (loop extent)
-+ | ins_next
-+ break;
-+
-+ case BC_JLOOP:
-+ |.if JIT
-+ | ins_AD // RA = base (ignored), RD = traceno
-+ | mov RA, [DISPATCH+DISPATCH_J(trace)]
-+ | mov TRACE:RD, [RA+RD*4]
-+ | mov RDa, TRACE:RD->mcode
-+ | mov L:RB, SAVE_L
-+ | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
-+ | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
-+ | // Save additional callee-save registers only used in compiled code.
-+ |.if X64WIN
-+ | mov TMPQ, r12
-+ | mov TMPa, r13
-+ | mov CSAVE_4, r14
-+ | mov CSAVE_3, r15
-+ | mov RAa, rsp
-+ | sub rsp, 9*16+4*8
-+ | movdqa [RAa], xmm6
-+ | movdqa [RAa-1*16], xmm7
-+ | movdqa [RAa-2*16], xmm8
-+ | movdqa [RAa-3*16], xmm9
-+ | movdqa [RAa-4*16], xmm10
-+ | movdqa [RAa-5*16], xmm11
-+ | movdqa [RAa-6*16], xmm12
-+ | movdqa [RAa-7*16], xmm13
-+ | movdqa [RAa-8*16], xmm14
-+ | movdqa [RAa-9*16], xmm15
-+ |.elif X64
-+ | mov TMPQ, r12
-+ | mov TMPa, r13
-+ | sub rsp, 16
-+ |.endif
-+ | jmp RDa
-+ |.endif
-+ break;
-+
-+ case BC_JMP:
-+ | ins_AJ // RA = unused, RD = target
-+ | branchPC RD
-+ | ins_next
-+ break;
-+
-+ /* -- Function headers -------------------------------------------------- */
-+
-+ /*
-+ ** Reminder: A function may be called with func/args above L->maxstack,
-+ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
-+ ** too. This means all FUNC* ops (including fast functions) must check
-+ ** for stack overflow _before_ adding more slots!
-+ */
-+
-+ case BC_FUNCF:
-+ |.if JIT
-+ | hotcall RB
-+ |.endif
-+ case BC_FUNCV: /* NYI: compiled vararg functions. */
-+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
-+ break;
-+
-+ case BC_JFUNCF:
-+#if !LJ_HASJIT
-+ break;
-+#endif
-+ case BC_IFUNCF:
-+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
-+ | mov KBASE, [PC-4+PC2PROTO(k)]
-+ | mov L:RB, SAVE_L
-+ | lea RA, [BASE+RA*8] // Top of frame.
-+ | cmp RA, L:RB->maxstack
-+ | ja ->vm_growstack_f
-+ | movzx RA, byte [PC-4+PC2PROTO(numparams)]
-+ | cmp NARGS:RD, RA // Check for missing parameters.
-+ | jbe >3
-+ |2:
-+ if (op == BC_JFUNCF) {
-+ | movzx RD, PC_RD
-+ | jmp =>BC_JLOOP
-+ } else {
-+ | ins_next
-+ }
-+ |
-+ |3: // Clear missing parameters.
-+ | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
-+ | add NARGS:RD, 1
-+ | cmp NARGS:RD, RA
-+ | jbe <3
-+ | jmp <2
-+ break;
-+
-+ case BC_JFUNCV:
-+#if !LJ_HASJIT
-+ break;
-+#endif
-+ | int3 // NYI: compiled vararg functions
-+ break; /* NYI: compiled vararg functions. */
-+
-+ case BC_IFUNCV:
-+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
-+ | lea RB, [NARGS:RD*8+FRAME_VARG]
-+ | lea RD, [BASE+NARGS:RD*8]
-+ | mov LFUNC:KBASE, [BASE-8]
-+ | mov [RD-4], RB // Store delta + FRAME_VARG.
-+ | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
-+ | mov L:RB, SAVE_L
-+ | lea RA, [RD+RA*8]
-+ | cmp RA, L:RB->maxstack
-+ | ja ->vm_growstack_v // Need to grow stack.
-+ | mov RA, BASE
-+ | mov BASE, RD
-+ | movzx RB, byte [PC-4+PC2PROTO(numparams)]
-+ | test RB, RB
-+ | jz >2
-+ |1: // Copy fixarg slots up to new frame.
-+ | add RA, 8
-+ | cmp RA, BASE
-+ | jnb >3 // Less args than parameters?
-+ | mov KBASE, [RA-8]
-+ | mov [RD], KBASE
-+ | mov KBASE, [RA-4]
-+ | mov [RD+4], KBASE
-+ | add RD, 8
-+ | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
-+ | sub RB, 1
-+ | jnz <1
-+ |2:
-+ if (op == BC_JFUNCV) {
-+ | movzx RD, PC_RD
-+ | jmp =>BC_JLOOP
-+ } else {
-+ | mov KBASE, [PC-4+PC2PROTO(k)]
-+ | ins_next
-+ }
-+ |
-+ |3: // Clear missing parameters.
-+ | mov dword [RD+4], LJ_TNIL
-+ | add RD, 8
-+ | sub RB, 1
-+ | jnz <3
-+ | jmp <2
-+ break;
-+
-+ case BC_FUNCC:
-+ case BC_FUNCCW:
-+ | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov KBASEa, CFUNC:RB->f
-+ | mov L:RB, SAVE_L
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | mov L:RB->base, BASE
-+ | lea RA, [RD+8*LUA_MINSTACK]
-+ | cmp RA, L:RB->maxstack
-+ | mov L:RB->top, RD
-+ if (op == BC_FUNCC) {
-+ |.if X64
-+ | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
-+ |.else
-+ | mov ARG1, L:RB
-+ |.endif
-+ } else {
-+ |.if X64
-+ | mov CARG2, KBASEa
-+ | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
-+ |.else
-+ | mov ARG2, KBASEa
-+ | mov ARG1, L:RB
-+ |.endif
-+ }
-+ | ja ->vm_growstack_c // Need to grow stack.
-+ | set_vmstate C
-+ if (op == BC_FUNCC) {
-+ | call KBASEa // (lua_State *L)
-+ } else {
-+ | // (lua_State *L, lua_CFunction f)
-+ | call aword [DISPATCH+DISPATCH_GL(wrapf)]
-+ }
-+ | // nresults returned in eax (RD).
-+ | mov BASE, L:RB->base
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ | set_vmstate INTERP
-+ | lea RA, [BASE+RD*8]
-+ | neg RA
-+ | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
-+ | mov PC, [BASE-4] // Fetch PC of caller.
-+ | jmp ->vm_returnc
-+ break;
-+
-+ /* ---------------------------------------------------------------------- */
-+
-+ default:
-+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
-+ exit(2);
-+ break;
-+ }
-+}
-+
-+static int build_backend(BuildCtx *ctx)
-+{
-+ int op;
-+ dasm_growpc(Dst, BC__MAX);
-+ build_subroutines(ctx);
-+ |.code_op
-+ for (op = 0; op < BC__MAX; op++)
-+ build_ins(ctx, (BCOp)op, op);
-+ return BC__MAX;
-+}
-+
-+/* Emit pseudo frame-info for all assembler functions. */
-+static void emit_asm_debug(BuildCtx *ctx)
-+{
-+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
-+#if LJ_64
-+#define SZPTR "8"
-+#define BSZPTR "3"
-+#define REG_SP "0x7"
-+#define REG_RA "0x10"
-+#else
-+#define SZPTR "4"
-+#define BSZPTR "2"
-+#define REG_SP "0x4"
-+#define REG_RA "0x8"
-+#endif
-+ switch (ctx->mode) {
-+ case BUILD_elfasm:
-+ fprintf(ctx->fp, "\t.section
.debug_frame,\"\",@progbits\n");
-+ fprintf(ctx->fp,
-+ ".Lframe0:\n"
-+ "\t.long .LECIE0-.LSCIE0\n"
-+ ".LSCIE0:\n"
-+ "\t.long 0xffffffff\n"
-+ "\t.byte 0x1\n"
-+ "\t.string \"\"\n"
-+ "\t.uleb128 0x1\n"
-+ "\t.sleb128 -" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR
"\n"
-+ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
-+ "\t.align " SZPTR "\n"
-+ ".LECIE0:\n\n");
-+ fprintf(ctx->fp,
-+ ".LSFDE0:\n"
-+ "\t.long .LEFDE0-.LASFDE0\n"
-+ ".LASFDE0:\n"
-+ "\t.long .Lframe0\n"
-+#if LJ_64
-+ "\t.quad .Lbegin\n"
-+ "\t.quad %d\n"
-+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
-+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
-+#if LJ_NO_UNWIND
-+ "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
-+ "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
-+#endif
-+#else
-+ "\t.long .Lbegin\n"
-+ "\t.long %d\n"
-+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-+ "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
-+ "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
-+ "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
-+#endif
-+ "\t.align " SZPTR "\n"
-+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
-+#if LJ_HASFFI
-+ fprintf(ctx->fp,
-+ ".LSFDE1:\n"
-+ "\t.long .LEFDE1-.LASFDE1\n"
-+ ".LASFDE1:\n"
-+ "\t.long .Lframe0\n"
-+#if LJ_64
-+ "\t.quad lj_vm_ffi_call\n"
-+ "\t.quad %d\n"
-+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-+#else
-+ "\t.long lj_vm_ffi_call\n"
-+ "\t.long %d\n"
-+ "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
-+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-+ "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
-+#endif
-+ "\t.align " SZPTR "\n"
-+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
-+#endif
-+#if !LJ_NO_UNWIND
-+#if (defined(__sun__) && defined(__svr4__))
-+#if LJ_64
-+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
-+#else
-+ fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
-+#endif
-+#else
-+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
-+#endif
-+ fprintf(ctx->fp,
-+ ".Lframe1:\n"
-+ "\t.long .LECIE1-.LSCIE1\n"
-+ ".LSCIE1:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.string \"zPR\"\n"
-+ "\t.uleb128 0x1\n"
-+ "\t.sleb128 -" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.uleb128 6\n" /* augmentation length */
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.long lj_err_unwind_dwarf-.\n"
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR
"\n"
-+ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
-+ "\t.align " SZPTR "\n"
-+ ".LECIE1:\n\n");
-+ fprintf(ctx->fp,
-+ ".LSFDE2:\n"
-+ "\t.long .LEFDE2-.LASFDE2\n"
-+ ".LASFDE2:\n"
-+ "\t.long .LASFDE2-.Lframe1\n"
-+ "\t.long .Lbegin-.\n"
-+ "\t.long %d\n"
-+ "\t.uleb128 0\n" /* augmentation length */
-+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-+#if LJ_64
-+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
-+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
-+#else
-+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-+ "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
-+ "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
-+ "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
-+#endif
-+ "\t.align " SZPTR "\n"
-+ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
-+#if LJ_HASFFI
-+ fprintf(ctx->fp,
-+ ".Lframe2:\n"
-+ "\t.long .LECIE2-.LSCIE2\n"
-+ ".LSCIE2:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.string \"zR\"\n"
-+ "\t.uleb128 0x1\n"
-+ "\t.sleb128 -" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.uleb128 1\n" /* augmentation length */
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR
"\n"
-+ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
-+ "\t.align " SZPTR "\n"
-+ ".LECIE2:\n\n");
-+ fprintf(ctx->fp,
-+ ".LSFDE3:\n"
-+ "\t.long .LEFDE3-.LASFDE3\n"
-+ ".LASFDE3:\n"
-+ "\t.long .LASFDE3-.Lframe2\n"
-+ "\t.long lj_vm_ffi_call-.\n"
-+ "\t.long %d\n"
-+ "\t.uleb128 0\n" /* augmentation length */
-+#if LJ_64
-+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-+#else
-+ "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
-+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-+ "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
-+#endif
-+ "\t.align " SZPTR "\n"
-+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
-+#endif
-+#endif
-+ break;
-+#if !LJ_NO_UNWIND
-+ /* Mental note: never let Apple design an assembler.
-+ ** Or a linker. Or a plastic case. But I digress.
-+ */
-+ case BUILD_machasm: {
-+#if LJ_HASFFI
-+ int fcsize = 0;
-+#endif
-+ int i;
-+ fprintf(ctx->fp, "\t.section
__TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
-+ fprintf(ctx->fp,
-+ "EH_frame1:\n"
-+ "\t.set L$set$x,LECIEX-LSCIEX\n"
-+ "\t.long L$set$x\n"
-+ "LSCIEX:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.ascii \"zPR\\0\"\n"
-+ "\t.byte 0x1\n"
-+ "\t.byte 128-" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.byte 6\n" /* augmentation length */
-+ "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
-+#if LJ_64
-+ "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
-+#else
-+ "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */
-+#endif
-+ "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
-+ "\t.align " BSZPTR "\n"
-+ "LECIEX:\n\n");
-+ for (i = 0; i < ctx->nsym; i++) {
-+ const char *name = ctx->sym[i].name;
-+ int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
-+ if (size == 0) continue;
-+#if LJ_HASFFI
-+ if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
-+#endif
-+ fprintf(ctx->fp,
-+ "%s.eh:\n"
-+ "LSFDE%d:\n"
-+ "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
-+ "\t.long L$set$%d\n"
-+ "LASFDE%d:\n"
-+ "\t.long LASFDE%d-EH_frame1\n"
-+ "\t.long %s-.\n"
-+ "\t.long %d\n"
-+ "\t.byte 0\n" /* augmentation length */
-+ "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
-+#if LJ_64
-+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
-+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
-+ "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
-+ "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
-+#else
-+ "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
-+ "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */
-+ "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */
-+ "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */
-+#endif
-+ "\t.align " BSZPTR "\n"
-+ "LEFDE%d:\n\n",
-+ name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
-+ }
-+#if LJ_HASFFI
-+ if (fcsize) {
-+ fprintf(ctx->fp,
-+ "EH_frame2:\n"
-+ "\t.set L$set$y,LECIEY-LSCIEY\n"
-+ "\t.long L$set$y\n"
-+ "LSCIEY:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.ascii \"zR\\0\"\n"
-+ "\t.byte 0x1\n"
-+ "\t.byte 128-" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.byte 1\n" /* augmentation length */
-+#if LJ_64
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
-+#else
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */
-+#endif
-+ "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
-+ "\t.align " BSZPTR "\n"
-+ "LECIEY:\n\n");
-+ fprintf(ctx->fp,
-+ "_lj_vm_ffi_call.eh:\n"
-+ "LSFDEY:\n"
-+ "\t.set L$set$yy,LEFDEY-LASFDEY\n"
-+ "\t.long L$set$yy\n"
-+ "LASFDEY:\n"
-+ "\t.long LASFDEY-EH_frame2\n"
-+ "\t.long _lj_vm_ffi_call-.\n"
-+ "\t.long %d\n"
-+ "\t.byte 0\n" /* augmentation length */
-+#if LJ_64
-+ "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
-+ "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
-+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
-+#else
-+ "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */
-+ "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
-+ "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */
-+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */
-+#endif
-+ "\t.align " BSZPTR "\n"
-+ "LEFDEY:\n\n", fcsize);
-+ }
-+#endif
-+#if !LJ_64
-+ fprintf(ctx->fp,
-+ "\t.non_lazy_symbol_pointer\n"
-+ "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
-+ ".indirect_symbol _lj_err_unwind_dwarf\n"
-+ ".long 0\n\n");
-+ fprintf(ctx->fp, "\t.section
__IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
-+ {
-+ const char *const *xn;
-+ for (xn = ctx->extnames; *xn; xn++)
-+ if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
-+ fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii
\"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
-+ }
-+#endif
-+ fprintf(ctx->fp, ".subsections_via_symbols\n");
-+ }
-+ break;
-+#endif
-+ default: /* Difficult for other modes. */
-+ break;
-+ }
-+}
---
-2.20.1
-
-
-From 60d18a8d74c593fa689880a228c5e8c13fc33c9e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 15 Nov 2016 13:50:15 -0500
-Subject: [PATCH 011/247] Fix some s390x declarations.
-
-s/S390x/S390X/
----
- Makefile | 2 +-
- src/Makefile | 3 ++-
- src/lj_arch.h | 29 +++++++++++++++++++----------
- 3 files changed, 22 insertions(+), 12 deletions(-)
-
-diff --git a/Makefile b/Makefile
-index 923bf72..fc8ed61 100644
---- a/Makefile
-+++ b/Makefile
-@@ -88,7 +88,7 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
- FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
- dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
- dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
-- dis_mips64.lua dis_mips64el.lua vmdef.lua
-+ dis_mips64.lua dis_mips64el.lua dis_s390x.lua vmdef.lua
-
- ifeq (,$(findstring Windows,$(OS)))
- HOST_SYS:= $(shell uname -s)
-diff --git a/src/Makefile b/src/Makefile
-index 2bf15d2..d0f160a 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -245,7 +245,7 @@ else
- ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= arm
- else
--ifneq (,$(findstring LJ_TARGET_S390x ,$(TARGET_TESTARCH)))
-+ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= s390x
- else
- ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
-@@ -279,6 +279,7 @@ endif
- endif
- endif
- endif
-+endif
-
- ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
- TARGET_SYS= PS3
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index c781eb1..32c706f 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -29,7 +29,8 @@
- #define LUAJIT_ARCH_mips32 6
- #define LUAJIT_ARCH_MIPS64 7
- #define LUAJIT_ARCH_mips64 7
--#define LUAJIT_ARCH_S390x 8
-+#define LUAJIT_ARCH_S390X 8
-+#define LUAJIT_ARCH_s390x 8
-
- /* Target OS. */
- #define LUAJIT_OS_OTHER 0
-@@ -50,8 +51,8 @@
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM
- #elif defined(__aarch64__)
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
--#elif defined(__s390x__) || defined(__s390x) || defined(__S390x__) || defined(__S390x)
|| defined(S390x)
--#define LUAJIT_TARGET LUAJIT_ARCH_S390x
-+#elif defined(__s390x__) || defined(__s390x)
-+#define LUAJIT_TARGET LUAJIT_ARCH_S390X
- #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) ||
defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC)
|| defined(_M_PPC)
- #define LUAJIT_TARGET LUAJIT_ARCH_PPC
- #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) ||
defined(__MIPS64)
-@@ -244,13 +245,6 @@
-
- #define LJ_ARCH_VERSION 80
-
--#elif LUAJIT_TARGET == LUAJIT_ARCH_S390
--
-- #define LJ_ARCH_NAME "s390x"
-- #define LJ_ARCH_BITS 64
-- #define LJ_ARCH_ENDIAN LUAJIT_BE
-- #define LJ_TARGET_S390 1
--
- #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
-
- #ifndef LJ_ARCH_ENDIAN
-@@ -393,6 +387,21 @@
- #define LJ_ARCH_VERSION 10
- #endif
-
-+#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X
-+
-+#define LJ_ARCH_NAME "s390x"
-+#define LJ_ARCH_BITS 64
-+#define LJ_ARCH_ENDIAN LUAJIT_BE
-+#define LJ_TARGET_S390X 1
-+#define LJ_TARGET_EHRETREG 0
-+#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */
-+#define LJ_TARGET_MASKSHIFT 1
-+#define LJ_TARGET_MASKROT 1
-+#define LJ_TARGET_UNALIGNED 1
-+#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
-+#define LJ_TARGET_GC64 1
-+#define LJ_ARCH_NOJIT 1 /* NYI */
-+
- #else
- #error "No target architecture defined"
- #endif
---
-2.20.1
-
-
-From ae026b0a6988225376413fd758daadafd44098cb Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 15 Nov 2016 14:39:34 -0500
-Subject: [PATCH 012/247] Add some s390x C calling convention constants.
-
-Guesses for now based on the ELF ABI supplement for zSeries.
----
- src/host/buildvm.c | 2 ++
- src/lj_ccall.h | 11 +++++++++++
- 2 files changed, 13 insertions(+)
-
-diff --git a/src/host/buildvm.c b/src/host/buildvm.c
-index de23fab..fdd6ec6 100644
---- a/src/host/buildvm.c
-+++ b/src/host/buildvm.c
-@@ -65,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int
type);
- #include "../dynasm/dasm_ppc.h"
- #elif LJ_TARGET_MIPS
- #include "../dynasm/dasm_mips.h"
-+#elif LJ_TARGET_S390X
-+#include "../dynasm/dasm_s390x.h"
- #else
- #error "No support for this architecture (yet)"
- #endif
-diff --git a/src/lj_ccall.h b/src/lj_ccall.h
-index 6efa48c..9f023fc 100644
---- a/src/lj_ccall.h
-+++ b/src/lj_ccall.h
-@@ -126,6 +126,17 @@ typedef union FPRArg {
- struct { LJ_ENDIAN_LOHI(float f; , float g;) };
- } FPRArg;
-
-+#elif LJ_TARGET_S390X
-+
-+#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */
-+#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */
-+#define CCALL_NRET_GPR 1 /* GPR 2 */
-+#define CCALL_NRET_FPR 1 /* FPR 0 */
-+#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right
place) */
-+#define CCALL_SPS_FREE 0
-+
-+typedef intptr_t GPRArg;
-+typedef double FPRArg;
- #else
- #error "Missing calling convention definitions for this architecture"
- #endif
---
-2.20.1
-
-
-From 1bd2f2928e00c4d90861a351406acdc3575d9d49 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 15 Nov 2016 14:53:00 -0500
-Subject: [PATCH 013/247] Delete gcc version check for now.
-
-Stick to the default until we know what we actually need.
----
- src/lj_arch.h | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 32c706f..930d4c3 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -429,10 +429,6 @@
- #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ <
5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
- #error "Need at least Clang 3.5 or newer"
- #endif
--#elif LJ_TARGET_S390x
--#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
--#error "Need at least GCC 4.2 or newer"
--#endif
- #else
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
- #error "Need at least GCC 4.8 or newer"
---
-2.20.1
-
-
-From 7957ceb416ca1426f2e990c6131a61d2ceae69aa Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 16 Nov 2016 10:31:34 +0530
-Subject: [PATCH 014/247] Update lj_arch.h
-
-Added missing elif condition for s390x for GCC dependency
----
- src/lj_arch.h | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 930d4c3..33f51af 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -424,6 +424,10 @@
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
- #error "Need at least GCC 4.2 or newer"
- #endif
-+#elif LJ_TARGET_S390x
-+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
-+#error "Need at least GCC 4.2 or newer"
-+#endif
- #elif LJ_TARGET_ARM64
- #if __clang__
- #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ <
5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
---
-2.20.1
-
-
-From af5fc086d5f7febd746929bfabe6d5a3a2e072c3 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 16 Nov 2016 10:32:53 +0530
-Subject: [PATCH 015/247] Update lj_arch.h
-
-Removing the gcc check for now .. missed micheal's comment earlier
----
- src/lj_arch.h | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 33f51af..930d4c3 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -424,10 +424,6 @@
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
- #error "Need at least GCC 4.2 or newer"
- #endif
--#elif LJ_TARGET_S390x
--#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
--#error "Need at least GCC 4.2 or newer"
--#endif
- #elif LJ_TARGET_ARM64
- #if __clang__
- #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ <
5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
---
-2.20.1
-
-
-From b48aeb494fa7dbb18c2852235ce057edb7a6ac9b Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 16 Nov 2016 11:50:46 +0530
-Subject: [PATCH 016/247] Update lj_target_s390x.h
-
-changed instruction opcode to 64bit
----
- src/lj_target_s390x.h | 30 +++++++++++++++---------------
- 1 file changed, 15 insertions(+), 15 deletions(-)
-
-diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
-index 27bb349..551bb7d 100644
---- a/src/lj_target_s390x.h
-+++ b/src/lj_target_s390x.h
-@@ -154,27 +154,27 @@ typedef struct {
- #define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
-
- typedef enum S390xIns {
-- S390I_SR = 0x1B000000,
-- S390I_AR = 0x1A000000,
-- S390I_NR = 0x14000000,
-- S390I_XR = 0x17000000,
-- S390I_MR = 0x1C000000,
-- S390I_LR = 0x18000000,
-- S390I_C = 0x59000000,
-- S390I_LH = 0x48000000,
-- S390I_BASR = 0x0D000000,
-- S390I_MVCL = 0x0e000000,
-- S390I_ST = 0x50000000,
-- S390I_TM = 0x91000000,
-- S390I_MP = 0xbd000090,
-- S390I_CLR = 0x15000000,
-+ S390I_SR = 0x1B00000000000000,
-+ S390I_AR = 0x1A00000000000000,
-+ S390I_NR = 0x1400000000000000,
-+ S390I_XR = 0x1700000000000000,
-+ S390I_MR = 0x1C00000000000000,
-+ S390I_LR = 0x1800000000000000,
-+ S390I_C = 0x5900000000000000,
-+ S390I_LH = 0x4800000000000000,
-+ S390I_BASR = 0x0D00000000000000,
-+ S390I_MVCL = 0x0e00000000000000,
-+ S390I_ST = 0x5000000000000000,
-+ S390I_TM = 0x9100000000000000,
-+ S390I_MP = 0xbd00009000000000,
-+ S390I_CLR = 0x1500000000000000,
- } S390xIns;
-
- typedef enum S390xShift {
- S390SH_SLL, S390SH_SRL, S390SH_SRA
- } S390xShift;
-
--/* ARM condition codes. */
-+/* S390x condition codes. */
- typedef enum S390xCC {
-
- } S390xCC;
---
-2.20.1
-
-
-From ac680ad13758d2e3006f80ccce9b081591ec1e64 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 16 Nov 2016 14:44:12 +0530
-Subject: [PATCH 017/247] Changed the encoding for add,and,branch instructions
-
----
- dynasm/dasm_s390x.lua | 116 +++++++++++++++++++++++++++---------------
- 1 file changed, 76 insertions(+), 40 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index a0a50e1..3542e7e 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -653,47 +653,83 @@ end)
- -- Template strings for ARM instructions.
- map_op = {
- -- Basic data processing instructions.
-- add_2 =
"00000000005a0000RX-a|00000000001aRR|00000000b9f80000RRF-a|00000000e35a0000RXY-a|00000000e3080000RXY-a",
--
---- and has several possible ways, need to find one, currently added two type of
-- and_2 =
"0000000000540000RX-a|00000000140000RR|00000000b9f4RRF-a|00000000e3540000RXY-a|00000000b9800000RRE|
00000000b9e40000RRF-a",
-- and_c = "0000000000d40000SS-a",
-- and_i = "0000000000940000SI|00000000eb540000SIY",
-+ --add
-+ ar = "0000000000001a00", --RR
-+ ay = "0000e3000000005a", --RXY-a
-+ ag = "0000e30000000008",
-+ agr = "00000000b9080000", --RRE
-+ agf = "0000e30000000018",
-+ agfr = "00000000b9180000",
-+ agbr = "00000000b34a0000",
-+ adbr = "00000000b31a0000",
-+ aebr = "00000000b30a0000",
-+ ah = "000000004a000000", --RXa
-+ ahy = "0000e3000000007a",
-+ afi = "0000c20900000000", --RIL-a --pls check if this is correct
-+ agfi = "0000c20800000000",
-+ aih = "0000cc0800000000",
-+ al = "000000005e000000",
-+ alr = "0000000000001e00",
-+ aly = "0000e3000000005e", -- RXY-a
-+ alg = "0000e3000000000a",
-+ algr = "00000000b90a0000",
-+ algf = "0000e3000000001a",
-+ algfr = "00000000b91a0000",
-+ alfi = "0000c20b00000000",
-+ algfi = "0000c20a00000000",
-+ alc = "0000e30000000098",
-+ alcr = "00000000b9980000", -- RRE
-+ alcg = "0000e30000000088",
-+ alcgr = "00000000b9880000",
-+ alsih = "0000cc0a00000000",
-+ alsihn ="0000cc0b00000000",
-+ axr = "0000000000003600", -- RR
-+ ad = "000000006a000000", -- Rx-a
-+ adr = "0000000000002a00",
-+ ae = "000000007a000000",
-+ aer = "0000000000003a00",
-+ aw = "000000006e000000",
-+ awr = "0000000000002e00",
-+ au = "000000007e000000",
-+ aur = "0000000000003e00",
-+
-+-- and
-+ n = "0000000054000000",
-+ nr = "0000000000001400",
-+ ny = "0000e30000000054", -- RXY-a
-+ ng = "0000e30000000080",
-+ ngr = "00000000b9800000",
-+ nihf = "0000c00a00000000", --RIL-a
-+ nihl = "0000c00b00000000",
-
--and_2 =
"0000000000540000RX-a|0000000000140000RR|00000000b9f40000RRF-a|00000000e3540000RXY-a",
-- and_3 = "00000000e3800000RXY-a|00000000b9800000RRE|00000000b9e40000RRF-a",
-- and_c = "0000000000d40000SS-a",
-- and_i = "0000000000940000SI",
-- and_i4 = "00000000eb540000SIY"
-- and_i3 =
"000000000a540000RI-a|000000000a550000RI-a|000000000c0a0000RIL-a|000000000a560000RI-a|000000000a570000RI-a|000000000c0bRIL-a"
-- --branch related instrcutions
-- bal = "0000000000450000RX-a",
-- balr = "0000000000050000RR",
-- bas = "00000000004d0000RX-a",
-- basr = "00000000000d0000RR",
-- bassm = "00000000000c0000RR",
-- bsm = "00000000000b0000RR",
-- bc = "0000000000470000Rx-b",
-- bcr = "00000000000070000RR",
-- bct = "0000000000460000RX-a",
-- bctr = "0000000000060000RR",
-- bctg = "00000000e3460000RXY-a",
-- bctgr = "00000000b9460000RRE",
-- bxh = "0000000000860000RS-a",
-- bxhg = "00000000eb440000RSY-a",
-- bxle = "0000000000870000RS-a",
-- bxleg = "00000000eb450000RSY-a",
-- bras = "000000000a750000RI-b",
-- brasl = "000000000c050000RIL-b",
-- brc = "000000000a740000RI-c",
-- brcl = "000000000c040000RIL-c",
-- brct = "000000000a760000RI-b",
-- brctg = "000000000a770000RI-b",
-- brctg = "00000000occ60000RIL-b",
-- brxh = "0000000000840000RSI",
-- brxhg = "00000000ec440000RIE-e",
-- brxle = "0000000000850000RSI",
-- brxlg = "00000000ec450000RIE-e",
-+ --branch related instrcutions
-+ bal = "0000000045000000", --RX-a
-+ balr = "0000000000005000", --RR
-+ bas = "000000004d000000",
-+ basr = "0000000000000d00", -- this has leading zero in the instrcution
opcode: 0d, need to take into consideration
-+ bassm = "0000000000000c00",
-+ bsm = "0000000000000b00",
-+ bc = "0000000047000000",
-+ bcr = "0000000000000700",
-+ bct = "0000000046000000",
-+ bctr = "0000000000000600",
-+ bctg = "0000e30000000046",
-+ bctgr = "00000000b9460000",
-+ bxh = "0000000086000000", --RS-a
-+ bxhg = "0000eb0000000044",
-+ bxle = "0000000087000000",
-+ bxleg = "0000eb0000000045", -- RSY-a
-+ --bras = "000000000a750000RI-b",
-+ brasl = "0000c00500000000", --RIL-b
-+ --brc = "000000000a740000RI-c",
-+ brcl = "0000c00400000000", --RIL-c
-+ --brct = "000000000a760000RI-b",
-+ --brctg = "000000000a770000RI-b",
-+ brcth = "0000cc0600000000",
-+ --brxh = "0000000000840000RSI",
-+ --brxhg = "00000000ec440000RIE-e",
-+ --brxle = "0000000000850000RSI",
-+ --brxlg = "00000000ec450000RIE-e",
-
- ----subtraction (basic operation)
- sub = "00000000005b0000RX-a"
---
-2.20.1
-
-
-From 6a3f8893a0b81b127935b4c576af60cfaf74ae40 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 16 Nov 2016 15:09:59 +0530
-Subject: [PATCH 018/247] Update lj_target_s390x.h
-
-Added s390x specific condition codes
----
- src/lj_target_s390x.h | 9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
-index 551bb7d..4e35891 100644
---- a/src/lj_target_s390x.h
-+++ b/src/lj_target_s390x.h
-@@ -176,7 +176,14 @@ typedef enum S390xShift {
-
- /* S390x condition codes. */
- typedef enum S390xCC {
--
-+ /* Z- Zero , LZ - Less thena Zero , GZ - Greater than Zero
-+ O - Overflow , NZ - Not Zero , ZC - Zero with carry
-+ NZC - No Zero with carry , ZNC - Zero with No Carry
-+ EQ - Equal , NE - Not Equal , LO - Loq , HI - High
-+ */
-+ CC_Z , CC_LZ , CC_GZ , CC_O ,
-+ CC_NZ , CC_ZC , CC_NZC ,
-+ CC_ZNC , CC_EQ , CC_NE , CC_LO , CC_HI
- } S390xCC;
-
- #endif
---
-2.20.1
-
-
-From 2f569e8a69f8debc6d66f7968323eb92f339a13f Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 16 Nov 2016 15:34:32 +0530
-Subject: [PATCH 019/247] Update vm_s390x.dasc
-
-made some changes like mentioning arch from x86 to S390x
-removed some x86 specific code
----
- src/vm_s390x.dasc | 195 +---------------------------------------------
- 1 file changed, 3 insertions(+), 192 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index d7d618d..7f12f62 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1,12 +1,9 @@
--|// Low-level VM code for x86 CPUs.
-+|// Low-level VM code for S390x CPUs.
- |// Bytecode interpreter, fast functions and helper functions.
- |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- |
--|.if P64
--|.arch x64
--|.else
--|.arch x86
--|.endif
-+
-+|.arch S390x
- |.section code_op, code_sub
- |
- |.actionlist build_actionlist
-@@ -16,13 +13,6 @@
- |
- |//-----------------------------------------------------------------------
- |
--|.if P64
--|.define X64, 1
--|.if WIN
--|.define X64WIN, 1
--|.endif
--|.endif
--|
- |// Fixed register assignments for the interpreter.
- |// This is very fragile and has many dependencies. Caveat emptor.
- |.define BASE, edx // Not C callee-save, refetched anyway.
-@@ -119,10 +109,6 @@
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
- |//-----------------------------------------------------------------------
--|.if not X64 // x86 stack layout.
--|
--|.if WIN
--|
- |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
- |.macro saveregs_
- | push edi; push esi; push ebx
-@@ -138,51 +124,9 @@
- | pop ebx; pop esi; pop edi; pop ebp
- |.endmacro
- |
--|.else
--|
--|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
--|.macro saveregs_
--| push edi; push esi; push ebx
--| sub esp, CFRAME_SPACE
--|.endmacro
--|.macro restoreregs
--| add esp, CFRAME_SPACE
--| pop ebx; pop esi; pop edi; pop ebp
--|.endmacro
--|
--|.endif
--|
- |.macro saveregs
- | push ebp; saveregs_
- |.endmacro
--|
--|.if WIN
--|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
--|.define SAVE_NRES, aword [esp+aword*18]
--|.define SAVE_CFRAME, aword [esp+aword*17]
--|.define SAVE_L, aword [esp+aword*16]
--|//----- 16 byte aligned, ^^^ arguments from C caller
--|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
--|.define SAVE_R4, aword [esp+aword*14]
--|.define SAVE_R3, aword [esp+aword*13]
--|.define SAVE_R2, aword [esp+aword*12]
--|//----- 16 byte aligned
--|.define SAVE_R1, aword [esp+aword*11]
--|.define SEH_FUNC, aword [esp+aword*10]
--|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
--|.define UNUSED2, aword [esp+aword*8]
--|//----- 16 byte aligned
--|.define UNUSED1, aword [esp+aword*7]
--|.define SAVE_PC, aword [esp+aword*6]
--|.define TMP2, aword [esp+aword*5]
--|.define TMP1, aword [esp+aword*4]
--|//----- 16 byte aligned
--|.define ARG4, aword [esp+aword*3]
--|.define ARG3, aword [esp+aword*2]
--|.define ARG2, aword [esp+aword*1]
--|.define ARG1, aword [esp] //<-- esp while in interpreter.
--|//----- 16 byte aligned, ^^^ arguments for C callee
--|.else
- |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
- |.define SAVE_NRES, aword [esp+aword*14]
- |.define SAVE_CFRAME, aword [esp+aword*13]
-@@ -203,7 +147,6 @@
- |.define ARG2, aword [esp+aword*1]
- |.define ARG1, aword [esp] //<-- esp while in interpreter.
- |//----- 16 byte aligned, ^^^ arguments for C callee
--|.endif
- |
- |// FPARGx overlaps ARGx and ARG(x+1) on x86.
- |.define FPARG3, qword [esp+qword*1]
-@@ -215,112 +158,6 @@
- |.define TMPa, TMP1
- |.define MULTRES, TMP2
- |
--|// Arguments for vm_call and vm_pcall.
--|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
--|
--|// Arguments for vm_cpcall.
--|.define INARG_CP_CALL, SAVE_ERRF
--|.define INARG_CP_UD, SAVE_NRES
--|.define INARG_CP_FUNC, SAVE_CFRAME
--|
--|//-----------------------------------------------------------------------
--|.elif X64WIN // x64/Windows stack layout
--|
--|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
--|.macro saveregs_
--| push rdi; push rsi; push rbx
--| sub rsp, CFRAME_SPACE
--|.endmacro
--|.macro saveregs
--| push rbp; saveregs_
--|.endmacro
--|.macro restoreregs
--| add rsp, CFRAME_SPACE
--| pop rbx; pop rsi; pop rdi; pop rbp
--|.endmacro
--|
--|.define SAVE_CFRAME, aword [rsp+aword*13]
--|.define SAVE_PC, dword [rsp+dword*25]
--|.define SAVE_L, dword [rsp+dword*24]
--|.define SAVE_ERRF, dword [rsp+dword*23]
--|.define SAVE_NRES, dword [rsp+dword*22]
--|.define TMP2, dword [rsp+dword*21]
--|.define TMP1, dword [rsp+dword*20]
--|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
--|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
--|.define SAVE_R4, aword [rsp+aword*8]
--|.define SAVE_R3, aword [rsp+aword*7]
--|.define SAVE_R2, aword [rsp+aword*6]
--|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
--|.define ARG5, aword [rsp+aword*4]
--|.define CSAVE_4, aword [rsp+aword*3]
--|.define CSAVE_3, aword [rsp+aword*2]
--|.define CSAVE_2, aword [rsp+aword*1]
--|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
--|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
--|
--|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
--|.define TMPQ, qword [rsp+aword*10]
--|.define MULTRES, TMP2
--|.define TMPa, ARG5
--|.define ARG5d, dword [rsp+aword*4]
--|.define TMP3, ARG5d
--|
--|//-----------------------------------------------------------------------
--|.else // x64/POSIX stack layout
--|
--|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
--|.macro saveregs_
--| push rbx; push r15; push r14
--|.if NO_UNWIND
--| push r13; push r12
--|.endif
--| sub rsp, CFRAME_SPACE
--|.endmacro
--|.macro saveregs
--| push rbp; saveregs_
--|.endmacro
--|.macro restoreregs
--| add rsp, CFRAME_SPACE
--|.if NO_UNWIND
--| pop r12; pop r13
--|.endif
--| pop r14; pop r15; pop rbx; pop rbp
--|.endmacro
--|
--|//----- 16 byte aligned,
--|.if NO_UNWIND
--|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
--|.define SAVE_R4, aword [rsp+aword*10]
--|.define SAVE_R3, aword [rsp+aword*9]
--|.define SAVE_R2, aword [rsp+aword*8]
--|.define SAVE_R1, aword [rsp+aword*7]
--|.define SAVE_RU2, aword [rsp+aword*6]
--|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
--|.else
--|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
--|.define SAVE_R4, aword [rsp+aword*8]
--|.define SAVE_R3, aword [rsp+aword*7]
--|.define SAVE_R2, aword [rsp+aword*6]
--|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
--|.endif
--|.define SAVE_CFRAME, aword [rsp+aword*4]
--|.define SAVE_PC, dword [rsp+dword*7]
--|.define SAVE_L, dword [rsp+dword*6]
--|.define SAVE_ERRF, dword [rsp+dword*5]
--|.define SAVE_NRES, dword [rsp+dword*4]
--|.define TMPa, aword [rsp+aword*1]
--|.define TMP2, dword [rsp+dword*1]
--|.define TMP1, dword [rsp] //<-- rsp while in interpreter.
--|//----- 16 byte aligned
--|
--|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
--|.define TMPQ, qword [rsp]
--|.define TMP3, dword [rsp+aword*1]
--|.define MULTRES, TMP2
--|
--|.endif
--|
- |//-----------------------------------------------------------------------
- |
- |// Instruction headers.
-@@ -339,11 +176,6 @@
- | movzx OP, RCL
- | add PC, 4
- | shr RC, 16
--|.if X64
--| jmp aword [DISPATCH+OP*8]
--|.else
--| jmp aword [DISPATCH+OP*4]
--|.endif
- |.endmacro
- |
- |// Instruction footer.
-@@ -433,30 +265,9 @@
- | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
- |.endmacro
- |
--|// x87 compares.
--|.macro fcomparepp // Compare and pop st0 >< st1.
--| fucomip st1
--| fpop
--|.endmacro
- |
- |.macro fpop1; fstp st1; .endmacro
- |
--|// Synthesize SSE FP constants.
--|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
--|.if X64
--| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
--|.else
--| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
--|.endif
--|.endmacro
--|
--|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
--|.if X64
--| mov64 tmp, U64x(val,00000000); movd reg, tmp
--|.else
--| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
--|.endif
--|.endmacro
- |
- |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
- | sseconst_hi reg, tmp, 80000000
---
-2.20.1
-
-
-From be1380a6b4e108ebc2336a64c6e8e2b8cef76b73 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 16 Nov 2016 17:19:10 +0530
-Subject: [PATCH 020/247] Update lj_frame.h
-
-Added CFrame definations for S390X
-values un assigned as i am unsure of the values
----
- src/lj_frame.h | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index 04cb5a3..2d6598f 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -200,6 +200,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
- #define CFRAME_OFS_MULTRES 192
- #define CFRAME_SIZE 208
- #define CFRAME_SHIFT_MULTRES 3
-+#elif LJ_TARGET_S390X
-+#define CFRAME_OFS_ERRF
-+#define CFRAME_OFS_NRES
-+#define CFRAME_OFS_PREV
-+#define CFRAME_OFS_L
-+#define CFRAME_OFS_PC
-+#define CFRAME_OFS_MULTRES
-+#define CFRAME_SIZE
-+#define CFRAME_SHIFT_MULTRES
- #elif LJ_TARGET_PPC
- #if LJ_TARGET_XBOX360
- #define CFRAME_OFS_ERRF 424
---
-2.20.1
-
-
-From 27d0afb89ad42ca053dca121845e6df7c9876561 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Thu, 17 Nov 2016 14:58:17 +0530
-Subject: [PATCH 021/247] Update vm_s390x.dasc
-
-Referred arm dasc file have created slots wherein i have to replace them with s390x
registers and instructions
----
- src/vm_s390x.dasc | 219 +++++++++++++++++++++-------------------------
- 1 file changed, 101 insertions(+), 118 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 7f12f62..ff59947 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -15,81 +15,85 @@
- |
- |// Fixed register assignments for the interpreter.
- |// This is very fragile and has many dependencies. Caveat emptor.
--|.define BASE, edx // Not C callee-save, refetched anyway.
--|.if not X64
--|.define KBASE, edi // Must be C callee-save.
--|.define KBASEa, KBASE
--|.define PC, esi // Must be C callee-save.
--|.define PCa, PC
--|.define DISPATCH, ebx // Must be C callee-save.
--|.elif X64WIN
--|.define KBASE, edi // Must be C callee-save.
--|.define KBASEa, rdi
--|.define PC, esi // Must be C callee-save.
--|.define PCa, rsi
--|.define DISPATCH, ebx // Must be C callee-save.
--|.else
--|.define KBASE, r15d // Must be C callee-save.
--|.define KBASEa, r15
--|.define PC, ebx // Must be C callee-save.
--|.define PCa, rbx
--|.define DISPATCH, r14d // Must be C callee-save.
--|.endif
-+.define BASE, // Base of current Lua stack frame.
-+|.define KBASE, // Constants of current Lua function.
-+|.define PC, // Next PC.
-+|.define GLREG, // Global state.
-+|.define LREG, // Register holding lua_State (also in SAVE_L).
-+|.define TISNUM, // Constant LJ_TISNUM << 47.
-+|.define TISNUMhi, // Constant LJ_TISNUM << 15.
-+|.define TISNIL, // Constant -1LL.
-+|.define fp, // Yes, we have to maintain a frame pointer.
- |
--|.define RA, ecx
--|.define RAH, ch
--|.define RAL, cl
--|.define RB, ebp // Must be ebp (C callee-save).
--|.define RC, eax // Must be eax.
--|.define RCW, ax
--|.define RCH, ah
--|.define RCL, al
--|.define OP, RB
--|.define RD, RC
--|.define RDW, RCW
--|.define RDL, RCL
--|.if X64
--|.define RAa, rcx
--|.define RBa, rbp
--|.define RCa, rax
--|.define RDa, rax
--|.else
--|.define RAa, RA
--|.define RBa, RB
--|.define RCa, RC
--|.define RDa, RD
--|.endif
-+|// The following temporaries are not saved across C calls, except for RA/RC.
-+|.define RA,
-+|.define RC,
-+|.define RB,
-+|.define RAw,
-+|.define RCw,
-+|.define RBw,
-+|.define INS,
-+|.define INSw,
-+|.define ITYPE,
-+|.define TMP0,
-+|.define TMP1,
-+|.define TMP2,
-+|.define TMP3,
-+|.define TMP0w,
-+|.define TMP1w,
-+|.define TMP2w,
-+|.define TMP3w,
- |
--|.if not X64
--|.define FCARG1, ecx // x86 fastcall arguments.
--|.define FCARG2, edx
--|.elif X64WIN
--|.define CARG1, rcx // x64/WIN64 C call arguments.
--|.define CARG2, rdx
--|.define CARG3, r8
--|.define CARG4, r9
--|.define CARG1d, ecx
--|.define CARG2d, edx
--|.define CARG3d, r8d
--|.define CARG4d, r9d
--|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
--|.define FCARG2, CARG2d
--|.else
--|.define CARG1, rdi // x64/POSIX C call arguments.
--|.define CARG2, rsi
--|.define CARG3, rdx
--|.define CARG4, rcx
--|.define CARG5, r8
--|.define CARG6, r9
--|.define CARG1d, edi
--|.define CARG2d, esi
--|.define CARG3d, edx
--|.define CARG4d, ecx
--|.define CARG5d, r8d
--|.define CARG6d, r9d
--|.define FCARG1, CARG1d // Simulate x86 fastcall.
--|.define FCARG2, CARG2d
--|.endif
-+|// Calling conventions. Also used as temporaries.
-+|.define CARG1,
-+|.define CARG2,
-+|.define CARG3,
-+|.define CARG4,
-+|.define CARG5,
-+|.define CARG1w,
-+|.define CARG2w,
-+|.define CARG3w,
-+|.define CARG4w,
-+|.define CARG5w,
-+|
-+|.define FARG1,
-+|.define FARG2,
-+|
-+|.define CRET1,
-+|.define CRET1w,
-+|// Stack layout while in interpreter. Must match with lj_frame.h.
-+|
-+|.define CFRAME_SPACE, 208
-+|//----- 16 byte aligned, <-- sp entering interpreter
-+|// Unused [sp, #204] // 32 bit values
-+|.define SAVE_NRES,
-+|.define SAVE_ERRF,
-+|.define SAVE_MULTRES,
-+|.define TMPD,
-+|.define SAVE_L,
-+|.define SAVE_PC,
-+|.define SAVE_CFRAME,
-+|.define SAVE_FPR_,
-+|.define SAVE_GPR_,
-+|.define SAVE_LR,
-+|.define SAVE_FP,
-+|//----- 16 byte aligned, <-- sp while in interpreter.
-+|
-+|.define TMPDofs,
-+|
-+|.macro save_, gpr1, gpr2, fpr1, fpr2
-+]
-+|.endmacro
-+|.macro rest_, gpr1, gpr2, fpr1, fpr2
-+]
-+|.endmacro
-+|
-+|.macro saveregs
-+
-+|.endmacro
-+|.macro restoreregs
-+
-+|.endmacro
- |
- |// Type definitions. Some of these are only used for documentation.
- |.type L, lua_State
-@@ -111,22 +115,16 @@
- |//-----------------------------------------------------------------------
- |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
- |.macro saveregs_
--| push edi; push esi; push ebx
--| push extern lj_err_unwind_win
--| fs; push dword [0]
--| fs; mov [0], esp
--| sub esp, CFRAME_SPACE
-+
- |.endmacro
- |.macro restoreregs
--| add esp, CFRAME_SPACE
--| fs; pop dword [0]
--| pop edi // Short for esp += 4.
--| pop ebx; pop esi; pop edi; pop ebp
-+
- |.endmacro
- |
- |.macro saveregs
--| push ebp; saveregs_
-+
- |.endmacro
-+
- |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
- |.define SAVE_NRES, aword [esp+aword*14]
- |.define SAVE_CFRAME, aword [esp+aword*13]
-@@ -164,18 +162,14 @@
- |.macro ins_A; .endmacro
- |.macro ins_AD; .endmacro
- |.macro ins_AJ; .endmacro
--|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
--|.macro ins_AB_; movzx RB, RCH; .endmacro
--|.macro ins_A_C; movzx RC, RCL; .endmacro
--|.macro ins_AND; not RDa; .endmacro
-+|.macro ins_ABC; .endmacro
-+|.macro ins_AB_; .endmacro
-+|.macro ins_A_C; .endmacro
-+|.macro ins_AND; .endmacro
- |
- |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
- |.macro ins_NEXT
--| mov RC, [PC]
--| movzx RA, RCH
--| movzx OP, RCL
--| add PC, 4
--| shr RC, 16
-+
- |.endmacro
- |
- |// Instruction footer.
-@@ -220,11 +214,11 @@
- |//-----------------------------------------------------------------------
- |
- |// Macros to test operand types.
--|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
--|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
--|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
--|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
--|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
-+|.macro checktp, .endmacro
-+|.macro checknum, .endmacro
-+|.macro checkint, .endmacro
-+|.macro checkstr, .endmacro
-+|.macro checktab, .endmacro
- |
- |// These operands must be used with movzx.
- |.define PC_OP, byte [PC-4]
-@@ -234,7 +228,7 @@
- |.define PC_RD, word [PC-2]
- |
- |.macro branchPC, reg
--| lea PC, [PC+reg*4-BCBIAS_J*4]
-+
- |.endmacro
- |
- |// Assumes DISPATCH is relative to GL.
-@@ -245,24 +239,16 @@
- |
- |// Decrement hashed hotcount and trigger trace recorder if zero.
- |.macro hotloop, reg
--| mov reg, PC
--| shr reg, 1
--| and reg, HOTCOUNT_PCMASK
--| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
--| jb ->vm_hotloop
-+
- |.endmacro
- |
- |.macro hotcall, reg
--| mov reg, PC
--| shr reg, 1
--| and reg, HOTCOUNT_PCMASK
--| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
--| jb ->vm_hotcall
-+
- |.endmacro
- |
- |// Set current VM state.
- |.macro set_vmstate, st
--| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
-+
- |.endmacro
- |
- |
-@@ -270,27 +256,24 @@
- |
- |
- |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
--| sseconst_hi reg, tmp, 80000000
-+|
- |.endmacro
- |.macro sseconst_1, reg, tmp // Synthesize 1.0.
--| sseconst_hi reg, tmp, 3ff00000
-+|
- |.endmacro
- |.macro sseconst_m1, reg, tmp // Synthesize -1.0.
--| sseconst_hi reg, tmp, bff00000
-+|
- |.endmacro
- |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
--| sseconst_hi reg, tmp, 43300000
-+|
- |.endmacro
- |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
--| sseconst_hi reg, tmp, 43380000
-+|
- |.endmacro
- |
- |// Move table write barrier back. Overwrites reg.
- |.macro barrierback, tab, reg
--| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
--| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
--| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
--| mov tab->gclist, reg
-+
- |.endmacro
- |
- |//-----------------------------------------------------------------------
---
-2.20.1
-
-
-From a6c99dcec0b9a7a9bd533f309ec849b005642820 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Fri, 18 Nov 2016 17:09:20 +0530
-Subject: [PATCH 022/247] Update vm_s390x.dasc
-
-Assigned general purpose register to existing macros
----
- src/vm_s390x.dasc | 19 ++++++++++---------
- 1 file changed, 10 insertions(+), 9 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index ff59947..656ed05 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -15,15 +15,15 @@
- |
- |// Fixed register assignments for the interpreter.
- |// This is very fragile and has many dependencies. Caveat emptor.
--.define BASE, // Base of current Lua stack frame.
--|.define KBASE, // Constants of current Lua function.
--|.define PC, // Next PC.
--|.define GLREG, // Global state.
--|.define LREG, // Register holding lua_State (also in SAVE_L).
--|.define TISNUM, // Constant LJ_TISNUM << 47.
--|.define TISNUMhi, // Constant LJ_TISNUM << 15.
--|.define TISNIL, // Constant -1LL.
--|.define fp, // Yes, we have to maintain a frame pointer.
-+|.define BASE, gr0 // Base of current Lua stack frame.
-+|.define KBASE, gr1 // Constants of current Lua function.
-+|.define PC, gr14 // Next PC.
-+|.define GLREG, gr2 // Global state.
-+|.define LREG, gr3 // Register holding lua_State (also in SAVE_L).
-+|.define TISNUM, gr4 // Constant LJ_TISNUM << 47.
-+|.define TISNUMhi, gr5 // Constant LJ_TISNUM << 15.
-+|.define TISNIL, gr6 // Constant -1LL.
-+|.define fp, gr7 // Yes, we have to maintain a frame pointer.
- |
- |// The following temporaries are not saved across C calls, except for RA/RC.
- |.define RA,
-@@ -66,6 +66,7 @@
- |.define CFRAME_SPACE, 208
- |//----- 16 byte aligned, <-- sp entering interpreter
- |// Unused [sp, #204] // 32 bit values
-+|
- |.define SAVE_NRES,
- |.define SAVE_ERRF,
- |.define SAVE_MULTRES,
---
-2.20.1
-
-
-From c6a7dd8a7e7d23e7ac5abfcdf7a31f675ea6352f Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 18 Nov 2016 13:06:31 -0500
-Subject: [PATCH 023/247] Look for s390x file rather than S390x file.
-
----
- src/vm_s390x.dasc | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 656ed05..4b5ae2a 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1,9 +1,9 @@
--|// Low-level VM code for S390x CPUs.
-+|// Low-level VM code for IBM z/Architecture (s390x) CPUs.
- |// Bytecode interpreter, fast functions and helper functions.
- |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- |
-
--|.arch S390x
-+|.arch s390x
- |.section code_op, code_sub
- |
- |.actionlist build_actionlist
---
-2.20.1
-
-
-From de1552219a3d573342c574f447ae9a96141b9533 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Tue, 22 Nov 2016 10:20:56 +0530
-Subject: [PATCH 024/247] Update vm_s390x.dasc
-
-Added definitions to macros savereg and restreg
-used Store and Load instructions
-to store and load register contents to n from memory
----
- src/vm_s390x.dasc | 36 +++++++++++++++++-------------------
- 1 file changed, 17 insertions(+), 19 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 4b5ae2a..a9a3835 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -15,15 +15,15 @@
- |
- |// Fixed register assignments for the interpreter.
- |// This is very fragile and has many dependencies. Caveat emptor.
--|.define BASE, gr0 // Base of current Lua stack frame.
--|.define KBASE, gr1 // Constants of current Lua function.
--|.define PC, gr14 // Next PC.
--|.define GLREG, gr2 // Global state.
--|.define LREG, gr3 // Register holding lua_State (also in SAVE_L).
--|.define TISNUM, gr4 // Constant LJ_TISNUM << 47.
--|.define TISNUMhi, gr5 // Constant LJ_TISNUM << 15.
--|.define TISNIL, gr6 // Constant -1LL.
--|.define fp, gr7 // Yes, we have to maintain a frame pointer.
-+|.define BASE, gr0
-+|.define KBASE, gr1
-+|.define PC, gr14
-+|.define GLREG, gr2
-+|.define LREG, gr3
-+|.define TISNUM, gr4
-+|.define TISNUMhi, gr5
-+|.define TISNIL, gr6
-+|.define fp, gr7
- |
- |// The following temporaries are not saved across C calls, except for RA/RC.
- |.define RA,
-@@ -82,18 +82,16 @@
- |
- |.define TMPDofs,
- |
--|.macro save_, gpr1, gpr2, fpr1, fpr2
--]
--|.endmacro
--|.macro rest_, gpr1, gpr2, fpr1, fpr2
--]
-+|.macro savereg arg1 arg2 arg3
-+| STG arg1; // Store 64bit content
-+| STG arg2; // Store 64bit content
-+| STG arg3; // Store 64bit content
- |.endmacro
- |
--|.macro saveregs
--
--|.endmacro
--|.macro restoreregs
--
-+|.macro restreg arg1 arg2 arg3
-+| LG arg1; // Load 64 bit content
-+| LG arg2; // Load 64 bit content
-+| LG arg3; // Load 64 bit content
- |.endmacro
- |
- |// Type definitions. Some of these are only used for documentation.
---
-2.20.1
-
-
-From 75a053e8da3f850f5f5d669175bb99dff0040471 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 22 Nov 2016 11:48:56 -0500
-Subject: [PATCH 025/247] Add preliminary frame offsets.
-
-These are educated guesses at this point. We might need more stack space because
-we don't have many free registers available.
----
- src/lj_asm.c | 2 +
- src/lj_frame.h | 18 +++----
- src/vm_s390x.dasc | 118 +++++++++++++++++++++++-----------------------
- 3 files changed, 70 insertions(+), 68 deletions(-)
-
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index 992dcf5..2ac40fd 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -1601,6 +1601,8 @@ static void asm_loop(ASMState *as)
- #include "lj_asm_ppc.h"
- #elif LJ_TARGET_MIPS
- #include "lj_asm_mips.h"
-+#elif LJ_TARGET_S390X
-+#include "lj_asm_s390x.h"
- #else
- #error "Missing assembler for target CPU"
- #endif
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index 2d6598f..f3b3595 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -200,15 +200,6 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
- #define CFRAME_OFS_MULTRES 192
- #define CFRAME_SIZE 208
- #define CFRAME_SHIFT_MULTRES 3
--#elif LJ_TARGET_S390X
--#define CFRAME_OFS_ERRF
--#define CFRAME_OFS_NRES
--#define CFRAME_OFS_PREV
--#define CFRAME_OFS_L
--#define CFRAME_OFS_PC
--#define CFRAME_OFS_MULTRES
--#define CFRAME_SIZE
--#define CFRAME_SHIFT_MULTRES
- #elif LJ_TARGET_PPC
- #if LJ_TARGET_XBOX360
- #define CFRAME_OFS_ERRF 424
-@@ -273,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
- #endif
- #define CFRAME_OFS_MULTRES 0
- #define CFRAME_SHIFT_MULTRES 3
-+#elif LJ_TARGET_S390X
-+#define CFRAME_OFS_ERRF 216
-+#define CFRAME_OFS_NRES 208
-+#define CFRAME_OFS_PREV 200
-+#define CFRAME_OFS_L 192
-+#define CFRAME_OFS_PC 168
-+#define CFRAME_OFS_MULTRES 160
-+#define CFRAME_SIZE 172
-+#define CFRAME_SHIFT_MULTRES 3
- #else
- #error "Missing CFRAME_* definitions for this architecture"
- #endif
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index a9a3835..dc30593 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2,7 +2,22 @@
- |// Bytecode interpreter, fast functions and helper functions.
- |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- |
--
-+|// ELF ABI registers:
-+|// r0,r1 | | volatile |
-+|// r2 | parameter and return value | volatile |
-+|// r3-r5 | parameter | volatile |
-+|// r6 | parameter | saved |
-+|// r7-r11 | | saved |
-+|// r12 | GOT pointer (needed?) | saved |
-+|// r13 | literal pool (needed?) | saved |
-+|// r14 | return address | volatile |
-+|// r15 | stack pointer | saved |
-+|// f0,f2,f4,f6 | parameter and return value | volatile |
-+|// f1,f3,f5,f7 | | volatile |
-+|// f8-f15 | | saved |
-+|// ar0,ar1 | TLS | volatile |
-+|// ar2-ar15 | | volatile |
-+|
- |.arch s390x
- |.section code_op, code_sub
- |
-@@ -13,72 +28,57 @@
- |
- |//-----------------------------------------------------------------------
- |
--|// Fixed register assignments for the interpreter.
--|// This is very fragile and has many dependencies. Caveat emptor.
--|.define BASE, gr0
--|.define KBASE, gr1
--|.define PC, gr14
--|.define GLREG, gr2
--|.define LREG, gr3
--|.define TISNUM, gr4
--|.define TISNUMhi, gr5
--|.define TISNIL, gr6
--|.define fp, gr7
-+|// Fixed register assignments for the interpreter, callee-saved.
-+|.define BASE, r7 // Base of current Lua stack frame.
-+|.define KBASE, r8 // Constants of current Lua function.
-+|.define PC, r9 // Next PC.
-+|.define GLREG, r10 // Global state.
-+|.define LREG, r11 // Register holding lua_State (also in SAVE_L).
- |
--|// The following temporaries are not saved across C calls, except for RA/RC.
--|.define RA,
--|.define RC,
--|.define RB,
--|.define RAw,
--|.define RCw,
--|.define RBw,
--|.define INS,
--|.define INSw,
--|.define ITYPE,
--|.define TMP0,
--|.define TMP1,
--|.define TMP2,
--|.define TMP3,
--|.define TMP0w,
--|.define TMP1w,
--|.define TMP2w,
--|.define TMP3w,
-+|// The following temporaries are not saved across C calls, except for RD.
-+|.define RA, r0 // Cannot be dereferenced.
-+|.define RB, r1
-+|.define RC, r5 // Overlaps CARG4.
-+|.define RD, r6 // Overlaps CARG5. Callee-saved.
- |
- |// Calling conventions. Also used as temporaries.
--|.define CARG1,
--|.define CARG2,
--|.define CARG3,
--|.define CARG4,
--|.define CARG5,
--|.define CARG1w,
--|.define CARG2w,
--|.define CARG3w,
--|.define CARG4w,
--|.define CARG5w,
-+|.define CARG1, r2
-+|.define CARG2, r3
-+|.define CARG3, r4
-+|.define CARG4, r5
-+|.define CARG5, r6
-+|
-+|.define FARG1, f0
-+|.define FARG2, f2
-+|.define FARG3, f4
-+|.define FARG4, f6
- |
--|.define FARG1,
--|.define FARG2,
-+|.define CRET1, r2
-+|
-+|.define SP, r15
- |
--|.define CRET1,
--|.define CRET1w,
- |// Stack layout while in interpreter. Must match with lj_frame.h.
-+|.define CFRAME_SPACE, 176 // Delta for SP, 8 byte aligned.
-+|
-+|// Register save area.
-+|.define SAVE_FPR6, 328(SP)
-+|.define SAVE_FPR4, 320(SP)
-+|.define SAVE_FPR2, 312(SP)
-+|.define SAVE_FPR0, 304(SP)
-+|.define SAVE_GPRS, 224(SP) // Save area for r6-r15 (10*8 bytes).
- |
--|.define CFRAME_SPACE, 208
--|//----- 16 byte aligned, <-- sp entering interpreter
--|// Unused [sp, #204] // 32 bit values
-+|// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
-+|.define SAVE_ERRF, 216(SP) // Argument 4, in r5.
-+|.define SAVE_NRES, 208(SP) // Argument 3, in r4.
-+|.define SAVE_CFRAME, 200(SP) // Argument 2, in r3.
-+|.define SAVE_L, 192(SP) // Argument 1, in r2.
-+|.define RESERVED, 184(SP) // Reserved for compiler use.
-+|.define BACKCHAIN, 176(SP) // <- SP entering interpreter.
-+|.define SAVE_PC, 168(SP)
-+|.define SAVE_MULTRES, 160(SP)
- |
--|.define SAVE_NRES,
--|.define SAVE_ERRF,
--|.define SAVE_MULTRES,
--|.define TMPD,
--|.define SAVE_L,
--|.define SAVE_PC,
--|.define SAVE_CFRAME,
--|.define SAVE_FPR_,
--|.define SAVE_GPR_,
--|.define SAVE_LR,
--|.define SAVE_FP,
--|//----- 16 byte aligned, <-- sp while in interpreter.
-+|// Callee save area (allocated by interpreter).
-+|.define CALLEESAVE 000(SP) // <- SP in interpreter.
- |
- |.define TMPDofs,
- |
---
-2.20.1
-
-
-From f34b300f5a6f2c2b6f7716898ca66a1d37c456c8 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 22 Nov 2016 13:47:35 -0500
-Subject: [PATCH 026/247] Cleanup.
-
----
- src/vm_s390x.dasc | 47 -----------------------------------------------
- 1 file changed, 47 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index dc30593..44c056d 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -80,8 +80,6 @@
- |// Callee save area (allocated by interpreter).
- |.define CALLEESAVE 000(SP) // <- SP in interpreter.
- |
--|.define TMPDofs,
--|
- |.macro savereg arg1 arg2 arg3
- | STG arg1; // Store 64bit content
- | STG arg2; // Store 64bit content
-@@ -110,51 +108,6 @@
- |.type TRACE, GCtrace
- |.type SBUF, SBuf
- |
--|// Stack layout while in interpreter. Must match with lj_frame.h.
--|//-----------------------------------------------------------------------
--|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
--|.macro saveregs_
--
--|.endmacro
--|.macro restoreregs
--
--|.endmacro
--|
--|.macro saveregs
--
--|.endmacro
--
--|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
--|.define SAVE_NRES, aword [esp+aword*14]
--|.define SAVE_CFRAME, aword [esp+aword*13]
--|.define SAVE_L, aword [esp+aword*12]
--|//----- 16 byte aligned, ^^^ arguments from C caller
--|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
--|.define SAVE_R4, aword [esp+aword*10]
--|.define SAVE_R3, aword [esp+aword*9]
--|.define SAVE_R2, aword [esp+aword*8]
--|//----- 16 byte aligned
--|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
--|.define SAVE_PC, aword [esp+aword*6]
--|.define TMP2, aword [esp+aword*5]
--|.define TMP1, aword [esp+aword*4]
--|//----- 16 byte aligned
--|.define ARG4, aword [esp+aword*3]
--|.define ARG3, aword [esp+aword*2]
--|.define ARG2, aword [esp+aword*1]
--|.define ARG1, aword [esp] //<-- esp while in interpreter.
--|//----- 16 byte aligned, ^^^ arguments for C callee
--|
--|// FPARGx overlaps ARGx and ARG(x+1) on x86.
--|.define FPARG3, qword [esp+qword*1]
--|.define FPARG1, qword [esp]
--|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
--|.define TMPQ, qword [esp+aword*4]
--|.define TMP3, ARG4
--|.define ARG5, TMP1
--|.define TMPa, TMP1
--|.define MULTRES, TMP2
--|
- |//-----------------------------------------------------------------------
- |
- |// Instruction headers.
---
-2.20.1
-
-
-From e840ce40898a5426cdfa5306d0125c89f82d2f55 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 22 Nov 2016 13:58:10 -0500
-Subject: [PATCH 027/247] Fixup the save/restore register macros.
-
-I believe these macros obey the C calling convention, so we need to
-allocate our stack frame and save all callee-save registers. We
-can tune it later if it turns out we don't need all the registers.
----
- src/vm_s390x.dasc | 23 +++++++++++++++--------
- 1 file changed, 15 insertions(+), 8 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 44c056d..49ea335 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -80,16 +80,23 @@
- |// Callee save area (allocated by interpreter).
- |.define CALLEESAVE 000(SP) // <- SP in interpreter.
- |
--|.macro savereg arg1 arg2 arg3
--| STG arg1; // Store 64bit content
--| STG arg2; // Store 64bit content
--| STG arg3; // Store 64bit content
-+|.macro saveregs
-+| lay SP, -CFRAME_SPACE(SP) // Allocate stack frame.
-+| stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
-+| std f0, SAVE_FPR0
-+| std f2, SAVE_FPR2
-+| std f4, SAVE_FPR4
-+| std f6, SAVE_FPR6
- |.endmacro
- |
--|.macro restreg arg1 arg2 arg3
--| LG arg1; // Load 64 bit content
--| LG arg2; // Load 64 bit content
--| LG arg3; // Load 64 bit content
-+|.macro restoreregs
-+| la SP, CFRAME_SPACE(SP) // De-allocate stack frame.
-+| lmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
-+| ld f0, SAVE_FPR0
-+| ld f2, SAVE_FPR2
-+| ld f4, SAVE_FPR4
-+| ld f6, SAVE_FPR6
-+|// br r14 to return?
- |.endmacro
- |
- |// Type definitions. Some of these are only used for documentation.
---
-2.20.1
-
-
-From 1af277c5bad944010108666bd3ef2d84beb00374 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 23 Nov 2016 17:30:10 -0500
-Subject: [PATCH 028/247] Fix stack frame layout.
-
-f8-f15 are callee-saved (not f0,f2,f4 and f6). There isn't space
-for them in the caller's stack frame so we need to increase the
-size of the interpreter's stack frame.
----
- src/lj_frame.h | 10 ++++-----
- src/vm_s390x.dasc | 57 +++++++++++++++++++++++++++++------------------
- 2 files changed, 40 insertions(+), 27 deletions(-)
-
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index f3b3595..a30618e 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -265,13 +265,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
- #define CFRAME_OFS_MULTRES 0
- #define CFRAME_SHIFT_MULTRES 3
- #elif LJ_TARGET_S390X
--#define CFRAME_OFS_ERRF 216
--#define CFRAME_OFS_NRES 208
--#define CFRAME_OFS_PREV 200
--#define CFRAME_OFS_L 192
-+#define CFRAME_OFS_ERRF 280
-+#define CFRAME_OFS_NRES 272
-+#define CFRAME_OFS_PREV 264
-+#define CFRAME_OFS_L 256
- #define CFRAME_OFS_PC 168
- #define CFRAME_OFS_MULTRES 160
--#define CFRAME_SIZE 172
-+#define CFRAME_SIZE 240
- #define CFRAME_SHIFT_MULTRES 3
- #else
- #error "Missing CFRAME_* definitions for this architecture"
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 49ea335..f547111 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -58,22 +58,28 @@
- |.define SP, r15
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
--|.define CFRAME_SPACE, 176 // Delta for SP, 8 byte aligned.
-+|.define CFRAME_SPACE, 240 // Delta for SP, 8 byte aligned.
- |
- |// Register save area.
--|.define SAVE_FPR6, 328(SP)
--|.define SAVE_FPR4, 320(SP)
--|.define SAVE_FPR2, 312(SP)
--|.define SAVE_FPR0, 304(SP)
--|.define SAVE_GPRS, 224(SP) // Save area for r6-r15 (10*8 bytes).
-+|.define SAVE_GPRS, 288(SP) // Save area for r6-r15 (10*8 bytes).
- |
- |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
--|.define SAVE_ERRF, 216(SP) // Argument 4, in r5.
--|.define SAVE_NRES, 208(SP) // Argument 3, in r4.
--|.define SAVE_CFRAME, 200(SP) // Argument 2, in r3.
--|.define SAVE_L, 192(SP) // Argument 1, in r2.
--|.define RESERVED, 184(SP) // Reserved for compiler use.
--|.define BACKCHAIN, 176(SP) // <- SP entering interpreter.
-+|.define SAVE_ERRF, 280(SP) // Argument 4, in r5.
-+|.define SAVE_NRES, 272(SP) // Argument 3, in r4.
-+|.define SAVE_CFRAME, 264(SP) // Argument 2, in r3.
-+|.define SAVE_L, 256(SP) // Argument 1, in r2.
-+|.define RESERVED, 248(SP) // Reserved for compiler use.
-+|.define BACKCHAIN, 240(SP) // <- SP entering interpreter.
-+|
-+|// Interpreter stack frame.
-+|.define SAVE_FPR15, 232(SP)
-+|.define SAVE_FPR14, 224(SP)
-+|.define SAVE_FPR13, 216(SP)
-+|.define SAVE_FPR12, 208(SP)
-+|.define SAVE_FPR11, 200(SP)
-+|.define SAVE_FPR10, 192(SP)
-+|.define SAVE_FPR9, 184(SP)
-+|.define SAVE_FPR8, 176(SP)
- |.define SAVE_PC, 168(SP)
- |.define SAVE_MULTRES, 160(SP)
- |
-@@ -83,19 +89,26 @@
- |.macro saveregs
- | lay SP, -CFRAME_SPACE(SP) // Allocate stack frame.
- | stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
--| std f0, SAVE_FPR0
--| std f2, SAVE_FPR2
--| std f4, SAVE_FPR4
--| std f6, SAVE_FPR6
-+| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
-+| std f9, SAVE_FPR9
-+| std f10, SAVE_FPR10
-+| std f11, SAVE_FPR11
-+| std f12, SAVE_FPR12
-+| std f13, SAVE_FPR13
-+| std f14, SAVE_FPR14
-+| std f15, SAVE_FPR15
- |.endmacro
- |
- |.macro restoreregs
--| la SP, CFRAME_SPACE(SP) // De-allocate stack frame.
--| lmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
--| ld f0, SAVE_FPR0
--| ld f2, SAVE_FPR2
--| ld f4, SAVE_FPR4
--| ld f6, SAVE_FPR6
-+| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
-+| ld f9, SAVE_FPR9
-+| ld f10, SAVE_FPR10
-+| ld f11, SAVE_FPR11
-+| ld f12, SAVE_FPR12
-+| ld f13, SAVE_FPR13
-+| ld f14, SAVE_FPR14
-+| ld f15, SAVE_FPR15
-+| lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
- |// br r14 to return?
- |.endmacro
- |
---
-2.20.1
-
-
-From d8ddf722969b5d3f8c5b20ea9731253cee7951f8 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 23 Nov 2016 18:02:00 -0500
-Subject: [PATCH 029/247] Add assembly for decoding instructions.
-
-Still guessing at this point. This code will need to be changed.
----
- src/vm_s390x.dasc | 31 +++++++++++++++++++++++--------
- 1 file changed, 23 insertions(+), 8 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f547111..f6f1adb 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1,4 +1,4 @@
--|// Low-level VM code for IBM z/Architecture (s390x) CPUs.
-+|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode.
- |// Bytecode interpreter, fast functions and helper functions.
- |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- |
-@@ -32,7 +32,7 @@
- |.define BASE, r7 // Base of current Lua stack frame.
- |.define KBASE, r8 // Constants of current Lua function.
- |.define PC, r9 // Next PC.
--|.define GLREG, r10 // Global state.
-+|.define DISPATCH, r10 // Opcode dispatch table.
- |.define LREG, r11 // Register holding lua_State (also in SAVE_L).
- |
- |// The following temporaries are not saved across C calls, except for RD.
-@@ -56,6 +56,8 @@
- |.define CRET1, r2
- |
- |.define SP, r15
-+|.define OP, r2
-+|.define TMP1, r3
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
- |.define CFRAME_SPACE, 240 // Delta for SP, 8 byte aligned.
-@@ -134,14 +136,29 @@
- |.macro ins_A; .endmacro
- |.macro ins_AD; .endmacro
- |.macro ins_AJ; .endmacro
--|.macro ins_ABC; .endmacro
--|.macro ins_AB_; .endmacro
-+|.macro ins_ABC; .endmacro
-+|.macro ins_AB_; .endmacro
- |.macro ins_A_C; .endmacro
- |.macro ins_AND; .endmacro
- |
--|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
-+|// Instruction decode+dispatch.
-+| // TODO: tune this, right now we always decode RA-D even if they aren't used.
- |.macro ins_NEXT
--
-+| l RD, (PC)
-+| // 32 63
-+| // [ B | C | A | OP ]
-+| // [ D | A | OP ]
-+| llhr RA, RD
-+| srl RA, #8
-+| llcr OP, RD
-+| srl RD, #16
-+| lr RB, RD
-+| srl RB, #8
-+| llcr RC, RD
-+| la PC, 4(PC)
-+| llgfr TMP1, OP
-+| sll TMP1, #3 // TMP1=OP*8
-+| b 0(TMP1, DISPATCH)
- |.endmacro
- |
- |// Instruction footer.
-@@ -151,8 +168,6 @@
- | .define ins_next_, ins_NEXT
- |.else
- | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
--| // Affects only certain kinds of benchmarks (and only with -j off).
--| // Around 10%-30% slower on Core2, a lot more slower on P4.
- | .macro ins_next
- | jmp ->ins_next
- | .endmacro
---
-2.20.1
-
-
-From c7ec00b97366b6a69bf242f33ece00d6c1d74c07 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Thu, 24 Nov 2016 11:25:07 +0530
-Subject: [PATCH 030/247] Update vm_s390x.dasc
-
-used MOVE LONG EXTENDED in place of mov and
-MOVE LONG instead of movzx
----
- src/vm_s390x.dasc | 15 +++++----------
- 1 file changed, 5 insertions(+), 10 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f6f1adb..3758ee3 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -180,21 +180,16 @@
- |// Call decode and dispatch.
- |.macro ins_callt
- | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
--| mov PC, LFUNC:RB->pc
--| mov RA, [PC]
--| movzx OP, RAL
--| movzx RA, RAH
-+| mvcle PC, LFUNC:RB->pc
-+| mvcle RA, [PC]
-+| movcl OP, RAL
-+| movcl RA, RAH
- | add PC, 4
--|.if X64
--| jmp aword [DISPATCH+OP*8]
--|.else
--| jmp aword [DISPATCH+OP*4]
--|.endif
- |.endmacro
- |
- |.macro ins_call
- | // BASE = new base, RB = LFUNC, RD = nargs+1
--| mov [BASE-4], PC
-+| mvcle [BASE-4], PC
- | ins_callt
- |.endmacro
- |
---
-2.20.1
-
-
-From bb6ecf87a2a6631551f2dce603a5ef70da7935e7 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Thu, 24 Nov 2016 14:02:50 +0530
-Subject: [PATCH 031/247] Update vm_s390x.dasc
-
-added instructions to macros, referring macro defination of x86
-for macro ins_ANDdid not find equivalent s390x replacement instruction for 'Not'
hence have currently marked the place as '????'
-
-'????' has to be replaced with s390x complement instruction
----
- src/vm_s390x.dasc | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 3758ee3..b2640e8 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -136,10 +136,10 @@
- |.macro ins_A; .endmacro
- |.macro ins_AD; .endmacro
- |.macro ins_AJ; .endmacro
--|.macro ins_ABC; .endmacro
--|.macro ins_AB_; .endmacro
--|.macro ins_A_C; .endmacro
--|.macro ins_AND; .endmacro
-+|.macro ins_ABC; mvcl RB, RCH; mvcl RC, RCL; .endmacro
-+|.macro ins_AB_; mvcl RB, RCH; .endmacro
-+|.macro ins_A_C; mvcl RC, RCL; .endmacro
-+|.macro ins_AND; ??? RD; .endmacro
- |
- |// Instruction decode+dispatch.
- | // TODO: tune this, right now we always decode RA-D even if they aren't used.
---
-2.20.1
-
-
-From b530f9c7170eb04dd9646b0ab78ccacb0341e11b Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Thu, 24 Nov 2016 14:58:52 +0530
-Subject: [PATCH 032/247] Update vm_s390x.dasc
-
-added definations to macros to test operand type refeered x86 definations
-no JUMP instruction found for s390x used BRANCH RELATIVE on CONDITION instead (brc)
-Not sure how the condition will be checked , need to discuss this
----
- src/vm_s390x.dasc | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index b2640e8..72fe5d2 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -196,11 +196,11 @@
- |//-----------------------------------------------------------------------
- |
- |// Macros to test operand types.
--|.macro checktp, .endmacro
--|.macro checknum, .endmacro
--|.macro checkint, .endmacro
--|.macro checkstr, .endmacro
--|.macro checktab, .endmacro
-+|.macro checktp, reg, tp; CG dword [BASE+reg*8+4], tp; .endmacro
-+|.macro checknum, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro //
condition to chk is result is above or equal
-+|.macro checkint, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro //
condition to chk is result is not equal
-+|.macro checkstr, reg, target; checktp reg, LJ_TSTR; brc target; .endmacro //
condition to chk is result is nto equal
-+|.macro checktab, reg, target; checktp reg, LJ_TTAB; brc target; .endmacro //
condition to chk is result is nto equal
- |
- |// These operands must be used with movzx.
- |.define PC_OP, byte [PC-4]
---
-2.20.1
-
-
-From 43ddf3fe32c24e296842ff4fc219c4c82cbda69c Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Fri, 25 Nov 2016 19:44:04 +0530
-Subject: [PATCH 033/247] Added s390x instructions with their encoding
-
----
- dynasm/dasm_s390x.lua | 953 +++++++++++++++++++++++++-----------------
- 1 file changed, 575 insertions(+), 378 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 3542e7e..e39a27f 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -652,311 +652,565 @@ end)
-
- -- Template strings for ARM instructions.
- map_op = {
-- -- Basic data processing instructions.
-- --add
-- ar = "0000000000001a00", --RR
-- ay = "0000e3000000005a", --RXY-a
-- ag = "0000e30000000008",
-- agr = "00000000b9080000", --RRE
-- agf = "0000e30000000018",
-- agfr = "00000000b9180000",
-- agbr = "00000000b34a0000",
-- adbr = "00000000b31a0000",
-- aebr = "00000000b30a0000",
-- ah = "000000004a000000", --RXa
-- ahy = "0000e3000000007a",
-- afi = "0000c20900000000", --RIL-a --pls check if this is correct
-- agfi = "0000c20800000000",
-- aih = "0000cc0800000000",
-- al = "000000005e000000",
-- alr = "0000000000001e00",
-- aly = "0000e3000000005e", -- RXY-a
-- alg = "0000e3000000000a",
-- algr = "00000000b90a0000",
-- algf = "0000e3000000001a",
-- algfr = "00000000b91a0000",
-- alfi = "0000c20b00000000",
-- algfi = "0000c20a00000000",
-- alc = "0000e30000000098",
-- alcr = "00000000b9980000", -- RRE
-- alcg = "0000e30000000088",
-- alcgr = "00000000b9880000",
-- alsih = "0000cc0a00000000",
-- alsihn ="0000cc0b00000000",
-- axr = "0000000000003600", -- RR
-- ad = "000000006a000000", -- Rx-a
-- adr = "0000000000002a00",
-- ae = "000000007a000000",
-- aer = "0000000000003a00",
-- aw = "000000006e000000",
-- awr = "0000000000002e00",
-- au = "000000007e000000",
-- aur = "0000000000003e00",
--
---- and
-- n = "0000000054000000",
-- nr = "0000000000001400",
-- ny = "0000e30000000054", -- RXY-a
-- ng = "0000e30000000080",
-- ngr = "00000000b9800000",
-- nihf = "0000c00a00000000", --RIL-a
-- nihl = "0000c00b00000000",
--
-- --branch related instrcutions
-- bal = "0000000045000000", --RX-a
-- balr = "0000000000005000", --RR
-- bas = "000000004d000000",
-- basr = "0000000000000d00", -- this has leading zero in the instrcution
opcode: 0d, need to take into consideration
-- bassm = "0000000000000c00",
-- bsm = "0000000000000b00",
-- bc = "0000000047000000",
-- bcr = "0000000000000700",
-- bct = "0000000046000000",
-- bctr = "0000000000000600",
-- bctg = "0000e30000000046",
-- bctgr = "00000000b9460000",
-- bxh = "0000000086000000", --RS-a
-- bxhg = "0000eb0000000044",
-- bxle = "0000000087000000",
-- bxleg = "0000eb0000000045", -- RSY-a
-- --bras = "000000000a750000RI-b",
-- brasl = "0000c00500000000", --RIL-b
-- --brc = "000000000a740000RI-c",
-- brcl = "0000c00400000000", --RIL-c
-- --brct = "000000000a760000RI-b",
-- --brctg = "000000000a770000RI-b",
-- brcth = "0000cc0600000000",
-- --brxh = "0000000000840000RSI",
-- --brxhg = "00000000ec440000RIE-e",
-- --brxle = "0000000000850000RSI",
-- --brxlg = "00000000ec450000RIE-e",
--
-- ----subtraction (basic operation)
-- sub = "00000000005b0000RX-a"
-- sr = "00000000001b0000RR"
-- srk = "00000000b9f90000RRF-a"
-- sy = "00000000e35b0000RXY-a"
-- sg = "00000000e3090000RXY-a"
-- sgr = "00000000b9090000RRE"
-- sgrk = "00000000b9e90000RRF-a"
-- sgf = "00000000e3190000RXY-a"
-- sgfr = "00000000b9190000RRE"
-- sh = "00000000004b0000RX-a"
-- shy = "00000000e37b0000RXY-a"
-- shhhr = "00000000b9c90000RRF-a"
-- shhlr = "00000000b9d90000RX-a"
-- sl = "00000000005f0000RX-a"
-- slr = "00000000001f0000RR"
-- slrk = "00000000b9f80000RR"
-- sly = "00000000e35f0000RXY-a",
-- slg = "00000000e30b0000RXY-a",
-- slgr = "00000000b9080000RRE",
-- slgrk = "00000000b9eb0000RRF-a",
-- slgf = "00000000e3180000RXY-a",
-- slgfr = "00000000b91b0000RRE",
-- slhhhr = "00000000b9cb0000RRF-a",
-- slhhlr = "00000000b9db0000RRF-a",
-- slfi = "000000000c250000RIL-a",
-- slgfi = "000000000c240000RIL-a",
-- slb = "00000000e3990000RXY-a",
-- slbr = "00000000b9990000RRE" ,
-- slbg = "00000000e3890000RXY-a",
-- slbgr = "00000000b9890000RXY-a",
--
-- cmp_2 = "0000000000590000RX-a|0000000000190000RR|00000000e3590000RXY-a",
-- cmp_3 = "00000000e3200000RXY-a|00000000b9200000RRE|00000000e3300000RXY-a|
00000000b9300000RRE",
--
-- div_2 =
"00000000005d0000RX-a|00000000001d0000RR|00000000e3970000RXY-a|00000000b9970000RRE",
-- div_3 ="00000000e3870000RXY-a|00000000b9870000RRE",
-- div_sing
="00000000e30d0000RXY-a|00000000b90d0000RRE|00000000e31d0000RXY-a|00000000b91d0000RRE",
--
-- eor_2 =
"0000000000570000RX-a|0000000000170000RR|00000000b9f70000RRF-a|00000000e3570000RXY-a",
-- eor_3 = "00000000e3820000RXY-a|00000000b9820000RRE|00000000b9e70000RRF-a|
-- eor_c = "0000000000d70000SS-a",
-- eor_i = "0000000000970000SI|
00000000eb570000|000000000c060000a|000000000c070000RIL-a",
--
-- -- load instruction to be added and the following instructions need to be changed (are
not s390x related)
--
-- neg_2 = "4b0003e0DMg",
-- neg_3 = "4b0003e0DMSg",
-- negs_2 = "6b0003e0DMg",
-- negs_3 = "6b0003e0DMSg",
-- adc_3 = "1a000000DNMg",
-- adcs_3 = "3a000000DNMg",
-- sbc_3 = "5a000000DNMg",
-- sbcs_3 = "7a000000DNMg",
-- ngc_2 = "5a0003e0DMg",
-- ngcs_2 = "7a0003e0DMg",
-- and_3 = "0a000000DNMg|12000000pDNig",
-- and_4 = "0a000000DNMSg",
-- orr_3 = "2a000000DNMg|32000000pDNig",
-- orr_4 = "2a000000DNMSg",
-- eor_3 = "4a000000DNMg|52000000pDNig",
-- eor_4 = "4a000000DNMSg",
-- ands_3 = "6a000000DNMg|72000000DNig",
-- ands_4 = "6a000000DNMSg",
-- tst_2 = "6a00001fNMg|7200001fNig",
-- tst_3 = "6a00001fNMSg",
-- bic_3 = "0a200000DNMg",
-- bic_4 = "0a200000DNMSg",
-- orn_3 = "2a200000DNMg",
-- orn_4 = "2a200000DNMSg",
-- eon_3 = "4a200000DNMg",
-- eon_4 = "4a200000DNMSg",
-- bics_3 = "6a200000DNMg",
-- bics_4 = "6a200000DNMSg",
-- movn_2 = "12800000DWg",
-- movn_3 = "12800000DWRg",
-- movz_2 = "52800000DWg",
-- movz_3 = "52800000DWRg",
-- movk_2 = "72800000DWg",
-- movk_3 = "72800000DWRg",
-- -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
-- mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
-- mov_3 = "2a0003e0DMSg",
-- mvn_2 = "2a2003e0DMg",
-- mvn_3 = "2a2003e0DMSg",
-- adr_2 = "10000000DBx",
-- adrp_2 = "90000000DBx",
-- csel_4 = "1a800000DNMCg",
-- csinc_4 = "1a800400DNMCg",
-- csinv_4 = "5a800000DNMCg",
-- csneg_4 = "5a800400DNMCg",
-- cset_2 = "1a9f07e0Dcg",
-- csetm_2 = "5a9f03e0Dcg",
-- cinc_3 = "1a800400DNmcg",
-- cinv_3 = "5a800000DNmcg",
-- cneg_3 = "5a800400DNmcg",
-- ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
-- ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
-- madd_4 = "1b000000DNMAg",
-- msub_4 = "1b008000DNMAg",
-- mul_3 = "1b007c00DNMg",
-- mneg_3 = "1b00fc00DNMg",
-- smaddl_4 = "9b200000DxNMwAx",
-- smsubl_4 = "9b208000DxNMwAx",
-- smull_3 = "9b207c00DxNMw",
-- smnegl_3 = "9b20fc00DxNMw",
-- smulh_3 = "9b407c00DNMx",
-- umaddl_4 = "9ba00000DxNMwAx",
-- umsubl_4 = "9ba08000DxNMwAx",
-- umull_3 = "9ba07c00DxNMw",
-- umnegl_3 = "9ba0fc00DxNMw",
-- umulh_3 = "9bc07c00DNMx",
-- udiv_3 = "1ac00800DNMg",
-- sdiv_3 = "1ac00c00DNMg",
-- -- Bit operations.
-- sbfm_4 = "13000000DN12w|93400000DN12x",
-- bfm_4 = "33000000DN12w|b3400000DN12x",
-- ubfm_4 = "53000000DN12w|d3400000DN12x",
-- extr_4 = "13800000DNM2w|93c00000DNM2x",
-- sxtb_2 = "13001c00DNw|93401c00DNx",
-- sxth_2 = "13003c00DNw|93403c00DNx",
-- sxtw_2 = "93407c00DxNw",
-- uxtb_2 = "53001c00DNw",
-- uxth_2 = "53003c00DNw",
-- sbfx_4 = op_alias("sbfm_4", alias_bfx),
-- bfxil_4 = op_alias("bfm_4", alias_bfx),
-- ubfx_4 = op_alias("ubfm_4", alias_bfx),
-- sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
-- bfi_4 = op_alias("bfm_4", alias_bfiz),
-- ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
-- lsl_3 = function(params, nparams)
-- if params and params[3]:byte() == 35 then
-- return alias_lslimm(params, nparams)
-- else
-- return op_template(params, "1ac02000DNMg", nparams)
-- end
-- end,
-- lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
-- asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
-- ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
-- clz_2 = "5ac01000DNg",
-- cls_2 = "5ac01400DNg",
-- rbit_2 = "5ac00000DNg",
-- rev_2 = "5ac00800DNw|dac00c00DNx",
-- rev16_2 = "5ac00400DNg",
-- rev32_2 = "dac00800DNx",
-- -- Loads and stores.
-- ["strb_*"] = "38000000DwL",
-- ["ldrb_*"] = "38400000DwL",
-- ["ldrsb_*"] = "38c00000DwL|38800000DxL",
-- ["strh_*"] = "78000000DwL",
-- ["ldrh_*"] = "78400000DwL",
-- ["ldrsh_*"] = "78c00000DwL|78800000DxL",
-- ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
-- ["ldr_*"] =
"18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
-- ["ldrsw_*"] = "98000000DxB|b8800000DxL",
-- -- NOTE: ldur etc. are handled by ldr et al.
-- ["stp_*"] =
"28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
-- ["ldp_*"] =
"28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
-- ["ldpsw_*"] = "68400000DAxP",
-- -- Branches.
-- b_1 = "14000000B",
-- bl_1 = "94000000B",
-- blr_1 = "d63f0000Nx",
-- br_1 = "d61f0000Nx",
-- ret_0 = "d65f03c0",
-- ret_1 = "d65f0000Nx",
-- -- b.cond is added below.
-- cbz_2 = "34000000DBg",
-- cbnz_2 = "35000000DBg",
-- tbz_3 = "36000000DTBw|36000000DTBx",
-- tbnz_3 = "37000000DTBw|37000000DTBx",
-- -- Miscellaneous instructions.
-- -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
-- -- TODO: sys, sysl, ic, dc, at, tlbi
-- -- TODO: hint, yield, wfe, wfi, sev, sevl
-- -- TODO: clrex, dsb, dmb, isb
-- nop_0 = "d503201f",
-- brk_0 = "d4200000",
-- brk_1 = "d4200000W",
-- -- Floating point instructions.
-- fmov_2 =
"1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
-- fabs_2 = "1e20c000DNf",
-- fneg_2 = "1e214000DNf",
-- fsqrt_2 = "1e21c000DNf",
-- fcvt_2 = "1e22c000DdNs|1e624000DsNd",
-- -- TODO: half-precision and fixed-point conversions.
-- fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
-- fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
-- fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
-- fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
-- fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
-- fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
-- fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
-- fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
-- fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
-- fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
-- scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
-- ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
-- frintn_2 = "1e244000DNf",
-- frintp_2 = "1e24c000DNf",
-- frintm_2 = "1e254000DNf",
-- frintz_2 = "1e25c000DNf",
-- frinta_2 = "1e264000DNf",
-- frintx_2 = "1e274000DNf",
-- frinti_2 = "1e27c000DNf",
-- fadd_3 = "1e202800DNMf",
-- fsub_3 = "1e203800DNMf",
-- fmul_3 = "1e200800DNMf",
-- fnmul_3 = "1e208800DNMf",
-- fdiv_3 = "1e201800DNMf",
-- fmadd_4 = "1f000000DNMAf",
-- fmsub_4 = "1f008000DNMAf",
-- fnmadd_4 = "1f200000DNMAf",
-- fnmsub_4 = "1f208000DNMAf",
-- fmax_3 = "1e204800DNMf",
-- fmaxnm_3 = "1e206800DNMf",
-- fmin_3 = "1e205800DNMf",
-- fminnm_3 = "1e207800DNMf",
-- fcmp_2 = "1e202000NMf|1e202008NZf",
-- fcmpe_2 = "1e202010NMf|1e202018NZf",
-- fccmp_4 = "1e200400NMVCf",
-- fccmpe_4 = "1e200410NMVCf",
-- fcsel_4 = "1e200c00DNMCf",
-- -- TODO: crc32*, aes*, sha*, pmull
-- -- TODO: SIMD instructions.
-+ a = "000000005a000000j",
-+ar = "0000000000001a00g",
-+ay = "0000e3000000005ak",
-+ag = "0000e30000000008k",
-+agr = "00000000b9080000h",
-+agf = "0000e30000000018k",
-+agfr = "00000000b9180000h",
-+axbr = "00000000b34a0000h",
-+adbr = "00000000b31a0000h",
-+aebr = "00000000b30a0000h",
-+ah = "000000004a000000j",
-+ahy = "0000e3000000007ak",
-+afi = "0000c20900000000l",
-+agfi = "0000c20800000000l",
-+aih = "0000cc0800000000l",
-+al = "000000005e000000j",
-+alr = "0000000000001e00g",
-+aly = "0000e3000000005ek",
-+alg = "0000e3000000000ak",
-+algr = "00000000b90a0000h",
-+algf = "0000e3000000001ak",
-+algfr = "00000000b91a0000h",
-+alfi = "0000c20b00000000l",
-+algfi = "0000c20a00000000l",
-+alc = "0000e30000000098k",
-+alcr = "00000000b9980000h",
-+alcg = "0000e30000000088k",
-+alcgr = "00000000b9880000h",
-+alsih = "0000cc0a00000000l",
-+alsihn = "0000cc0b00000000l",
-+axr = "0000000000003600g",
-+ad = "000000006a000000j",
-+adr = "0000000000002a00g",
-+ae = "000000007a000000j",
-+aer = "0000000000003a00g",
-+aw = "000000006e000000j",
-+awr = "0000000000002e00g",
-+au = "000000007e000000j",
-+aur = "0000000000003e00g",
-+n = "0000000054000000j",
-+nr = "0000000000001400g",
-+ny = "0000e30000000054k",
-+ng = "0000e30000000080k",
-+ngr = "00000000b9800000h",
-+nihf = "0000c00a00000000l",
-+nilf = "0000c00b00000000l",
-+bal = "0000000045000000j",
-+balr = "000000000000500g",
-+bas = "000000004d000000j",
-+basr = "0000000000000d00g",
-+bassm = "0000000000000c00g",
-+bsa = "00000000b25a0000h",
-+bsm = "0000000000000b00g",
-+bakr = "00000000b2400000h",
-+bsg = "00000000b2580000h",
-+bc = "0000000047000000j",
-+bcr = "000000000000700g",
-+bct = "0000000046000000j",
-+bctr = "000000000000600g",
-+bctg = "0000e30000000046k",
-+bctgr = "00000000b9460000h",
-+bxh = "0000000086000000m",
-+bxhg = "0000eb0000000044n",
-+bxle = "0000000087000000m",
-+bxleg = "0000eb0000000045n",
-+brasl = "0000c00500000000l",
-+brcl = "0000c00400000000l",
-+brcth = "0000cc0600000000l",
-+cksm = "00000000b2410000h",
-+km = "00000000b92e0000h",
-+kmf = "00000000b92a0000h",
-+kmc = "00000000b92f0000h",
-+kmo = "00000000b92b0000h",
-+c = "0000000059000000j",
-+cr = "0000000000001900g",
-+cy = "0000e30000000059k",
-+cg = "0000e30000000020k",
-+cgr = "00000000b9200000h",
-+cgf = "0000e30000000030k",
-+cgfr = "00000000b9300000h",
-+cxbr = "00000000b3490000h",
-+cxtr = "00000000b3ec0000h",
-+cxr = "00000000b3690000h",
-+cdbr = "00000000b3190000h",
-+cdtr = "00000000b3e40000h",
-+cd = "0000000069000000j",
-+cdr = "0000000000002900g",
-+cebr = "00000000b3090000h",
-+ce = "0000000079000000j",
-+cer = "0000000000003900g",
-+kxbr = "00000000b3480000h",
-+kxtr = "00000000b3e80000h",
-+kdbr = "00000000b3180000h",
-+kdtr = "00000000b3e00000h",
-+kebr = "00000000b3080000h",
-+cs = "00000000ba000000m",
-+csy = "0000eb0000000014n",
-+csg = "0000eb0000000030n",
-+csp = "00000000b2500000h",
-+cspg = "00000000b98a0000h",
-+cextr = "00000000b3fc0000h",
-+cedtr = "00000000b3f40000h",
-+cds = "00000000bb000000m",
-+cdsy = "0000eb0000000031n",
-+cdsg = "0000eb000000003en",
-+ch = "0000000049000000j",
-+chy = "0000e30000000079k",
-+cgh = "0000e30000000034k",
-+chrl = "0000c60500000000l",
-+cghrl = "0000c60400000000l",
-+chf = "0000e300000000cdk",
-+chhr = "00000000b9cd0000h",
-+chlr = "00000000b9dd0000h",
-+cfi = "0000c20d00000000l",
-+cgfi = "0000c20c00000000l",
-+cih = "0000cc0d00000000l",
-+cl = "0000000055000000j",
-+clr = "0000000000001500g",
-+cly = "0000e30000000055k",
-+clg = "0000e30000000021k",
-+clgr = "00000000b9210000h",
-+clgf = "0000e30000000031k",
-+clgfr = "00000000b9310000h",
-+clmh = "0000eb0000000020n",
-+clm = "00000000bd000000m",
-+clmy = "0000eb0000000021n",
-+clhf = "0000e300000000cfk",
-+clhhr = "00000000b9cf0000h",
-+clhlr = "00000000b9df0000h",
-+clfi = "0000c20f00000000l",
-+clgfi = "0000c20e00000000l",
-+clih = "0000cc0f00000000l",
-+clcl = "0000000000000f00g",
-+clcle = "00000000a9000000m",
-+clclu = "0000eb000000008fn",
-+clrl = "0000c60f00000000l",
-+clhrl = "0000c60700000000l",
-+clgrl = "0000c60a00000000l",
-+clghrl = "0000c60600000000l",
-+clgfrl = "0000c60e00000000l",
-+clst = "00000000b25d0000h",
-+crl = "0000c60d00000000l",
-+cgrl = "0000c60800000000l",
-+cgfrl = "0000c60c00000000l",
-+ cuse = "00000000b2570000h",
-+cmpsc = "00000000b2630000h",
-+kimd = "00000000b93e0000h",
-+klmd = "00000000b93f0000h",
-+kmac = "00000000b91e0000h",
-+thdr = "00000000b3590000h",
-+thder = "00000000b3580000h",
-+cxfbr = "00000000b3960000h",
-+cxftr = "00000000b9590000h",
-+cxfr = "00000000b3b60000h",
-+cdfbr = "00000000b3950000h",
-+cdftr = "00000000b9510000h",
-+cdfr = "00000000b3b50000h",
-+cefbr = "00000000b3940000h",
-+cefr = "00000000b3b40000h",
-+cxgbr = "00000000b3a60000h",
-+cxgtr = "00000000b3f90000h",
-+cxgr = "00000000b3c60000h",
-+cdgbr = "00000000b3a50000h",
-+cdgtr = "00000000b3f10000h",
-+cdgr = "00000000b3c50000h",
-+cegbr = "00000000b3a40000h",
-+cegr = "00000000b3c40000h",
-+cxstr = "00000000b3fb0000h",
-+cdstr = "00000000b3f30000h",
-+cxutr = "00000000b3fa0000h",
-+cdutr = "00000000b3f20000h",
-+cvb = "000000004f000000j",
-+cvby = "0000e30000000006k",
-+cvbg = "0000e3000000000ek",
-+cvd = "000000004e000000j",
-+cvdy = "0000e30000000026k",
-+cvdg = "0000e3000000002ek",
-+cuxtr = "00000000b3ea0000h",
-+cudtr = "00000000b3e20000h",
-+cu42 = "00000000b9b30000h",
-+cu41 = "00000000b9b20000h",
-+cpya = "00000000b24d0000h",
-+d = "000000005d000000j",
-+dr = "0000000000001d00g",
-+dxbr = "00000000b34d0000h",
-+dxr = "00000000b22d0000h",
-+ddbr = "00000000b31d0000h",
-+dd = "000000006d000000j",
-+ddr = "0000000000002d00g",
-+debr = "00000000b30d0000h",
-+de = "000000007d000000j",
-+der = "0000000000003d00g",
-+dl = "0000e30000000097k",
-+dlr = "00000000b9970000h",
-+dlg = "0000e30000000087k",
-+dlgr = "00000000b9870000h",
-+dsg = "0000e3000000000dk",
-+dsgr = "00000000b90d0000h",
-+dsgf = "0000e3000000001dk",
-+dsgfr = "00000000b91d0000h",
-+x = "0000000057000000j",
-+xr = "0000000000001700g",
-+xy = "0000e30000000057k",
-+xg = "0000e30000000082k",
-+xgr = "00000000b9820000h",
-+xihf = "0000c00600000000l",
-+xilf = "0000c00700000000l",
-+ex = "0000000044000000j",
-+exrl = "0000c60000000000l",
-+ear = "00000000b24f0000h",
-+esea = "00000000b99d0000h",
-+eextr = "00000000b3ed0000h",
-+eedtr = "00000000b3e50000h",
-+ecag = "0000eb000000004cn",
-+efpc = "00000000b38c0000h",
-+epar = "00000000b2260000h",
-+epair = "00000000b99a0000h",
-+epsw = "00000000b98d0000h",
-+esar = "00000000b2270000h",
-+esair = "00000000b99b0000h",
-+esxtr = "00000000b3ef0000h",
-+esdtr = "00000000b3e70000h",
-+ereg = "00000000b2490000h",
-+eregg = "00000000b90e0000h",
-+esta = "00000000b24a0000h",
-+flogr = "00000000b9830000h",
-+hdr = "0000000000002400g",
-+her = "0000000000003400g",
-+iac = "00000000b2240000h",
-+ic = "0000000043000000j",
-+icy = "0000e30000000073k",
-+icmh = "0000eb0000000080n",
-+icm = "00000000bf000000m",
-+icmy = "0000eb0000000081n",
-+iihf = "0000c00800000000l",
-+iilf = "0000c00900000000l",
-+ipm = "00000000b2220000h",
-+iske = "00000000b2290000h",
-+ivsk = "00000000b2230000h",
-+l = "0000000058000000j",
-+lr = "0000000000001800g",
-+ly = "0000e30000000058k",
-+lg = "0000e30000000004k",
-+lgr = "00000000b9040000h",
-+lgf = "0000e30000000014k",
-+lgfr = "00000000b9140000h",
-+lxr = "00000000b3650000h",
-+ld = "0000000068000000j",
-+ldr = "0000000000002800g",
-+ldy = "0000ed0000000065k",
-+le = "0000000078000000j",
-+ler = "0000000000003800g",
-+ ley = "0000ed0000000064k",
-+lam = "000000009a000000m",
-+lamy = "0000eb000000009an",
-+la = "0000000041000000j",
-+lay = "0000e30000000071k",
-+lae = "0000000051000000j",
-+laey = "0000e30000000075k",
-+larl = "0000c00000000000l",
-+laa = "0000eb00000000f8n",
-+laag = "0000eb00000000e8n",
-+laal = "0000eb00000000fan",
-+laalg = "0000eb00000000ean",
-+lan = "0000eb00000000f4n",
-+lang = "0000eb00000000e4n",
-+lax = "0000eb00000000f7n",
-+laxg = "0000eb00000000e7n",
-+lao = "0000eb00000000f6n",
-+laog = "0000eb00000000e6n",
-+lt = "0000e30000000012k",
-+ltr = "0000000000001200g",
-+ltg = "0000e30000000002k",
-+ltgr = "00000000b9020000h",
-+ltgf = "0000e30000000032k",
-+ltgfr = "00000000b9120000h",
-+ltxbr = "00000000b3420000h",
-+ltxtr = "00000000b3de0000h",
-+ltxr = "00000000b3620000h",
-+ltdbr = "00000000b3120000h",
-+ltdtr = "00000000b3d60000h",
-+ltdr = "0000000000002200g",
-+ltebr = "00000000b3020000h",
-+lter = "0000000000003200g",
-+lb = "0000e30000000076k",
-+lbr = "00000000b9260000h",
-+lgb = "0000e30000000077k",
-+lgbr = "00000000b9060000h",
-+ lbh = "0000e300000000c0k",
-+lcr = "0000000000001300g",
-+lcgr = "00000000b9030000h",
-+lcgfr = "00000000b9130000h",
-+lcxbr = "00000000b3430000h",
-+lcxr = "00000000b3630000h",
-+lcdbr = "00000000b3130000h",
-+lcdr = "0000000000002300g",
-+lcdfr = "00000000b3730000h",
-+lcebr = "00000000b3030000h",
-+lcer = "0000000000003300g",
-+lctl = "00000000b7000000m",
-+lctlg = "0000eb000000002fn",
-+fixr = "00000000b3670000h",
-+fidr = "00000000b37f0000h",
-+fier = "00000000b3770000h",
-+ldgr = "00000000b3c10000h",
-+lgdr = "00000000b3cd0000h",
-+lh = "0000000048000000j",
-+lhr = "00000000b9270000h",
-+lhy = "0000e30000000078k",
-+lgh = "0000e30000000015k",
-+lghr = "00000000b9070000h",
-+lhh = "0000e300000000c4k",
-+lhrl = "0000c40500000000l",
-+lghrl = "0000c40400000000l",
-+lfh = "0000e300000000cak",
-+lgfi = "0000c00100000000l",
-+lxdbr = "00000000b3050000h",
-+lxdr = "00000000b3250000h",
-+lxebr = "00000000b3060000h",
-+lxer = "00000000b3260000h",
-+ldebr = "00000000b3040000h",
-+lder = "00000000b3240000h",
-+llgf = "0000e30000000016k",
-+llgfr = "00000000b9160000h",
-+llc = "0000e30000000094k",
-+llcr = "00000000b9940000h",
-+llgc = "0000e30000000090k",
-+llgcr = "00000000b9840000h",
-+llch = "0000e300000000c2k",
-+llh = "0000e30000000095k",
-+llhr = "00000000b9950000h",
-+llgh = "0000e30000000091k",
-+llghr = "00000000b9850000h",
-+llhh = "0000e300000000c6k",
-+llhrl = "0000c40200000000l",
-+llghrl = "0000c40600000000l",
-+llihf = "0000c00e00000000l",
-+llilf = "0000c00f00000000l",
-+llgfrl = "0000c40e00000000l",
-+llgt = "0000e30000000017k",
-+llgtr = "00000000b9170000h",
-+lm = "0000000098000000m",
-+lmy = "0000eb0000000098n",
-+lmg = "0000eb0000000004n",
-+lmh = "0000eb0000000096n",
-+lnr = "0000000000001100g",
-+lngr = "00000000b9010000h",
-+lngfr = "00000000b9110000h",
-+lnxbr = "00000000b3410000h",
-+lnxr = "00000000b3610000h",
-+lndbr = "00000000b3110000h",
-+lndr = "0000000000002100g",
-+lndfr = "00000000b3710000h",
-+lnebr = "00000000b3010000h",
-+lner = "0000000000003100g",
-+loc = "0000eb00000000f2n",
-+locg = "0000eb00000000e2n",
-+lpq = "0000e3000000008fk",
-+lpr = "0000000000001000g",
-+lpgr = "00000000b9000000h",
-+lpgfr = "00000000b9100000h",
-+lpxbr = "00000000b3400000h",
-+lpxr = "00000000b3600000h",
-+lpdbr = "00000000b3100000h",
-+lpdr = "0000000000002000g",
-+lpdfr = "00000000b3700000h",
-+lpebr = "00000000b3000000h",
-+lper = "0000000000003000g",
-+lra = "00000000b1000000j",
-+lray = "0000e30000000013k",
-+lrag = "0000e30000000003k",
-+lrl = "0000c40d00000000l",
-+lgrl = "0000c40800000000l",
-+lgfrl = "0000c40c00000000l",
-+lrvh = "0000e3000000001fk",
-+lrv = "0000e3000000001ek",
-+lrvr = "00000000b91f0000h",
-+lrvg = "0000e3000000000fk",
-+lrvgr = "00000000b90f0000h",
-+ldxbr = "00000000b3450000h",
-+ldxr = "0000000000002500g",
-+lrdr = "0000000000002500g",
-+lexbr = "00000000b3460000h",
-+lexr = "00000000b3660000h",
-+ledbr = "00000000b3440000h",
-+ledr = "0000000000003500g",
-+lrer = "0000000000003500g",
-+lura = "00000000b24b0000h",
-+lurag = "00000000b9050000h",
-+lzxr = "00000000b3760000h",
-+lzdr = "00000000b3750000h",
-+lzer = "00000000b3740000h",
-+msta = "00000000b2470000h",
-+mvcl = "0000000000000e00g",
-+mvcle = "00000000a8000000m",
-+mvclu = "0000eb000000008en",
-+mvpg = "00000000b2540000h",
-+mvst = "00000000b2550000h",
-+m = "000000005c000000j",
-+mfy = "0000e3000000005ck",
-+mr = "0000000000001c00g",
-+mxbr = "00000000b34c0000h",
-+mxr = "0000000000002600g",
-+mdbr = "00000000b31c0000h",
-+md = "000000006c000000j",
-+mdr = "0000000000002c00g",
-+mxdbr = "00000000b3070000h",
-+mxd = "0000000067000000j",
-+mxdr = "0000000000002700g",
-+meebr = "00000000b3170000h",
-+meer = "00000000b3370000h",
-+mdebr = "00000000b30c0000h",
-+mde = "000000007c000000j",
-+mder = "0000000000003c00g",
-+me = "000000007c000000j",
-+mer = "0000000000003c00g",
-+mh = "000000004c000000j",
-+mhy = "0000e3000000007ck",
-+mlg = "0000e30000000086k",
-+mlgr = "00000000b9860000h",
-+ml = "0000e30000000096k",
-+mlr = "00000000b9960000h",
-+ms = "0000000071000000j",
-+msr = "00000000b2520000h",
-+msy = "0000e30000000051k",
-+msg = "0000e3000000000ck",
-+msgr = "00000000b90c0000h",
-+msgf = "0000e3000000001ck",
-+msgfr = "00000000b91c0000h",
-+msfi = "0000c20100000000l",
-+msgfi = "0000c20000000000l",
-+o = "0000000056000000j",
-+or = "0000000000001600g",
-+oy = "0000e30000000056k",
-+og = "0000e30000000081k",
-+ogr = "00000000b9810000h",
-+oihf = "0000c00c00000000l",
-+oilf = "0000c00d00000000l",
-+pgin = "00000000b22e0000h",
-+pgout = "00000000b22f0000h",
-+pcc = "00000000b92c0000h",
-+pckmo = "00000000b9280000h",
-+pfmf = "00000000b9af0000h",
-+ptf = "00000000b9a20000h",
-+popcnt = "00000000b9e10000h",
-+pfd = "0000e30000000036k",
-+pfdrl = "0000c60200000000l",
-+pt = "00000000b2280000h",
-+pti = "00000000b99e0000h",
-+palb = "00000000b2480000h",
-+rrbe = "00000000b22a0000h",
-+rrbm = "00000000b9ae0000h",
-+rll = "0000eb000000001dn",
-+rllg = "0000eb000000001cn",
-+srst = "00000000b25e0000h",
-+srstu = "00000000b9be0000h",
-+sar = "00000000b24e0000h",
-+sfpc = "00000000b3840000h",
-+sfasr = "00000000b3850000h",
-+spm = "000000000000400g",
-+ssar = "00000000b2250000h",
-+ssair = "00000000b99f0000h",
-+slda = "000000008f000000m",
-+sldl = "000000008d000000m",
-+sla = "000000008b000000m",
-+slak = "0000eb00000000ddn",
-+slag = "0000eb000000000bn",
-+sll = "0000000089000000m",
-+sllk = "0000eb00000000dfn",
-+sllg = "0000eb000000000dn",
-+srda = "000000008e000000m",
-+srdl = "000000008c000000m",
-+sra = "000000008a000000m",
-+srak = "0000eb00000000dcn",
-+srag = "0000eb000000000an",
-+srl = "0000000088000000m",
-+srlk = "0000eb00000000den",
-+srlg = "0000eb000000000cn",
-+sqxbr = "00000000b3160000h",
-+sqxr = "00000000b3360000h",
-+sqdbr = "00000000b3150000h",
-+sqdr = "00000000b2440000h",
-+sqebr = "00000000b3140000h",
-+sqer = "00000000b2450000h",
-+st = "0000000050000000j",
-+sty = "0000e30000000050k",
-+stg = "0000e30000000024k",
-+std = "0000000060000000j",
-+stdy = "0000ed0000000067k",
-+ste = "0000000070000000j",
-+stey = "0000ed0000000066k",
-+stam = "000000009b000000m",
-+stamy = "0000eb000000009bn",
-+stc = "0000000042000000j",
-+stcy = "0000e30000000072k",
-+stch = "0000e300000000c3k",
-+stcmh = "0000eb000000002cn",
-+stcm = "00000000be000000m",
-+stcmy = "0000eb000000002dn",
-+stctl = "00000000b6000000m",
-+stctg = "0000eb0000000025n",
-+sth = "0000000040000000j",
-+sthy = "0000e30000000070k",
-+sthh = "0000e300000000c7k",
-+sthrl = "0000c40700000000l",
-+stfh = "0000e300000000cbk",
-+stm = "0000000090000000m",
-+stmy = "0000eb0000000090n",
-+stmg = "0000eb0000000024n",
-+stmh = "0000eb0000000026n",
-+stoc = "0000eb00000000f3n",
-+stocg = "0000eb00000000e3n",
-+stpq = "0000e3000000008ek",
-+strl = "0000c40f00000000l",
-+stgrl = "0000c40b00000000l",
-+strvh = "0000e3000000003fk",
-+strv = "0000e3000000003ek",
-+strvg = "0000e3000000002fk",
-+stura = "00000000b2460000h",
-+sturg = "00000000b9250000h",
-+s = "000000005b000000j",
-+sr = "0000000000001b00g",
-+sy = "0000e3000000005bk",
-+sg = "0000e30000000009k",
-+sgr = "00000000b9090000h",
-+sgf = "0000e30000000019k",
-+sgfr = "00000000b9190000h",
-+sxbr = "00000000b34b0000h",
-+sdbr = "00000000b31b0000h",
-+sebr = "00000000b30b0000h",
-+sh = "000000004b000000j",
-+shy = "0000e3000000007bk",
-+sl = "000000005f000000j",
-+slr = "0000000000001f00g",
-+sly = "0000e3000000005fk",
-+slg = "0000e3000000000bk",
-+slgr = "00000000b90b0000h",
-+slgf = "0000e3000000001bk",
-+slgfr = "00000000b91b0000h",
-+slfi = "0000c20500000000l",
-+slgfi = "0000c20400000000l",
-+slb = "0000e30000000099k",
-+slbr = "00000000b9990000h",
-+slbg = "0000e30000000089k",
-+slbgr = "00000000b9890000h",
-+sxr = "0000000000003700g",
-+sd = "000000006b000000j",
-+sdr = "0000000000002b00g",
-+se = "000000007b000000j",
-+ser = "0000000000003b00g",
-+su = "000000007f000000j",
-+sur = "0000000000003f00g",
-+sw = "000000006f000000j",
-+swr = "0000000000002f00g",
-+tar = "00000000b24c0000h",
-+tb = "00000000b22c0000h",
-+trace = "0000000099000000m",
-+tracg = "0000eb000000000fn",
-+tre = "00000000b2a50000h",
- }
- for cond,c in pairs(map_cond) do
- map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
-@@ -964,87 +1218,30 @@ end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
- local function parse_template(params, template, nparams, pos)
-- local op = tonumber(sub(template, 1, 12), 16) -- 13-16 ignored since those are
trailing zeros added after the instruction
-+ local op = tonumber(sub(template, 1, 16), 16) --
- -- 00000000005a0000 converts to 90
- local n,rs = 1,26
-
- parse_reg_type = false
- -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on)
- for p in gmatch(sub(template, 17), ".") do
-- local q = params[n]
-- if p == "R" then
-- op = op + parse_reg(q); n = n + 1
-- elseif p == "N" then
-- op = op + shl(parse_reg(q), 5); n = n + 1
-- elseif p == "M" then
-- op = op + shl(parse_reg(q), 16); n = n + 1
-- elseif p == "A" then
-- op = op + shl(parse_reg(q), 10); n = n + 1
-+ local pr1,pr2,pr3
-+ if p == "g" then
-+ pr1,pr2=param[n],param[n+1]
-+ op = op + parse_reg(pr1)+parse_reg(pr2); n = n + 1 -- not sure if we will require
n later, so keeping it as it is now
-+ elseif p == "h" then
-+
-+ elseif p == "j" then
-+
-+ elseif p == "k" then
-+
-+ elseif p == "l" then
-+
- elseif p == "m" then
-- op = op + shl(parse_reg(params[n-1]), 16)
-- elseif p == "p" then
-- if q == "sp" then params[n] = "@x31" end
-- elseif p == "g" then
-- if parse_reg_type == "x" then
-- op = op + 0x80000000
-- elseif parse_reg_type ~= "w" then
-- werror("bad register type")
-- end
-- parse_reg_type = false
-- elseif p == "f" then
-- if parse_reg_type == "d" then
-- op = op + 0x00400000
-- elseif parse_reg_type ~= "s" then
-- werror("bad register type")
-- end
-- parse_reg_type = false
-- elseif p == "x" or p == "w" or p == "d" or p ==
"s" then
-- if parse_reg_type ~= p then
-- werror("register size mismatch")
-+
-+ elseif p == "n" then
-+
- end
-- parse_reg_type = false
-- elseif p == "L" then
-- op = parse_load(params, nparams, n, op)
-- elseif p == "P" then
-- op = parse_load_pair(params, nparams, n, op)
-- elseif p == "B" then
-- local mode, v, s = parse_label(q, false); n = n + 1
-- local m = branch_type(op)
-- waction("REL_"..mode, v+m, s, 1)
-- elseif p == "I" then
-- op = op + parse_imm12(q); n = n + 1
-- elseif p == "i" then
-- op = op + parse_imm13(q); n = n + 1
-- elseif p == "W" then
-- op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
-- elseif p == "T" then
-- op = op + parse_imm6(q); n = n + 1
-- elseif p == "1" then
-- op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
-- elseif p == "2" then
-- op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
-- elseif p == "5" then
-- op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
-- elseif p == "V" then
-- op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
-- elseif p == "F" then
-- op = op + parse_fpimm(q); n = n + 1
-- elseif p == "Z" then
-- if q ~= "#0" and q ~= "#0.0" then werror("expected zero
immediate") end
-- n = n + 1
-- elseif p == "S" then
-- op = op + parse_shift(q); n = n + 1
-- elseif p == "X" then
-- op = op + parse_extend(q); n = n + 1
-- elseif p == "R" then
-- op = op + parse_lslx16(q); n = n + 1
-- elseif p == "C" then
-- op = op + parse_cond(q, 0); n = n + 1
-- elseif p == "c" then
-- op = op + parse_cond(q, 1); n = n + 1
-- else
-- assert(false)
-- end
- end
- wputpos(pos, op)
- end
---
-2.20.1
-
-
-From 1814b99302f46d6fc0f72a1398a9dde7f11262a5 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 25 Nov 2016 16:38:32 -0500
-Subject: [PATCH 034/247] Add extended mnemonics for branches.
-
----
- dynasm/dasm_s390x.lua | 22 ++++++++++++++++------
- 1 file changed, 16 insertions(+), 6 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index e39a27f..76fe281 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -239,9 +239,10 @@ local map_extend = {
- }
-
- local map_cond = {
-- eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
-- hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
-- hs = 2, lo = 3,
-+ o = 1, h = 2, hle = 3, l = 4,
-+ nhe = 5, lh = 6, ne = 7, e = 8,
-+ nlh = 9, he = 10, nl = 11, le = 12,
-+ nh = 13, no = 14, [""] = 15,
- }
-
- ------------------------------------------------------------------------------
-@@ -650,7 +651,7 @@ local alias_lslimm = op_alias("ubfm_4", function(p)
- end
- end)
-
---- Template strings for ARM instructions.
-+-- Template strings for s390x instructions.
- map_op = {
- a = "000000005a000000j",
- ar = "0000000000001a00g",
-@@ -1084,7 +1085,7 @@ msgfr = "00000000b91c0000h",
- msfi = "0000c20100000000l",
- msgfi = "0000c20000000000l",
- o = "0000000056000000j",
--or = "0000000000001600g",
-+["or"] = "0000000000001600g",
- oy = "0000e30000000056k",
- og = "0000e30000000081k",
- ogr = "00000000b9810000h",
-@@ -1213,7 +1214,16 @@ tracg = "0000eb000000000fn",
- tre = "00000000b2a50000h",
- }
- for cond,c in pairs(map_cond) do
-- map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
-+ -- Extended mnemonics for branches.
-+ -- TODO: replace 'B' with correct encoding.
-+ -- brc
-+ map_op["j"..cond.."_1"] =
"00000000"..tohex(0xa7040000+shl(c, 20)).."B"
-+ -- brcl
-+ map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c,
4)).."00000000".."B"
-+ -- bc
-+ map_op["b"..cond.."_1"] =
"00000000"..tohex(0x47000000+shl(c, 20)).."B"
-+ -- bcr
-+ map_op["b"..cond.."r_1"] =
"00000000"..tohex(0x0700+shl(c, 4)).."B"
- end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
---
-2.20.1
-
-
-From 4cc73d93652ff71249e20a4b2614277d701e6b2a Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Mon, 28 Nov 2016 13:32:30 +0530
-Subject: [PATCH 035/247] Removed the extra check in parse_reg
-
-The extra check for register is currently ignored, and trying to see what value does the
encode function return. Its still to be worked out, how this value is used later, after
decoding.
----
- dynasm/dasm_s390x.lua | 10 +---------
- 1 file changed, 1 insertion(+), 9 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 76fe281..340ad24 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -251,15 +251,7 @@ local parse_reg_type
-
-
- local function parse_gpr(expr)
-- local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
-- local tp = map_type[tname or expr]
-- if tp then
-- local reg = ovreg or tp.reg
-- if not reg then
-- werror("type `"..(tname or expr).."' needs a register
override")
-- end
-- expr = reg
-- end
-+ -- assuming we get r0-r31 for now
- local r = match(expr, "^r([1-3]?[0-9])$")
- if r then
- r = tonumber(r)
---
-2.20.1
-
-
-From e926b6eeceb68386086f054d9948a3a13f37d1ab Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Mon, 28 Nov 2016 15:29:58 +0530
-Subject: [PATCH 036/247] Updated size of the instruction word
-
-We can discuss if we need to keep it 6 bytes or 8 bytes long, Not clear enough to me as
well
----
- dynasm/dasm_s390x.lua | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 340ad24..2965034 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -88,7 +88,7 @@ end
-
- -- Add word to action list.
- local function wputxw(n)
-- assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of
range")
-+ assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of
range") -- s390x inst can be 6 bytes
- actlist[#actlist+1] = n
- end
-
-@@ -109,7 +109,7 @@ local function wflush(term)
- secpos = 1 -- The actionlist offset occupies a buffer position, too.
- end
-
---- Put escaped word.
-+-- Put escaped word. --Need to check this as well, not sure how it will work on
s390x
- local function wputw(n)
- if n <= 0x000fffff then waction("ESC") end
- wputxw(n)
-@@ -122,9 +122,9 @@ local function wpos()
- return pos
- end
-
---- Store word to reserved position.
-+-- Store word to reserved position. -- added 2 bytes more since s390x has 6 bytes inst
as well
- local function wputpos(pos, n)
-- assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of
range")
-+ assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of
range")
- if n <= 0x000fffff then
- insert(actlist, pos+1, n)
- n = map_action.ESC * 0x10000
-@@ -278,7 +278,7 @@ local function parse_reg_base(expr)
- local base, tp = parse_reg(expr)
- if parse_reg_type ~= "x" then werror("bad register type") end
- parse_reg_type = false
-- return shl(base, 5), tp
-+ return shl(base, 5), tp -- why is it shifted not able to make out
- end
-
- local parse_ctx = {}
---
-2.20.1
-
-
-From 7fbc58dba6fd2bdcacfac77cd1c7ca43974646b5 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Tue, 29 Nov 2016 19:00:28 +0530
-Subject: [PATCH 037/247] Added the required character for encoding
-
-I have added the number depending on the number of operands, pls check for the ones which
access memory.
-Also For base register and displacement, should I assume that it will be passed in the
same order as it is expected, since I dont have any means to see the output, I am confused
a bit for those add modes.
-Since we decided to test RR first, thats in progress, but would like to add others as
well.
----
- dynasm/dasm_s390x.lua | 1130 +++++++++++++++++++++--------------------
- 1 file changed, 567 insertions(+), 563 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 2965034..f1d492c 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -645,565 +645,565 @@ end)
-
- -- Template strings for s390x instructions.
- map_op = {
-- a = "000000005a000000j",
--ar = "0000000000001a00g",
--ay = "0000e3000000005ak",
--ag = "0000e30000000008k",
--agr = "00000000b9080000h",
--agf = "0000e30000000018k",
--agfr = "00000000b9180000h",
--axbr = "00000000b34a0000h",
--adbr = "00000000b31a0000h",
--aebr = "00000000b30a0000h",
--ah = "000000004a000000j",
--ahy = "0000e3000000007ak",
--afi = "0000c20900000000l",
--agfi = "0000c20800000000l",
--aih = "0000cc0800000000l",
--al = "000000005e000000j",
--alr = "0000000000001e00g",
--aly = "0000e3000000005ek",
--alg = "0000e3000000000ak",
--algr = "00000000b90a0000h",
--algf = "0000e3000000001ak",
--algfr = "00000000b91a0000h",
--alfi = "0000c20b00000000l",
--algfi = "0000c20a00000000l",
--alc = "0000e30000000098k",
--alcr = "00000000b9980000h",
--alcg = "0000e30000000088k",
--alcgr = "00000000b9880000h",
--alsih = "0000cc0a00000000l",
--alsihn = "0000cc0b00000000l",
--axr = "0000000000003600g",
--ad = "000000006a000000j",
--adr = "0000000000002a00g",
--ae = "000000007a000000j",
--aer = "0000000000003a00g",
--aw = "000000006e000000j",
--awr = "0000000000002e00g",
--au = "000000007e000000j",
--aur = "0000000000003e00g",
--n = "0000000054000000j",
--nr = "0000000000001400g",
--ny = "0000e30000000054k",
--ng = "0000e30000000080k",
--ngr = "00000000b9800000h",
--nihf = "0000c00a00000000l",
--nilf = "0000c00b00000000l",
--bal = "0000000045000000j",
--balr = "000000000000500g",
--bas = "000000004d000000j",
--basr = "0000000000000d00g",
--bassm = "0000000000000c00g",
--bsa = "00000000b25a0000h",
--bsm = "0000000000000b00g",
--bakr = "00000000b2400000h",
--bsg = "00000000b2580000h",
--bc = "0000000047000000j",
--bcr = "000000000000700g",
--bct = "0000000046000000j",
--bctr = "000000000000600g",
--bctg = "0000e30000000046k",
--bctgr = "00000000b9460000h",
--bxh = "0000000086000000m",
--bxhg = "0000eb0000000044n",
--bxle = "0000000087000000m",
--bxleg = "0000eb0000000045n",
--brasl = "0000c00500000000l",
--brcl = "0000c00400000000l",
--brcth = "0000cc0600000000l",
--cksm = "00000000b2410000h",
--km = "00000000b92e0000h",
--kmf = "00000000b92a0000h",
--kmc = "00000000b92f0000h",
--kmo = "00000000b92b0000h",
--c = "0000000059000000j",
--cr = "0000000000001900g",
--cy = "0000e30000000059k",
--cg = "0000e30000000020k",
--cgr = "00000000b9200000h",
--cgf = "0000e30000000030k",
--cgfr = "00000000b9300000h",
--cxbr = "00000000b3490000h",
--cxtr = "00000000b3ec0000h",
--cxr = "00000000b3690000h",
--cdbr = "00000000b3190000h",
--cdtr = "00000000b3e40000h",
--cd = "0000000069000000j",
--cdr = "0000000000002900g",
--cebr = "00000000b3090000h",
--ce = "0000000079000000j",
--cer = "0000000000003900g",
--kxbr = "00000000b3480000h",
--kxtr = "00000000b3e80000h",
--kdbr = "00000000b3180000h",
--kdtr = "00000000b3e00000h",
--kebr = "00000000b3080000h",
--cs = "00000000ba000000m",
--csy = "0000eb0000000014n",
--csg = "0000eb0000000030n",
--csp = "00000000b2500000h",
--cspg = "00000000b98a0000h",
--cextr = "00000000b3fc0000h",
--cedtr = "00000000b3f40000h",
--cds = "00000000bb000000m",
--cdsy = "0000eb0000000031n",
--cdsg = "0000eb000000003en",
--ch = "0000000049000000j",
--chy = "0000e30000000079k",
--cgh = "0000e30000000034k",
--chrl = "0000c60500000000l",
--cghrl = "0000c60400000000l",
--chf = "0000e300000000cdk",
--chhr = "00000000b9cd0000h",
--chlr = "00000000b9dd0000h",
--cfi = "0000c20d00000000l",
--cgfi = "0000c20c00000000l",
--cih = "0000cc0d00000000l",
--cl = "0000000055000000j",
--clr = "0000000000001500g",
--cly = "0000e30000000055k",
--clg = "0000e30000000021k",
--clgr = "00000000b9210000h",
--clgf = "0000e30000000031k",
--clgfr = "00000000b9310000h",
--clmh = "0000eb0000000020n",
--clm = "00000000bd000000m",
--clmy = "0000eb0000000021n",
--clhf = "0000e300000000cfk",
--clhhr = "00000000b9cf0000h",
--clhlr = "00000000b9df0000h",
--clfi = "0000c20f00000000l",
--clgfi = "0000c20e00000000l",
--clih = "0000cc0f00000000l",
--clcl = "0000000000000f00g",
--clcle = "00000000a9000000m",
--clclu = "0000eb000000008fn",
--clrl = "0000c60f00000000l",
--clhrl = "0000c60700000000l",
--clgrl = "0000c60a00000000l",
--clghrl = "0000c60600000000l",
--clgfrl = "0000c60e00000000l",
--clst = "00000000b25d0000h",
--crl = "0000c60d00000000l",
--cgrl = "0000c60800000000l",
--cgfrl = "0000c60c00000000l",
-- cuse = "00000000b2570000h",
--cmpsc = "00000000b2630000h",
--kimd = "00000000b93e0000h",
--klmd = "00000000b93f0000h",
--kmac = "00000000b91e0000h",
--thdr = "00000000b3590000h",
--thder = "00000000b3580000h",
--cxfbr = "00000000b3960000h",
--cxftr = "00000000b9590000h",
--cxfr = "00000000b3b60000h",
--cdfbr = "00000000b3950000h",
--cdftr = "00000000b9510000h",
--cdfr = "00000000b3b50000h",
--cefbr = "00000000b3940000h",
--cefr = "00000000b3b40000h",
--cxgbr = "00000000b3a60000h",
--cxgtr = "00000000b3f90000h",
--cxgr = "00000000b3c60000h",
--cdgbr = "00000000b3a50000h",
--cdgtr = "00000000b3f10000h",
--cdgr = "00000000b3c50000h",
--cegbr = "00000000b3a40000h",
--cegr = "00000000b3c40000h",
--cxstr = "00000000b3fb0000h",
--cdstr = "00000000b3f30000h",
--cxutr = "00000000b3fa0000h",
--cdutr = "00000000b3f20000h",
--cvb = "000000004f000000j",
--cvby = "0000e30000000006k",
--cvbg = "0000e3000000000ek",
--cvd = "000000004e000000j",
--cvdy = "0000e30000000026k",
--cvdg = "0000e3000000002ek",
--cuxtr = "00000000b3ea0000h",
--cudtr = "00000000b3e20000h",
--cu42 = "00000000b9b30000h",
--cu41 = "00000000b9b20000h",
--cpya = "00000000b24d0000h",
--d = "000000005d000000j",
--dr = "0000000000001d00g",
--dxbr = "00000000b34d0000h",
--dxr = "00000000b22d0000h",
--ddbr = "00000000b31d0000h",
--dd = "000000006d000000j",
--ddr = "0000000000002d00g",
--debr = "00000000b30d0000h",
--de = "000000007d000000j",
--der = "0000000000003d00g",
--dl = "0000e30000000097k",
--dlr = "00000000b9970000h",
--dlg = "0000e30000000087k",
--dlgr = "00000000b9870000h",
--dsg = "0000e3000000000dk",
--dsgr = "00000000b90d0000h",
--dsgf = "0000e3000000001dk",
--dsgfr = "00000000b91d0000h",
--x = "0000000057000000j",
--xr = "0000000000001700g",
--xy = "0000e30000000057k",
--xg = "0000e30000000082k",
--xgr = "00000000b9820000h",
--xihf = "0000c00600000000l",
--xilf = "0000c00700000000l",
--ex = "0000000044000000j",
--exrl = "0000c60000000000l",
--ear = "00000000b24f0000h",
--esea = "00000000b99d0000h",
--eextr = "00000000b3ed0000h",
--eedtr = "00000000b3e50000h",
--ecag = "0000eb000000004cn",
--efpc = "00000000b38c0000h",
--epar = "00000000b2260000h",
--epair = "00000000b99a0000h",
--epsw = "00000000b98d0000h",
--esar = "00000000b2270000h",
--esair = "00000000b99b0000h",
--esxtr = "00000000b3ef0000h",
--esdtr = "00000000b3e70000h",
--ereg = "00000000b2490000h",
--eregg = "00000000b90e0000h",
--esta = "00000000b24a0000h",
--flogr = "00000000b9830000h",
--hdr = "0000000000002400g",
--her = "0000000000003400g",
--iac = "00000000b2240000h",
--ic = "0000000043000000j",
--icy = "0000e30000000073k",
--icmh = "0000eb0000000080n",
--icm = "00000000bf000000m",
--icmy = "0000eb0000000081n",
--iihf = "0000c00800000000l",
--iilf = "0000c00900000000l",
--ipm = "00000000b2220000h",
--iske = "00000000b2290000h",
--ivsk = "00000000b2230000h",
--l = "0000000058000000j",
--lr = "0000000000001800g",
--ly = "0000e30000000058k",
--lg = "0000e30000000004k",
--lgr = "00000000b9040000h",
--lgf = "0000e30000000014k",
--lgfr = "00000000b9140000h",
--lxr = "00000000b3650000h",
--ld = "0000000068000000j",
--ldr = "0000000000002800g",
--ldy = "0000ed0000000065k",
--le = "0000000078000000j",
--ler = "0000000000003800g",
-- ley = "0000ed0000000064k",
--lam = "000000009a000000m",
--lamy = "0000eb000000009an",
--la = "0000000041000000j",
--lay = "0000e30000000071k",
--lae = "0000000051000000j",
--laey = "0000e30000000075k",
--larl = "0000c00000000000l",
--laa = "0000eb00000000f8n",
--laag = "0000eb00000000e8n",
--laal = "0000eb00000000fan",
--laalg = "0000eb00000000ean",
--lan = "0000eb00000000f4n",
--lang = "0000eb00000000e4n",
--lax = "0000eb00000000f7n",
--laxg = "0000eb00000000e7n",
--lao = "0000eb00000000f6n",
--laog = "0000eb00000000e6n",
--lt = "0000e30000000012k",
--ltr = "0000000000001200g",
--ltg = "0000e30000000002k",
--ltgr = "00000000b9020000h",
--ltgf = "0000e30000000032k",
--ltgfr = "00000000b9120000h",
--ltxbr = "00000000b3420000h",
--ltxtr = "00000000b3de0000h",
--ltxr = "00000000b3620000h",
--ltdbr = "00000000b3120000h",
--ltdtr = "00000000b3d60000h",
--ltdr = "0000000000002200g",
--ltebr = "00000000b3020000h",
--lter = "0000000000003200g",
--lb = "0000e30000000076k",
--lbr = "00000000b9260000h",
--lgb = "0000e30000000077k",
--lgbr = "00000000b9060000h",
-- lbh = "0000e300000000c0k",
--lcr = "0000000000001300g",
--lcgr = "00000000b9030000h",
--lcgfr = "00000000b9130000h",
--lcxbr = "00000000b3430000h",
--lcxr = "00000000b3630000h",
--lcdbr = "00000000b3130000h",
--lcdr = "0000000000002300g",
--lcdfr = "00000000b3730000h",
--lcebr = "00000000b3030000h",
--lcer = "0000000000003300g",
--lctl = "00000000b7000000m",
--lctlg = "0000eb000000002fn",
--fixr = "00000000b3670000h",
--fidr = "00000000b37f0000h",
--fier = "00000000b3770000h",
--ldgr = "00000000b3c10000h",
--lgdr = "00000000b3cd0000h",
--lh = "0000000048000000j",
--lhr = "00000000b9270000h",
--lhy = "0000e30000000078k",
--lgh = "0000e30000000015k",
--lghr = "00000000b9070000h",
--lhh = "0000e300000000c4k",
--lhrl = "0000c40500000000l",
--lghrl = "0000c40400000000l",
--lfh = "0000e300000000cak",
--lgfi = "0000c00100000000l",
--lxdbr = "00000000b3050000h",
--lxdr = "00000000b3250000h",
--lxebr = "00000000b3060000h",
--lxer = "00000000b3260000h",
--ldebr = "00000000b3040000h",
--lder = "00000000b3240000h",
--llgf = "0000e30000000016k",
--llgfr = "00000000b9160000h",
--llc = "0000e30000000094k",
--llcr = "00000000b9940000h",
--llgc = "0000e30000000090k",
--llgcr = "00000000b9840000h",
--llch = "0000e300000000c2k",
--llh = "0000e30000000095k",
--llhr = "00000000b9950000h",
--llgh = "0000e30000000091k",
--llghr = "00000000b9850000h",
--llhh = "0000e300000000c6k",
--llhrl = "0000c40200000000l",
--llghrl = "0000c40600000000l",
--llihf = "0000c00e00000000l",
--llilf = "0000c00f00000000l",
--llgfrl = "0000c40e00000000l",
--llgt = "0000e30000000017k",
--llgtr = "00000000b9170000h",
--lm = "0000000098000000m",
--lmy = "0000eb0000000098n",
--lmg = "0000eb0000000004n",
--lmh = "0000eb0000000096n",
--lnr = "0000000000001100g",
--lngr = "00000000b9010000h",
--lngfr = "00000000b9110000h",
--lnxbr = "00000000b3410000h",
--lnxr = "00000000b3610000h",
--lndbr = "00000000b3110000h",
--lndr = "0000000000002100g",
--lndfr = "00000000b3710000h",
--lnebr = "00000000b3010000h",
--lner = "0000000000003100g",
--loc = "0000eb00000000f2n",
--locg = "0000eb00000000e2n",
--lpq = "0000e3000000008fk",
--lpr = "0000000000001000g",
--lpgr = "00000000b9000000h",
--lpgfr = "00000000b9100000h",
--lpxbr = "00000000b3400000h",
--lpxr = "00000000b3600000h",
--lpdbr = "00000000b3100000h",
--lpdr = "0000000000002000g",
--lpdfr = "00000000b3700000h",
--lpebr = "00000000b3000000h",
--lper = "0000000000003000g",
--lra = "00000000b1000000j",
--lray = "0000e30000000013k",
--lrag = "0000e30000000003k",
--lrl = "0000c40d00000000l",
--lgrl = "0000c40800000000l",
--lgfrl = "0000c40c00000000l",
--lrvh = "0000e3000000001fk",
--lrv = "0000e3000000001ek",
--lrvr = "00000000b91f0000h",
--lrvg = "0000e3000000000fk",
--lrvgr = "00000000b90f0000h",
--ldxbr = "00000000b3450000h",
--ldxr = "0000000000002500g",
--lrdr = "0000000000002500g",
--lexbr = "00000000b3460000h",
--lexr = "00000000b3660000h",
--ledbr = "00000000b3440000h",
--ledr = "0000000000003500g",
--lrer = "0000000000003500g",
--lura = "00000000b24b0000h",
--lurag = "00000000b9050000h",
--lzxr = "00000000b3760000h",
--lzdr = "00000000b3750000h",
--lzer = "00000000b3740000h",
--msta = "00000000b2470000h",
--mvcl = "0000000000000e00g",
--mvcle = "00000000a8000000m",
--mvclu = "0000eb000000008en",
--mvpg = "00000000b2540000h",
--mvst = "00000000b2550000h",
--m = "000000005c000000j",
--mfy = "0000e3000000005ck",
--mr = "0000000000001c00g",
--mxbr = "00000000b34c0000h",
--mxr = "0000000000002600g",
--mdbr = "00000000b31c0000h",
--md = "000000006c000000j",
--mdr = "0000000000002c00g",
--mxdbr = "00000000b3070000h",
--mxd = "0000000067000000j",
--mxdr = "0000000000002700g",
--meebr = "00000000b3170000h",
--meer = "00000000b3370000h",
--mdebr = "00000000b30c0000h",
--mde = "000000007c000000j",
--mder = "0000000000003c00g",
--me = "000000007c000000j",
--mer = "0000000000003c00g",
--mh = "000000004c000000j",
--mhy = "0000e3000000007ck",
--mlg = "0000e30000000086k",
--mlgr = "00000000b9860000h",
--ml = "0000e30000000096k",
--mlr = "00000000b9960000h",
--ms = "0000000071000000j",
--msr = "00000000b2520000h",
--msy = "0000e30000000051k",
--msg = "0000e3000000000ck",
--msgr = "00000000b90c0000h",
--msgf = "0000e3000000001ck",
--msgfr = "00000000b91c0000h",
--msfi = "0000c20100000000l",
--msgfi = "0000c20000000000l",
--o = "0000000056000000j",
--["or"] = "0000000000001600g",
--oy = "0000e30000000056k",
--og = "0000e30000000081k",
--ogr = "00000000b9810000h",
--oihf = "0000c00c00000000l",
--oilf = "0000c00d00000000l",
--pgin = "00000000b22e0000h",
--pgout = "00000000b22f0000h",
--pcc = "00000000b92c0000h",
--pckmo = "00000000b9280000h",
--pfmf = "00000000b9af0000h",
--ptf = "00000000b9a20000h",
--popcnt = "00000000b9e10000h",
--pfd = "0000e30000000036k",
--pfdrl = "0000c60200000000l",
--pt = "00000000b2280000h",
--pti = "00000000b99e0000h",
--palb = "00000000b2480000h",
--rrbe = "00000000b22a0000h",
--rrbm = "00000000b9ae0000h",
--rll = "0000eb000000001dn",
--rllg = "0000eb000000001cn",
--srst = "00000000b25e0000h",
--srstu = "00000000b9be0000h",
--sar = "00000000b24e0000h",
--sfpc = "00000000b3840000h",
--sfasr = "00000000b3850000h",
--spm = "000000000000400g",
--ssar = "00000000b2250000h",
--ssair = "00000000b99f0000h",
--slda = "000000008f000000m",
--sldl = "000000008d000000m",
--sla = "000000008b000000m",
--slak = "0000eb00000000ddn",
--slag = "0000eb000000000bn",
--sll = "0000000089000000m",
--sllk = "0000eb00000000dfn",
--sllg = "0000eb000000000dn",
--srda = "000000008e000000m",
--srdl = "000000008c000000m",
--sra = "000000008a000000m",
--srak = "0000eb00000000dcn",
--srag = "0000eb000000000an",
--srl = "0000000088000000m",
--srlk = "0000eb00000000den",
--srlg = "0000eb000000000cn",
--sqxbr = "00000000b3160000h",
--sqxr = "00000000b3360000h",
--sqdbr = "00000000b3150000h",
--sqdr = "00000000b2440000h",
--sqebr = "00000000b3140000h",
--sqer = "00000000b2450000h",
--st = "0000000050000000j",
--sty = "0000e30000000050k",
--stg = "0000e30000000024k",
--std = "0000000060000000j",
--stdy = "0000ed0000000067k",
--ste = "0000000070000000j",
--stey = "0000ed0000000066k",
--stam = "000000009b000000m",
--stamy = "0000eb000000009bn",
--stc = "0000000042000000j",
--stcy = "0000e30000000072k",
--stch = "0000e300000000c3k",
--stcmh = "0000eb000000002cn",
--stcm = "00000000be000000m",
--stcmy = "0000eb000000002dn",
--stctl = "00000000b6000000m",
--stctg = "0000eb0000000025n",
--sth = "0000000040000000j",
--sthy = "0000e30000000070k",
--sthh = "0000e300000000c7k",
--sthrl = "0000c40700000000l",
--stfh = "0000e300000000cbk",
--stm = "0000000090000000m",
--stmy = "0000eb0000000090n",
--stmg = "0000eb0000000024n",
--stmh = "0000eb0000000026n",
--stoc = "0000eb00000000f3n",
--stocg = "0000eb00000000e3n",
--stpq = "0000e3000000008ek",
--strl = "0000c40f00000000l",
--stgrl = "0000c40b00000000l",
--strvh = "0000e3000000003fk",
--strv = "0000e3000000003ek",
--strvg = "0000e3000000002fk",
--stura = "00000000b2460000h",
--sturg = "00000000b9250000h",
--s = "000000005b000000j",
--sr = "0000000000001b00g",
--sy = "0000e3000000005bk",
--sg = "0000e30000000009k",
--sgr = "00000000b9090000h",
--sgf = "0000e30000000019k",
--sgfr = "00000000b9190000h",
--sxbr = "00000000b34b0000h",
--sdbr = "00000000b31b0000h",
--sebr = "00000000b30b0000h",
--sh = "000000004b000000j",
--shy = "0000e3000000007bk",
--sl = "000000005f000000j",
--slr = "0000000000001f00g",
--sly = "0000e3000000005fk",
--slg = "0000e3000000000bk",
--slgr = "00000000b90b0000h",
--slgf = "0000e3000000001bk",
--slgfr = "00000000b91b0000h",
--slfi = "0000c20500000000l",
--slgfi = "0000c20400000000l",
--slb = "0000e30000000099k",
--slbr = "00000000b9990000h",
--slbg = "0000e30000000089k",
--slbgr = "00000000b9890000h",
--sxr = "0000000000003700g",
--sd = "000000006b000000j",
--sdr = "0000000000002b00g",
--se = "000000007b000000j",
--ser = "0000000000003b00g",
--su = "000000007f000000j",
--sur = "0000000000003f00g",
--sw = "000000006f000000j",
--swr = "0000000000002f00g",
--tar = "00000000b24c0000h",
--tb = "00000000b22c0000h",
--trace = "0000000099000000m",
--tracg = "0000eb000000000fn",
--tre = "00000000b2a50000h",
-+a_4 = "000000005a000000j",
-+ar_2 = "0000000000001a00g",
-+ay_5 = "0000e3000000005al",
-+ag_5 = "0000e30000000008l",
-+agr_2 = "00000000b9080000h",
-+agf_5 = "0000e30000000018l",
-+agfr_2 = "00000000b9180000h",
-+axbr_2 = "00000000b34a0000h",
-+adbr_2 = "00000000b31a0000h",
-+aebr_2 = "00000000b30a0000h",
-+ah_4 = "000000004a000000j",
-+ahy_5 = "0000e3000000007al",
-+afi_3 = "0000c20900000000n",
-+agfi_3 = "0000c20800000000n",
-+aih_3 = "0000cc0800000000n",
-+al_4 = "000000005e000000j",
-+alr_2 = "0000000000001e00g",
-+aly_5 = "0000e3000000005el",
-+alg_5 = "0000e3000000000al",
-+algr_2 = "00000000b90a0000h",
-+algf_5 = "0000e3000000001al",
-+algfr_2 = "00000000b91a0000h",
-+alfi_3 = "0000c20b00000000n",
-+algfi_3 = "0000c20a00000000n",
-+alc_5 = "0000e30000000098l",
-+alcr_2 = "00000000b9980000h",
-+alcg_5 = "0000e30000000088l",
-+alcgr_2 = "00000000b9880000h",
-+alsih_3 = "0000cc0a00000000n",
-+alsihn_3 = "0000cc0b00000000n",
-+axr_2 = "0000000000003600g",
-+ad_4 = "000000006a000000j",
-+adr_2 = "0000000000002a00g",
-+ae_4 = "000000007a000000j",
-+aer_2 = "0000000000003a00g",
-+aw_4 = "000000006e000000j",
-+awr_2 = "0000000000002e00g",
-+au_4 = "000000007e000000j",
-+aur_2 = "0000000000003e00g",
-+n_4 = "0000000054000000j",
-+nr_2 = "0000000000001400g",
-+ny_5 = "0000e30000000054l",
-+ng_5 = "0000e30000000080l",
-+ngr_2 = "00000000b9800000h",
-+nihf_3 = "0000c00a00000000n",
-+nilf_3 = "0000c00b00000000n",
-+bal_4 = "0000000045000000j",
-+balr_2 = "000000000000500g",
-+bas_4 = "000000004d000000j",
-+basr_2 = "0000000000000d00g",
-+bassm_2 = "0000000000000c00g",
-+bsa_2 = "00000000b25a0000h",
-+bsm_2 = "0000000000000b00g",
-+bakr_2 = "00000000b2400000h",
-+bsg_2 = "00000000b2580000h",
-+bc_4 = "0000000047000000k",
-+bcr_2 = "000000000000700g",
-+bct_4 = "0000000046000000j",
-+bctr_2 = "000000000000600g",
-+bctg_5 = "0000e30000000046l",
-+bctgr_2 = "00000000b9460000h",
-+bxh_4 = "0000000086000000q",
-+bxhg_5 = "0000eb0000000044s",
-+bxle_4 = "0000000087000000q",
-+bxleg_5 = "0000eb0000000045s",
-+brasl_3 = "0000c00500000000o",
-+brcl_3 = "0000c00400000000p",
-+brcth_3 = "0000cc0600000000o",
-+cksm_2 = "00000000b2410000h",
-+km_2 = "00000000b92e0000h",
-+kmf_2 = "00000000b92a0000h",
-+kmc_2 = "00000000b92f0000h",
-+kmo_2 = "00000000b92b0000h",
-+c_4 = "0000000059000000j",
-+cr_2 = "0000000000001900g",
-+cy_5 = "0000e30000000059l",
-+cg_5 = "0000e30000000020l",
-+cgr_2 = "00000000b9200000h",
-+cgf_5 = "0000e30000000030l",
-+cgfr_2 = "00000000b9300000h",
-+cxbr_2 = "00000000b3490000h",
-+cxtr_2 = "00000000b3ec0000h",
-+cxr_2 = "00000000b3690000h",
-+cdbr_2 = "00000000b3190000h",
-+cdtr_2 = "00000000b3e40000h",
-+cd_4 = "0000000069000000j",
-+cdr_2 = "0000000000002900g",
-+cebr_2 = "00000000b3090000h",
-+ce_4 = "0000000079000000j",
-+cer_2 = "0000000000003900g",
-+kxbr_2 = "00000000b3480000h",
-+kxtr_2 = "00000000b3e80000h",
-+kdbr_2 = "00000000b3180000h",
-+kdtr_2 = "00000000b3e00000h",
-+kebr_2 = "00000000b3080000h",
-+cs_4 = "00000000ba000000q",
-+csy_5 = "0000eb0000000014s",
-+csg_5 = "0000eb0000000030s",
-+csp_2 = "00000000b2500000h",
-+cspg_2 = "00000000b98a0000h",
-+cextr_2 = "00000000b3fc0000h",
-+cedtr_2 = "00000000b3f40000h",
-+cds_4 = "00000000bb000000q",
-+cdsy_5 = "0000eb0000000031s",
-+cdsg_5 = "0000eb000000003es",
-+ch_4 = "0000000049000000j",
-+chy_5 = "0000e30000000079l",
-+cgh_5 = "0000e30000000034l",
-+chrl_3 = "0000c60500000000o",
-+cghrl_3 = "0000c60400000000o",
-+chf_5 = "0000e300000000cdl",
-+chhr_2 = "00000000b9cd0000h",
-+chlr_2 = "00000000b9dd0000h",
-+cfi_3 = "0000c20d00000000n",
-+cgfi_3 = "0000c20c00000000n",
-+cih_3 = "0000cc0d00000000n",
-+cl_4 = "0000000055000000j",
-+clr_2 = "0000000000001500g",
-+cly_5 = "0000e30000000055l",
-+clg_5 = "0000e30000000021l",
-+clgr_2 = "00000000b9210000h",
-+clgf_5 = "0000e30000000031l",
-+clgfr_2 = "00000000b9310000h",
-+clmh_5 = "0000eb0000000020t",
-+clm_4 = "00000000bd000000r",
-+clmy_5 = "0000eb0000000021t",
-+clhf_5 = "0000e300000000cfl",
-+clhhr_2 = "00000000b9cf0000h",
-+clhlr_2 = "00000000b9df0000h",
-+clfi_3 = "0000c20f00000000n",
-+clgfi_3 = "0000c20e00000000n",
-+clih_3 = "0000cc0f00000000n",
-+clcl_2 = "0000000000000f00g",
-+clcle_4 = "00000000a9000000q",
-+clclu_5 = "0000eb000000008fs",
-+clrl_3 = "0000c60f00000000o",
-+clhrl_3 = "0000c60700000000o",
-+clgrl_3 = "0000c60a00000000o",
-+clghrl_3 = "0000c60600000000o",
-+clgfrl_3 = "0000c60e00000000o",
-+clst_2 = "00000000b25d0000h",
-+crl_3 = "0000c60d00000000o",
-+cgrl_3 = "0000c60800000000o",
-+cgfrl_3 = "0000c60c00000000o",
-+cuse_2 = "00000000b2570000h",
-+cmpsc_2 = "00000000b2630000h",
-+kimd_2 = "00000000b93e0000h",
-+klmd_2 = "00000000b93f0000h",
-+kmac_2 = "00000000b91e0000h",
-+thdr_2 = "00000000b3590000h",
-+thder_2 = "00000000b3580000h",
-+cxfbr_2 = "00000000b3960000h",
-+cxftr_2 = "00000000b9590000h",
-+cxfr_2 = "00000000b3b60000h",
-+cdfbr_2 = "00000000b3950000h",
-+cdftr_2 = "00000000b9510000h",
-+cdfr_2 = "00000000b3b50000h",
-+cefbr_2 = "00000000b3940000h",
-+cefr_2 = "00000000b3b40000h",
-+cxgbr_2 = "00000000b3a60000h",
-+cxgtr_2 = "00000000b3f90000h",
-+cxgr_2 = "00000000b3c60000h",
-+cdgbr_2 = "00000000b3a50000h",
-+cdgtr_2 = "00000000b3f10000h",
-+cdgr_2 = "00000000b3c50000h",
-+cegbr_2 = "00000000b3a40000h",
-+cegr_2 = "00000000b3c40000h",
-+cxstr_2 = "00000000b3fb0000h",
-+cdstr_2 = "00000000b3f30000h",
-+cxutr_2 = "00000000b3fa0000h",
-+cdutr_2 = "00000000b3f20000h",
-+cvb_4 = "000000004f000000j",
-+cvby_5 = "0000e30000000006l",
-+cvbg_5 = "0000e3000000000el",
-+cvd_4 = "000000004e000000j",
-+cvdy_5 = "0000e30000000026l",
-+cvdg_5 = "0000e3000000002el",
-+cuxtr_2 = "00000000b3ea0000h",
-+cudtr_2 = "00000000b3e20000h",
-+cu42_2 = "00000000b9b30000h",
-+cu41_2 = "00000000b9b20000h",
-+cpya_2 = "00000000b24d0000h",
-+d_4 = "000000005d000000j",
-+dr_2 = "0000000000001d00g",
-+dxbr_2 = "00000000b34d0000h",
-+dxr_2 = "00000000b22d0000h",
-+ddbr_2 = "00000000b31d0000h",
-+dd_4 = "000000006d000000j",
-+ddr_2 = "0000000000002d00g",
-+debr_2 = "00000000b30d0000h",
-+de_4 = "000000007d000000j",
-+der_2 = "0000000000003d00g",
-+dl_5 = "0000e30000000097l",
-+dlr_2 = "00000000b9970000h",
-+dlg_5 = "0000e30000000087l",
-+dlgr_2 = "00000000b9870000h",
-+dsg_5 = "0000e3000000000dl",
-+dsgr_2 = "00000000b90d0000h",
-+dsgf_5 = "0000e3000000001dl",
-+dsgfr_2 = "00000000b91d0000h",
-+x_4 = "0000000057000000j",
-+xr_2 = "0000000000001700g",
-+xy_5 = "0000e30000000057l",
-+xg_5 = "0000e30000000082l",
-+xgr_2 = "00000000b9820000h",
-+xihf_3 = "0000c00600000000n",
-+xilf_3 = "0000c00700000000n",
-+ex_4 = "0000000044000000j",
-+exrl_3 = "0000c60000000000o",
-+ear_2 = "00000000b24f0000h",
-+esea_2 = "00000000b99d0000h",
-+eextr_2 = "00000000b3ed0000h",
-+eedtr_2 = "00000000b3e50000h",
-+ecag_5 = "0000eb000000004cs",
-+efpc_2 = "00000000b38c0000h",
-+epar_2 = "00000000b2260000h",
-+epair_2 = "00000000b99a0000h",
-+epsw_2 = "00000000b98d0000h",
-+esar_2 = "00000000b2270000h",
-+esair_2 = "00000000b99b0000h",
-+esxtr_2 = "00000000b3ef0000h",
-+esdtr_2 = "00000000b3e70000h",
-+ereg_2 = "00000000b2490000h",
-+eregg_2 = "00000000b90e0000h",
-+esta_2 = "00000000b24a0000h",
-+flogr_2 = "00000000b9830000h",
-+hdr_2 = "0000000000002400g",
-+her_2 = "0000000000003400g",
-+iac_2 = "00000000b2240000h",
-+ic_4 = "0000000043000000j",
-+icy_5 = "0000e30000000073l",
-+icmh_5 = "0000eb0000000080t",
-+icm_4 = "00000000bf000000r",
-+icmy_5 = "0000eb0000000081t",
-+iihf_3 = "0000c00800000000n",
-+iilf_3 = "0000c00900000000n",
-+ipm_2 = "00000000b2220000h",
-+iske_2 = "00000000b2290000h",
-+ivsk_2 = "00000000b2230000h",
-+l_4 = "0000000058000000j",
-+lr_2 = "0000000000001800g",
-+ly_5 = "0000e30000000058l",
-+lg_5 = "0000e30000000004l",
-+lgr_2 = "00000000b9040000h",
-+lgf_5 = "0000e30000000014l",
-+lgfr_2 = "00000000b9140000h",
-+lxr_2 = "00000000b3650000h",
-+ld_4 = "0000000068000000j",
-+ldr_2 = "0000000000002800g",
-+ldy_5 = "0000ed0000000065l",
-+le_4 = "0000000078000000j",
-+ler_2 = "0000000000003800g",
-+ley_5 = "0000ed0000000064l",
-+lam_4 = "000000009a000000q",
-+lamy_5 = "0000eb000000009as",
-+la_4 = "0000000041000000j",
-+lay_5 = "0000e30000000071l",
-+lae_4 = "0000000051000000j",
-+laey_5 = "0000e30000000075l",
-+larl_3 = "0000c00000000000o",
-+laa_5 = "0000eb00000000f8s",
-+laag_5 = "0000eb00000000e8s",
-+laal_5 = "0000eb00000000fas",
-+laalg_5 = "0000eb00000000eas",
-+lan_5 = "0000eb00000000f4s",
-+lang_5 = "0000eb00000000e4s",
-+lax_5 = "0000eb00000000f7s",
-+laxg_5 = "0000eb00000000e7s",
-+lao_5 = "0000eb00000000f6s",
-+laog_5 = "0000eb00000000e6s",
-+lt_5 = "0000e30000000012l",
-+ltr_2 = "0000000000001200g",
-+ltg_5 = "0000e30000000002l",
-+ltgr_2 = "00000000b9020000h",
-+ltgf_5 = "0000e30000000032l",
-+ltgfr_2 = "00000000b9120000h",
-+ltxbr_2 = "00000000b3420000h",
-+ltxtr_2 = "00000000b3de0000h",
-+ltxr_2 = "00000000b3620000h",
-+ltdbr_2 = "00000000b3120000h",
-+ltdtr_2 = "00000000b3d60000h",
-+ltdr_2 = "0000000000002200g",
-+ltebr_2 = "00000000b3020000h",
-+lter_2 = "0000000000003200g",
-+lb_5 = "0000e30000000076l",
-+lbr_2 = "00000000b9260000h",
-+lgb_5 = "0000e30000000077l",
-+lgbr_2 = "00000000b9060000h",
-+lbh_5 = "0000e300000000c0l",
-+lcr_2 = "0000000000001300g",
-+lcgr_2 = "00000000b9030000h",
-+lcgfr_2 = "00000000b9130000h",
-+lcxbr_2 = "00000000b3430000h",
-+lcxr_2 = "00000000b3630000h",
-+lcdbr_2 = "00000000b3130000h",
-+lcdr_2 = "0000000000002300g",
-+lcdfr_2 = "00000000b3730000h",
-+lcebr_2 = "00000000b3030000h",
-+lcer_2 = "0000000000003300g",
-+lctl_4 = "00000000b7000000q",
-+lctlg_5 = "0000eb000000002fs",
-+fixr_2 = "00000000b3670000h",
-+fidr_2 = "00000000b37f0000h",
-+fier_2 = "00000000b3770000h",
-+ldgr_2 = "00000000b3c10000h",
-+lgdr_2 = "00000000b3cd0000h",
-+lh_4 = "0000000048000000j",
-+lhr_2 = "00000000b9270000h",
-+lhy_5 = "0000e30000000078l",
-+lgh_5 = "0000e30000000015l",
-+lghr_2 = "00000000b9070000h",
-+lhh_5 = "0000e300000000c4l",
-+lhrl_3 = "0000c40500000000o",
-+lghrl_3 = "0000c40400000000o",
-+lfh_5 = "0000e300000000cal",
-+lgfi_3 = "0000c00100000000n",
-+lxdbr_2 = "00000000b3050000h",
-+lxdr_2 = "00000000b3250000h",
-+lxebr_2 = "00000000b3060000h",
-+lxer_2 = "00000000b3260000h",
-+ldebr_2 = "00000000b3040000h",
-+lder_2 = "00000000b3240000h",
-+llgf_5 = "0000e30000000016l",
-+llgfr_2 = "00000000b9160000h",
-+llc_5 = "0000e30000000094l",
-+llcr_2 = "00000000b9940000h",
-+llgc_5 = "0000e30000000090l",
-+llgcr_2 = "00000000b9840000h",
-+llch_5 = "0000e300000000c2l",
-+llh_5 = "0000e30000000095l",
-+llhr_2 = "00000000b9950000h",
-+llgh_5 = "0000e30000000091l",
-+llghr_2 = "00000000b9850000h",
-+llhh_5 = "0000e300000000c6l",
-+llhrl_3 = "0000c40200000000o",
-+llghrl_3 = "0000c40600000000o",
-+llihf_3 = "0000c00e00000000n",
-+llilf_3 = "0000c00f00000000n",
-+llgfrl_3 = "0000c40e00000000o",
-+llgt_5 = "0000e30000000017l",
-+llgtr_2 = "00000000b9170000h",
-+lm_4 = "0000000098000000q",
-+lmy_5 = "0000eb0000000098s",
-+lmg_5 = "0000eb0000000004s",
-+lmh_5 = "0000eb0000000096s",
-+lnr_2 = "0000000000001100g",
-+lngr_2 = "00000000b9010000h",
-+lngfr_2 = "00000000b9110000h",
-+lnxbr_2 = "00000000b3410000h",
-+lnxr_2 = "00000000b3610000h",
-+lndbr_2 = "00000000b3110000h",
-+lndr_2 = "0000000000002100g",
-+lndfr_2 = "00000000b3710000h",
-+lnebr_2 = "00000000b3010000h",
-+lner_2 = "0000000000003100g",
-+loc_5 = "0000eb00000000f2t",
-+locg_5 = "0000eb00000000e2t",
-+lpq_5 = "0000e3000000008fl",
-+lpr_2 = "0000000000001000g",
-+lpgr_2 = "00000000b9000000h",
-+lpgfr_2 = "00000000b9100000h",
-+lpxbr_2 = "00000000b3400000h",
-+lpxr_2 = "00000000b3600000h",
-+lpdbr_2 = "00000000b3100000h",
-+lpdr_2 = "0000000000002000g",
-+lpdfr_2 = "00000000b3700000h",
-+lpebr_2 = "00000000b3000000h",
-+lper_2 = "0000000000003000g",
-+lra_4 = "00000000b1000000j",
-+lray_5 = "0000e30000000013l",
-+lrag_5 = "0000e30000000003l",
-+lrl_3 = "0000c40d00000000o",
-+lgrl_3 = "0000c40800000000o",
-+lgfrl_3 = "0000c40c00000000o",
-+lrvh_5 = "0000e3000000001fl",
-+lrv_5 = "0000e3000000001el",
-+lrvr_2 = "00000000b91f0000h",
-+lrvg_5 = "0000e3000000000fl",
-+lrvgr_2 = "00000000b90f0000h",
-+ldxbr_2 = "00000000b3450000h",
-+ldxr_2 = "0000000000002500g",
-+lrdr_2 = "0000000000002500g",
-+lexbr_2 = "00000000b3460000h",
-+lexr_2 = "00000000b3660000h",
-+ledbr_2 = "00000000b3440000h",
-+ledr_2 = "0000000000003500g",
-+lrer_2 = "0000000000003500g",
-+lura_2 = "00000000b24b0000h",
-+lurag_2 = "00000000b9050000h",
-+lzxr_2 = "00000000b3760000h",
-+lzdr_2 = "00000000b3750000h",
-+lzer_2 = "00000000b3740000h",
-+msta_2 = "00000000b2470000h",
-+mvcl_2 = "0000000000000e00g",
-+mvcle_4 = "00000000a8000000q",
-+mvclu_5 = "0000eb000000008es",
-+mvpg_2 = "00000000b2540000h",
-+mvst_2 = "00000000b2550000h",
-+m_4 = "000000005c000000j",
-+mfy_5 = "0000e3000000005cl",
-+mr_2 = "0000000000001c00g",
-+mxbr_2 = "00000000b34c0000h",
-+mxr_2 = "0000000000002600g",
-+mdbr_2 = "00000000b31c0000h",
-+md_4 = "000000006c000000j",
-+mdr_2 = "0000000000002c00g",
-+mxdbr_2 = "00000000b3070000h",
-+mxd_4 = "0000000067000000j",
-+mxdr_2 = "0000000000002700g",
-+meebr_2 = "00000000b3170000h",
-+meer_2 = "00000000b3370000h",
-+mdebr_2 = "00000000b30c0000h",
-+mde_4 = "000000007c000000j",
-+mder_2 = "0000000000003c00g",
-+me_4 = "000000007c000000j",
-+mer_2 = "0000000000003c00g",
-+mh_4 = "000000004c000000j",
-+mhy_5 = "0000e3000000007cl",
-+mlg_5 = "0000e30000000086l",
-+mlgr_2 = "00000000b9860000h",
-+ml_5 = "0000e30000000096l",
-+mlr_2 = "00000000b9960000h",
-+ms_4 = "0000000071000000j",
-+msr_2 = "00000000b2520000h",
-+msy_5 = "0000e30000000051l",
-+msg_5 = "0000e3000000000cl",
-+msgr_2 = "00000000b90c0000h",
-+msgf_5 = "0000e3000000001cl",
-+msgfr_2 = "00000000b91c0000h",
-+msfi_3 = "0000c20100000000n",
-+msgfi_3 = "0000c20000000000n",
-+o_4 = "0000000056000000j",
-+["or_2"] = "0000000000001600g",
-+oy_5 = "0000e30000000056l",
-+og_5 = "0000e30000000081l",
-+ogr_2 = "00000000b9810000h",
-+oihf_3 = "0000c00c00000000n",
-+oilf_3 = "0000c00d00000000n",
-+pgin_2 = "00000000b22e0000h",
-+pgout_2 = "00000000b22f0000h",
-+pcc_2 = "00000000b92c0000h",
-+pckmo_2 = "00000000b9280000h",
-+pfmf_2 = "00000000b9af0000h",
-+ptf_2 = "00000000b9a20000h",
-+popcnt_2 = "00000000b9e10000h",
-+pfd_5 = "0000e30000000036m",
-+pfdrl_3 = "0000c60200000000p",
-+pt_2 = "00000000b2280000h",
-+pti_2 = "00000000b99e0000h",
-+palb_2 = "00000000b2480000h",
-+rrbe_2 = "00000000b22a0000h",
-+rrbm_2 = "00000000b9ae0000h",
-+rll_5 = "0000eb000000001ds",
-+rllg_5 = "0000eb000000001cs",
-+srst_2 = "00000000b25e0000h",
-+srstu_2 = "00000000b9be0000h",
-+sar_2 = "00000000b24e0000h",
-+sfpc_2 = "00000000b3840000h",
-+sfasr_2 = "00000000b3850000h",
-+spm_2 = "000000000000400g",
-+ssar_2 = "00000000b2250000h",
-+ssair_2 = "00000000b99f0000h",
-+slda_4 = "000000008f000000q",
-+sldl_4 = "000000008d000000q",
-+sla_4 = "000000008b000000q",
-+slak_5 = "0000eb00000000dds",
-+slag_5 = "0000eb000000000bs",
-+sll_4 = "0000000089000000q",
-+sllk_5 = "0000eb00000000dfs",
-+sllg_5 = "0000eb000000000ds",
-+srda_4 = "000000008e000000q",
-+srdl_4 = "000000008c000000q",
-+sra_4 = "000000008a000000q",
-+srak_5 = "0000eb00000000dcs",
-+srag_5 = "0000eb000000000as",
-+srl_4 = "0000000088000000q",
-+srlk_5 = "0000eb00000000des",
-+srlg_5 = "0000eb000000000cs",
-+sqxbr_2 = "00000000b3160000h",
-+sqxr_2 = "00000000b3360000h",
-+sqdbr_2 = "00000000b3150000h",
-+sqdr_2 = "00000000b2440000h",
-+sqebr_2 = "00000000b3140000h",
-+sqer_2 = "00000000b2450000h",
-+st_4 = "0000000050000000j",
-+sty_5 = "0000e30000000050l",
-+stg_5 = "0000e30000000024l",
-+std_4 = "0000000060000000j",
-+stdy_5 = "0000ed0000000067l",
-+ste_4 = "0000000070000000j",
-+stey_5 = "0000ed0000000066l",
-+stam_4 = "000000009b000000q",
-+stamy_5 = "0000eb000000009bs",
-+stc_4 = "0000000042000000j",
-+stcy_5 = "0000e30000000072l",
-+stch_5 = "0000e300000000c3l",
-+stcmh_5 = "0000eb000000002ct",
-+stcm_4 = "00000000be000000r",
-+stcmy_5 = "0000eb000000002dt",
-+stctl_4 = "00000000b6000000q",
-+stctg_5 = "0000eb0000000025s",
-+sth_4 = "0000000040000000j",
-+sthy_5 = "0000e30000000070l",
-+sthh_5 = "0000e300000000c7l",
-+sthrl_3 = "0000c40700000000o",
-+stfh_5 = "0000e300000000cbl",
-+stm_4 = "0000000090000000q",
-+stmy_5 = "0000eb0000000090s",
-+stmg_5 = "0000eb0000000024s",
-+stmh_5 = "0000eb0000000026s",
-+stoc_5 = "0000eb00000000f3t",
-+stocg_5 = "0000eb00000000e3t",
-+stpq_5 = "0000e3000000008el",
-+strl_3 = "0000c40f00000000o",
-+stgrl_3 = "0000c40b00000000o",
-+strvh_5 = "0000e3000000003fl",
-+strv_5 = "0000e3000000003el",
-+strvg_5 = "0000e3000000002fl",
-+stura_2 = "00000000b2460000h",
-+sturg_2 = "00000000b9250000h",
-+s_4 = "000000005b000000j",
-+sr_2 = "0000000000001b00g",
-+sy_5 = "0000e3000000005bl",
-+sg_5 = "0000e30000000009l",
-+sgr_2 = "00000000b9090000h",
-+sgf_5 = "0000e30000000019l",
-+sgfr_2 = "00000000b9190000h",
-+sxbr_2 = "00000000b34b0000h",
-+sdbr_2 = "00000000b31b0000h",
-+sebr_2 = "00000000b30b0000h",
-+sh_4 = "000000004b000000j",
-+shy_5 = "0000e3000000007bl",
-+sl_4 = "000000005f000000j",
-+slr_2 = "0000000000001f00g",
-+sly_5 = "0000e3000000005fl",
-+slg_5 = "0000e3000000000bl",
-+slgr_2 = "00000000b90b0000h",
-+slgf_5 = "0000e3000000001bl",
-+slgfr_2 = "00000000b91b0000h",
-+slfi_3 = "0000c20500000000n",
-+slgfi_3 = "0000c20400000000n",
-+slb_5 = "0000e30000000099l",
-+slbr_2 = "00000000b9990000h",
-+slbg_5 = "0000e30000000089l",
-+slbgr_2 = "00000000b9890000h",
-+sxr_2 = "0000000000003700g",
-+sd_4 = "000000006b000000j",
-+sdr_2 = "0000000000002b00g",
-+se_4 = "000000007b000000j",
-+ser_2 = "0000000000003b00g",
-+su_4 = "000000007f000000j",
-+sur_2 = "0000000000003f00g",
-+sw_4 = "000000006f000000j",
-+swr_2 = "0000000000002f00g",
-+tar_2 = "00000000b24c0000h",
-+tb_2 = "00000000b22c0000h",
-+trace_4 = "0000000099000000q",
-+tracg_5 = "0000eb000000000fs",
-+tre_2 = "00000000b2a50000h",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1230,13 +1230,17 @@ local function parse_template(params, template, nparams, pos)
- local pr1,pr2,pr3
- if p == "g" then
- pr1,pr2=param[n],param[n+1]
-- op = op + parse_reg(pr1)+parse_reg(pr2); n = n + 1 -- not sure if we will require
n later, so keeping it as it is now
-+ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we
will require n later, so keeping it as it is now
- elseif p == "h" then
--
-+ pr1,pr2=param[n],param[n+1]
-+ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
- elseif p == "j" then
--
-+ op = op + shl(parse_reg(param[1],24) + shl(parse_reg(param[2],20) +
shl(parse_reg(param[3]),16) + parse_number(param[4])
-+ -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is
satisfied
-+
- elseif p == "k" then
--
-+ op = op + shl(parse_reg(param[1],40) + shl(parse_reg(param[2],36) +
shl(parse_reg(param[3]),32) + parse_number(param[4]) parse_number(param[5])
-+ -- assuming params are passed as (R1,X2,B2,DL2,DH2)
- elseif p == "l" then
-
- elseif p == "m" then
---
-2.20.1
-
-
-From b79c5e1ce135ad0d857c8ae2af0b453d2c44c675 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 29 Nov 2016 13:45:59 -0500
-Subject: [PATCH 038/247] Various cleanup of dasm_s390x.lua
-
- - Fix syntax errors
- - Fix whitespace (use two-space indentation to match surrounding code)
----
- dynasm/dasm_s390x.lua | 23 +++++++++++------------
- 1 file changed, 11 insertions(+), 12 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index f1d492c..2ae9e59 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1225,29 +1225,28 @@ local function parse_template(params, template, nparams, pos)
- local n,rs = 1,26
-
- parse_reg_type = false
-- -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on)
-+ -- Process each character.
- for p in gmatch(sub(template, 17), ".") do
- local pr1,pr2,pr3
- if p == "g" then
-- pr1,pr2=param[n],param[n+1]
-- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we
will require n later, so keeping it as it is now
-+ pr1,pr2=param[n],param[n+1]
-+ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we
will require n later, so keeping it as it is now
- elseif p == "h" then
-- pr1,pr2=param[n],param[n+1]
-- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
-+ pr1,pr2=param[n],param[n+1]
-+ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
- elseif p == "j" then
-- op = op + shl(parse_reg(param[1],24) + shl(parse_reg(param[2],20) +
shl(parse_reg(param[3]),16) + parse_number(param[4])
-- -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is
satisfied
--
-+ op = op + shl(parse_reg(param[1]),24) + shl(parse_reg(param[2]),20) +
shl(parse_reg(param[3]),16) + parse_number(param[4])
-+ -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is
satisfied
- elseif p == "k" then
-- op = op + shl(parse_reg(param[1],40) + shl(parse_reg(param[2],36) +
shl(parse_reg(param[3]),32) + parse_number(param[4]) parse_number(param[5])
-- -- assuming params are passed as (R1,X2,B2,DL2,DH2)
-+ op = op + shl(parse_reg(param[1]),40) + shl(parse_reg(param[2]),36) +
shl(parse_reg(param[3]),32) + parse_number(param[4]) + parse_number(param[5])
-+ -- assuming params are passed as (R1,X2,B2,DL2,DH2)
- elseif p == "l" then
-
- elseif p == "m" then
-
- elseif p == "n" then
--
-- end
-+
-+ end
- end
- wputpos(pos, op)
- end
---
-2.20.1
-
-
-From 9c029ae17f043c98eaa93873ccdf4a8ceb54b0ea Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 29 Nov 2016 13:59:37 -0500
-Subject: [PATCH 039/247] Add sp -> r15 mapping and don't special case or_2
-
-It's convenient for sp to be a pseudonym for r15 (the stack pointer).
-'or_2' doesn't need to be special cased ('or' did because it is a
-keyword).
----
- dynasm/dasm_s390x.lua | 11 ++++-------
- 1 file changed, 4 insertions(+), 7 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 2ae9e59..0ec7893 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -214,15 +214,12 @@ end
- ------------------------------------------------------------------------------
-
- -- Arch-specific maps.
---- TODO: add s390x related register names
- -- Ext. register name -> int. name.
----local map_archdef = { xzr = "@x31", wzr = "@w31", lr =
"x30", }
--local map_archdef = {}
-+local map_archdef = { sp = "r15" }
-
- -- Int. register name -> ext. name.
---- local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] =
"wzr", x30 = "lr", }
--local map_reg_rev = {}
--
-+local map_reg_rev = { r15 = "sp" }
-+
- local map_type = {} -- Type name -> { ctype, reg }
- local ctypenum = 0 -- Type number (for Dt... macros).
-
-@@ -1077,7 +1074,7 @@ msgfr_2 = "00000000b91c0000h",
- msfi_3 = "0000c20100000000n",
- msgfi_3 = "0000c20000000000n",
- o_4 = "0000000056000000j",
--["or_2"] = "0000000000001600g",
-+or_2 = "0000000000001600g",
- oy_5 = "0000e30000000056l",
- og_5 = "0000e30000000081l",
- ogr_2 = "00000000b9810000h",
---
-2.20.1
-
-
-From 0f757313a30d4f345ead4a9f96eb75dac4413d28 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 29 Nov 2016 15:24:11 -0500
-Subject: [PATCH 040/247] Add stubs for parsing memory operands and delete
- unwanted code.
-
-Each memory operand will be a single parameter so we also need
-to update the instruction encoding nargs field.
----
- dynasm/dasm_s390x.h | 2 +-
- dynasm/dasm_s390x.lua | 333 ++++--------------------------------------
- 2 files changed, 30 insertions(+), 305 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 577920a..b327e7a 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -21,7 +21,7 @@ enum {
- /* The following actions need a buffer position. */
- DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
- /* The following actions also have an argument. */
-- DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
-+ DASM_REL_PC, DASM_LABEL_PC, DASM_DISP12, DASM_DISP20, DASM_IMM16, DASM_IMM32,
- DASM__MAX
- };
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 0ec7893..556f7fe 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn
- local action_names = {
- "STOP", "SECTION", "ESC", "REL_EXT",
- "ALIGN", "REL_LG", "LABEL_LG",
-- "REL_PC", "LABEL_PC", "IMM", "IMM6",
"IMM12", "IMM13W", "IMM13X", "IMML",
-+ "REL_PC", "LABEL_PC", "DISP12", "DISP20",
"IMM16", "IMM32",
- }
-
- -- Maximum number of section buffer positions for dasm_put().
-@@ -227,13 +227,6 @@ local ctypenum = 0 -- Type number (for Dt... macros).
- function _M.revdef(s)
- return map_reg_rev[s] or s
- end
---- not sure of these
--local map_shift = { lsl = 0, lsr = 1, asr = 2, }
--
--local map_extend = {
-- uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
-- sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
--}
-
- local map_cond = {
- o = 1, h = 2, hle = 3, l = 4,
-@@ -246,13 +239,11 @@ local map_cond = {
-
- local parse_reg_type
-
--
- local function parse_gpr(expr)
-- -- assuming we get r0-r31 for now
- local r = match(expr, "^r([1-3]?[0-9])$")
- if r then
- r = tonumber(r)
-- if r <= 31 then return r, tp end
-+ if r <= 15 then return r, tp end
- end
- werror("bad register name `"..expr.."'")
- end
-@@ -261,23 +252,11 @@ local function parse_fpr(expr)
- local r = match(expr, "^f([1-3]?[0-9])$")
- if r then
- r = tonumber(r)
-- if r <= 31 then return r end
-+ if r <= 15 then return r end
- end
- werror("bad register name `"..expr.."'")
- end
-
--
--
--
--
--local function parse_reg_base(expr)
-- if expr == "sp" then return 0x3e0 end
-- local base, tp = parse_reg(expr)
-- if parse_reg_type ~= "x" then werror("bad register type") end
-- parse_reg_type = false
-- return shl(base, 5), tp -- why is it shifted not able to make out
--end
--
- local parse_ctx = {}
-
- local loadenv = setfenv and function(s)
-@@ -300,262 +279,35 @@ local function parse_number(n)
- return nil
- end
-
--local function parse_imm(imm, bits, shift, scale, signed)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- if n then
-- local m = sar(n, scale)
-- if shl(m, scale) == n then
-- if signed then
-- local s = sar(m, bits-1)
-- if s == 0 then return shl(m, shift)
-- elseif s == -1 then return shl(m + shl(1, bits), shift) end
-- else
-- if sar(m, bits) == 0 then return shl(m, shift) end
-- end
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
-- return 0
-- end
--end
--
--local function parse_imm12(imm)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- if n then
-- if shr(n, 12) == 0 then
-- return shl(n, 10)
-- elseif band(n, 0xff000fff) == 0 then
-- return shr(n, 2) + 0x00400000
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- waction("IMM12", 0, imm)
-- return 0
-- end
--end
--
--local function parse_imm13(imm)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- local r64 = parse_reg_type == "x"
-- if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
-- local inv = false
-- if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
-- local t = {}
-- for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
-- local b = table.concat(t)
-- b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
-- local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
-- if p0 then
-- local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
-- if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
-- local s = band(-2*w, 0x3f) - 1
-- if w == 64 then s = s + 0x1000 end
-- if inv then
-- return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
-- else
-- return shl(w-#p0, 16) + shl(s+#p1, 10)
-- end
-- end
-- end
-- werror("out of range immediate `"..imm.."'")
-- elseif r64 then
-- waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
-- actargs[#actargs+1] = format("(unsigned int)((unsigned long
long)(%s)>>32)", imm)
-- return 0
-- else
-- waction("IMM13W", 0, imm)
-- return 0
-- end
--end
--
--local function parse_imm6(imm)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- if n then
-- if n >= 0 and n <= 63 then
-- return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- waction("IMM6", 0, imm)
-- return 0
-- end
--end
--
--local function parse_imm_load(imm, scale)
-- local n = parse_number(imm)
-- if n then
-- local m = sar(n, scale)
-- if shl(m, scale) == n and m >= 0 and m < 0x1000 then
-- return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
-- elseif n >= -256 and n < 256 then
-- return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- waction("IMML", 0, imm)
-- return 0
-- end
--end
--
--local function parse_fpimm(imm)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- if n then
-- local m, e = math.frexp(n)
-- local s, e2 = 0, band(e-2, 7)
-- if m < 0 then m = -m; s = 0x00100000 end
-- m = m*32-16
-- if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
-- return s + shl(e2, 17) + shl(m, 13)
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- werror("NYI fpimm action")
-- end
--end
--
--local function parse_shift(expr)
-- local s, s2 = match(expr, "^(%S+)%s*(.*)$")
-- s = map_shift[s]
-- if not s then werror("expected shift operand") end
-- return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
--end
--
--local function parse_lslx16(expr)
-- local n = match(expr, "^lsl%s*#(%d+)$")
-- n = tonumber(n)
-- if not n then werror("expected shift operand") end
-- if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
-- werror("bad shift amount")
-- end
-- return shl(n, 17)
--end
--
--local function parse_extend(expr)
-- local s, s2 = match(expr, "^(%S+)%s*(.*)$")
-- if s == "lsl" then
-- s = parse_reg_type == "x" and 3 or 2
-- else
-- s = map_extend[s]
-- end
-- if not s then werror("expected extend operand") end
-- return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
-+-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
-+-- Encoded as: bddd
-+local function parse_mem_b(arg)
-+ werror("parse_mem_b: not implemented")
-+ return nil
- end
-
--local function parse_cond(expr, inv)
-- local c = map_cond[expr]
-- if not c then werror("expected condition operand") end
-- return shl(bit.bxor(c, inv), 12)
-+-- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
-+-- are GPRs.
-+-- Encoded as: xbddd
-+local function parse_mem_bx(arg)
-+ werror("parse_mem_bx: not implemented")
-+ return nil
- end
-
--local function parse_load(params, nparams, n, op)
-- if params[n+2] then werror("too many operands") end
-- local pn, p2 = params[n], params[n+1]
-- local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
-- if not p1 then
-- if not p2 then
-- local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
-- if reg and tailr ~= "" then
-- local base, tp = parse_reg_base(reg)
-- if tp then
-- waction("IMML", 0, format(tp.ctypefmt, tailr))
-- return op + base
-- end
-- end
-- end
-- werror("expected address operand")
-- end
-- local scale = shr(op, 30)
-- if p2 then
-- if wb == "!" then werror("bad use of '!'") end
-- op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
-- elseif wb == "!" then
-- local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
-- if not p1a then werror("bad use of '!'") end
-- op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
-- else
-- local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
-- op = op + parse_reg_base(p1a)
-- if p2a ~= "" then
-- local imm = match(p2a, "^,%s*#(.*)$")
-- if imm then
-- op = op + parse_imm_load(imm, scale)
-- else
-- local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
-- op = op + shl(parse_reg(p2b), 16) + 0x00200800
-- if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
-- werror("bad index register type")
-- end
-- if p3b == "" then
-- if parse_reg_type ~= "x" then werror("bad index register type")
end
-- op = op + 0x6000
-- else
-- if p3s == "" or p3s == "#0" then
-- elseif p3s == "#"..scale then
-- op = op + 0x1000
-- else
-- werror("bad scale")
-- end
-- if parse_reg_type == "x" then
-- if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
-- elseif p3b == "sxtx" then op = op + 0xe000
-- else
-- werror("bad extend/shift specifier")
-- end
-- else
-- if p3b == "uxtw" then op = op + 0x4000
-- elseif p3b == "sxtw" then op = op + 0xc000
-- else
-- werror("bad extend/shift specifier")
-- end
-- end
-- end
-- end
-- else
-- if wb == "!" then werror("bad use of '!'") end
-- op = op + 0x01000000
-- end
-- end
-- return op
-+-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
-+-- b is a GPR.
-+-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
-+local function parse_mem_by(arg)
-+ werror("parse_mem_by: not implemented")
-+ return nil
- end
-
--local function parse_load_pair(params, nparams, n, op)
-- if params[n+2] then werror("too many operands") end
-- local pn, p2 = params[n], params[n+1]
-- local scale = shr(op, 30) == 0 and 2 or 3
-- local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
-- if not p1 then
-- if not p2 then
-- local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
-- if reg and tailr ~= "" then
-- local base, tp = parse_reg_base(reg)
-- if tp then
-- waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
-- return op + base + 0x01000000
-- end
-- end
-- end
-- werror("expected address operand")
-- end
-- if p2 then
-- if wb == "!" then werror("bad use of '!'") end
-- op = op + 0x00800000
-- else
-- local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
-- if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
-- op = op + (wb == "!" and 0x01800000 or 0x01000000)
-- end
-- return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
-+-- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2
-+-- and b and x are GPRs.
-+-- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits).
-+local function parse_mem_bxy(arg)
-+ werror("parse_mem_bxy: not implemented")
-+ return nil
- end
-
- local function parse_label(label, def)
-@@ -613,33 +365,6 @@ local function op_alias(opname, f)
- end
- end
-
--local function alias_bfx(p)
-- p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
--end
--
--local function alias_bfiz(p)
-- parse_reg(p[1])
-- if parse_reg_type == "w" then
-- p[3] = "#-("..p[3]:sub(2)..")%32"
-- p[4] = "#("..p[4]:sub(2)..")-1"
-- else
-- p[3] = "#-("..p[3]:sub(2)..")%64"
-- p[4] = "#("..p[4]:sub(2)..")-1"
-- end
--end
--
--local alias_lslimm = op_alias("ubfm_4", function(p)
-- parse_reg(p[1])
-- local sh = p[3]:sub(2)
-- if parse_reg_type == "w" then
-- p[3] = "#-("..sh..")%32"
-- p[4] = "#31-("..sh..")"
-- else
-- p[3] = "#-("..sh..")%64"
-- p[4] = "#63-("..sh..")"
-- end
--end)
--
- -- Template strings for s390x instructions.
- map_op = {
- a_4 = "000000005a000000j",
-@@ -1226,11 +951,11 @@ local function parse_template(params, template, nparams, pos)
- for p in gmatch(sub(template, 17), ".") do
- local pr1,pr2,pr3
- if p == "g" then
-- pr1,pr2=param[n],param[n+1]
-+ pr1,pr2=params[n],params[n+1]
- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we
will require n later, so keeping it as it is now
- elseif p == "h" then
-- pr1,pr2=param[n],param[n+1]
-- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
-+ pr1,pr2=params[n],params[n+1]
-+ op = op + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
- elseif p == "j" then
- op = op + shl(parse_reg(param[1]),24) + shl(parse_reg(param[2]),20) +
shl(parse_reg(param[3]),16) + parse_number(param[4])
- -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is
satisfied
---
-2.20.1
-
-
-From cda6dd1ace06f0dd290eae08f5524743ba069512 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 29 Nov 2016 16:29:42 -0500
-Subject: [PATCH 041/247] Add a description of how immediate actions should be
- encoded.
-
-Also sets the action list type to unsigned short (uint16_t) which
-I think is the most appropriate type for s390x (x86 uses uint8_t
-and other platforms use uint32_t).
----
- dynasm/dasm_s390x.h | 4 ++--
- dynasm/dasm_s390x.lua | 13 +++++++++++++
- 2 files changed, 15 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index b327e7a..254db8b 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -9,7 +9,7 @@
- #include <string.h>
- #include <stdlib.h>
-
--#define DASM_ARCH "s390"
-+#define DASM_ARCH "s390x"
-
- #ifndef DASM_EXTERN
- #define DASM_EXTERN(a,b,c,d) 0
-@@ -49,7 +49,7 @@ enum {
- #define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
-
- /* Action list type. */
--typedef const unsigned int *dasm_ActList;
-+typedef const unsigned short *dasm_ActList;
-
- /* Per-section structure. */
- typedef struct dasm_Section {
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 556f7fe..c73e317 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -970,6 +970,19 @@ local function parse_template(params, template, nparams, pos)
-
- end
- end
-+
-+ -- TODO
-+ -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
-+ -- one of two locations relative to the end of the instruction.
-+ -- To make decoding easier we should insert the actions for these immediately
-+ -- after the halfword they modify.
-+ -- For example, take the instruction ahik, which is laid out as follows (each
-+ -- char is 4 bits):
-+ -- o = op code, r = register, i = immediate
-+ -- oorr iiii 00oo
-+ -- This should be emitted as oorr, followed by the immediate action, followed by
-+ -- 00oo.
-+
- wputpos(pos, op)
- end
- function op_template(params, template, nparams)
---
-2.20.1
-
-
-From 027fbcd1d9eb50771f0f6953a2f798e73d8e7d37 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 29 Nov 2016 18:06:59 -0500
-Subject: [PATCH 042/247] Breakup instructions and action list into halfword
- chunks.
-
-This should allow us to encode the instructions relatively naturally
-and efficiently. For now I've escaped halfwords with a value <=
-the maximum action. This means that 0 is escaped which probably
-isn't ideal, so we may want to revisit that decision at some point.
----
- dynasm/dasm_s390x.lua | 100 ++++++++++++++++++++++--------------------
- 1 file changed, 53 insertions(+), 47 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index c73e317..ef7f35e 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -48,8 +48,10 @@ local maxsecpos = 25 -- Keep this low, to avoid excessively long C
lines.
-
- -- Action name -> action number.
- local map_action = {}
-+local max_action = 0
- for n,name in ipairs(action_names) do
- map_action[name] = n-1
-+ max_action = n
- end
-
- -- Action list buffer.
-@@ -77,25 +79,35 @@ end
- local function writeactions(out, name)
- local nn = #actlist
- if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
-- out:write("static const unsigned int ", name, "[", nn, "] =
{\n")
-- for i = 1,nn-1 do
-- assert(out:write("0x", tohex(actlist[i]), ",\n"))
-+ out:write("static const unsigned short ", name, "[", nn, "] =
{")
-+ local esc = false -- also need to escape for action arguments
-+ for i = 1,nn do
-+ assert(out:write("\n 0x", sub(tohex(actlist[i]), 5, 8)))
-+ if i ~= nn then assert(out:write(",")) end
-+ local name = action_names[actlist[i]+1]
-+ if not esc and name then
-+ assert(out:write(" /* ", name, " */"))
-+ esc = name == "ESC" or name == "SECTION"
-+ else
-+ esc = false
-+ end
- end
-- assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
-+ assert(out:write("\n};\n\n"))
- end
-
- ------------------------------------------------------------------------------
-
---- Add word to action list.
--local function wputxw(n)
-- assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of
range") -- s390x inst can be 6 bytes
-+-- Add halfword to action list.
-+local function wputxhw(n)
-+ assert(n >= 0 and n <= 0xffff, "halfword out of range")
- actlist[#actlist+1] = n
- end
-
- -- Add action to list with optional arg. Advance buffer pos, too.
- local function waction(action, val, a, num)
- local w = assert(map_action[action], "bad action name
`"..action.."'")
-- wputxw(w * 0x10000 + (val or 0))
-+ wputxhw(w)
-+ if val then wputxhw(val) end -- Not sure about this, do we always have one arg?
- if a then actargs[#actargs+1] = a end
- if a or num then secpos = secpos + (num or 1) end
- end
-@@ -109,29 +121,19 @@ local function wflush(term)
- secpos = 1 -- The actionlist offset occupies a buffer position, too.
- end
-
---- Put escaped word. --Need to check this as well, not sure how it will work on
s390x
--local function wputw(n)
-- if n <= 0x000fffff then waction("ESC") end
-- wputxw(n)
-+-- Put escaped halfword.
-+local function wputhw(n)
-+ if n <= max_action then waction("ESC") end
-+ wputxhw(n)
- end
-
---- Reserve position for word.
-+-- Reserve position for halfword.
- local function wpos()
- local pos = #actlist+1
- actlist[pos] = ""
- return pos
- end
-
---- Store word to reserved position. -- added 2 bytes more since s390x has 6 bytes inst
as well
--local function wputpos(pos, n)
-- assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of
range")
-- if n <= 0x000fffff then
-- insert(actlist, pos+1, n)
-- n = map_action.ESC * 0x10000
-- end
-- actlist[pos] = n
--end
--
- ------------------------------------------------------------------------------
-
- -- Global label name -> global label number. With auto assignment on 1st use.
-@@ -942,26 +944,44 @@ end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
- local function parse_template(params, template, nparams, pos)
-- local op = tonumber(sub(template, 1, 16), 16) --
-- -- 00000000005a0000 converts to 90
-+ -- Read the template in 16-bit chunks.
-+ -- Leading halfword zeroes should not be written out.
-+ local op0 = tonumber(sub(template, 5, 8), 16)
-+ local op1 = tonumber(sub(template, 9, 12), 16)
-+ local op2 = tonumber(sub(template, 13, 16), 16)
-+
- local n,rs = 1,26
-
- parse_reg_type = false
- -- Process each character.
-+ -- TODO
-+ -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
-+ -- one of two locations relative to the end of the instruction.
-+ -- To make decoding easier we should insert the actions for these immediately
-+ -- after the halfword they modify.
-+ -- For example, take the instruction ahik, which is laid out as follows (each
-+ -- char is 4 bits):
-+ -- o = op code, r = register, i = immediate
-+ -- oorr iiii 00oo
-+ -- This should be emitted as oorr, followed by the immediate action, followed by
-+ -- 00oo.
- for p in gmatch(sub(template, 17), ".") do
-- local pr1,pr2,pr3
-+ local pr1,pr2,pr3
- if p == "g" then
- pr1,pr2=params[n],params[n+1]
-- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we
will require n later, so keeping it as it is now
-+ op2 = op2 + shl(parse_reg(pr1),4) + parse_reg(pr2)
-+ wputhw(op2)
- elseif p == "h" then
- pr1,pr2=params[n],params[n+1]
-- op = op + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
-+ op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
-+ wputhw(op1); wputhw(op2)
- elseif p == "j" then
-- op = op + shl(parse_reg(param[1]),24) + shl(parse_reg(param[2]),20) +
shl(parse_reg(param[3]),16) + parse_number(param[4])
-- -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is
satisfied
-+ op1 = op1 + shl(parse_reg(param[1], 8))
-+ wputhw(op1); wputhw(op2)
-+ -- TODO: parse param[2] using parse_mem_bx, need to put x into op1, b and d
-+ -- into op2, emitting an action for the DISP12 afterwards if necessary.
- elseif p == "k" then
-- op = op + shl(parse_reg(param[1]),40) + shl(parse_reg(param[2]),36) +
shl(parse_reg(param[3]),32) + parse_number(param[4]) + parse_number(param[5])
-- -- assuming params are passed as (R1,X2,B2,DL2,DH2)
-+
- elseif p == "l" then
-
- elseif p == "m" then
-@@ -971,30 +991,16 @@ local function parse_template(params, template, nparams, pos)
- end
- end
-
-- -- TODO
-- -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
-- -- one of two locations relative to the end of the instruction.
-- -- To make decoding easier we should insert the actions for these immediately
-- -- after the halfword they modify.
-- -- For example, take the instruction ahik, which is laid out as follows (each
-- -- char is 4 bits):
-- -- o = op code, r = register, i = immediate
-- -- oorr iiii 00oo
-- -- This should be emitted as oorr, followed by the immediate action, followed by
-- -- 00oo.
--
-- wputpos(pos, op)
- end
- function op_template(params, template, nparams)
- if not params then return template:gsub("%x%x%x%x%x%x%x%x", "")
end
- -- Limit number of section buffer positions used by a single dasm_put().
- -- A single opcode needs a maximum of 3 positions.
- if secpos+3 > maxsecpos then wflush() end
-- local pos = wpos()
- local lpos, apos, spos = #actlist, #actargs, secpos
- local ok, err
- for t in gmatch(template, "[^|]+") do
-- ok, err = pcall(parse_template, params, t, nparams, pos)
-+ ok, err = pcall(parse_template, params, t, nparams)
- if ok then return end
- secpos = spos
- actlist[lpos+1] = nil
---
-2.20.1
-
-
-From 97face5d5fa32afbe87e0b7acda3fee55054d35f Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 30 Nov 2016 14:11:01 -0500
-Subject: [PATCH 043/247] Fix C code in header file and handle br template.
-
-This means that code like this can now be generated on s390x:
-
-| ar r2, r3
-| br r14
-
-Still need to add support for immediates, memory, labels, other
-instructions and so on.
----
- dynasm/dasm_s390x.h | 56 +++++++++++++++++++------------------------
- dynasm/dasm_s390x.lua | 13 ++++++----
- 2 files changed, 33 insertions(+), 36 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 254db8b..837a2ed 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -186,10 +186,10 @@ void dasm_put(Dst_DECL, int start, ...)
-
- va_start(ap, start);
- while (1) {
-- unsigned int ins = *p++;
-- unsigned int action = (ins >> 16);
-+ unsigned short ins = *p++;
-+ unsigned short action = ins;
- if (action >= DASM__MAX) {
-- ofs += 4;
-+ ofs += 2;
- } else {
- int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
- switch (action) {
-@@ -231,22 +231,11 @@ void dasm_put(Dst_DECL, int start, ...)
- *pl = -pos; /* Label exists now. */
- b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
-- case DASM_IMM:
--#ifdef DASM_CHECKS
-- CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
--#endif
-- n >>= ((ins>>10)&31);
--#ifdef DASM_CHECKS
-- if (ins & 0x8000)
-- CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) ==
0, RANGE_I);
-- else
-- CK((n>>((ins>>5)&31)) == 0, RANGE_I);
--#endif
-- b[pos++] = n;
-- break;
-- case DASM_IMMSH:
-- CK((n >> 6) == 0, RANGE_I);
-- b[pos++] = n;
-+ case DASM_IMM16:
-+ case DASM_IMM32:
-+ case DASM_DISP20:
-+ case DASM_DISP12:
-+ fprintf(stderr, "not implemented\n");
- break;
- }
- }
-@@ -294,8 +283,8 @@ int dasm_link(Dst_DECL, size_t *szp)
- while (pos != lastpos) {
- dasm_ActList p = D->actionlist + b[pos++];
- while (1) {
-- unsigned int ins = *p++;
-- unsigned int action = (ins >> 16);
-+ unsigned short ins = *p++;
-+ unsigned short action = ins;
- switch (action) {
- case DASM_STOP: case DASM_SECTION: goto stop;
- case DASM_ESC: p++; break;
-@@ -303,7 +292,12 @@ int dasm_link(Dst_DECL, size_t *szp)
- case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
- case DASM_REL_LG: case DASM_REL_PC: pos++; break;
- case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-- case DASM_IMM: case DASM_IMMSH: pos++; break;
-+ case DASM_IMM16:
-+ case DASM_IMM32:
-+ case DASM_DISP20:
-+ case DASM_DISP12:
-+ fprintf(stderr, "not implemented\n");
-+ break;
- }
- }
- stop: (void)0;
-@@ -328,7 +322,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- {
- dasm_State *D = Dst_REF;
- char *base = (char *)buffer;
-- unsigned int *cp = (unsigned int *)buffer;
-+ unsigned short *cp = (unsigned short *)buffer;
- int secnum;
-
- /* Encode all code sections. No support for data sections (yet). */
-@@ -340,8 +334,8 @@ int dasm_encode(Dst_DECL, void *buffer)
- while (b != endb) {
- dasm_ActList p = D->actionlist + *b++;
- while (1) {
-- unsigned int ins = *p++;
-- unsigned int action = (ins >> 16);
-+ unsigned short ins = *p++;
-+ unsigned short action = ins;
- int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
- switch (action) {
- case DASM_STOP: case DASM_SECTION: goto stop;
-@@ -350,7 +344,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
- goto patchrel;
- case DASM_ALIGN:
-- ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
-+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x0707;
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-@@ -367,11 +361,11 @@ int dasm_encode(Dst_DECL, void *buffer)
- ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
- break;
- case DASM_LABEL_PC: break;
-- case DASM_IMM:
-- cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
-- break;
-- case DASM_IMMSH:
-- cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) :
((n&31)<<6)|(n&32);
-+ case DASM_IMM16:
-+ case DASM_IMM32:
-+ case DASM_DISP20:
-+ case DASM_DISP12:
-+ fprintf(stderr, "not implemented\n");
- break;
- default: *cp++ = ins; break;
- }
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index ef7f35e..52acbdb 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -933,13 +933,13 @@ for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
- -- TODO: replace 'B' with correct encoding.
- -- brc
-- map_op["j"..cond.."_1"] =
"00000000"..tohex(0xa7040000+shl(c, 20)).."B"
-+ map_op["j"..cond.."_1"] =
"00000000"..tohex(0xa7040000+shl(c, 20)).."w"
- -- brcl
-- map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c,
4)).."00000000".."B"
-+ map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c,
4)).."00000000".."x"
- -- bc
-- map_op["b"..cond.."_1"] =
"00000000"..tohex(0x47000000+shl(c, 20)).."B"
-+ map_op["b"..cond.."_1"] =
"00000000"..tohex(0x47000000+shl(c, 20)).."y"
- -- bcr
-- map_op["b"..cond.."r_1"] =
"00000000"..tohex(0x0700+shl(c, 4)).."B"
-+ map_op["b"..cond.."r_1"] =
"00000000"..tohex(0x0700+shl(c, 4)).."z"
- end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
-@@ -969,7 +969,7 @@ local function parse_template(params, template, nparams, pos)
- local pr1,pr2,pr3
- if p == "g" then
- pr1,pr2=params[n],params[n+1]
-- op2 = op2 + shl(parse_reg(pr1),4) + parse_reg(pr2)
-+ op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
- wputhw(op2)
- elseif p == "h" then
- pr1,pr2=params[n],params[n+1]
-@@ -988,6 +988,9 @@ local function parse_template(params, template, nparams, pos)
-
- elseif p == "n" then
-
-+ elseif p == "z" then
-+ op2 = op2 + parse_gpr(params[1])
-+ wputhw(op2)
- end
- end
-
---
-2.20.1
-
-
-From e9275c57bd8c1749b755cbabe4ffa272ff599298 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 30 Nov 2016 16:05:36 -0500
-Subject: [PATCH 044/247] Add initial support for D(B,X) memory operands
- (12-bit only).
-
-Most RX instructions don't specify the correct number of operands
-so this won't work on many yet. It also won't yet emit an action
-if D is a variable rather than a constant.
----
- dynasm/dasm_s390x.lua | 60 ++++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 54 insertions(+), 6 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 52acbdb..eac9d60 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -281,6 +281,32 @@ local function parse_number(n)
- return nil
- end
-
-+local function is_uint12(num)
-+ return 0 <= num and num < 4096
-+end
-+
-+local function is_int20(num)
-+ return -shl(1, 19) <= num and num < shl(1, 19)
-+end
-+
-+-- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
-+-- If x is not specified then it is 0.
-+local function split_memop(arg)
-+ local reg = "r[0-1]?[0-9]"
-+ local d, x, b = match(arg, "^(.*)%(("..reg.."),
("..reg..")%)$")
-+ if d then
-+ return d, parse_gpr(x), parse_gpr(b)
-+ end
-+ local d, b = match(arg, "^(.*)%(("..reg..")%)$")
-+ if d then
-+ return d, 0, parse_gpr(b)
-+ end
-+ -- TODO: handle values without registers?
-+ -- TODO: handle registers without a displacement?
-+ werror("bad memory operand: "..arg)
-+ return nil
-+end
-+
- -- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
- -- Encoded as: bddd
- local function parse_mem_b(arg)
-@@ -292,6 +318,17 @@ end
- -- are GPRs.
- -- Encoded as: xbddd
- local function parse_mem_bx(arg)
-+ local d, x, b = split_memop(arg)
-+ local dval = tonumber(d)
-+ if dval then
-+ if not is_uint12(dval) then
-+ werror("displacement out of range: ", dval)
-+ end
-+ return dval, x, b, nil
-+ end
-+ -- TODO: handle d being a symbol.
-+ -- Action is currently the final return value (the caller needs to add it
-+ -- to the action list at a later point).
- werror("parse_mem_bx: not implemented")
- return nil
- end
-@@ -369,7 +406,7 @@ end
-
- -- Template strings for s390x instructions.
- map_op = {
--a_4 = "000000005a000000j",
-+a_2 = "000000005a000000j",
- ar_2 = "0000000000001a00g",
- ay_5 = "0000e3000000005al",
- ag_5 = "0000e30000000008l",
-@@ -853,7 +890,7 @@ sqdbr_2 = "00000000b3150000h",
- sqdr_2 = "00000000b2440000h",
- sqebr_2 = "00000000b3140000h",
- sqer_2 = "00000000b2450000h",
--st_4 = "0000000050000000j",
-+st_2 = "0000000050000000j",
- sty_5 = "0000e30000000050l",
- stg_5 = "0000e30000000024l",
- std_4 = "0000000060000000j",
-@@ -976,10 +1013,13 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
- wputhw(op1); wputhw(op2)
- elseif p == "j" then
-- op1 = op1 + shl(parse_reg(param[1], 8))
-- wputhw(op1); wputhw(op2)
-- -- TODO: parse param[2] using parse_mem_bx, need to put x into op1, b and d
-- -- into op2, emitting an action for the DISP12 afterwards if necessary.
-+ local d, x, b, a = parse_mem_bx(params[2])
-+ op1 = op1 + shl(parse_gpr(params[1]), 4) + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2);
-+ if a then
-+ werror("disp12 actions not yet implemented")
-+ end
- elseif p == "k" then
-
- elseif p == "l" then
-@@ -988,6 +1028,14 @@ local function parse_template(params, template, nparams, pos)
-
- elseif p == "n" then
-
-+ elseif p == "y" then
-+ local d, x, b, a = parse_mem_bx(params[1])
-+ op1 = op1 + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2);
-+ if a then
-+ werror("disp12 actions not yet implemented")
-+ end
- elseif p == "z" then
- op2 = op2 + parse_gpr(params[1])
- wputhw(op2)
---
-2.20.1
-
-
-From 2fd8a561aa56f6f369e6e9e904d7ab86a1f7f546 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 30 Nov 2016 17:07:17 -0500
-Subject: [PATCH 045/247] Minor cleanup of regular expressions.
-
----
- dynasm/dasm_s390x.lua | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index eac9d60..c15719b 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -242,7 +242,7 @@ local map_cond = {
- local parse_reg_type
-
- local function parse_gpr(expr)
-- local r = match(expr, "^r([1-3]?[0-9])$")
-+ local r = match(expr, "^r(1?[0-9])$")
- if r then
- r = tonumber(r)
- if r <= 15 then return r, tp end
-@@ -251,7 +251,7 @@ local function parse_gpr(expr)
- end
-
- local function parse_fpr(expr)
-- local r = match(expr, "^f([1-3]?[0-9])$")
-+ local r = match(expr, "^f(1?[0-9])$")
- if r then
- r = tonumber(r)
- if r <= 15 then return r end
-@@ -292,7 +292,7 @@ end
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
- -- If x is not specified then it is 0.
- local function split_memop(arg)
-- local reg = "r[0-1]?[0-9]"
-+ local reg = "r1?[0-9]"
- local d, x, b = match(arg, "^(.*)%(("..reg.."),
("..reg..")%)$")
- if d then
- return d, parse_gpr(x), parse_gpr(b)
---
-2.20.1
-
-
-From 82e06e5b60553fd3c99f5cf1b0dfa9676822d32d Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Thu, 1 Dec 2016 15:15:06 +0530
-Subject: [PATCH 046/247] Changed the templates based on no of arguments
-
-Have changed the templates based on number of parameters passed, mainly the memory and
immediate ones are modified.
----
- dynasm/dasm_s390x.lua | 552 +++++++++++++++++++++---------------------
- 1 file changed, 276 insertions(+), 276 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index c15719b..467e218 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -408,148 +408,148 @@ end
- map_op = {
- a_2 = "000000005a000000j",
- ar_2 = "0000000000001a00g",
--ay_5 = "0000e3000000005al",
--ag_5 = "0000e30000000008l",
-+ay_2 = "0000e3000000005al",
-+ag_2 = "0000e30000000008l",
- agr_2 = "00000000b9080000h",
--agf_5 = "0000e30000000018l",
-+agf_2 = "0000e30000000018l",
- agfr_2 = "00000000b9180000h",
- axbr_2 = "00000000b34a0000h",
- adbr_2 = "00000000b31a0000h",
- aebr_2 = "00000000b30a0000h",
--ah_4 = "000000004a000000j",
--ahy_5 = "0000e3000000007al",
--afi_3 = "0000c20900000000n",
--agfi_3 = "0000c20800000000n",
--aih_3 = "0000cc0800000000n",
--al_4 = "000000005e000000j",
-+ah_2 = "000000004a000000j",
-+ahy_2 = "0000e3000000007al",
-+afi_2 = "0000c20900000000n",
-+agfi_2 = "0000c20800000000n",
-+aih_2 = "0000cc0800000000n",
-+al_2 = "000000005e000000j",
- alr_2 = "0000000000001e00g",
--aly_5 = "0000e3000000005el",
--alg_5 = "0000e3000000000al",
-+aly_2 = "0000e3000000005el",
-+alg_2 = "0000e3000000000al",
- algr_2 = "00000000b90a0000h",
--algf_5 = "0000e3000000001al",
-+algf_2 = "0000e3000000001al",
- algfr_2 = "00000000b91a0000h",
--alfi_3 = "0000c20b00000000n",
--algfi_3 = "0000c20a00000000n",
--alc_5 = "0000e30000000098l",
-+alfi_2 = "0000c20b00000000n",
-+algfi_2 = "0000c20a00000000n",
-+alc_2 = "0000e30000000098l",
- alcr_2 = "00000000b9980000h",
--alcg_5 = "0000e30000000088l",
-+alcg_2 = "0000e30000000088l",
- alcgr_2 = "00000000b9880000h",
--alsih_3 = "0000cc0a00000000n",
--alsihn_3 = "0000cc0b00000000n",
-+alsih_2 = "0000cc0a00000000n",
-+alsihn_2 = "0000cc0b00000000n",
- axr_2 = "0000000000003600g",
--ad_4 = "000000006a000000j",
-+ad_2 = "000000006a000000j",
- adr_2 = "0000000000002a00g",
--ae_4 = "000000007a000000j",
-+ae_2 = "000000007a000000j",
- aer_2 = "0000000000003a00g",
--aw_4 = "000000006e000000j",
-+aw_2 = "000000006e000000j",
- awr_2 = "0000000000002e00g",
--au_4 = "000000007e000000j",
-+au_2 = "000000007e000000j",
- aur_2 = "0000000000003e00g",
--n_4 = "0000000054000000j",
-+n_2 = "0000000054000000j",
- nr_2 = "0000000000001400g",
--ny_5 = "0000e30000000054l",
--ng_5 = "0000e30000000080l",
-+ny_2 = "0000e30000000054l",
-+ng_2 = "0000e30000000080l",
- ngr_2 = "00000000b9800000h",
--nihf_3 = "0000c00a00000000n",
--nilf_3 = "0000c00b00000000n",
--bal_4 = "0000000045000000j",
-+nihf_2 = "0000c00a00000000n",
-+nilf_2 = "0000c00b00000000n",
-+bal_2 = "0000000045000000j",
- balr_2 = "000000000000500g",
--bas_4 = "000000004d000000j",
-+bas_2 = "000000004d000000j",
- basr_2 = "0000000000000d00g",
- bassm_2 = "0000000000000c00g",
- bsa_2 = "00000000b25a0000h",
- bsm_2 = "0000000000000b00g",
- bakr_2 = "00000000b2400000h",
- bsg_2 = "00000000b2580000h",
--bc_4 = "0000000047000000k",
-+bc_2 = "0000000047000000k",
- bcr_2 = "000000000000700g",
--bct_4 = "0000000046000000j",
-+bct_2 = "0000000046000000j",
- bctr_2 = "000000000000600g",
--bctg_5 = "0000e30000000046l",
-+bctg_2 = "0000e30000000046l",
- bctgr_2 = "00000000b9460000h",
--bxh_4 = "0000000086000000q",
--bxhg_5 = "0000eb0000000044s",
--bxle_4 = "0000000087000000q",
--bxleg_5 = "0000eb0000000045s",
--brasl_3 = "0000c00500000000o",
--brcl_3 = "0000c00400000000p",
--brcth_3 = "0000cc0600000000o",
-+bxh_3 = "0000000086000000q",
-+bxhg_3 = "0000eb0000000044s",
-+bxle_3 = "0000000087000000q",
-+bxleg_3 = "0000eb0000000045s",
-+brasl_2 = "0000c00500000000o",
-+brcl_2 = "0000c00400000000p",
-+brcth_2 = "0000cc0600000000o",
- cksm_2 = "00000000b2410000h",
- km_2 = "00000000b92e0000h",
- kmf_2 = "00000000b92a0000h",
- kmc_2 = "00000000b92f0000h",
- kmo_2 = "00000000b92b0000h",
--c_4 = "0000000059000000j",
-+c_2 = "0000000059000000j",
- cr_2 = "0000000000001900g",
--cy_5 = "0000e30000000059l",
--cg_5 = "0000e30000000020l",
-+cy_2 = "0000e30000000059l",
-+cg_2 = "0000e30000000020l",
- cgr_2 = "00000000b9200000h",
--cgf_5 = "0000e30000000030l",
-+cgf_2 = "0000e30000000030l",
- cgfr_2 = "00000000b9300000h",
- cxbr_2 = "00000000b3490000h",
- cxtr_2 = "00000000b3ec0000h",
- cxr_2 = "00000000b3690000h",
- cdbr_2 = "00000000b3190000h",
- cdtr_2 = "00000000b3e40000h",
--cd_4 = "0000000069000000j",
-+cd_2 = "0000000069000000j",
- cdr_2 = "0000000000002900g",
- cebr_2 = "00000000b3090000h",
--ce_4 = "0000000079000000j",
-+ce_2 = "0000000079000000j",
- cer_2 = "0000000000003900g",
- kxbr_2 = "00000000b3480000h",
- kxtr_2 = "00000000b3e80000h",
- kdbr_2 = "00000000b3180000h",
- kdtr_2 = "00000000b3e00000h",
- kebr_2 = "00000000b3080000h",
--cs_4 = "00000000ba000000q",
--csy_5 = "0000eb0000000014s",
--csg_5 = "0000eb0000000030s",
-+cs_3 = "00000000ba000000q",
-+csy_3 = "0000eb0000000014s",
-+csg_3 = "0000eb0000000030s",
- csp_2 = "00000000b2500000h",
- cspg_2 = "00000000b98a0000h",
- cextr_2 = "00000000b3fc0000h",
- cedtr_2 = "00000000b3f40000h",
--cds_4 = "00000000bb000000q",
--cdsy_5 = "0000eb0000000031s",
--cdsg_5 = "0000eb000000003es",
--ch_4 = "0000000049000000j",
--chy_5 = "0000e30000000079l",
--cgh_5 = "0000e30000000034l",
--chrl_3 = "0000c60500000000o",
--cghrl_3 = "0000c60400000000o",
--chf_5 = "0000e300000000cdl",
-+cds_3 = "00000000bb000000q",
-+cdsy_3 = "0000eb0000000031s",
-+cdsg_3 = "0000eb000000003es",
-+ch_2 = "0000000049000000j",
-+chy_2 = "0000e30000000079l",
-+cgh_2 = "0000e30000000034l",
-+chrl_2 = "0000c60500000000o",
-+cghrl_2 = "0000c60400000000o",
-+chf_2 = "0000e300000000cdl",
- chhr_2 = "00000000b9cd0000h",
- chlr_2 = "00000000b9dd0000h",
--cfi_3 = "0000c20d00000000n",
--cgfi_3 = "0000c20c00000000n",
--cih_3 = "0000cc0d00000000n",
--cl_4 = "0000000055000000j",
-+cfi_2 = "0000c20d00000000n",
-+cgfi_2 = "0000c20c00000000n",
-+cih_2 = "0000cc0d00000000n",
-+cl_2 = "0000000055000000j",
- clr_2 = "0000000000001500g",
--cly_5 = "0000e30000000055l",
--clg_5 = "0000e30000000021l",
-+cly_2 = "0000e30000000055l",
-+clg_2 = "0000e30000000021l",
- clgr_2 = "00000000b9210000h",
--clgf_5 = "0000e30000000031l",
-+clgf_2 = "0000e30000000031l",
- clgfr_2 = "00000000b9310000h",
--clmh_5 = "0000eb0000000020t",
--clm_4 = "00000000bd000000r",
--clmy_5 = "0000eb0000000021t",
--clhf_5 = "0000e300000000cfl",
-+clmh_3 = "0000eb0000000020t",
-+clm_3 = "00000000bd000000r",
-+clmy_3 = "0000eb0000000021t",
-+clhf_2 = "0000e300000000cfl",
- clhhr_2 = "00000000b9cf0000h",
- clhlr_2 = "00000000b9df0000h",
--clfi_3 = "0000c20f00000000n",
--clgfi_3 = "0000c20e00000000n",
--clih_3 = "0000cc0f00000000n",
-+clfi_2 = "0000c20f00000000n",
-+clgfi_2 = "0000c20e00000000n",
-+clih_2 = "0000cc0f00000000n",
- clcl_2 = "0000000000000f00g",
--clcle_4 = "00000000a9000000q",
--clclu_5 = "0000eb000000008fs",
--clrl_3 = "0000c60f00000000o",
--clhrl_3 = "0000c60700000000o",
--clgrl_3 = "0000c60a00000000o",
--clghrl_3 = "0000c60600000000o",
--clgfrl_3 = "0000c60e00000000o",
-+clcle_3 = "00000000a9000000q",
-+clclu_3 = "0000eb000000008fs",
-+clrl_2 = "0000c60f00000000o",
-+clhrl_2 = "0000c60700000000o",
-+clgrl_2 = "0000c60a00000000o",
-+clghrl_2 = "0000c60600000000o",
-+clgfrl_2 = "0000c60e00000000o",
- clst_2 = "00000000b25d0000h",
--crl_3 = "0000c60d00000000o",
--cgrl_3 = "0000c60800000000o",
--cgfrl_3 = "0000c60c00000000o",
-+crl_2 = "0000c60d00000000o",
-+cgrl_2 = "0000c60800000000o",
-+cgfrl_2 = "0000c60c00000000o",
- cuse_2 = "00000000b2570000h",
- cmpsc_2 = "00000000b2630000h",
- kimd_2 = "00000000b93e0000h",
-@@ -577,49 +577,49 @@ cxstr_2 = "00000000b3fb0000h",
- cdstr_2 = "00000000b3f30000h",
- cxutr_2 = "00000000b3fa0000h",
- cdutr_2 = "00000000b3f20000h",
--cvb_4 = "000000004f000000j",
--cvby_5 = "0000e30000000006l",
--cvbg_5 = "0000e3000000000el",
--cvd_4 = "000000004e000000j",
--cvdy_5 = "0000e30000000026l",
--cvdg_5 = "0000e3000000002el",
-+cvb_2 = "000000004f000000j",
-+cvby_2 = "0000e30000000006l",
-+cvbg_2 = "0000e3000000000el",
-+cvd_2 = "000000004e000000j",
-+cvdy_2 = "0000e30000000026l",
-+cvdg_2 = "0000e3000000002el",
- cuxtr_2 = "00000000b3ea0000h",
- cudtr_2 = "00000000b3e20000h",
- cu42_2 = "00000000b9b30000h",
- cu41_2 = "00000000b9b20000h",
- cpya_2 = "00000000b24d0000h",
--d_4 = "000000005d000000j",
-+d_2 = "000000005d000000j",
- dr_2 = "0000000000001d00g",
- dxbr_2 = "00000000b34d0000h",
- dxr_2 = "00000000b22d0000h",
- ddbr_2 = "00000000b31d0000h",
--dd_4 = "000000006d000000j",
-+dd_2 = "000000006d000000j",
- ddr_2 = "0000000000002d00g",
- debr_2 = "00000000b30d0000h",
--de_4 = "000000007d000000j",
-+de_2 = "000000007d000000j",
- der_2 = "0000000000003d00g",
--dl_5 = "0000e30000000097l",
-+dl_2 = "0000e30000000097l",
- dlr_2 = "00000000b9970000h",
--dlg_5 = "0000e30000000087l",
-+dlg_2 = "0000e30000000087l",
- dlgr_2 = "00000000b9870000h",
--dsg_5 = "0000e3000000000dl",
-+dsg_2 = "0000e3000000000dl",
- dsgr_2 = "00000000b90d0000h",
--dsgf_5 = "0000e3000000001dl",
-+dsgf_2 = "0000e3000000001dl",
- dsgfr_2 = "00000000b91d0000h",
--x_4 = "0000000057000000j",
-+x_2 = "0000000057000000j",
- xr_2 = "0000000000001700g",
--xy_5 = "0000e30000000057l",
--xg_5 = "0000e30000000082l",
-+xy_2 = "0000e30000000057l",
-+xg_2 = "0000e30000000082l",
- xgr_2 = "00000000b9820000h",
--xihf_3 = "0000c00600000000n",
--xilf_3 = "0000c00700000000n",
--ex_4 = "0000000044000000j",
--exrl_3 = "0000c60000000000o",
-+xihf_2 = "0000c00600000000n",
-+xilf_2 = "0000c00700000000n",
-+ex_2 = "0000000044000000j",
-+exrl_2 = "0000c60000000000o",
- ear_2 = "00000000b24f0000h",
- esea_2 = "00000000b99d0000h",
- eextr_2 = "00000000b3ed0000h",
- eedtr_2 = "00000000b3e50000h",
--ecag_5 = "0000eb000000004cs",
-+ecag_3 = "0000eb000000004cs",
- efpc_2 = "00000000b38c0000h",
- epar_2 = "00000000b2260000h",
- epair_2 = "00000000b99a0000h",
-@@ -635,52 +635,52 @@ flogr_2 = "00000000b9830000h",
- hdr_2 = "0000000000002400g",
- her_2 = "0000000000003400g",
- iac_2 = "00000000b2240000h",
--ic_4 = "0000000043000000j",
--icy_5 = "0000e30000000073l",
--icmh_5 = "0000eb0000000080t",
--icm_4 = "00000000bf000000r",
--icmy_5 = "0000eb0000000081t",
--iihf_3 = "0000c00800000000n",
--iilf_3 = "0000c00900000000n",
-+ic_2 = "0000000043000000j",
-+icy_2 = "0000e30000000073l",
-+icmh_3 = "0000eb0000000080t",
-+icm_3 = "00000000bf000000r",
-+icmy_3 = "0000eb0000000081t",
-+iihf_2 = "0000c00800000000n",
-+iilf_2 = "0000c00900000000n",
- ipm_2 = "00000000b2220000h",
- iske_2 = "00000000b2290000h",
- ivsk_2 = "00000000b2230000h",
--l_4 = "0000000058000000j",
-+l_2 = "0000000058000000j",
- lr_2 = "0000000000001800g",
--ly_5 = "0000e30000000058l",
--lg_5 = "0000e30000000004l",
-+ly_2 = "0000e30000000058l",
-+lg_2 = "0000e30000000004l",
- lgr_2 = "00000000b9040000h",
--lgf_5 = "0000e30000000014l",
-+lgf_2 = "0000e30000000014l",
- lgfr_2 = "00000000b9140000h",
- lxr_2 = "00000000b3650000h",
--ld_4 = "0000000068000000j",
-+ld_2 = "0000000068000000j",
- ldr_2 = "0000000000002800g",
--ldy_5 = "0000ed0000000065l",
--le_4 = "0000000078000000j",
-+ldy_2 = "0000ed0000000065l",
-+le_2 = "0000000078000000j",
- ler_2 = "0000000000003800g",
--ley_5 = "0000ed0000000064l",
--lam_4 = "000000009a000000q",
--lamy_5 = "0000eb000000009as",
--la_4 = "0000000041000000j",
--lay_5 = "0000e30000000071l",
--lae_4 = "0000000051000000j",
--laey_5 = "0000e30000000075l",
--larl_3 = "0000c00000000000o",
--laa_5 = "0000eb00000000f8s",
--laag_5 = "0000eb00000000e8s",
--laal_5 = "0000eb00000000fas",
--laalg_5 = "0000eb00000000eas",
--lan_5 = "0000eb00000000f4s",
--lang_5 = "0000eb00000000e4s",
--lax_5 = "0000eb00000000f7s",
--laxg_5 = "0000eb00000000e7s",
--lao_5 = "0000eb00000000f6s",
--laog_5 = "0000eb00000000e6s",
--lt_5 = "0000e30000000012l",
-+ley_2 = "0000ed0000000064l",
-+lam_3 = "000000009a000000q",
-+lamy_3 = "0000eb000000009as",
-+la_2 = "0000000041000000j",
-+lay_2 = "0000e30000000071l",
-+lae_2 = "0000000051000000j",
-+laey_2 = "0000e30000000075l",
-+larl_2 = "0000c00000000000o",
-+laa_3 = "0000eb00000000f8s",
-+laag_3 = "0000eb00000000e8s",
-+laal_3 = "0000eb00000000fas",
-+laalg_3 = "0000eb00000000eas",
-+lan_3 = "0000eb00000000f4s",
-+lang_3 = "0000eb00000000e4s",
-+lax_3 = "0000eb00000000f7s",
-+laxg_3 = "0000eb00000000e7s",
-+lao_3 = "0000eb00000000f6s",
-+laog_3 = "0000eb00000000e6s",
-+lt_2 = "0000e30000000012l",
- ltr_2 = "0000000000001200g",
--ltg_5 = "0000e30000000002l",
-+ltg_2 = "0000e30000000002l",
- ltgr_2 = "00000000b9020000h",
--ltgf_5 = "0000e30000000032l",
-+ltgf_2 = "0000e30000000032l",
- ltgfr_2 = "00000000b9120000h",
- ltxbr_2 = "00000000b3420000h",
- ltxtr_2 = "00000000b3de0000h",
-@@ -690,11 +690,11 @@ ltdtr_2 = "00000000b3d60000h",
- ltdr_2 = "0000000000002200g",
- ltebr_2 = "00000000b3020000h",
- lter_2 = "0000000000003200g",
--lb_5 = "0000e30000000076l",
-+lb_2 = "0000e30000000076l",
- lbr_2 = "00000000b9260000h",
--lgb_5 = "0000e30000000077l",
-+lgb_2 = "0000e30000000077l",
- lgbr_2 = "00000000b9060000h",
--lbh_5 = "0000e300000000c0l",
-+lbh_2 = "0000e300000000c0l",
- lcr_2 = "0000000000001300g",
- lcgr_2 = "00000000b9030000h",
- lcgfr_2 = "00000000b9130000h",
-@@ -705,52 +705,52 @@ lcdr_2 = "0000000000002300g",
- lcdfr_2 = "00000000b3730000h",
- lcebr_2 = "00000000b3030000h",
- lcer_2 = "0000000000003300g",
--lctl_4 = "00000000b7000000q",
--lctlg_5 = "0000eb000000002fs",
-+lctl_3 = "00000000b7000000q",
-+lctlg_3 = "0000eb000000002fs",
- fixr_2 = "00000000b3670000h",
- fidr_2 = "00000000b37f0000h",
- fier_2 = "00000000b3770000h",
- ldgr_2 = "00000000b3c10000h",
- lgdr_2 = "00000000b3cd0000h",
--lh_4 = "0000000048000000j",
-+lh_2 = "0000000048000000j",
- lhr_2 = "00000000b9270000h",
--lhy_5 = "0000e30000000078l",
--lgh_5 = "0000e30000000015l",
-+lhy_2 = "0000e30000000078l",
-+lgh_2 = "0000e30000000015l",
- lghr_2 = "00000000b9070000h",
--lhh_5 = "0000e300000000c4l",
--lhrl_3 = "0000c40500000000o",
--lghrl_3 = "0000c40400000000o",
--lfh_5 = "0000e300000000cal",
--lgfi_3 = "0000c00100000000n",
-+lhh_2 = "0000e300000000c4l",
-+lhrl_2 = "0000c40500000000o",
-+lghrl_2 = "0000c40400000000o",
-+lfh_2 = "0000e300000000cal",
-+lgfi_2 = "0000c00100000000n",
- lxdbr_2 = "00000000b3050000h",
- lxdr_2 = "00000000b3250000h",
- lxebr_2 = "00000000b3060000h",
- lxer_2 = "00000000b3260000h",
- ldebr_2 = "00000000b3040000h",
- lder_2 = "00000000b3240000h",
--llgf_5 = "0000e30000000016l",
-+llgf_2 = "0000e30000000016l",
- llgfr_2 = "00000000b9160000h",
--llc_5 = "0000e30000000094l",
-+llc_2 = "0000e30000000094l",
- llcr_2 = "00000000b9940000h",
--llgc_5 = "0000e30000000090l",
-+llgc_2 = "0000e30000000090l",
- llgcr_2 = "00000000b9840000h",
--llch_5 = "0000e300000000c2l",
--llh_5 = "0000e30000000095l",
-+llch_2 = "0000e300000000c2l",
-+llh_2 = "0000e30000000095l",
- llhr_2 = "00000000b9950000h",
--llgh_5 = "0000e30000000091l",
-+llgh_2 = "0000e30000000091l",
- llghr_2 = "00000000b9850000h",
--llhh_5 = "0000e300000000c6l",
--llhrl_3 = "0000c40200000000o",
--llghrl_3 = "0000c40600000000o",
--llihf_3 = "0000c00e00000000n",
--llilf_3 = "0000c00f00000000n",
--llgfrl_3 = "0000c40e00000000o",
--llgt_5 = "0000e30000000017l",
-+llhh_2 = "0000e300000000c6l",
-+llhrl_2 = "0000c40200000000o",
-+llghrl_2 = "0000c40600000000o",
-+llihf_2 = "0000c00e00000000n",
-+llilf_2 = "0000c00f00000000n",
-+llgfrl_2 = "0000c40e00000000o",
-+llgt_2 = "0000e30000000017l",
- llgtr_2 = "00000000b9170000h",
--lm_4 = "0000000098000000q",
--lmy_5 = "0000eb0000000098s",
--lmg_5 = "0000eb0000000004s",
--lmh_5 = "0000eb0000000096s",
-+lm_3 = "0000000098000000q",
-+lmy_3 = "0000eb0000000098s",
-+lmg_3 = "0000eb0000000004s",
-+lmh_3 = "0000eb0000000096s",
- lnr_2 = "0000000000001100g",
- lngr_2 = "00000000b9010000h",
- lngfr_2 = "00000000b9110000h",
-@@ -761,9 +761,9 @@ lndr_2 = "0000000000002100g",
- lndfr_2 = "00000000b3710000h",
- lnebr_2 = "00000000b3010000h",
- lner_2 = "0000000000003100g",
--loc_5 = "0000eb00000000f2t",
--locg_5 = "0000eb00000000e2t",
--lpq_5 = "0000e3000000008fl",
-+loc_3 = "0000eb00000000f2t",
-+locg_3 = "0000eb00000000e2t",
-+lpq_2 = "0000e3000000008fl",
- lpr_2 = "0000000000001000g",
- lpgr_2 = "00000000b9000000h",
- lpgfr_2 = "00000000b9100000h",
-@@ -774,16 +774,16 @@ lpdr_2 = "0000000000002000g",
- lpdfr_2 = "00000000b3700000h",
- lpebr_2 = "00000000b3000000h",
- lper_2 = "0000000000003000g",
--lra_4 = "00000000b1000000j",
--lray_5 = "0000e30000000013l",
--lrag_5 = "0000e30000000003l",
--lrl_3 = "0000c40d00000000o",
--lgrl_3 = "0000c40800000000o",
--lgfrl_3 = "0000c40c00000000o",
--lrvh_5 = "0000e3000000001fl",
--lrv_5 = "0000e3000000001el",
-+lra_2 = "00000000b1000000j",
-+lray_2 = "0000e30000000013l",
-+lrag_2 = "0000e30000000003l",
-+lrl_2 = "0000c40d00000000o",
-+lgrl_2 = "0000c40800000000o",
-+lgfrl_2 = "0000c40c00000000o",
-+lrvh_2 = "0000e3000000001fl",
-+lrv_2 = "0000e3000000001el",
- lrvr_2 = "00000000b91f0000h",
--lrvg_5 = "0000e3000000000fl",
-+lrvg_2 = "0000e3000000000fl",
- lrvgr_2 = "00000000b90f0000h",
- ldxbr_2 = "00000000b3450000h",
- ldxr_2 = "0000000000002500g",
-@@ -800,50 +800,50 @@ lzdr_2 = "00000000b3750000h",
- lzer_2 = "00000000b3740000h",
- msta_2 = "00000000b2470000h",
- mvcl_2 = "0000000000000e00g",
--mvcle_4 = "00000000a8000000q",
--mvclu_5 = "0000eb000000008es",
-+mvcle_3 = "00000000a8000000q",
-+mvclu_3 = "0000eb000000008es",
- mvpg_2 = "00000000b2540000h",
- mvst_2 = "00000000b2550000h",
--m_4 = "000000005c000000j",
--mfy_5 = "0000e3000000005cl",
-+m_2 = "000000005c000000j",
-+mfy_2 = "0000e3000000005cl",
- mr_2 = "0000000000001c00g",
- mxbr_2 = "00000000b34c0000h",
- mxr_2 = "0000000000002600g",
- mdbr_2 = "00000000b31c0000h",
--md_4 = "000000006c000000j",
-+md_2 = "000000006c000000j",
- mdr_2 = "0000000000002c00g",
- mxdbr_2 = "00000000b3070000h",
--mxd_4 = "0000000067000000j",
-+mxd_2 = "0000000067000000j",
- mxdr_2 = "0000000000002700g",
- meebr_2 = "00000000b3170000h",
- meer_2 = "00000000b3370000h",
- mdebr_2 = "00000000b30c0000h",
--mde_4 = "000000007c000000j",
-+mde_2 = "000000007c000000j",
- mder_2 = "0000000000003c00g",
--me_4 = "000000007c000000j",
-+me_2 = "000000007c000000j",
- mer_2 = "0000000000003c00g",
--mh_4 = "000000004c000000j",
--mhy_5 = "0000e3000000007cl",
--mlg_5 = "0000e30000000086l",
-+mh_2 = "000000004c000000j",
-+mhy_2 = "0000e3000000007cl",
-+mlg_2 = "0000e30000000086l",
- mlgr_2 = "00000000b9860000h",
--ml_5 = "0000e30000000096l",
-+ml_2 = "0000e30000000096l",
- mlr_2 = "00000000b9960000h",
--ms_4 = "0000000071000000j",
-+ms_2 = "0000000071000000j",
- msr_2 = "00000000b2520000h",
--msy_5 = "0000e30000000051l",
--msg_5 = "0000e3000000000cl",
-+msy_2 = "0000e30000000051l",
-+msg_2 = "0000e3000000000cl",
- msgr_2 = "00000000b90c0000h",
--msgf_5 = "0000e3000000001cl",
-+msgf_2 = "0000e3000000001cl",
- msgfr_2 = "00000000b91c0000h",
--msfi_3 = "0000c20100000000n",
--msgfi_3 = "0000c20000000000n",
--o_4 = "0000000056000000j",
--or_2 = "0000000000001600g",
--oy_5 = "0000e30000000056l",
--og_5 = "0000e30000000081l",
-+msfi_2 = "0000c20100000000n",
-+msgfi_2 = "0000c20000000000n",
-+o_2 = "0000000056000000j",
-+or_2 = "0000000000001600g",
-+oy_2 = "0000e30000000056l",
-+og_2 = "0000e30000000081l",
- ogr_2 = "00000000b9810000h",
--oihf_3 = "0000c00c00000000n",
--oilf_3 = "0000c00d00000000n",
-+oihf_2 = "0000c00c00000000n",
-+oilf_2 = "0000c00d00000000n",
- pgin_2 = "00000000b22e0000h",
- pgout_2 = "00000000b22f0000h",
- pcc_2 = "00000000b92c0000h",
-@@ -851,15 +851,15 @@ pckmo_2 = "00000000b9280000h",
- pfmf_2 = "00000000b9af0000h",
- ptf_2 = "00000000b9a20000h",
- popcnt_2 = "00000000b9e10000h",
--pfd_5 = "0000e30000000036m",
--pfdrl_3 = "0000c60200000000p",
-+pfd_2 = "0000e30000000036m",
-+pfdrl_2 = "0000c60200000000p",
- pt_2 = "00000000b2280000h",
- pti_2 = "00000000b99e0000h",
- palb_2 = "00000000b2480000h",
- rrbe_2 = "00000000b22a0000h",
- rrbm_2 = "00000000b9ae0000h",
--rll_5 = "0000eb000000001ds",
--rllg_5 = "0000eb000000001cs",
-+rll_3 = "0000eb000000001ds",
-+rllg_3 = "0000eb000000001cs",
- srst_2 = "00000000b25e0000h",
- srstu_2 = "00000000b9be0000h",
- sar_2 = "00000000b24e0000h",
-@@ -868,22 +868,22 @@ sfasr_2 = "00000000b3850000h",
- spm_2 = "000000000000400g",
- ssar_2 = "00000000b2250000h",
- ssair_2 = "00000000b99f0000h",
--slda_4 = "000000008f000000q",
--sldl_4 = "000000008d000000q",
--sla_4 = "000000008b000000q",
--slak_5 = "0000eb00000000dds",
--slag_5 = "0000eb000000000bs",
--sll_4 = "0000000089000000q",
--sllk_5 = "0000eb00000000dfs",
--sllg_5 = "0000eb000000000ds",
--srda_4 = "000000008e000000q",
--srdl_4 = "000000008c000000q",
--sra_4 = "000000008a000000q",
--srak_5 = "0000eb00000000dcs",
--srag_5 = "0000eb000000000as",
--srl_4 = "0000000088000000q",
--srlk_5 = "0000eb00000000des",
--srlg_5 = "0000eb000000000cs",
-+slda_3 = "000000008f000000q",
-+sldl_3 = "000000008d000000q",
-+sla_3 = "000000008b000000q",
-+slak_3 = "0000eb00000000dds",
-+slag_3 = "0000eb000000000bs",
-+sll_3 = "0000000089000000q",
-+sllk_3 = "0000eb00000000dfs",
-+sllg_3 = "0000eb000000000ds",
-+srda_3 = "000000008e000000q",
-+srdl_3 = "000000008c000000q",
-+sra_3 = "000000008a000000q",
-+srak_3 = "0000eb00000000dcs",
-+srag_3 = "0000eb000000000as",
-+srl_3 = "0000000088000000q",
-+srlk_3 = "0000eb00000000des",
-+srlg_3 = "0000eb000000000cs",
- sqxbr_2 = "00000000b3160000h",
- sqxr_2 = "00000000b3360000h",
- sqdbr_2 = "00000000b3150000h",
-@@ -891,79 +891,79 @@ sqdr_2 = "00000000b2440000h",
- sqebr_2 = "00000000b3140000h",
- sqer_2 = "00000000b2450000h",
- st_2 = "0000000050000000j",
--sty_5 = "0000e30000000050l",
--stg_5 = "0000e30000000024l",
--std_4 = "0000000060000000j",
--stdy_5 = "0000ed0000000067l",
--ste_4 = "0000000070000000j",
--stey_5 = "0000ed0000000066l",
--stam_4 = "000000009b000000q",
--stamy_5 = "0000eb000000009bs",
--stc_4 = "0000000042000000j",
--stcy_5 = "0000e30000000072l",
--stch_5 = "0000e300000000c3l",
--stcmh_5 = "0000eb000000002ct",
--stcm_4 = "00000000be000000r",
--stcmy_5 = "0000eb000000002dt",
--stctl_4 = "00000000b6000000q",
--stctg_5 = "0000eb0000000025s",
--sth_4 = "0000000040000000j",
--sthy_5 = "0000e30000000070l",
--sthh_5 = "0000e300000000c7l",
--sthrl_3 = "0000c40700000000o",
--stfh_5 = "0000e300000000cbl",
--stm_4 = "0000000090000000q",
--stmy_5 = "0000eb0000000090s",
--stmg_5 = "0000eb0000000024s",
--stmh_5 = "0000eb0000000026s",
--stoc_5 = "0000eb00000000f3t",
--stocg_5 = "0000eb00000000e3t",
--stpq_5 = "0000e3000000008el",
--strl_3 = "0000c40f00000000o",
--stgrl_3 = "0000c40b00000000o",
--strvh_5 = "0000e3000000003fl",
--strv_5 = "0000e3000000003el",
--strvg_5 = "0000e3000000002fl",
-+sty_2 = "0000e30000000050l",
-+stg_2 = "0000e30000000024l",
-+std_2 = "0000000060000000j",
-+stdy_2 = "0000ed0000000067l",
-+ste_2 = "0000000070000000j",
-+stey_2 = "0000ed0000000066l",
-+stam_3 = "000000009b000000q",
-+stamy_3 = "0000eb000000009bs",
-+stc_2 = "0000000042000000j",
-+stcy_2 = "0000e30000000072l",
-+stch_2 = "0000e300000000c3l",
-+stcmh_3 = "0000eb000000002ct",
-+stcm_3 = "00000000be000000r",
-+stcmy_3 = "0000eb000000002dt",
-+stctl_3 = "00000000b6000000q",
-+stctg_3 = "0000eb0000000025s",
-+sth_2 = "0000000040000000j",
-+sthy_2 = "0000e30000000070l",
-+sthh_2 = "0000e300000000c7l",
-+sthrl_2 = "0000c40700000000o",
-+stfh_2 = "0000e300000000cbl",
-+stm_3 = "0000000090000000q",
-+stmy_3 = "0000eb0000000090s",
-+stmg_3 = "0000eb0000000024s",
-+stmh_3 = "0000eb0000000026s",
-+stoc_3 = "0000eb00000000f3t",
-+stocg_3 = "0000eb00000000e3t",
-+stpq_2 = "0000e3000000008el",
-+strl_2 = "0000c40f00000000o",
-+stgrl_2 = "0000c40b00000000o",
-+strvh_2 = "0000e3000000003fl",
-+strv_2 = "0000e3000000003el",
-+strvg_2 = "0000e3000000002fl",
- stura_2 = "00000000b2460000h",
- sturg_2 = "00000000b9250000h",
--s_4 = "000000005b000000j",
-+s_2 = "000000005b000000j",
- sr_2 = "0000000000001b00g",
--sy_5 = "0000e3000000005bl",
--sg_5 = "0000e30000000009l",
-+sy_2 = "0000e3000000005bl",
-+sg_2 = "0000e30000000009l",
- sgr_2 = "00000000b9090000h",
--sgf_5 = "0000e30000000019l",
-+sgf_2 = "0000e30000000019l",
- sgfr_2 = "00000000b9190000h",
- sxbr_2 = "00000000b34b0000h",
- sdbr_2 = "00000000b31b0000h",
- sebr_2 = "00000000b30b0000h",
--sh_4 = "000000004b000000j",
--shy_5 = "0000e3000000007bl",
--sl_4 = "000000005f000000j",
-+sh_2 = "000000004b000000j",
-+shy_2 = "0000e3000000007bl",
-+sl_2 = "000000005f000000j",
- slr_2 = "0000000000001f00g",
--sly_5 = "0000e3000000005fl",
--slg_5 = "0000e3000000000bl",
-+sly_2 = "0000e3000000005fl",
-+slg_2 = "0000e3000000000bl",
- slgr_2 = "00000000b90b0000h",
--slgf_5 = "0000e3000000001bl",
-+slgf_2 = "0000e3000000001bl",
- slgfr_2 = "00000000b91b0000h",
--slfi_3 = "0000c20500000000n",
--slgfi_3 = "0000c20400000000n",
--slb_5 = "0000e30000000099l",
-+slfi_2 = "0000c20500000000n",
-+slgfi_2 = "0000c20400000000n",
-+slb_2 = "0000e30000000099l",
- slbr_2 = "00000000b9990000h",
--slbg_5 = "0000e30000000089l",
-+slbg_2 = "0000e30000000089l",
- slbgr_2 = "00000000b9890000h",
- sxr_2 = "0000000000003700g",
--sd_4 = "000000006b000000j",
-+sd_2 = "000000006b000000j",
- sdr_2 = "0000000000002b00g",
--se_4 = "000000007b000000j",
-+se_2 = "000000007b000000j",
- ser_2 = "0000000000003b00g",
--su_4 = "000000007f000000j",
-+su_2 = "000000007f000000j",
- sur_2 = "0000000000003f00g",
--sw_4 = "000000006f000000j",
-+sw_2 = "000000006f000000j",
- swr_2 = "0000000000002f00g",
- tar_2 = "00000000b24c0000h",
- tb_2 = "00000000b22c0000h",
--trace_4 = "0000000099000000q",
--tracg_5 = "0000eb000000000fs",
-+trace_3 = "0000000099000000q",
-+tracg_3 = "0000eb000000000fs",
- tre_2 = "00000000b2a50000h",
- }
- for cond,c in pairs(map_cond) do
---
-2.20.1
-
-
-From 1aa38edf7d5e77424b7baa99367975ac9a3e5153 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 1 Dec 2016 14:42:42 -0500
-Subject: [PATCH 047/247] Fix indentation.
-
-I miss gofmt.
----
- dynasm/dasm_s390x.lua | 18 +++++++++---------
- 1 file changed, 9 insertions(+), 9 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 467e218..2ee9493 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1029,16 +1029,16 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "n" then
-
- elseif p == "y" then
-- local d, x, b, a = parse_mem_bx(params[1])
-- op1 = op1 + x
-- op2 = op2 + shl(b, 12) + d
-- wputhw(op1); wputhw(op2);
-- if a then
-- werror("disp12 actions not yet implemented")
-- end
-+ local d, x, b, a = parse_mem_bx(params[1])
-+ op1 = op1 + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2);
-+ if a then
-+ werror("disp12 actions not yet implemented")
-+ end
- elseif p == "z" then
-- op2 = op2 + parse_gpr(params[1])
-- wputhw(op2)
-+ op2 = op2 + parse_gpr(params[1])
-+ wputhw(op2)
- end
- end
-
---
-2.20.1
-
-
-From 01eae7153c7c0cb9ac7ebed06666357c47f19893 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 1 Dec 2016 17:09:45 -0500
-Subject: [PATCH 048/247] Allow symbols to be used for 12-bit displacements.
-
-The parse_mem_bx function now returns a function to call to add an
-action to the action list to handle the evaluation of the
-displacement. This allows us to delay adding said action until
-after we have emitted the actions for the instruction encodings
-themselves.
-
-Code like this should now work:
-
-int x = 24
-| st r1, x(sp)
----
- dynasm/dasm_s390x.h | 10 +++++++---
- dynasm/dasm_s390x.lua | 19 ++++++++++---------
- 2 files changed, 17 insertions(+), 12 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 837a2ed..8b43a78 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -234,8 +234,10 @@ void dasm_put(Dst_DECL, int start, ...)
- case DASM_IMM16:
- case DASM_IMM32:
- case DASM_DISP20:
-- case DASM_DISP12:
- fprintf(stderr, "not implemented\n");
-+ case DASM_DISP12:
-+ CK((n>>12) == 0, RANGE_I);
-+ b[pos++] = n;
- break;
- }
- }
-@@ -296,7 +298,7 @@ int dasm_link(Dst_DECL, size_t *szp)
- case DASM_IMM32:
- case DASM_DISP20:
- case DASM_DISP12:
-- fprintf(stderr, "not implemented\n");
-+ pos++;
- break;
- }
- }
-@@ -364,8 +366,10 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_IMM16:
- case DASM_IMM32:
- case DASM_DISP20:
-- case DASM_DISP12:
- fprintf(stderr, "not implemented\n");
-+ break;
-+ case DASM_DISP12:
-+ cp[-1] |= n&0xfff;
- break;
- default: *cp++ = ins; break;
- }
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 2ee9493..b306165 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -316,6 +316,8 @@ end
-
- -- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
- -- are GPRs.
-+-- If the fourth return value is not-nil then it needs to be called to
-+-- insert an action.
- -- Encoded as: xbddd
- local function parse_mem_bx(arg)
- local d, x, b = split_memop(arg)
-@@ -326,11 +328,10 @@ local function parse_mem_bx(arg)
- end
- return dval, x, b, nil
- end
-- -- TODO: handle d being a symbol.
-- -- Action is currently the final return value (the caller needs to add it
-- -- to the action list at a later point).
-- werror("parse_mem_bx: not implemented")
-- return nil
-+ if match(d, "^[rf]1?[0-9]?") then
-+ werror("expected immediate operand, got register")
-+ end
-+ return 0, x, b, function() waction("DISP12", nil, d) end
- end
-
- -- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
-@@ -1018,7 +1019,7 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
- if a then
-- werror("disp12 actions not yet implemented")
-+ a()
- end
- elseif p == "k" then
-
-@@ -1034,7 +1035,7 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
- if a then
-- werror("disp12 actions not yet implemented")
-+ a()
- end
- elseif p == "z" then
- op2 = op2 + parse_gpr(params[1])
-@@ -1046,8 +1047,8 @@ end
- function op_template(params, template, nparams)
- if not params then return template:gsub("%x%x%x%x%x%x%x%x", "")
end
- -- Limit number of section buffer positions used by a single dasm_put().
-- -- A single opcode needs a maximum of 3 positions.
-- if secpos+3 > maxsecpos then wflush() end
-+ -- A single opcode needs a maximum of 5 positions.
-+ if secpos+5 > maxsecpos then wflush() end
- local lpos, apos, spos = #actlist, #actargs, secpos
- local ok, err
- for t in gmatch(template, "[^|]+") do
---
-2.20.1
-
-
-From 733cbf18da5e491033856367ffee3624e41f7149 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 1 Dec 2016 19:25:32 -0500
-Subject: [PATCH 049/247] Add support for RXY instructions (20-bit
- displacements).
-
----
- dynasm/dasm_s390x.h | 10 ++++++++--
- dynasm/dasm_s390x.lua | 23 ++++++++++++++++++++---
- 2 files changed, 28 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 8b43a78..66dfd79 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -233,8 +233,11 @@ void dasm_put(Dst_DECL, int start, ...)
- break;
- case DASM_IMM16:
- case DASM_IMM32:
-- case DASM_DISP20:
- fprintf(stderr, "not implemented\n");
-+ case DASM_DISP20:
-+ CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
-+ b[pos++] = n;
-+ break;
- case DASM_DISP12:
- CK((n>>12) == 0, RANGE_I);
- b[pos++] = n;
-@@ -365,9 +368,12 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_LABEL_PC: break;
- case DASM_IMM16:
- case DASM_IMM32:
-- case DASM_DISP20:
- fprintf(stderr, "not implemented\n");
- break;
-+ case DASM_DISP20:
-+ cp[-2] |= n&0xfff;
-+ cp[-1] |= (n>>4)&0xff00;
-+ break;
- case DASM_DISP12:
- cp[-1] |= n&0xfff;
- break;
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index b306165..6900944 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -346,8 +346,18 @@ end
- -- and b and x are GPRs.
- -- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits).
- local function parse_mem_bxy(arg)
-- werror("parse_mem_bxy: not implemented")
-- return nil
-+ local d, x, b = split_memop(arg)
-+ local dval = tonumber(d)
-+ if dval then
-+ if not is_int20(dval) then
-+ werror("displacement out of range: ", dval)
-+ end
-+ return dval, x, b, nil
-+ end
-+ if match(d, "^[rf]1?[0-9]?") then
-+ werror("expected immediate operand, got register")
-+ end
-+ return 0, x, b, function() waction("DISP20", nil, d) end
- end
-
- local function parse_label(label, def)
-@@ -1024,7 +1034,14 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "k" then
-
- elseif p == "l" then
--
-+ local d, x, b, a = parse_mem_bxy(params[2])
-+ op0 = op0 + shl(parse_gpr(params[1]), 4) + x
-+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
-+ op2 = op2 + band(shr(d, 4), 0xff00)
-+ wputhw(op0); wputhw(op1); wputhw(op2)
-+ if a then
-+ a()
-+ end
- elseif p == "m" then
-
- elseif p == "n" then
---
-2.20.1
-
-
-From db784ffda14b82387bde002cc8f0bb24c063fa45 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 1 Dec 2016 19:45:06 -0500
-Subject: [PATCH 050/247] Add support for RS-a and RSY-a instructions like stm
- and stmg.
-
----
- dynasm/dasm_s390x.lua | 57 ++++++++++++++++++++++++++-----------------
- 1 file changed, 35 insertions(+), 22 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 6900944..039681b 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -307,13 +307,6 @@ local function split_memop(arg)
- return nil
- end
-
---- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
---- Encoded as: bddd
--local function parse_mem_b(arg)
-- werror("parse_mem_b: not implemented")
-- return nil
--end
--
- -- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
- -- are GPRs.
- -- If the fourth return value is not-nil then it needs to be called to
-@@ -334,12 +327,14 @@ local function parse_mem_bx(arg)
- return 0, x, b, function() waction("DISP12", nil, d) end
- end
-
---- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
---- b is a GPR.
---- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
--local function parse_mem_by(arg)
-- werror("parse_mem_by: not implemented")
-- return nil
-+-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
-+-- Encoded as: bddd
-+local function parse_mem_b(arg)
-+ local d, x, b, a = parse_mem_bx(arg)
-+ if x ~= 0 then
-+ werror("unexpected index register")
-+ end
-+ return d, b, a
- end
-
- -- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2
-@@ -360,6 +355,17 @@ local function parse_mem_bxy(arg)
- return 0, x, b, function() waction("DISP20", nil, d) end
- end
-
-+-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
-+-- b is a GPR.
-+-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
-+local function parse_mem_by(arg)
-+ local d, x, b, a = parse_mem_bxy(arg)
-+ if x ~= 0 then
-+ werror("unexpected index register")
-+ end
-+ return d, b, a
-+end
-+
- local function parse_label(label, def)
- local prefix = sub(label, 1, 2)
- -- =>label (pc label reference)
-@@ -1028,9 +1034,7 @@ local function parse_template(params, template, nparams, pos)
- op1 = op1 + shl(parse_gpr(params[1]), 4) + x
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
-- if a then
-- a()
-- end
-+ if a then a() end
- elseif p == "k" then
-
- elseif p == "l" then
-@@ -1039,21 +1043,30 @@ local function parse_template(params, template, nparams, pos)
- op1 = op1 + shl(b, 12) + band(d, 0xfff)
- op2 = op2 + band(shr(d, 4), 0xff00)
- wputhw(op0); wputhw(op1); wputhw(op2)
-- if a then
-- a()
-- end
-+ if a then a() end
- elseif p == "m" then
-
- elseif p == "n" then
-
-+ elseif p == "q" then
-+ local d, b, a = parse_mem_b(params[3])
-+ op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2)
-+ if a then a() end
-+ elseif p == "s" then
-+ local d, b, a = parse_mem_by(params[3])
-+ op0 = op0 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
-+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
-+ op2 = op2 + band(shr(d, 4), 0xff00)
-+ wputhw(op0); wputhw(op1); wputhw(op2)
-+ if a then a() end
- elseif p == "y" then
- local d, x, b, a = parse_mem_bx(params[1])
- op1 = op1 + x
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
-- if a then
-- a()
-- end
-+ if a then a() end
- elseif p == "z" then
- op2 = op2 + parse_gpr(params[1])
- wputhw(op2)
---
-2.20.1
-
-
-From 962bb30c7a5208028c31baf41dcbcfe30fcc37a3 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Fri, 2 Dec 2016 12:55:43 +0530
-Subject: [PATCH 051/247] Added support for Immediate addressing mode
-
-Adding support for Immediate add mode, need to check how 32 bits is returned, currently
followed the displacement method.
----
- dynasm/dasm_s390x.lua | 16 +++++++++++++++-
- 1 file changed, 15 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 039681b..c2c5a79 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -289,6 +289,10 @@ local function is_int20(num)
- return -shl(1, 19) <= num and num < shl(1, 19)
- end
-
-+local function is_int32(num)
-+ return -shl(1,31) <= num and num <shl(1,31)
-+end
-+
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
- -- If x is not specified then it is 0.
- local function split_memop(arg)
-@@ -366,6 +370,14 @@ local function parse_mem_by(arg)
- return d, b, a
- end
-
-+local function parse_imm(arg)
-+ local imm_val = tonumber(arg,16)
-+ if not is_int32(imm_val) then
-+ werror("Immediate value out of range: ", imm_val)
-+ end
-+ return imm_val
-+end
-+
- local function parse_label(label, def)
- local prefix = sub(label, 1, 2)
- -- =>label (pc label reference)
-@@ -1047,7 +1059,9 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "m" then
-
- elseif p == "n" then
--
-+ op0 = op0 + shl(parse_gpr(params[1], 4)
-+ local imm = parse_imm(param[2])
-+ wputhw(op0); waction("IMM32", nil, imm)
- elseif p == "q" then
- local d, b, a = parse_mem_b(params[3])
- op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
---
-2.20.1
-
-
-From 9e90a5f897ce6af799c9a0ae493aa0b011ad83f5 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Fri, 2 Dec 2016 14:13:55 +0530
-Subject: [PATCH 052/247] Minor change , missed out brace
-
----
- dynasm/dasm_s390x.lua | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index c2c5a79..536f517 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1059,7 +1059,7 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "m" then
-
- elseif p == "n" then
-- op0 = op0 + shl(parse_gpr(params[1], 4)
-+ op0 = op0 + shl(parse_gpr(params[1]), 4)
- local imm = parse_imm(param[2])
- wputhw(op0); waction("IMM32", nil, imm)
- elseif p == "q" then
---
-2.20.1
-
-
-From b624a8d0134ffc67b90b4c7c0295d4d7a2e20c55 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Fri, 2 Dec 2016 15:21:18 +0530
-Subject: [PATCH 053/247] Create test_z_inst.c
-
-Added examples folder
-Added test code to test basic instructions like add , sub and msr
-This code is in processes of further expansion and tuning
----
- dynasm/Examples/test_z_inst.c | 80 +++++++++++++++++++++++++++++++++++
- 1 file changed, 80 insertions(+)
- create mode 100644 dynasm/Examples/test_z_inst.c
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-new file mode 100644
-index 0000000..314ea0c
---- /dev/null
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -0,0 +1,80 @@
-+#include <assert.h>
-+#include <stdio.h>
-+#include <sys/mman.h>
-+
-+#include "../dynasm/dasm_proto.h"
-+#include "../dynasm/dasm_s390x.h"
-+
-+//DynASM directives.
-+ |.arch s390x
-+ |.actionlist actions
-+
-+/* Instructio modes
-+ mode 0 : RR Mode
-+ mode 1 : I Mode
-+*/
-+
-+void *jitcode(dasm_State **state);
-+void add(dasm_State * , int);
-+void sub(dasm_State * , int);
-+void mul(dasm_State * , int);
-+
-+void *jitcode(dasm_State **state)
-+{
-+ size_t size;
-+ int dasm_status = dasm_link(state, &size);
-+ assert(dasm_status == DASM_S_OK);
-+
-+ void *ret = (int *)calloc(10,sizeof(int));
-+ dasm_encode(state, ret);
-+ dasm_free(state);
-+
-+ return (int *)ret;
-+}
-+
-+void add(dasm_State *state)
-+{
-+ dasm_State ** Dst = &state;
-+
-+ | ar r2,r3
-+ | br r14
-+}
-+
-+void sub(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | sr r2,r3
-+ | br r14
-+}
-+
-+void mul(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | msr r2 , r3
-+ | br r14
-+}
-+
-+void main(int argc, char *argv[])
-+{
-+ dasm_State *state;
-+ dasm_State **Dst = &state;
-+ int num1 , num2;
-+ int *ret;
-+ size_t size;
-+
-+ int* (*fptr)(int , int) = jitcode(&state);
-+
-+ num1 = atoi(argv[1]);
-+ num2 = atoi(argv[2]);
-+
-+ dasm_init(&state, 1);
-+ dasm_setup(&state, actions);
-+
-+ /* Call respective test function */
-+ sub(state);
-+
-+ ret = fptr(num1 , num2);
-+ printf("The value is %d\n" ,ret);
-+}
---
-2.20.1
-
-
-From 33b0f3b0fbda1ecebe4e8bd8d5e203d9e786b926 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Fri, 2 Dec 2016 15:46:45 +0530
-Subject: [PATCH 054/247] Update test_z_inst.c
-
-added functionality to test different modes of same instruction type
----
- dynasm/Examples/test_z_inst.c | 36 +++++++++++++++++++++++++++--------
- 1 file changed, 28 insertions(+), 8 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 314ea0c..65ca39a 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -32,15 +32,35 @@ void *jitcode(dasm_State **state)
- return (int *)ret;
- }
-
--void add(dasm_State *state)
-+void add(dasm_State *state , int mode)
- {
- dasm_State ** Dst = &state;
--
-- | ar r2,r3
-- | br r14
-+
-+ switch(mode)
-+ {
-+ /* Case RR instruction mode */
-+ case 0:
-+ {
-+ | ar r2,r3
-+ | br r14
-+ break;
-+ }
-+ /* Case RIL instruction mode */
-+ case 1:
-+ {
-+ | ar r2,0x16
-+ | br r14
-+ break;
-+ }
-+ default:
-+ {
-+ printf( " Mode not recognised \n ");
-+ break;
-+ }
-+ }
- }
-
--void sub(dasm_State *state)
-+void sub(dasm_State *state , int mode)
- {
- dasm_State **Dst = &state;
-
-@@ -48,7 +68,7 @@ void sub(dasm_State *state)
- | br r14
- }
-
--void mul(dasm_State *state)
-+void mul(dasm_State *state, int mode)
- {
- dasm_State **Dst = &state;
-
-@@ -73,8 +93,8 @@ void main(int argc, char *argv[])
- dasm_setup(&state, actions);
-
- /* Call respective test function */
-- sub(state);
-+ add(state , 0);
-
- ret = fptr(num1 , num2);
-- printf("The value is %d\n" ,ret);
-+ printf("Result is %d\n" ,ret);
- }
---
-2.20.1
-
-
-From acda0179aef3762f7714ca91ee6be3696c25d2aa Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Fri, 2 Dec 2016 17:19:29 +0530
-Subject: [PATCH 055/247] Minor cleanup and modified 32 bit signed check
-
-Modified 32 bit signed check for the immediate value
----
- dynasm/dasm_s390x.lua | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 536f517..76d770e 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -290,7 +290,7 @@ local function is_int20(num)
- end
-
- local function is_int32(num)
-- return -shl(1,31) <= num and num <shl(1,31)
-+ return -2147483648 <= num and num < 2147483648
- end
-
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
-@@ -1060,7 +1060,7 @@ local function parse_template(params, template, nparams, pos)
-
- elseif p == "n" then
- op0 = op0 + shl(parse_gpr(params[1]), 4)
-- local imm = parse_imm(param[2])
-+ local imm = parse_imm(params[2])
- wputhw(op0); waction("IMM32", nil, imm)
- elseif p == "q" then
- local d, b, a = parse_mem_b(params[3])
---
-2.20.1
-
-
-From d3fe6349775857721393699aec0070bfdea91d86 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Fri, 2 Dec 2016 17:37:20 +0530
-Subject: [PATCH 056/247] Adding support for Immediate add mode
-
-The masking in immediate mode might not be proper. I could understand that you had masked
12bits and then 8bits to get the displacement in place for 20-bit displacement ( cp[-2] |=
n&0xfff; cp[-1] |= (n>>4)&0xff00;) But in my case I need all the
32bits, so not sure how to go about it. Currently I have just used "n" since no
point in "and with 0xffff" But I am getting core dump. Please Let me know your
comments on these.
----
- dynasm/dasm_s390x.h | 9 +++++++--
- 1 file changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 66dfd79..6314ff8 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -233,7 +233,10 @@ void dasm_put(Dst_DECL, int start, ...)
- break;
- case DASM_IMM16:
- case DASM_IMM32:
-- fprintf(stderr, "not implemented\n");
-+ CK((n>>32) == 0, RANGE_I);
-+ b[pos++]=n;
-+ break;
-+ //fprintf(stderr, "not implemented\n");
- case DASM_DISP20:
- CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
- b[pos++] = n;
-@@ -368,7 +371,9 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_LABEL_PC: break;
- case DASM_IMM16:
- case DASM_IMM32:
-- fprintf(stderr, "not implemented\n");
-+ //pintf(stderr, "not implemented\n");
-+ cp[-1] |= n
-+ cp[-2] |= n
- break;
- case DASM_DISP20:
- cp[-2] |= n&0xfff;
---
-2.20.1
-
-
-From 6999d8f649c2c2237bc10b50bae8439e16303d25 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Fri, 2 Dec 2016 17:39:00 +0530
-Subject: [PATCH 057/247] Minor change: Cleanup
-
----
- dynasm/dasm_s390x.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 6314ff8..ccfe98f 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -373,7 +373,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_IMM32:
- //pintf(stderr, "not implemented\n");
- cp[-1] |= n
-- cp[-2] |= n
-+ cp[-2] |= (n >>4)
- break;
- case DASM_DISP20:
- cp[-2] |= n&0xfff;
---
-2.20.1
-
-
-From 3a3e230e4438647cb4b89ae2228fc74d79863ebd Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 2 Dec 2016 13:06:03 -0500
-Subject: [PATCH 058/247] Cleanup and fix compilation.
-
----
- dynasm/dasm_s390x.h | 12 ++++++++----
- 1 file changed, 8 insertions(+), 4 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index ccfe98f..d303981 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -232,11 +232,14 @@ void dasm_put(Dst_DECL, int start, ...)
- b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
- case DASM_IMM16:
-+ ofs += 2;
-+ fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ break;
- case DASM_IMM32:
-+ ofs += 4;
- CK((n>>32) == 0, RANGE_I);
- b[pos++]=n;
- break;
-- //fprintf(stderr, "not implemented\n");
- case DASM_DISP20:
- CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
- b[pos++] = n;
-@@ -370,10 +373,11 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_LABEL_PC: break;
- case DASM_IMM16:
-+ fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ break;
- case DASM_IMM32:
-- //pintf(stderr, "not implemented\n");
-- cp[-1] |= n
-- cp[-2] |= (n >>4)
-+ *cp++ = n >> 16;
-+ *cp++ = n;
- break;
- case DASM_DISP20:
- cp[-2] |= n&0xfff;
---
-2.20.1
-
-
-From eb8edbb3b5bfd4b69609f0e94e22a6fb35cab678 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 2 Dec 2016 13:41:45 -0500
-Subject: [PATCH 059/247] Auto-format dasm_s390x.h.
-
-I did this mostly to get rid of the annoying tabs/spaces mix in this
-file. It has the side effect of forcing newlines before statements
-which I think is a better style (and not particularly inconsistent
-with the original which used both styles). Other than that I've tried
-to match the original style as closely as possible.
-
-Generated with this command:
-
-indent -i2 -brs -cli0 -br -ce -npcs -nbc -di1 -npsl -ncs dasm_s390x.h
----
- dynasm/dasm_s390x.h | 269 +++++++++++++++++++++++++++-----------------
- 1 file changed, 168 insertions(+), 101 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index d303981..a5daaa3 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -21,7 +21,9 @@ enum {
- /* The following actions need a buffer position. */
- DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
- /* The following actions also have an argument. */
-- DASM_REL_PC, DASM_LABEL_PC, DASM_DISP12, DASM_DISP20, DASM_IMM16, DASM_IMM32,
-+ DASM_REL_PC, DASM_LABEL_PC,
-+ DASM_DISP12, DASM_DISP20,
-+ DASM_IMM16, DASM_IMM32,
- DASM__MAX
- };
-
-@@ -53,12 +55,12 @@ typedef const unsigned short *dasm_ActList;
-
- /* Per-section structure. */
- typedef struct dasm_Section {
-- int *rbuf; /* Biased buffer pointer (negative section bias). */
-- int *buf; /* True buffer pointer. */
-- size_t bsize; /* Buffer size in bytes. */
-- int pos; /* Biased buffer position. */
-- int epos; /* End of biased buffer position - max single put. */
-- int ofs; /* Byte offset into section. */
-+ int *rbuf; /* Biased buffer pointer (negative section bias). */
-+ int *buf; /* True buffer pointer. */
-+ size_t bsize; /* Buffer size in bytes. */
-+ int pos; /* Biased buffer position. */
-+ int epos; /* End of biased buffer position - max single put. */
-+ int ofs; /* Byte offset into section. */
- } dasm_Section;
-
- /* Core structure holding the DynASM encoding state. */
-@@ -98,10 +100,10 @@ void dasm_init(Dst_DECL, int maxsection)
- D->globals = NULL;
- D->maxsection = maxsection;
- for (i = 0; i < maxsection; i++) {
-- D->sections[i].buf = NULL; /* Need this for pass3. */
-+ D->sections[i].buf = NULL; /* Need this for pass3. */
- D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
- D->sections[i].bsize = 0;
-- D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
-+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
- }
- }
-
-@@ -113,8 +115,10 @@ void dasm_free(Dst_DECL)
- for (i = 0; i < D->maxsection; i++)
- if (D->sections[i].buf)
- DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
-- if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
-- if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
-+ if (D->pclabels)
-+ DASM_M_FREE(Dst, D->pclabels, D->pcsize);
-+ if (D->lglabels)
-+ DASM_M_FREE(Dst, D->lglabels, D->lgsize);
- DASM_M_FREE(Dst, D, D->psize);
- }
-
-@@ -122,8 +126,8 @@ void dasm_free(Dst_DECL)
- void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
- {
- dasm_State *D = Dst_REF;
-- D->globals = gl - 10; /* Negative bias to compensate for locals. */
-- DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
-+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
-+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int));
- }
-
- /* Grow PC label array. Can be called after dasm_setup(), too. */
-@@ -131,8 +135,8 @@ void dasm_growpc(Dst_DECL, unsigned int maxpc)
- {
- dasm_State *D = Dst_REF;
- size_t osz = D->pcsize;
-- DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
-- memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
-+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int));
-+ memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz);
- }
-
- /* Setup encoder. */
-@@ -140,11 +144,12 @@ void dasm_setup(Dst_DECL, const void *actionlist)
- {
- dasm_State *D = Dst_REF;
- int i;
-- D->actionlist = (dasm_ActList)actionlist;
-+ D->actionlist = (dasm_ActList) actionlist;
- D->status = DASM_S_OK;
- D->section = &D->sections[0];
- memset((void *)D->lglabels, 0, D->lgsize);
-- if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
-+ if (D->pclabels)
-+ memset((void *)D->pclabels, 0, D->pcsize);
- for (i = 0; i < D->maxsection; i++) {
- D->sections[i].pos = DASM_SEC2POS(i);
- D->sections[i].ofs = 0;
-@@ -176,9 +181,10 @@ void dasm_put(Dst_DECL, int start, ...)
-
- if (pos >= sec->epos) {
- DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
-- sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
-+ sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
- sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
-- sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
-+ sec->epos =
-+ (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
- }
-
- b = sec->rbuf;
-@@ -193,60 +199,84 @@ void dasm_put(Dst_DECL, int start, ...)
- } else {
- int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
- switch (action) {
-- case DASM_STOP: goto stop;
-+ case DASM_STOP:
-+ goto stop;
- case DASM_SECTION:
-- n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
-- D->section = &D->sections[n]; goto stop;
-- case DASM_ESC: p++; ofs += 4; break;
-- case DASM_REL_EXT: break;
-- case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
-+ n = (ins & 255);
-+ CK(n < D->maxsection, RANGE_SEC);
-+ D->section = &D->sections[n];
-+ goto stop;
-+ case DASM_ESC:
-+ p++;
-+ ofs += 4;
-+ break;
-+ case DASM_REL_EXT:
-+ break;
-+ case DASM_ALIGN:
-+ ofs += (ins & 255);
-+ b[pos++] = ofs;
-+ break;
- case DASM_REL_LG:
-- n = (ins & 2047) - 10; pl = D->lglabels + n;
-+ n = (ins & 2047) - 10;
-+ pl = D->lglabels + n;
- /* Bkwd rel or global. */
-- if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
-- pl += 10; n = *pl;
-- if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
-+ if (n >= 0) {
-+ CK(n >= 10 || *pl < 0, RANGE_LG);
-+ CKPL(lg, LG);
-+ goto putrel;
-+ }
-+ pl += 10;
-+ n = *pl;
-+ if (n < 0)
-+ n = 0; /* Start new chain for fwd rel if label exists. */
- goto linkrel;
- case DASM_REL_PC:
-- pl = D->pclabels + n; CKPL(pc, PC);
-+ pl = D->pclabels + n;
-+ CKPL(pc, PC);
- putrel:
- n = *pl;
-- if (n < 0) { /* Label exists. Get label pos and store it. */
-+ if (n < 0) { /* Label exists. Get label pos and store it. */
- b[pos] = -n;
- } else {
-- linkrel:
-- b[pos] = n; /* Else link to rel chain, anchored at label. */
-+ linkrel:
-+ b[pos] = n; /* Else link to rel chain, anchored at label. */
- *pl = pos;
- }
- pos++;
- break;
- case DASM_LABEL_LG:
-- pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
-+ pl = D->lglabels + (ins & 2047) - 10;
-+ CKPL(lg, LG);
-+ goto putlabel;
- case DASM_LABEL_PC:
-- pl = D->pclabels + n; CKPL(pc, PC);
-+ pl = D->pclabels + n;
-+ CKPL(pc, PC);
- putlabel:
-- n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
-- while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
-+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
-+ while (n > 0) {
-+ int *pb = DASM_POS2PTR(D, n);
-+ n = *pb;
-+ *pb = pos;
- }
-- *pl = -pos; /* Label exists now. */
-- b[pos++] = ofs; /* Store pass1 offset estimate. */
-+ *pl = -pos; /* Label exists now. */
-+ b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
- case DASM_IMM16:
-- ofs += 2;
-- fprintf(stderr, "DASM_IMM16 not implemented\n");
-- break;
-+ ofs += 2;
-+ fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ break;
- case DASM_IMM32:
-- ofs += 4;
-- CK((n>>32) == 0, RANGE_I);
-- b[pos++]=n;
-- break;
-+ ofs += 4;
-+ CK((n >> 32) == 0, RANGE_I);
-+ b[pos++] = n;
-+ break;
- case DASM_DISP20:
-- CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
-- b[pos++] = n;
-- break;
-+ CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I);
-+ b[pos++] = n;
-+ break;
- case DASM_DISP12:
-- CK((n>>12) == 0, RANGE_I);
-- b[pos++] = n;
-+ CK((n >> 12) == 0, RANGE_I);
-+ b[pos++] = n;
- break;
- }
- }
-@@ -256,10 +286,11 @@ stop:
- sec->pos = pos;
- sec->ofs = ofs;
- }
-+
- #undef CK
-
- /* Pass 2: Link sections, shrink aligns, fix label offsets. */
--int dasm_link(Dst_DECL, size_t *szp)
-+int dasm_link(Dst_DECL, size_t * szp)
- {
- dasm_State *D = Dst_REF;
- int secnum;
-@@ -267,20 +298,26 @@ int dasm_link(Dst_DECL, size_t *szp)
-
- #ifdef DASM_CHECKS
- *szp = 0;
-- if (D->status != DASM_S_OK) return D->status;
-+ if (D->status != DASM_S_OK)
-+ return D->status;
- {
- int pc;
-- for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
-- if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
-+ for (pc = 0; pc * sizeof(int) < D->pcsize; pc++)
-+ if (D->pclabels[pc] > 0)
-+ return DASM_S_UNDEF_PC | pc;
- }
- #endif
-
-- { /* Handle globals not defined in this translation unit. */
-+ { /* Handle globals not defined in this translation unit. */
- int idx;
-- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
-+ for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) {
- int n = D->lglabels[idx];
- /* Undefined label: Collapse rel chain and replace with marker (< 0). */
-- while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
-+ while (n > 0) {
-+ int *pb = DASM_POS2PTR(D, n);
-+ n = *pb;
-+ *pb = -idx;
-+ }
- }
- }
-
-@@ -297,26 +334,39 @@ int dasm_link(Dst_DECL, size_t *szp)
- unsigned short ins = *p++;
- unsigned short action = ins;
- switch (action) {
-- case DASM_STOP: case DASM_SECTION: goto stop;
-- case DASM_ESC: p++; break;
-- case DASM_REL_EXT: break;
-- case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
-- case DASM_REL_LG: case DASM_REL_PC: pos++; break;
-- case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-- case DASM_IMM16:
-- case DASM_IMM32:
-- case DASM_DISP20:
-- case DASM_DISP12:
-- pos++;
-+ case DASM_STOP:
-+ case DASM_SECTION:
-+ goto stop;
-+ case DASM_ESC:
-+ p++;
-+ break;
-+ case DASM_REL_EXT:
-+ break;
-+ case DASM_ALIGN:
-+ ofs -= (b[pos++] + ofs) & (ins & 255);
-+ break;
-+ case DASM_REL_LG:
-+ case DASM_REL_PC:
-+ pos++;
-+ break;
-+ case DASM_LABEL_LG:
-+ case DASM_LABEL_PC:
-+ b[pos++] += ofs;
-+ break;
-+ case DASM_IMM16:
-+ case DASM_IMM32:
-+ case DASM_DISP20:
-+ case DASM_DISP12:
-+ pos++;
- break;
- }
- }
-- stop: (void)0;
-+ stop:(void)0;
- }
-- ofs += sec->ofs; /* Next section starts right after current section. */
-+ ofs += sec->ofs; /* Next section starts right after current section. */
- }
-
-- D->codesize = ofs; /* Total size of all code sections */
-+ D->codesize = ofs; /* Total size of all code sections */
- *szp = ofs;
- return DASM_S_OK;
- }
-@@ -349,13 +399,19 @@ int dasm_encode(Dst_DECL, void *buffer)
- unsigned short action = ins;
- int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
- switch (action) {
-- case DASM_STOP: case DASM_SECTION: goto stop;
-- case DASM_ESC: *cp++ = *p++; break;
-+ case DASM_STOP:
-+ case DASM_SECTION:
-+ goto stop;
-+ case DASM_ESC:
-+ *cp++ = *p++;
-+ break;
- case DASM_REL_EXT:
- n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
- goto patchrel;
- case DASM_ALIGN:
-- ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x0707;
-+ ins &= 255;
-+ while ((((char *)cp - base) & ins))
-+ *cp++ = 0x0707;
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-@@ -364,51 +420,59 @@ int dasm_encode(Dst_DECL, void *buffer)
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
- patchrel:
- CK((n & 3) == 0 &&
-- (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
-- ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
-- cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
-+ (((n + 4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
-+ ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
-+ cp[-1] |= ((n + 4) & ((ins & 2048) ? 0x0000fffc : 0x03fffffc));
- break;
- case DASM_LABEL_LG:
-- ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
-+ ins &= 2047;
-+ if (ins >= 20)
-+ D->globals[ins - 10] = (void *)(base + n);
-+ break;
-+ case DASM_LABEL_PC:
- break;
-- case DASM_LABEL_PC: break;
-- case DASM_IMM16:
-- fprintf(stderr, "DASM_IMM16 not implemented\n");
-- break;
-- case DASM_IMM32:
-+ case DASM_IMM16:
-+ fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ break;
-+ case DASM_IMM32:
- *cp++ = n >> 16;
-- *cp++ = n;
-- break;
-- case DASM_DISP20:
-- cp[-2] |= n&0xfff;
-- cp[-1] |= (n>>4)&0xff00;
-- break;
-- case DASM_DISP12:
-- cp[-1] |= n&0xfff;
-+ *cp++ = n;
-+ break;
-+ case DASM_DISP20:
-+ cp[-2] |= n & 0xfff;
-+ cp[-1] |= (n >> 4) & 0xff00;
-+ break;
-+ case DASM_DISP12:
-+ cp[-1] |= n & 0xfff;
-+ break;
-+ default:
-+ *cp++ = ins;
- break;
-- default: *cp++ = ins; break;
- }
- }
-- stop: (void)0;
-+ stop:(void)0;
- }
- }
-
-- if (base + D->codesize != (char *)cp) /* Check for phase errors. */
-+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
- return DASM_S_PHASE;
- return DASM_S_OK;
- }
-+
- #undef CK
-
- /* Get PC label offset. */
- int dasm_getpclabel(Dst_DECL, unsigned int pc)
- {
- dasm_State *D = Dst_REF;
-- if (pc*sizeof(int) < D->pcsize) {
-+ if (pc * sizeof(int) < D->pcsize) {
- int pos = D->pclabels[pc];
-- if (pos < 0) return *DASM_POS2PTR(D, -pos);
-- if (pos > 0) return -1; /* Undefined. */
-+ if (pos < 0)
-+ return *DASM_POS2PTR(D, -pos);
-+ if (pos > 0)
-+ return -1; /* Undefined. */
- }
-- return -2; /* Unused or out of range. */
-+ return -2; /* Unused or out of range. */
- }
-
- #ifdef DASM_CHECKS
-@@ -419,13 +483,16 @@ int dasm_checkstep(Dst_DECL, int secmatch)
- if (D->status == DASM_S_OK) {
- int i;
- for (i = 1; i <= 9; i++) {
-- if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
-+ if (D->lglabels[i] > 0) {
-+ D->status = DASM_S_UNDEF_LG | i;
-+ break;
-+ }
- D->lglabels[i] = 0;
- }
- }
- if (D->status == DASM_S_OK && secmatch >= 0 &&
- D->section != &D->sections[secmatch])
-- D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
-+ D->status = DASM_S_MATCH_SEC | (D->section - D->sections);
- return D->status;
- }
- #endif
---
-2.20.1
-
-
-From 796be2063984924123190ca8c3c80ce2ff5731d5 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 2 Dec 2016 13:50:09 -0500
-Subject: [PATCH 060/247] Reduce indentation level of big switch statement.
-
-A style thing. I find it easier to read this way.
-
-i.e. do:
-
-while(1) {
- if (blah) {
- ...
- continue;
- }
- ... // big switch statement
-}
-
-instead of:
-
-while(1) {
- if (blah) {
- ...
- } else {
- ... // big switch statement
- }
-}
----
- dynasm/dasm_s390x.h | 163 ++++++++++++++++++++++----------------------
- 1 file changed, 82 insertions(+), 81 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index a5daaa3..d856603 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -196,89 +196,90 @@ void dasm_put(Dst_DECL, int start, ...)
- unsigned short action = ins;
- if (action >= DASM__MAX) {
- ofs += 2;
-- } else {
-- int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
-- switch (action) {
-- case DASM_STOP:
-- goto stop;
-- case DASM_SECTION:
-- n = (ins & 255);
-- CK(n < D->maxsection, RANGE_SEC);
-- D->section = &D->sections[n];
-- goto stop;
-- case DASM_ESC:
-- p++;
-- ofs += 4;
-- break;
-- case DASM_REL_EXT:
-- break;
-- case DASM_ALIGN:
-- ofs += (ins & 255);
-- b[pos++] = ofs;
-- break;
-- case DASM_REL_LG:
-- n = (ins & 2047) - 10;
-- pl = D->lglabels + n;
-- /* Bkwd rel or global. */
-- if (n >= 0) {
-- CK(n >= 10 || *pl < 0, RANGE_LG);
-- CKPL(lg, LG);
-- goto putrel;
-- }
-- pl += 10;
-- n = *pl;
-- if (n < 0)
-- n = 0; /* Start new chain for fwd rel if label exists. */
-- goto linkrel;
-- case DASM_REL_PC:
-- pl = D->pclabels + n;
-- CKPL(pc, PC);
-- putrel:
-- n = *pl;
-- if (n < 0) { /* Label exists. Get label pos and store it. */
-- b[pos] = -n;
-- } else {
-- linkrel:
-- b[pos] = n; /* Else link to rel chain, anchored at label. */
-- *pl = pos;
-- }
-- pos++;
-- break;
-- case DASM_LABEL_LG:
-- pl = D->lglabels + (ins & 2047) - 10;
-+ continue;
-+ }
-+
-+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
-+ switch (action) {
-+ case DASM_STOP:
-+ goto stop;
-+ case DASM_SECTION:
-+ n = (ins & 255);
-+ CK(n < D->maxsection, RANGE_SEC);
-+ D->section = &D->sections[n];
-+ goto stop;
-+ case DASM_ESC:
-+ p++;
-+ ofs += 4;
-+ break;
-+ case DASM_REL_EXT:
-+ break;
-+ case DASM_ALIGN:
-+ ofs += (ins & 255);
-+ b[pos++] = ofs;
-+ break;
-+ case DASM_REL_LG:
-+ n = (ins & 2047) - 10;
-+ pl = D->lglabels + n;
-+ /* Bkwd rel or global. */
-+ if (n >= 0) {
-+ CK(n >= 10 || *pl < 0, RANGE_LG);
- CKPL(lg, LG);
-- goto putlabel;
-- case DASM_LABEL_PC:
-- pl = D->pclabels + n;
-- CKPL(pc, PC);
-- putlabel:
-- n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
-- while (n > 0) {
-- int *pb = DASM_POS2PTR(D, n);
-- n = *pb;
-- *pb = pos;
-- }
-- *pl = -pos; /* Label exists now. */
-- b[pos++] = ofs; /* Store pass1 offset estimate. */
-- break;
-- case DASM_IMM16:
-- ofs += 2;
-- fprintf(stderr, "DASM_IMM16 not implemented\n");
-- break;
-- case DASM_IMM32:
-- ofs += 4;
-- CK((n >> 32) == 0, RANGE_I);
-- b[pos++] = n;
-- break;
-- case DASM_DISP20:
-- CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I);
-- b[pos++] = n;
-- break;
-- case DASM_DISP12:
-- CK((n >> 12) == 0, RANGE_I);
-- b[pos++] = n;
-- break;
-+ goto putrel;
-+ }
-+ pl += 10;
-+ n = *pl;
-+ if (n < 0)
-+ n = 0; /* Start new chain for fwd rel if label exists. */
-+ goto linkrel;
-+ case DASM_REL_PC:
-+ pl = D->pclabels + n;
-+ CKPL(pc, PC);
-+ putrel:
-+ n = *pl;
-+ if (n < 0) { /* Label exists. Get label pos and store it. */
-+ b[pos] = -n;
-+ } else {
-+ linkrel:
-+ b[pos] = n; /* Else link to rel chain, anchored at label. */
-+ *pl = pos;
- }
-+ pos++;
-+ break;
-+ case DASM_LABEL_LG:
-+ pl = D->lglabels + (ins & 2047) - 10;
-+ CKPL(lg, LG);
-+ goto putlabel;
-+ case DASM_LABEL_PC:
-+ pl = D->pclabels + n;
-+ CKPL(pc, PC);
-+ putlabel:
-+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
-+ while (n > 0) {
-+ int *pb = DASM_POS2PTR(D, n);
-+ n = *pb;
-+ *pb = pos;
-+ }
-+ *pl = -pos; /* Label exists now. */
-+ b[pos++] = ofs; /* Store pass1 offset estimate. */
-+ break;
-+ case DASM_IMM16:
-+ ofs += 2;
-+ fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ break;
-+ case DASM_IMM32:
-+ ofs += 4;
-+ CK((n >> 32) == 0, RANGE_I);
-+ b[pos++] = n;
-+ break;
-+ case DASM_DISP20:
-+ CK(-(1 << 19) <= n && n < (1 << 19), RANGE_I);
-+ b[pos++] = n;
-+ break;
-+ case DASM_DISP12:
-+ CK((n >> 12) == 0, RANGE_I);
-+ b[pos++] = n;
-+ break;
- }
- }
- stop:
---
-2.20.1
-
-
-From a1fab3e5d3518b034b5b57efa248b1b0ea05ed51 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 2 Dec 2016 14:20:59 -0500
-Subject: [PATCH 061/247] Add C code to handle IMM16.
-
----
- dynasm/dasm_s390x.h | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index d856603..12b8b2b 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -264,12 +264,12 @@ void dasm_put(Dst_DECL, int start, ...)
- b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
- case DASM_IMM16:
-+ CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
- ofs += 2;
-- fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ b[pos++] = n;
- break;
- case DASM_IMM32:
- ofs += 4;
-- CK((n >> 32) == 0, RANGE_I);
- b[pos++] = n;
- break;
- case DASM_DISP20:
-@@ -433,7 +433,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_LABEL_PC:
- break;
- case DASM_IMM16:
-- fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ *cp++ = n;
- break;
- case DASM_IMM32:
- *cp++ = n >> 16;
---
-2.20.1
-
-
-From e2e68ede7aabf6258720ea150fc2624a35701cf5 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 2 Dec 2016 14:35:33 -0500
-Subject: [PATCH 062/247] Add support for .align directive.
-
----
- dynasm/dasm_s390x.h | 9 +++++----
- 1 file changed, 5 insertions(+), 4 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 12b8b2b..d505b38 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -215,7 +215,7 @@ void dasm_put(Dst_DECL, int start, ...)
- case DASM_REL_EXT:
- break;
- case DASM_ALIGN:
-- ofs += (ins & 255);
-+ ofs += *p++;
- b[pos++] = ofs;
- break;
- case DASM_REL_LG:
-@@ -344,7 +344,7 @@ int dasm_link(Dst_DECL, size_t * szp)
- case DASM_REL_EXT:
- break;
- case DASM_ALIGN:
-- ofs -= (b[pos++] + ofs) & (ins & 255);
-+ ofs -= (b[pos++] + ofs) & *p++;
- break;
- case DASM_REL_LG:
- case DASM_REL_PC:
-@@ -410,9 +410,10 @@ int dasm_encode(Dst_DECL, void *buffer)
- n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
- goto patchrel;
- case DASM_ALIGN:
-- ins &= 255;
-+ ins = *p++;
-+ /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */
- while ((((char *)cp - base) & ins))
-- *cp++ = 0x0707;
-+ *cp++ = 0x0700; /* nop */
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
---
-2.20.1
-
-
-From 56ae50d89a2424ed4c4567ade50415c82a8e339a Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 2 Dec 2016 15:14:37 -0500
-Subject: [PATCH 063/247] Auto-format dasm_s390x.h (again).
-
-This time explicitly ban tabs.
-
-indent -i2 -brs -cli0 -br -ce -npcs -nbc -di1 -npsl -ncs -nut dasm_s390x.h
----
- dynasm/dasm_s390x.h | 262 ++++++++++++++++++++++----------------------
- 1 file changed, 131 insertions(+), 131 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index d505b38..8cd2fc2 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -55,28 +55,28 @@ typedef const unsigned short *dasm_ActList;
-
- /* Per-section structure. */
- typedef struct dasm_Section {
-- int *rbuf; /* Biased buffer pointer (negative section bias). */
-- int *buf; /* True buffer pointer. */
-- size_t bsize; /* Buffer size in bytes. */
-- int pos; /* Biased buffer position. */
-- int epos; /* End of biased buffer position - max single put. */
-- int ofs; /* Byte offset into section. */
-+ int *rbuf; /* Biased buffer pointer (negative section bias). */
-+ int *buf; /* True buffer pointer. */
-+ size_t bsize; /* Buffer size in bytes. */
-+ int pos; /* Biased buffer position. */
-+ int epos; /* End of biased buffer position - max single put. */
-+ int ofs; /* Byte offset into section. */
- } dasm_Section;
-
- /* Core structure holding the DynASM encoding state. */
- struct dasm_State {
-- size_t psize; /* Allocated size of this structure. */
-- dasm_ActList actionlist; /* Current actionlist pointer. */
-- int *lglabels; /* Local/global chain/pos ptrs. */
-+ size_t psize; /* Allocated size of this structure. */
-+ dasm_ActList actionlist; /* Current actionlist pointer. */
-+ int *lglabels; /* Local/global chain/pos ptrs. */
- size_t lgsize;
-- int *pclabels; /* PC label chains/pos ptrs. */
-+ int *pclabels; /* PC label chains/pos ptrs. */
- size_t pcsize;
-- void **globals; /* Array of globals (bias -10). */
-- dasm_Section *section; /* Pointer to active section. */
-- size_t codesize; /* Total size of all code sections. */
-- int maxsection; /* 0 <= sectionidx < maxsection. */
-- int status; /* Status code. */
-- dasm_Section sections[1]; /* All sections. Alloc-extended. */
-+ void **globals; /* Array of globals (bias -10). */
-+ dasm_Section *section; /* Pointer to active section. */
-+ size_t codesize; /* Total size of all code sections. */
-+ int maxsection; /* 0 <= sectionidx < maxsection. */
-+ int status; /* Status code. */
-+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
- };
-
- /* The size of the core structure depends on the max. number of sections. */
-@@ -100,10 +100,10 @@ void dasm_init(Dst_DECL, int maxsection)
- D->globals = NULL;
- D->maxsection = maxsection;
- for (i = 0; i < maxsection; i++) {
-- D->sections[i].buf = NULL; /* Need this for pass3. */
-+ D->sections[i].buf = NULL; /* Need this for pass3. */
- D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
- D->sections[i].bsize = 0;
-- D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
-+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
- }
- }
-
-@@ -126,7 +126,7 @@ void dasm_free(Dst_DECL)
- void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
- {
- dasm_State *D = Dst_REF;
-- D->globals = gl - 10; /* Negative bias to compensate for locals. */
-+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
- DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int));
- }
-
-@@ -181,7 +181,7 @@ void dasm_put(Dst_DECL, int start, ...)
-
- if (pos >= sec->epos) {
- DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
-- sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
-+ sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
- sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
- sec->epos =
- (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
-@@ -223,26 +223,26 @@ void dasm_put(Dst_DECL, int start, ...)
- pl = D->lglabels + n;
- /* Bkwd rel or global. */
- if (n >= 0) {
-- CK(n >= 10 || *pl < 0, RANGE_LG);
-- CKPL(lg, LG);
-- goto putrel;
-+ CK(n >= 10 || *pl < 0, RANGE_LG);
-+ CKPL(lg, LG);
-+ goto putrel;
- }
- pl += 10;
- n = *pl;
- if (n < 0)
-- n = 0; /* Start new chain for fwd rel if label exists. */
-+ n = 0; /* Start new chain for fwd rel if label exists. */
- goto linkrel;
- case DASM_REL_PC:
- pl = D->pclabels + n;
- CKPL(pc, PC);
- putrel:
- n = *pl;
-- if (n < 0) { /* Label exists. Get label pos and store it. */
-- b[pos] = -n;
-+ if (n < 0) { /* Label exists. Get label pos and store it. */
-+ b[pos] = -n;
- } else {
- linkrel:
-- b[pos] = n; /* Else link to rel chain, anchored at label. */
-- *pl = pos;
-+ b[pos] = n; /* Else link to rel chain, anchored at label. */
-+ *pl = pos;
- }
- pos++;
- break;
-@@ -254,17 +254,17 @@ void dasm_put(Dst_DECL, int start, ...)
- pl = D->pclabels + n;
- CKPL(pc, PC);
- putlabel:
-- n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
-+ n = *pl; /* n > 0: Collapse rel chain and replace with label
pos. */
- while (n > 0) {
-- int *pb = DASM_POS2PTR(D, n);
-- n = *pb;
-- *pb = pos;
-+ int *pb = DASM_POS2PTR(D, n);
-+ n = *pb;
-+ *pb = pos;
- }
-- *pl = -pos; /* Label exists now. */
-- b[pos++] = ofs; /* Store pass1 offset estimate. */
-+ *pl = -pos; /* Label exists now. */
-+ b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
- case DASM_IMM16:
-- CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
-+ CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
- ofs += 2;
- b[pos++] = n;
- break;
-@@ -305,19 +305,19 @@ int dasm_link(Dst_DECL, size_t * szp)
- int pc;
- for (pc = 0; pc * sizeof(int) < D->pcsize; pc++)
- if (D->pclabels[pc] > 0)
-- return DASM_S_UNDEF_PC | pc;
-+ return DASM_S_UNDEF_PC | pc;
- }
- #endif
-
-- { /* Handle globals not defined in this translation unit. */
-+ { /* Handle globals not defined in this translation unit.
*/
- int idx;
- for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) {
- int n = D->lglabels[idx];
- /* Undefined label: Collapse rel chain and replace with marker (< 0). */
- while (n > 0) {
-- int *pb = DASM_POS2PTR(D, n);
-- n = *pb;
-- *pb = -idx;
-+ int *pb = DASM_POS2PTR(D, n);
-+ n = *pb;
-+ *pb = -idx;
- }
- }
- }
-@@ -332,42 +332,42 @@ int dasm_link(Dst_DECL, size_t * szp)
- while (pos != lastpos) {
- dasm_ActList p = D->actionlist + b[pos++];
- while (1) {
-- unsigned short ins = *p++;
-- unsigned short action = ins;
-- switch (action) {
-- case DASM_STOP:
-- case DASM_SECTION:
-- goto stop;
-- case DASM_ESC:
-- p++;
-- break;
-- case DASM_REL_EXT:
-- break;
-- case DASM_ALIGN:
-- ofs -= (b[pos++] + ofs) & *p++;
-- break;
-- case DASM_REL_LG:
-- case DASM_REL_PC:
-- pos++;
-- break;
-- case DASM_LABEL_LG:
-- case DASM_LABEL_PC:
-- b[pos++] += ofs;
-- break;
-- case DASM_IMM16:
-- case DASM_IMM32:
-- case DASM_DISP20:
-- case DASM_DISP12:
-- pos++;
-- break;
-- }
-+ unsigned short ins = *p++;
-+ unsigned short action = ins;
-+ switch (action) {
-+ case DASM_STOP:
-+ case DASM_SECTION:
-+ goto stop;
-+ case DASM_ESC:
-+ p++;
-+ break;
-+ case DASM_REL_EXT:
-+ break;
-+ case DASM_ALIGN:
-+ ofs -= (b[pos++] + ofs) & *p++;
-+ break;
-+ case DASM_REL_LG:
-+ case DASM_REL_PC:
-+ pos++;
-+ break;
-+ case DASM_LABEL_LG:
-+ case DASM_LABEL_PC:
-+ b[pos++] += ofs;
-+ break;
-+ case DASM_IMM16:
-+ case DASM_IMM32:
-+ case DASM_DISP20:
-+ case DASM_DISP12:
-+ pos++;
-+ break;
-+ }
- }
- stop:(void)0;
- }
-- ofs += sec->ofs; /* Next section starts right after current section. */
-+ ofs += sec->ofs; /* Next section starts right after current section.
*/
- }
-
-- D->codesize = ofs; /* Total size of all code sections */
-+ D->codesize = ofs; /* Total size of all code sections */
- *szp = ofs;
- return DASM_S_OK;
- }
-@@ -396,67 +396,67 @@ int dasm_encode(Dst_DECL, void *buffer)
- while (b != endb) {
- dasm_ActList p = D->actionlist + *b++;
- while (1) {
-- unsigned short ins = *p++;
-- unsigned short action = ins;
-- int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
-- switch (action) {
-- case DASM_STOP:
-- case DASM_SECTION:
-- goto stop;
-- case DASM_ESC:
-- *cp++ = *p++;
-- break;
-- case DASM_REL_EXT:
-- n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
-- goto patchrel;
-- case DASM_ALIGN:
-- ins = *p++;
-- /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */
-- while ((((char *)cp - base) & ins))
-- *cp++ = 0x0700; /* nop */
-- break;
-- case DASM_REL_LG:
-- CK(n >= 0, UNDEF_LG);
-- case DASM_REL_PC:
-- CK(n >= 0, UNDEF_PC);
-- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
-- patchrel:
-- CK((n & 3) == 0 &&
-- (((n + 4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
-- ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
-- cp[-1] |= ((n + 4) & ((ins & 2048) ? 0x0000fffc : 0x03fffffc));
-- break;
-- case DASM_LABEL_LG:
-- ins &= 2047;
-- if (ins >= 20)
-- D->globals[ins - 10] = (void *)(base + n);
-- break;
-- case DASM_LABEL_PC:
-- break;
-- case DASM_IMM16:
-- *cp++ = n;
-- break;
-- case DASM_IMM32:
-- *cp++ = n >> 16;
-- *cp++ = n;
-- break;
-- case DASM_DISP20:
-- cp[-2] |= n & 0xfff;
-- cp[-1] |= (n >> 4) & 0xff00;
-- break;
-- case DASM_DISP12:
-- cp[-1] |= n & 0xfff;
-- break;
-- default:
-- *cp++ = ins;
-- break;
-- }
-+ unsigned short ins = *p++;
-+ unsigned short action = ins;
-+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
-+ switch (action) {
-+ case DASM_STOP:
-+ case DASM_SECTION:
-+ goto stop;
-+ case DASM_ESC:
-+ *cp++ = *p++;
-+ break;
-+ case DASM_REL_EXT:
-+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
-+ goto patchrel;
-+ case DASM_ALIGN:
-+ ins = *p++;
-+ /* TODO: emit 4-byte noprs instead of 2-byte nops where possible. */
-+ while ((((char *)cp - base) & ins))
-+ *cp++ = 0x0700; /* nop */
-+ break;
-+ case DASM_REL_LG:
-+ CK(n >= 0, UNDEF_LG);
-+ case DASM_REL_PC:
-+ CK(n >= 0, UNDEF_PC);
-+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
-+ patchrel:
-+ CK((n & 3) == 0 &&
-+ (((n + 4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
-+ ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
-+ cp[-1] |= ((n + 4) & ((ins & 2048) ? 0x0000fffc : 0x03fffffc));
-+ break;
-+ case DASM_LABEL_LG:
-+ ins &= 2047;
-+ if (ins >= 20)
-+ D->globals[ins - 10] = (void *)(base + n);
-+ break;
-+ case DASM_LABEL_PC:
-+ break;
-+ case DASM_IMM16:
-+ *cp++ = n;
-+ break;
-+ case DASM_IMM32:
-+ *cp++ = n >> 16;
-+ *cp++ = n;
-+ break;
-+ case DASM_DISP20:
-+ cp[-2] |= n & 0xfff;
-+ cp[-1] |= (n >> 4) & 0xff00;
-+ break;
-+ case DASM_DISP12:
-+ cp[-1] |= n & 0xfff;
-+ break;
-+ default:
-+ *cp++ = ins;
-+ break;
-+ }
- }
- stop:(void)0;
- }
- }
-
-- if (base + D->codesize != (char *)cp) /* Check for phase errors. */
-+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
- return DASM_S_PHASE;
- return DASM_S_OK;
- }
-@@ -472,9 +472,9 @@ int dasm_getpclabel(Dst_DECL, unsigned int pc)
- if (pos < 0)
- return *DASM_POS2PTR(D, -pos);
- if (pos > 0)
-- return -1; /* Undefined. */
-+ return -1; /* Undefined. */
- }
-- return -2; /* Unused or out of range. */
-+ return -2; /* Unused or out of range. */
- }
-
- #ifdef DASM_CHECKS
-@@ -486,8 +486,8 @@ int dasm_checkstep(Dst_DECL, int secmatch)
- int i;
- for (i = 1; i <= 9; i++) {
- if (D->lglabels[i] > 0) {
-- D->status = DASM_S_UNDEF_LG | i;
-- break;
-+ D->status = DASM_S_UNDEF_LG | i;
-+ break;
- }
- D->lglabels[i] = 0;
- }
---
-2.20.1
-
-
-From c31ed809e6f71f7037e7275fe2684ff723b15e6d Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Mon, 5 Dec 2016 13:46:44 +0530
-Subject: [PATCH 064/247] Update test_z_inst.c
-
-changed code to in the form of a test table
-currently handles RR based , addition, subtraction and multiply test
----
- dynasm/Examples/test_z_inst.c | 84 ++++++++++++++++-------------------
- 1 file changed, 38 insertions(+), 46 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 65ca39a..9c1ae26 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -9,15 +9,26 @@
- |.arch s390x
- |.actionlist actions
-
--/* Instructio modes
-- mode 0 : RR Mode
-- mode 1 : I Mode
--*/
-+typedef struct
-+{
-+ int arg1;
-+ int arg2;
-+ void (*fn)(dasm_State *);
-+ int want;
-+ char *testname;
-+}test_table;
-+
-+test_table test[] = {
-+ {1,2,add,3,"add"},
-+ {10,5 ,sub ,5,"subract"} ,
-+ {2,3,mul,6,"Multiply"}
-+ };
-+
-
- void *jitcode(dasm_State **state);
--void add(dasm_State * , int);
--void sub(dasm_State * , int);
--void mul(dasm_State * , int);
-+void add(dasm_State *);
-+void sub(dasm_State *);
-+void mul(dasm_State *);
-
- void *jitcode(dasm_State **state)
- {
-@@ -32,35 +43,15 @@ void *jitcode(dasm_State **state)
- return (int *)ret;
- }
-
--void add(dasm_State *state , int mode)
-+void add(dasm_State *state)
- {
- dasm_State ** Dst = &state;
-
-- switch(mode)
-- {
-- /* Case RR instruction mode */
-- case 0:
-- {
-- | ar r2,r3
-- | br r14
-- break;
-- }
-- /* Case RIL instruction mode */
-- case 1:
-- {
-- | ar r2,0x16
-- | br r14
-- break;
-- }
-- default:
-- {
-- printf( " Mode not recognised \n ");
-- break;
-- }
-- }
-+ | ar r2,r3
-+ | br r14
- }
-
--void sub(dasm_State *state , int mode)
-+void sub(dasm_State *state)
- {
- dasm_State **Dst = &state;
-
-@@ -68,7 +59,7 @@ void sub(dasm_State *state , int mode)
- | br r14
- }
-
--void mul(dasm_State *state, int mode)
-+void mul(dasm_State *state)
- {
- dasm_State **Dst = &state;
-
-@@ -80,21 +71,22 @@ void main(int argc, char *argv[])
- {
- dasm_State *state;
- dasm_State **Dst = &state;
-- int num1 , num2;
-- int *ret;
-+ int i;
- size_t size;
-
-- int* (*fptr)(int , int) = jitcode(&state);
--
-- num1 = atoi(argv[1]);
-- num2 = atoi(argv[2]);
--
-- dasm_init(&state, 1);
-- dasm_setup(&state, actions);
--
-- /* Call respective test function */
-- add(state , 0);
-+ for(i=0;i<sizeof(test)/sizeof(test[0]);i++)
-+ {
-+ dasm_init(&state, 1);
-+ dasm_setup(&state, actions);
-+ test[i].fn(state);
-+ int (*fptr)(int, int) = jitcode(&state);
-+ int got = fptr(test[i].arg1, test[i].arg2);
-
-- ret = fptr(num1 , num2);
-- printf("Result is %d\n" ,ret);
-+ if (got != test[i].want) {
-+ fprintf(stderr, "test %s failed: want %d, got %d\n", test[i].testname,
test[i].want, got);
-+ exit(1);
-+ }
-+ free(fptr);
-+ }
-+ printf("All test passed\n");
- }
---
-2.20.1
-
-
-From d4e9bff973aea8d606a9ea85a6b45149880cd11b Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 2 Dec 2016 15:30:09 -0500
-Subject: [PATCH 065/247] Get DASM_SECTION argument from the correct place.
-
----
- dynasm/dasm_s390x.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 8cd2fc2..e2cd519 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -204,7 +204,7 @@ void dasm_put(Dst_DECL, int start, ...)
- case DASM_STOP:
- goto stop;
- case DASM_SECTION:
-- n = (ins & 255);
-+ n = *p++ & 255;
- CK(n < D->maxsection, RANGE_SEC);
- D->section = &D->sections[n];
- goto stop;
---
-2.20.1
-
-
-From 43378ec3f26e725dfb71490b538d2cb16ee5353a Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 2 Dec 2016 15:36:19 -0500
-Subject: [PATCH 066/247] Minor indentation fixes.
-
----
- dynasm/dasm_s390x.lua | 12 ++++++------
- 1 file changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 76d770e..8cc37a9 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -290,7 +290,7 @@ local function is_int20(num)
- end
-
- local function is_int32(num)
-- return -2147483648 <= num and num < 2147483648
-+ return -2147483648 <= num and num < 2147483648
- end
-
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
-@@ -371,11 +371,11 @@ local function parse_mem_by(arg)
- end
-
- local function parse_imm(arg)
-- local imm_val = tonumber(arg,16)
-- if not is_int32(imm_val) then
-- werror("Immediate value out of range: ", imm_val)
-- end
-- return imm_val
-+ local imm_val = tonumber(arg,16)
-+ if not is_int32(imm_val) then
-+ werror("Immediate value out of range: ", imm_val)
-+ end
-+ return imm_val
- end
-
- local function parse_label(label, def)
---
-2.20.1
-
-
-From e8e4faa37d42e320e0d0e0f3eb88175114075bdf Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 5 Dec 2016 13:59:44 -0500
-Subject: [PATCH 067/247] Clean up test file and add script to run tests.
-
-./run.sh will now execute the tests. It is a very simple setup
-currently, and is limited to linux on s390x. Enough to get started
-with.
----
- dynasm/Examples/run.sh | 13 ++++
- dynasm/Examples/test_z_inst.c | 117 ++++++++++++++++------------------
- 2 files changed, 67 insertions(+), 63 deletions(-)
- create mode 100755 dynasm/Examples/run.sh
-
-diff --git a/dynasm/Examples/run.sh b/dynasm/Examples/run.sh
-new file mode 100755
-index 0000000..dbe93b0
---- /dev/null
-+++ b/dynasm/Examples/run.sh
-@@ -0,0 +1,13 @@
-+#!/bin/bash
-+# set -x
-+
-+# run test
-+lua ../dynasm.lua test_z_inst.c | gcc -std=gnu99 -Wall -Werror -g -x c -o test_z_inst -
-+./test_z_inst
-+ec=$?
-+
-+# cleanup
-+rm -f ./test_z_inst
-+
-+# exit
-+exit $ec
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 9c1ae26..ed20ea6 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -2,91 +2,82 @@
- #include <stdio.h>
- #include <sys/mman.h>
-
--#include "../dynasm/dasm_proto.h"
--#include "../dynasm/dasm_s390x.h"
-+#include "../dasm_proto.h"
-+#include "../dasm_s390x.h"
-
--//DynASM directives.
-- |.arch s390x
-- |.actionlist actions
-+// DynASM directives.
-+|.arch s390x
-+|.actionlist actions
-
--typedef struct
-+static void add(dasm_State *state)
- {
-+ dasm_State ** Dst = &state;
-+
-+ | ar r2,r3
-+ | br r14
-+}
-+
-+static void sub(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | sr r2,r3
-+ | br r14
-+}
-+
-+static void mul(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | msr r2 , r3
-+ | br r14
-+}
-+
-+typedef struct {
- int arg1;
- int arg2;
- void (*fn)(dasm_State *);
- int want;
-- char *testname;
--}test_table;
-+ const char *testname;
-+} test_table;
-
- test_table test[] = {
-- {1,2,add,3,"add"},
-- {10,5 ,sub ,5,"subract"} ,
-- {2,3,mul,6,"Multiply"}
-- };
--
--
--void *jitcode(dasm_State **state);
--void add(dasm_State *);
--void sub(dasm_State *);
--void mul(dasm_State *);
--
--void *jitcode(dasm_State **state)
-+ { 1, 2, add, 3, "add"},
-+ {10, 5, sub, 5, "sub"},
-+ { 2, 3, mul, 6, "mul"}
-+};
-+
-+static void *jitcode(dasm_State **state, size_t *size)
- {
-- size_t size;
-- int dasm_status = dasm_link(state, &size);
-+ int dasm_status = dasm_link(state, size);
- assert(dasm_status == DASM_S_OK);
-
-- void *ret = (int *)calloc(10,sizeof(int));
-+ void *ret = mmap(0, *size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1,
0);
- dasm_encode(state, ret);
- dasm_free(state);
-
-+ mprotect(ret, *size, PROT_READ | PROT_EXEC);
- return (int *)ret;
- }
-
--void add(dasm_State *state)
-+int main(int argc, char *argv[])
- {
-- dasm_State ** Dst = &state;
--
-- | ar r2,r3
-- | br r14
--}
-+ dasm_State *state;
-
--void sub(dasm_State *state)
--{
-- dasm_State **Dst = &state;
--
-- | sr r2,r3
-- | br r14
--}
-+ for(int i=0; i < sizeof(test)/sizeof(test[0]); i++) {
-+ dasm_init(&state, 1);
-+ dasm_setup(&state, actions);
-+ test[i].fn(state);
-+ size_t size;
-+ int (*fptr)(int, int) = jitcode(&state, &size);
-+ int got = fptr(test[i].arg1, test[i].arg2);
-
--void mul(dasm_State *state)
--{
-- dasm_State **Dst = &state;
--
-- | msr r2 , r3
-- | br r14
--}
--
--void main(int argc, char *argv[])
--{
-- dasm_State *state;
-- dasm_State **Dst = &state;
-- int i;
-- size_t size;
--
-- for(i=0;i<sizeof(test)/sizeof(test[0]);i++)
-- {
-- dasm_init(&state, 1);
-- dasm_setup(&state, actions);
-- test[i].fn(state);
-- int (*fptr)(int, int) = jitcode(&state);
-- int got = fptr(test[i].arg1, test[i].arg2);
--
-- if (got != test[i].want) {
-- fprintf(stderr, "test %s failed: want %d, got %d\n", test[i].testname,
test[i].want, got);
-+ if (got != test[i].want) {
-+ fprintf(stderr, "FAIL: test %s: want %d, got %d\n", test[i].testname,
test[i].want, got);
- exit(1);
- }
-- free(fptr);
-+ munmap(fptr, size);
- }
-- printf("All test passed\n");
-+ printf("all tests passed\n");
-+ return 0;
- }
---
-2.20.1
-
-
-From 37b29f8f06bb3767b3eef4a7cc478c3e65b50941 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 5 Dec 2016 14:23:35 -0500
-Subject: [PATCH 068/247] Shorten templates by four characters.
-
-We only have 6-byte instructions, so we don't really need the ability
-to encode 8-bytes.
----
- dynasm/dasm_s390x.lua | 1134 ++++++++++++++++++++---------------------
- 1 file changed, 567 insertions(+), 567 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 8cc37a9..6416438 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -435,586 +435,586 @@ end
-
- -- Template strings for s390x instructions.
- map_op = {
--a_2 = "000000005a000000j",
--ar_2 = "0000000000001a00g",
--ay_2 = "0000e3000000005al",
--ag_2 = "0000e30000000008l",
--agr_2 = "00000000b9080000h",
--agf_2 = "0000e30000000018l",
--agfr_2 = "00000000b9180000h",
--axbr_2 = "00000000b34a0000h",
--adbr_2 = "00000000b31a0000h",
--aebr_2 = "00000000b30a0000h",
--ah_2 = "000000004a000000j",
--ahy_2 = "0000e3000000007al",
--afi_2 = "0000c20900000000n",
--agfi_2 = "0000c20800000000n",
--aih_2 = "0000cc0800000000n",
--al_2 = "000000005e000000j",
--alr_2 = "0000000000001e00g",
--aly_2 = "0000e3000000005el",
--alg_2 = "0000e3000000000al",
--algr_2 = "00000000b90a0000h",
--algf_2 = "0000e3000000001al",
--algfr_2 = "00000000b91a0000h",
--alfi_2 = "0000c20b00000000n",
--algfi_2 = "0000c20a00000000n",
--alc_2 = "0000e30000000098l",
--alcr_2 = "00000000b9980000h",
--alcg_2 = "0000e30000000088l",
--alcgr_2 = "00000000b9880000h",
--alsih_2 = "0000cc0a00000000n",
--alsihn_2 = "0000cc0b00000000n",
--axr_2 = "0000000000003600g",
--ad_2 = "000000006a000000j",
--adr_2 = "0000000000002a00g",
--ae_2 = "000000007a000000j",
--aer_2 = "0000000000003a00g",
--aw_2 = "000000006e000000j",
--awr_2 = "0000000000002e00g",
--au_2 = "000000007e000000j",
--aur_2 = "0000000000003e00g",
--n_2 = "0000000054000000j",
--nr_2 = "0000000000001400g",
--ny_2 = "0000e30000000054l",
--ng_2 = "0000e30000000080l",
--ngr_2 = "00000000b9800000h",
--nihf_2 = "0000c00a00000000n",
--nilf_2 = "0000c00b00000000n",
--bal_2 = "0000000045000000j",
--balr_2 = "000000000000500g",
--bas_2 = "000000004d000000j",
--basr_2 = "0000000000000d00g",
--bassm_2 = "0000000000000c00g",
--bsa_2 = "00000000b25a0000h",
--bsm_2 = "0000000000000b00g",
--bakr_2 = "00000000b2400000h",
--bsg_2 = "00000000b2580000h",
--bc_2 = "0000000047000000k",
--bcr_2 = "000000000000700g",
--bct_2 = "0000000046000000j",
--bctr_2 = "000000000000600g",
--bctg_2 = "0000e30000000046l",
--bctgr_2 = "00000000b9460000h",
--bxh_3 = "0000000086000000q",
--bxhg_3 = "0000eb0000000044s",
--bxle_3 = "0000000087000000q",
--bxleg_3 = "0000eb0000000045s",
--brasl_2 = "0000c00500000000o",
--brcl_2 = "0000c00400000000p",
--brcth_2 = "0000cc0600000000o",
--cksm_2 = "00000000b2410000h",
--km_2 = "00000000b92e0000h",
--kmf_2 = "00000000b92a0000h",
--kmc_2 = "00000000b92f0000h",
--kmo_2 = "00000000b92b0000h",
--c_2 = "0000000059000000j",
--cr_2 = "0000000000001900g",
--cy_2 = "0000e30000000059l",
--cg_2 = "0000e30000000020l",
--cgr_2 = "00000000b9200000h",
--cgf_2 = "0000e30000000030l",
--cgfr_2 = "00000000b9300000h",
--cxbr_2 = "00000000b3490000h",
--cxtr_2 = "00000000b3ec0000h",
--cxr_2 = "00000000b3690000h",
--cdbr_2 = "00000000b3190000h",
--cdtr_2 = "00000000b3e40000h",
--cd_2 = "0000000069000000j",
--cdr_2 = "0000000000002900g",
--cebr_2 = "00000000b3090000h",
--ce_2 = "0000000079000000j",
--cer_2 = "0000000000003900g",
--kxbr_2 = "00000000b3480000h",
--kxtr_2 = "00000000b3e80000h",
--kdbr_2 = "00000000b3180000h",
--kdtr_2 = "00000000b3e00000h",
--kebr_2 = "00000000b3080000h",
--cs_3 = "00000000ba000000q",
--csy_3 = "0000eb0000000014s",
--csg_3 = "0000eb0000000030s",
--csp_2 = "00000000b2500000h",
--cspg_2 = "00000000b98a0000h",
--cextr_2 = "00000000b3fc0000h",
--cedtr_2 = "00000000b3f40000h",
--cds_3 = "00000000bb000000q",
--cdsy_3 = "0000eb0000000031s",
--cdsg_3 = "0000eb000000003es",
--ch_2 = "0000000049000000j",
--chy_2 = "0000e30000000079l",
--cgh_2 = "0000e30000000034l",
--chrl_2 = "0000c60500000000o",
--cghrl_2 = "0000c60400000000o",
--chf_2 = "0000e300000000cdl",
--chhr_2 = "00000000b9cd0000h",
--chlr_2 = "00000000b9dd0000h",
--cfi_2 = "0000c20d00000000n",
--cgfi_2 = "0000c20c00000000n",
--cih_2 = "0000cc0d00000000n",
--cl_2 = "0000000055000000j",
--clr_2 = "0000000000001500g",
--cly_2 = "0000e30000000055l",
--clg_2 = "0000e30000000021l",
--clgr_2 = "00000000b9210000h",
--clgf_2 = "0000e30000000031l",
--clgfr_2 = "00000000b9310000h",
--clmh_3 = "0000eb0000000020t",
--clm_3 = "00000000bd000000r",
--clmy_3 = "0000eb0000000021t",
--clhf_2 = "0000e300000000cfl",
--clhhr_2 = "00000000b9cf0000h",
--clhlr_2 = "00000000b9df0000h",
--clfi_2 = "0000c20f00000000n",
--clgfi_2 = "0000c20e00000000n",
--clih_2 = "0000cc0f00000000n",
--clcl_2 = "0000000000000f00g",
--clcle_3 = "00000000a9000000q",
--clclu_3 = "0000eb000000008fs",
--clrl_2 = "0000c60f00000000o",
--clhrl_2 = "0000c60700000000o",
--clgrl_2 = "0000c60a00000000o",
--clghrl_2 = "0000c60600000000o",
--clgfrl_2 = "0000c60e00000000o",
--clst_2 = "00000000b25d0000h",
--crl_2 = "0000c60d00000000o",
--cgrl_2 = "0000c60800000000o",
--cgfrl_2 = "0000c60c00000000o",
--cuse_2 = "00000000b2570000h",
--cmpsc_2 = "00000000b2630000h",
--kimd_2 = "00000000b93e0000h",
--klmd_2 = "00000000b93f0000h",
--kmac_2 = "00000000b91e0000h",
--thdr_2 = "00000000b3590000h",
--thder_2 = "00000000b3580000h",
--cxfbr_2 = "00000000b3960000h",
--cxftr_2 = "00000000b9590000h",
--cxfr_2 = "00000000b3b60000h",
--cdfbr_2 = "00000000b3950000h",
--cdftr_2 = "00000000b9510000h",
--cdfr_2 = "00000000b3b50000h",
--cefbr_2 = "00000000b3940000h",
--cefr_2 = "00000000b3b40000h",
--cxgbr_2 = "00000000b3a60000h",
--cxgtr_2 = "00000000b3f90000h",
--cxgr_2 = "00000000b3c60000h",
--cdgbr_2 = "00000000b3a50000h",
--cdgtr_2 = "00000000b3f10000h",
--cdgr_2 = "00000000b3c50000h",
--cegbr_2 = "00000000b3a40000h",
--cegr_2 = "00000000b3c40000h",
--cxstr_2 = "00000000b3fb0000h",
--cdstr_2 = "00000000b3f30000h",
--cxutr_2 = "00000000b3fa0000h",
--cdutr_2 = "00000000b3f20000h",
--cvb_2 = "000000004f000000j",
--cvby_2 = "0000e30000000006l",
--cvbg_2 = "0000e3000000000el",
--cvd_2 = "000000004e000000j",
--cvdy_2 = "0000e30000000026l",
--cvdg_2 = "0000e3000000002el",
--cuxtr_2 = "00000000b3ea0000h",
--cudtr_2 = "00000000b3e20000h",
--cu42_2 = "00000000b9b30000h",
--cu41_2 = "00000000b9b20000h",
--cpya_2 = "00000000b24d0000h",
--d_2 = "000000005d000000j",
--dr_2 = "0000000000001d00g",
--dxbr_2 = "00000000b34d0000h",
--dxr_2 = "00000000b22d0000h",
--ddbr_2 = "00000000b31d0000h",
--dd_2 = "000000006d000000j",
--ddr_2 = "0000000000002d00g",
--debr_2 = "00000000b30d0000h",
--de_2 = "000000007d000000j",
--der_2 = "0000000000003d00g",
--dl_2 = "0000e30000000097l",
--dlr_2 = "00000000b9970000h",
--dlg_2 = "0000e30000000087l",
--dlgr_2 = "00000000b9870000h",
--dsg_2 = "0000e3000000000dl",
--dsgr_2 = "00000000b90d0000h",
--dsgf_2 = "0000e3000000001dl",
--dsgfr_2 = "00000000b91d0000h",
--x_2 = "0000000057000000j",
--xr_2 = "0000000000001700g",
--xy_2 = "0000e30000000057l",
--xg_2 = "0000e30000000082l",
--xgr_2 = "00000000b9820000h",
--xihf_2 = "0000c00600000000n",
--xilf_2 = "0000c00700000000n",
--ex_2 = "0000000044000000j",
--exrl_2 = "0000c60000000000o",
--ear_2 = "00000000b24f0000h",
--esea_2 = "00000000b99d0000h",
--eextr_2 = "00000000b3ed0000h",
--eedtr_2 = "00000000b3e50000h",
--ecag_3 = "0000eb000000004cs",
--efpc_2 = "00000000b38c0000h",
--epar_2 = "00000000b2260000h",
--epair_2 = "00000000b99a0000h",
--epsw_2 = "00000000b98d0000h",
--esar_2 = "00000000b2270000h",
--esair_2 = "00000000b99b0000h",
--esxtr_2 = "00000000b3ef0000h",
--esdtr_2 = "00000000b3e70000h",
--ereg_2 = "00000000b2490000h",
--eregg_2 = "00000000b90e0000h",
--esta_2 = "00000000b24a0000h",
--flogr_2 = "00000000b9830000h",
--hdr_2 = "0000000000002400g",
--her_2 = "0000000000003400g",
--iac_2 = "00000000b2240000h",
--ic_2 = "0000000043000000j",
--icy_2 = "0000e30000000073l",
--icmh_3 = "0000eb0000000080t",
--icm_3 = "00000000bf000000r",
--icmy_3 = "0000eb0000000081t",
--iihf_2 = "0000c00800000000n",
--iilf_2 = "0000c00900000000n",
--ipm_2 = "00000000b2220000h",
--iske_2 = "00000000b2290000h",
--ivsk_2 = "00000000b2230000h",
--l_2 = "0000000058000000j",
--lr_2 = "0000000000001800g",
--ly_2 = "0000e30000000058l",
--lg_2 = "0000e30000000004l",
--lgr_2 = "00000000b9040000h",
--lgf_2 = "0000e30000000014l",
--lgfr_2 = "00000000b9140000h",
--lxr_2 = "00000000b3650000h",
--ld_2 = "0000000068000000j",
--ldr_2 = "0000000000002800g",
--ldy_2 = "0000ed0000000065l",
--le_2 = "0000000078000000j",
--ler_2 = "0000000000003800g",
--ley_2 = "0000ed0000000064l",
--lam_3 = "000000009a000000q",
--lamy_3 = "0000eb000000009as",
--la_2 = "0000000041000000j",
--lay_2 = "0000e30000000071l",
--lae_2 = "0000000051000000j",
--laey_2 = "0000e30000000075l",
--larl_2 = "0000c00000000000o",
--laa_3 = "0000eb00000000f8s",
--laag_3 = "0000eb00000000e8s",
--laal_3 = "0000eb00000000fas",
--laalg_3 = "0000eb00000000eas",
--lan_3 = "0000eb00000000f4s",
--lang_3 = "0000eb00000000e4s",
--lax_3 = "0000eb00000000f7s",
--laxg_3 = "0000eb00000000e7s",
--lao_3 = "0000eb00000000f6s",
--laog_3 = "0000eb00000000e6s",
--lt_2 = "0000e30000000012l",
--ltr_2 = "0000000000001200g",
--ltg_2 = "0000e30000000002l",
--ltgr_2 = "00000000b9020000h",
--ltgf_2 = "0000e30000000032l",
--ltgfr_2 = "00000000b9120000h",
--ltxbr_2 = "00000000b3420000h",
--ltxtr_2 = "00000000b3de0000h",
--ltxr_2 = "00000000b3620000h",
--ltdbr_2 = "00000000b3120000h",
--ltdtr_2 = "00000000b3d60000h",
--ltdr_2 = "0000000000002200g",
--ltebr_2 = "00000000b3020000h",
--lter_2 = "0000000000003200g",
--lb_2 = "0000e30000000076l",
--lbr_2 = "00000000b9260000h",
--lgb_2 = "0000e30000000077l",
--lgbr_2 = "00000000b9060000h",
--lbh_2 = "0000e300000000c0l",
--lcr_2 = "0000000000001300g",
--lcgr_2 = "00000000b9030000h",
--lcgfr_2 = "00000000b9130000h",
--lcxbr_2 = "00000000b3430000h",
--lcxr_2 = "00000000b3630000h",
--lcdbr_2 = "00000000b3130000h",
--lcdr_2 = "0000000000002300g",
--lcdfr_2 = "00000000b3730000h",
--lcebr_2 = "00000000b3030000h",
--lcer_2 = "0000000000003300g",
--lctl_3 = "00000000b7000000q",
--lctlg_3 = "0000eb000000002fs",
--fixr_2 = "00000000b3670000h",
--fidr_2 = "00000000b37f0000h",
--fier_2 = "00000000b3770000h",
--ldgr_2 = "00000000b3c10000h",
--lgdr_2 = "00000000b3cd0000h",
--lh_2 = "0000000048000000j",
--lhr_2 = "00000000b9270000h",
--lhy_2 = "0000e30000000078l",
--lgh_2 = "0000e30000000015l",
--lghr_2 = "00000000b9070000h",
--lhh_2 = "0000e300000000c4l",
--lhrl_2 = "0000c40500000000o",
--lghrl_2 = "0000c40400000000o",
--lfh_2 = "0000e300000000cal",
--lgfi_2 = "0000c00100000000n",
--lxdbr_2 = "00000000b3050000h",
--lxdr_2 = "00000000b3250000h",
--lxebr_2 = "00000000b3060000h",
--lxer_2 = "00000000b3260000h",
--ldebr_2 = "00000000b3040000h",
--lder_2 = "00000000b3240000h",
--llgf_2 = "0000e30000000016l",
--llgfr_2 = "00000000b9160000h",
--llc_2 = "0000e30000000094l",
--llcr_2 = "00000000b9940000h",
--llgc_2 = "0000e30000000090l",
--llgcr_2 = "00000000b9840000h",
--llch_2 = "0000e300000000c2l",
--llh_2 = "0000e30000000095l",
--llhr_2 = "00000000b9950000h",
--llgh_2 = "0000e30000000091l",
--llghr_2 = "00000000b9850000h",
--llhh_2 = "0000e300000000c6l",
--llhrl_2 = "0000c40200000000o",
--llghrl_2 = "0000c40600000000o",
--llihf_2 = "0000c00e00000000n",
--llilf_2 = "0000c00f00000000n",
--llgfrl_2 = "0000c40e00000000o",
--llgt_2 = "0000e30000000017l",
--llgtr_2 = "00000000b9170000h",
--lm_3 = "0000000098000000q",
--lmy_3 = "0000eb0000000098s",
--lmg_3 = "0000eb0000000004s",
--lmh_3 = "0000eb0000000096s",
--lnr_2 = "0000000000001100g",
--lngr_2 = "00000000b9010000h",
--lngfr_2 = "00000000b9110000h",
--lnxbr_2 = "00000000b3410000h",
--lnxr_2 = "00000000b3610000h",
--lndbr_2 = "00000000b3110000h",
--lndr_2 = "0000000000002100g",
--lndfr_2 = "00000000b3710000h",
--lnebr_2 = "00000000b3010000h",
--lner_2 = "0000000000003100g",
--loc_3 = "0000eb00000000f2t",
--locg_3 = "0000eb00000000e2t",
--lpq_2 = "0000e3000000008fl",
--lpr_2 = "0000000000001000g",
--lpgr_2 = "00000000b9000000h",
--lpgfr_2 = "00000000b9100000h",
--lpxbr_2 = "00000000b3400000h",
--lpxr_2 = "00000000b3600000h",
--lpdbr_2 = "00000000b3100000h",
--lpdr_2 = "0000000000002000g",
--lpdfr_2 = "00000000b3700000h",
--lpebr_2 = "00000000b3000000h",
--lper_2 = "0000000000003000g",
--lra_2 = "00000000b1000000j",
--lray_2 = "0000e30000000013l",
--lrag_2 = "0000e30000000003l",
--lrl_2 = "0000c40d00000000o",
--lgrl_2 = "0000c40800000000o",
--lgfrl_2 = "0000c40c00000000o",
--lrvh_2 = "0000e3000000001fl",
--lrv_2 = "0000e3000000001el",
--lrvr_2 = "00000000b91f0000h",
--lrvg_2 = "0000e3000000000fl",
--lrvgr_2 = "00000000b90f0000h",
--ldxbr_2 = "00000000b3450000h",
--ldxr_2 = "0000000000002500g",
--lrdr_2 = "0000000000002500g",
--lexbr_2 = "00000000b3460000h",
--lexr_2 = "00000000b3660000h",
--ledbr_2 = "00000000b3440000h",
--ledr_2 = "0000000000003500g",
--lrer_2 = "0000000000003500g",
--lura_2 = "00000000b24b0000h",
--lurag_2 = "00000000b9050000h",
--lzxr_2 = "00000000b3760000h",
--lzdr_2 = "00000000b3750000h",
--lzer_2 = "00000000b3740000h",
--msta_2 = "00000000b2470000h",
--mvcl_2 = "0000000000000e00g",
--mvcle_3 = "00000000a8000000q",
--mvclu_3 = "0000eb000000008es",
--mvpg_2 = "00000000b2540000h",
--mvst_2 = "00000000b2550000h",
--m_2 = "000000005c000000j",
--mfy_2 = "0000e3000000005cl",
--mr_2 = "0000000000001c00g",
--mxbr_2 = "00000000b34c0000h",
--mxr_2 = "0000000000002600g",
--mdbr_2 = "00000000b31c0000h",
--md_2 = "000000006c000000j",
--mdr_2 = "0000000000002c00g",
--mxdbr_2 = "00000000b3070000h",
--mxd_2 = "0000000067000000j",
--mxdr_2 = "0000000000002700g",
--meebr_2 = "00000000b3170000h",
--meer_2 = "00000000b3370000h",
--mdebr_2 = "00000000b30c0000h",
--mde_2 = "000000007c000000j",
--mder_2 = "0000000000003c00g",
--me_2 = "000000007c000000j",
--mer_2 = "0000000000003c00g",
--mh_2 = "000000004c000000j",
--mhy_2 = "0000e3000000007cl",
--mlg_2 = "0000e30000000086l",
--mlgr_2 = "00000000b9860000h",
--ml_2 = "0000e30000000096l",
--mlr_2 = "00000000b9960000h",
--ms_2 = "0000000071000000j",
--msr_2 = "00000000b2520000h",
--msy_2 = "0000e30000000051l",
--msg_2 = "0000e3000000000cl",
--msgr_2 = "00000000b90c0000h",
--msgf_2 = "0000e3000000001cl",
--msgfr_2 = "00000000b91c0000h",
--msfi_2 = "0000c20100000000n",
--msgfi_2 = "0000c20000000000n",
--o_2 = "0000000056000000j",
--or_2 = "0000000000001600g",
--oy_2 = "0000e30000000056l",
--og_2 = "0000e30000000081l",
--ogr_2 = "00000000b9810000h",
--oihf_2 = "0000c00c00000000n",
--oilf_2 = "0000c00d00000000n",
--pgin_2 = "00000000b22e0000h",
--pgout_2 = "00000000b22f0000h",
--pcc_2 = "00000000b92c0000h",
--pckmo_2 = "00000000b9280000h",
--pfmf_2 = "00000000b9af0000h",
--ptf_2 = "00000000b9a20000h",
--popcnt_2 = "00000000b9e10000h",
--pfd_2 = "0000e30000000036m",
--pfdrl_2 = "0000c60200000000p",
--pt_2 = "00000000b2280000h",
--pti_2 = "00000000b99e0000h",
--palb_2 = "00000000b2480000h",
--rrbe_2 = "00000000b22a0000h",
--rrbm_2 = "00000000b9ae0000h",
--rll_3 = "0000eb000000001ds",
--rllg_3 = "0000eb000000001cs",
--srst_2 = "00000000b25e0000h",
--srstu_2 = "00000000b9be0000h",
--sar_2 = "00000000b24e0000h",
--sfpc_2 = "00000000b3840000h",
--sfasr_2 = "00000000b3850000h",
--spm_2 = "000000000000400g",
--ssar_2 = "00000000b2250000h",
--ssair_2 = "00000000b99f0000h",
--slda_3 = "000000008f000000q",
--sldl_3 = "000000008d000000q",
--sla_3 = "000000008b000000q",
--slak_3 = "0000eb00000000dds",
--slag_3 = "0000eb000000000bs",
--sll_3 = "0000000089000000q",
--sllk_3 = "0000eb00000000dfs",
--sllg_3 = "0000eb000000000ds",
--srda_3 = "000000008e000000q",
--srdl_3 = "000000008c000000q",
--sra_3 = "000000008a000000q",
--srak_3 = "0000eb00000000dcs",
--srag_3 = "0000eb000000000as",
--srl_3 = "0000000088000000q",
--srlk_3 = "0000eb00000000des",
--srlg_3 = "0000eb000000000cs",
--sqxbr_2 = "00000000b3160000h",
--sqxr_2 = "00000000b3360000h",
--sqdbr_2 = "00000000b3150000h",
--sqdr_2 = "00000000b2440000h",
--sqebr_2 = "00000000b3140000h",
--sqer_2 = "00000000b2450000h",
--st_2 = "0000000050000000j",
--sty_2 = "0000e30000000050l",
--stg_2 = "0000e30000000024l",
--std_2 = "0000000060000000j",
--stdy_2 = "0000ed0000000067l",
--ste_2 = "0000000070000000j",
--stey_2 = "0000ed0000000066l",
--stam_3 = "000000009b000000q",
--stamy_3 = "0000eb000000009bs",
--stc_2 = "0000000042000000j",
--stcy_2 = "0000e30000000072l",
--stch_2 = "0000e300000000c3l",
--stcmh_3 = "0000eb000000002ct",
--stcm_3 = "00000000be000000r",
--stcmy_3 = "0000eb000000002dt",
--stctl_3 = "00000000b6000000q",
--stctg_3 = "0000eb0000000025s",
--sth_2 = "0000000040000000j",
--sthy_2 = "0000e30000000070l",
--sthh_2 = "0000e300000000c7l",
--sthrl_2 = "0000c40700000000o",
--stfh_2 = "0000e300000000cbl",
--stm_3 = "0000000090000000q",
--stmy_3 = "0000eb0000000090s",
--stmg_3 = "0000eb0000000024s",
--stmh_3 = "0000eb0000000026s",
--stoc_3 = "0000eb00000000f3t",
--stocg_3 = "0000eb00000000e3t",
--stpq_2 = "0000e3000000008el",
--strl_2 = "0000c40f00000000o",
--stgrl_2 = "0000c40b00000000o",
--strvh_2 = "0000e3000000003fl",
--strv_2 = "0000e3000000003el",
--strvg_2 = "0000e3000000002fl",
--stura_2 = "00000000b2460000h",
--sturg_2 = "00000000b9250000h",
--s_2 = "000000005b000000j",
--sr_2 = "0000000000001b00g",
--sy_2 = "0000e3000000005bl",
--sg_2 = "0000e30000000009l",
--sgr_2 = "00000000b9090000h",
--sgf_2 = "0000e30000000019l",
--sgfr_2 = "00000000b9190000h",
--sxbr_2 = "00000000b34b0000h",
--sdbr_2 = "00000000b31b0000h",
--sebr_2 = "00000000b30b0000h",
--sh_2 = "000000004b000000j",
--shy_2 = "0000e3000000007bl",
--sl_2 = "000000005f000000j",
--slr_2 = "0000000000001f00g",
--sly_2 = "0000e3000000005fl",
--slg_2 = "0000e3000000000bl",
--slgr_2 = "00000000b90b0000h",
--slgf_2 = "0000e3000000001bl",
--slgfr_2 = "00000000b91b0000h",
--slfi_2 = "0000c20500000000n",
--slgfi_2 = "0000c20400000000n",
--slb_2 = "0000e30000000099l",
--slbr_2 = "00000000b9990000h",
--slbg_2 = "0000e30000000089l",
--slbgr_2 = "00000000b9890000h",
--sxr_2 = "0000000000003700g",
--sd_2 = "000000006b000000j",
--sdr_2 = "0000000000002b00g",
--se_2 = "000000007b000000j",
--ser_2 = "0000000000003b00g",
--su_2 = "000000007f000000j",
--sur_2 = "0000000000003f00g",
--sw_2 = "000000006f000000j",
--swr_2 = "0000000000002f00g",
--tar_2 = "00000000b24c0000h",
--tb_2 = "00000000b22c0000h",
--trace_3 = "0000000099000000q",
--tracg_3 = "0000eb000000000fs",
--tre_2 = "00000000b2a50000h",
-+ a_2 = "00005a000000j",
-+ ar_2 = "000000001a00g",
-+ ay_2 = "e3000000005al",
-+ ag_2 = "e30000000008l",
-+ agr_2 = "0000b9080000h",
-+ agf_2 = "e30000000018l",
-+ agfr_2 = "0000b9180000h",
-+ axbr_2 = "0000b34a0000h",
-+ adbr_2 = "0000b31a0000h",
-+ aebr_2 = "0000b30a0000h",
-+ ah_2 = "00004a000000j",
-+ ahy_2 = "e3000000007al",
-+ afi_2 = "c20900000000n",
-+ agfi_2 = "c20800000000n",
-+ aih_2 = "cc0800000000n",
-+ al_2 = "00005e000000j",
-+ alr_2 = "000000001e00g",
-+ aly_2 = "e3000000005el",
-+ alg_2 = "e3000000000al",
-+ algr_2 = "0000b90a0000h",
-+ algf_2 = "e3000000001al",
-+ algfr_2 = "0000b91a0000h",
-+ alfi_2 = "c20b00000000n",
-+ algfi_2 = "c20a00000000n",
-+ alc_2 = "e30000000098l",
-+ alcr_2 = "0000b9980000h",
-+ alcg_2 = "e30000000088l",
-+ alcgr_2 = "0000b9880000h",
-+ alsih_2 = "cc0a00000000n",
-+ alsihn_2 = "cc0b00000000n",
-+ axr_2 = "000000003600g",
-+ ad_2 = "00006a000000j",
-+ adr_2 = "000000002a00g",
-+ ae_2 = "00007a000000j",
-+ aer_2 = "000000003a00g",
-+ aw_2 = "00006e000000j",
-+ awr_2 = "000000002e00g",
-+ au_2 = "00007e000000j",
-+ aur_2 = "000000003e00g",
-+ n_2 = "000054000000j",
-+ nr_2 = "000000001400g",
-+ ny_2 = "e30000000054l",
-+ ng_2 = "e30000000080l",
-+ ngr_2 = "0000b9800000h",
-+ nihf_2 = "c00a00000000n",
-+ nilf_2 = "c00b00000000n",
-+ bal_2 = "000045000000j",
-+ balr_2 = "00000000500g",
-+ bas_2 = "00004d000000j",
-+ basr_2 = "000000000d00g",
-+ bassm_2 = "000000000c00g",
-+ bsa_2 = "0000b25a0000h",
-+ bsm_2 = "000000000b00g",
-+ bakr_2 = "0000b2400000h",
-+ bsg_2 = "0000b2580000h",
-+ bc_2 = "000047000000k",
-+ bcr_2 = "00000000700g",
-+ bct_2 = "000046000000j",
-+ bctr_2 = "00000000600g",
-+ bctg_2 = "e30000000046l",
-+ bctgr_2 = "0000b9460000h",
-+ bxh_3 = "000086000000q",
-+ bxhg_3 = "eb0000000044s",
-+ bxle_3 = "000087000000q",
-+ bxleg_3 = "eb0000000045s",
-+ brasl_2 = "c00500000000o",
-+ brcl_2 = "c00400000000p",
-+ brcth_2 = "cc0600000000o",
-+ cksm_2 = "0000b2410000h",
-+ km_2 = "0000b92e0000h",
-+ kmf_2 = "0000b92a0000h",
-+ kmc_2 = "0000b92f0000h",
-+ kmo_2 = "0000b92b0000h",
-+ c_2 = "000059000000j",
-+ cr_2 = "000000001900g",
-+ cy_2 = "e30000000059l",
-+ cg_2 = "e30000000020l",
-+ cgr_2 = "0000b9200000h",
-+ cgf_2 = "e30000000030l",
-+ cgfr_2 = "0000b9300000h",
-+ cxbr_2 = "0000b3490000h",
-+ cxtr_2 = "0000b3ec0000h",
-+ cxr_2 = "0000b3690000h",
-+ cdbr_2 = "0000b3190000h",
-+ cdtr_2 = "0000b3e40000h",
-+ cd_2 = "000069000000j",
-+ cdr_2 = "000000002900g",
-+ cebr_2 = "0000b3090000h",
-+ ce_2 = "000079000000j",
-+ cer_2 = "000000003900g",
-+ kxbr_2 = "0000b3480000h",
-+ kxtr_2 = "0000b3e80000h",
-+ kdbr_2 = "0000b3180000h",
-+ kdtr_2 = "0000b3e00000h",
-+ kebr_2 = "0000b3080000h",
-+ cs_3 = "0000ba000000q",
-+ csy_3 = "eb0000000014s",
-+ csg_3 = "eb0000000030s",
-+ csp_2 = "0000b2500000h",
-+ cspg_2 = "0000b98a0000h",
-+ cextr_2 = "0000b3fc0000h",
-+ cedtr_2 = "0000b3f40000h",
-+ cds_3 = "0000bb000000q",
-+ cdsy_3 = "eb0000000031s",
-+ cdsg_3 = "eb000000003es",
-+ ch_2 = "000049000000j",
-+ chy_2 = "e30000000079l",
-+ cgh_2 = "e30000000034l",
-+ chrl_2 = "c60500000000o",
-+ cghrl_2 = "c60400000000o",
-+ chf_2 = "e300000000cdl",
-+ chhr_2 = "0000b9cd0000h",
-+ chlr_2 = "0000b9dd0000h",
-+ cfi_2 = "c20d00000000n",
-+ cgfi_2 = "c20c00000000n",
-+ cih_2 = "cc0d00000000n",
-+ cl_2 = "000055000000j",
-+ clr_2 = "000000001500g",
-+ cly_2 = "e30000000055l",
-+ clg_2 = "e30000000021l",
-+ clgr_2 = "0000b9210000h",
-+ clgf_2 = "e30000000031l",
-+ clgfr_2 = "0000b9310000h",
-+ clmh_3 = "eb0000000020t",
-+ clm_3 = "0000bd000000r",
-+ clmy_3 = "eb0000000021t",
-+ clhf_2 = "e300000000cfl",
-+ clhhr_2 = "0000b9cf0000h",
-+ clhlr_2 = "0000b9df0000h",
-+ clfi_2 = "c20f00000000n",
-+ clgfi_2 = "c20e00000000n",
-+ clih_2 = "cc0f00000000n",
-+ clcl_2 = "000000000f00g",
-+ clcle_3 = "0000a9000000q",
-+ clclu_3 = "eb000000008fs",
-+ clrl_2 = "c60f00000000o",
-+ clhrl_2 = "c60700000000o",
-+ clgrl_2 = "c60a00000000o",
-+ clghrl_2 = "c60600000000o",
-+ clgfrl_2 = "c60e00000000o",
-+ clst_2 = "0000b25d0000h",
-+ crl_2 = "c60d00000000o",
-+ cgrl_2 = "c60800000000o",
-+ cgfrl_2 = "c60c00000000o",
-+ cuse_2 = "0000b2570000h",
-+ cmpsc_2 = "0000b2630000h",
-+ kimd_2 = "0000b93e0000h",
-+ klmd_2 = "0000b93f0000h",
-+ kmac_2 = "0000b91e0000h",
-+ thdr_2 = "0000b3590000h",
-+ thder_2 = "0000b3580000h",
-+ cxfbr_2 = "0000b3960000h",
-+ cxftr_2 = "0000b9590000h",
-+ cxfr_2 = "0000b3b60000h",
-+ cdfbr_2 = "0000b3950000h",
-+ cdftr_2 = "0000b9510000h",
-+ cdfr_2 = "0000b3b50000h",
-+ cefbr_2 = "0000b3940000h",
-+ cefr_2 = "0000b3b40000h",
-+ cxgbr_2 = "0000b3a60000h",
-+ cxgtr_2 = "0000b3f90000h",
-+ cxgr_2 = "0000b3c60000h",
-+ cdgbr_2 = "0000b3a50000h",
-+ cdgtr_2 = "0000b3f10000h",
-+ cdgr_2 = "0000b3c50000h",
-+ cegbr_2 = "0000b3a40000h",
-+ cegr_2 = "0000b3c40000h",
-+ cxstr_2 = "0000b3fb0000h",
-+ cdstr_2 = "0000b3f30000h",
-+ cxutr_2 = "0000b3fa0000h",
-+ cdutr_2 = "0000b3f20000h",
-+ cvb_2 = "00004f000000j",
-+ cvby_2 = "e30000000006l",
-+ cvbg_2 = "e3000000000el",
-+ cvd_2 = "00004e000000j",
-+ cvdy_2 = "e30000000026l",
-+ cvdg_2 = "e3000000002el",
-+ cuxtr_2 = "0000b3ea0000h",
-+ cudtr_2 = "0000b3e20000h",
-+ cu42_2 = "0000b9b30000h",
-+ cu41_2 = "0000b9b20000h",
-+ cpya_2 = "0000b24d0000h",
-+ d_2 = "00005d000000j",
-+ dr_2 = "000000001d00g",
-+ dxbr_2 = "0000b34d0000h",
-+ dxr_2 = "0000b22d0000h",
-+ ddbr_2 = "0000b31d0000h",
-+ dd_2 = "00006d000000j",
-+ ddr_2 = "000000002d00g",
-+ debr_2 = "0000b30d0000h",
-+ de_2 = "00007d000000j",
-+ der_2 = "000000003d00g",
-+ dl_2 = "e30000000097l",
-+ dlr_2 = "0000b9970000h",
-+ dlg_2 = "e30000000087l",
-+ dlgr_2 = "0000b9870000h",
-+ dsg_2 = "e3000000000dl",
-+ dsgr_2 = "0000b90d0000h",
-+ dsgf_2 = "e3000000001dl",
-+ dsgfr_2 = "0000b91d0000h",
-+ x_2 = "000057000000j",
-+ xr_2 = "000000001700g",
-+ xy_2 = "e30000000057l",
-+ xg_2 = "e30000000082l",
-+ xgr_2 = "0000b9820000h",
-+ xihf_2 = "c00600000000n",
-+ xilf_2 = "c00700000000n",
-+ ex_2 = "000044000000j",
-+ exrl_2 = "c60000000000o",
-+ ear_2 = "0000b24f0000h",
-+ esea_2 = "0000b99d0000h",
-+ eextr_2 = "0000b3ed0000h",
-+ eedtr_2 = "0000b3e50000h",
-+ ecag_3 = "eb000000004cs",
-+ efpc_2 = "0000b38c0000h",
-+ epar_2 = "0000b2260000h",
-+ epair_2 = "0000b99a0000h",
-+ epsw_2 = "0000b98d0000h",
-+ esar_2 = "0000b2270000h",
-+ esair_2 = "0000b99b0000h",
-+ esxtr_2 = "0000b3ef0000h",
-+ esdtr_2 = "0000b3e70000h",
-+ ereg_2 = "0000b2490000h",
-+ eregg_2 = "0000b90e0000h",
-+ esta_2 = "0000b24a0000h",
-+ flogr_2 = "0000b9830000h",
-+ hdr_2 = "000000002400g",
-+ her_2 = "000000003400g",
-+ iac_2 = "0000b2240000h",
-+ ic_2 = "000043000000j",
-+ icy_2 = "e30000000073l",
-+ icmh_3 = "eb0000000080t",
-+ icm_3 = "0000bf000000r",
-+ icmy_3 = "eb0000000081t",
-+ iihf_2 = "c00800000000n",
-+ iilf_2 = "c00900000000n",
-+ ipm_2 = "0000b2220000h",
-+ iske_2 = "0000b2290000h",
-+ ivsk_2 = "0000b2230000h",
-+ l_2 = "000058000000j",
-+ lr_2 = "000000001800g",
-+ ly_2 = "e30000000058l",
-+ lg_2 = "e30000000004l",
-+ lgr_2 = "0000b9040000h",
-+ lgf_2 = "e30000000014l",
-+ lgfr_2 = "0000b9140000h",
-+ lxr_2 = "0000b3650000h",
-+ ld_2 = "000068000000j",
-+ ldr_2 = "000000002800g",
-+ ldy_2 = "ed0000000065l",
-+ le_2 = "000078000000j",
-+ ler_2 = "000000003800g",
-+ ley_2 = "ed0000000064l",
-+ lam_3 = "00009a000000q",
-+ lamy_3 = "eb000000009as",
-+ la_2 = "000041000000j",
-+ lay_2 = "e30000000071l",
-+ lae_2 = "000051000000j",
-+ laey_2 = "e30000000075l",
-+ larl_2 = "c00000000000o",
-+ laa_3 = "eb00000000f8s",
-+ laag_3 = "eb00000000e8s",
-+ laal_3 = "eb00000000fas",
-+ laalg_3 = "eb00000000eas",
-+ lan_3 = "eb00000000f4s",
-+ lang_3 = "eb00000000e4s",
-+ lax_3 = "eb00000000f7s",
-+ laxg_3 = "eb00000000e7s",
-+ lao_3 = "eb00000000f6s",
-+ laog_3 = "eb00000000e6s",
-+ lt_2 = "e30000000012l",
-+ ltr_2 = "000000001200g",
-+ ltg_2 = "e30000000002l",
-+ ltgr_2 = "0000b9020000h",
-+ ltgf_2 = "e30000000032l",
-+ ltgfr_2 = "0000b9120000h",
-+ ltxbr_2 = "0000b3420000h",
-+ ltxtr_2 = "0000b3de0000h",
-+ ltxr_2 = "0000b3620000h",
-+ ltdbr_2 = "0000b3120000h",
-+ ltdtr_2 = "0000b3d60000h",
-+ ltdr_2 = "000000002200g",
-+ ltebr_2 = "0000b3020000h",
-+ lter_2 = "000000003200g",
-+ lb_2 = "e30000000076l",
-+ lbr_2 = "0000b9260000h",
-+ lgb_2 = "e30000000077l",
-+ lgbr_2 = "0000b9060000h",
-+ lbh_2 = "e300000000c0l",
-+ lcr_2 = "000000001300g",
-+ lcgr_2 = "0000b9030000h",
-+ lcgfr_2 = "0000b9130000h",
-+ lcxbr_2 = "0000b3430000h",
-+ lcxr_2 = "0000b3630000h",
-+ lcdbr_2 = "0000b3130000h",
-+ lcdr_2 = "000000002300g",
-+ lcdfr_2 = "0000b3730000h",
-+ lcebr_2 = "0000b3030000h",
-+ lcer_2 = "000000003300g",
-+ lctl_3 = "0000b7000000q",
-+ lctlg_3 = "eb000000002fs",
-+ fixr_2 = "0000b3670000h",
-+ fidr_2 = "0000b37f0000h",
-+ fier_2 = "0000b3770000h",
-+ ldgr_2 = "0000b3c10000h",
-+ lgdr_2 = "0000b3cd0000h",
-+ lh_2 = "000048000000j",
-+ lhr_2 = "0000b9270000h",
-+ lhy_2 = "e30000000078l",
-+ lgh_2 = "e30000000015l",
-+ lghr_2 = "0000b9070000h",
-+ lhh_2 = "e300000000c4l",
-+ lhrl_2 = "c40500000000o",
-+ lghrl_2 = "c40400000000o",
-+ lfh_2 = "e300000000cal",
-+ lgfi_2 = "c00100000000n",
-+ lxdbr_2 = "0000b3050000h",
-+ lxdr_2 = "0000b3250000h",
-+ lxebr_2 = "0000b3060000h",
-+ lxer_2 = "0000b3260000h",
-+ ldebr_2 = "0000b3040000h",
-+ lder_2 = "0000b3240000h",
-+ llgf_2 = "e30000000016l",
-+ llgfr_2 = "0000b9160000h",
-+ llc_2 = "e30000000094l",
-+ llcr_2 = "0000b9940000h",
-+ llgc_2 = "e30000000090l",
-+ llgcr_2 = "0000b9840000h",
-+ llch_2 = "e300000000c2l",
-+ llh_2 = "e30000000095l",
-+ llhr_2 = "0000b9950000h",
-+ llgh_2 = "e30000000091l",
-+ llghr_2 = "0000b9850000h",
-+ llhh_2 = "e300000000c6l",
-+ llhrl_2 = "c40200000000o",
-+ llghrl_2 = "c40600000000o",
-+ llihf_2 = "c00e00000000n",
-+ llilf_2 = "c00f00000000n",
-+ llgfrl_2 = "c40e00000000o",
-+ llgt_2 = "e30000000017l",
-+ llgtr_2 = "0000b9170000h",
-+ lm_3 = "000098000000q",
-+ lmy_3 = "eb0000000098s",
-+ lmg_3 = "eb0000000004s",
-+ lmh_3 = "eb0000000096s",
-+ lnr_2 = "000000001100g",
-+ lngr_2 = "0000b9010000h",
-+ lngfr_2 = "0000b9110000h",
-+ lnxbr_2 = "0000b3410000h",
-+ lnxr_2 = "0000b3610000h",
-+ lndbr_2 = "0000b3110000h",
-+ lndr_2 = "000000002100g",
-+ lndfr_2 = "0000b3710000h",
-+ lnebr_2 = "0000b3010000h",
-+ lner_2 = "000000003100g",
-+ loc_3 = "eb00000000f2t",
-+ locg_3 = "eb00000000e2t",
-+ lpq_2 = "e3000000008fl",
-+ lpr_2 = "000000001000g",
-+ lpgr_2 = "0000b9000000h",
-+ lpgfr_2 = "0000b9100000h",
-+ lpxbr_2 = "0000b3400000h",
-+ lpxr_2 = "0000b3600000h",
-+ lpdbr_2 = "0000b3100000h",
-+ lpdr_2 = "000000002000g",
-+ lpdfr_2 = "0000b3700000h",
-+ lpebr_2 = "0000b3000000h",
-+ lper_2 = "000000003000g",
-+ lra_2 = "0000b1000000j",
-+ lray_2 = "e30000000013l",
-+ lrag_2 = "e30000000003l",
-+ lrl_2 = "c40d00000000o",
-+ lgrl_2 = "c40800000000o",
-+ lgfrl_2 = "c40c00000000o",
-+ lrvh_2 = "e3000000001fl",
-+ lrv_2 = "e3000000001el",
-+ lrvr_2 = "0000b91f0000h",
-+ lrvg_2 = "e3000000000fl",
-+ lrvgr_2 = "0000b90f0000h",
-+ ldxbr_2 = "0000b3450000h",
-+ ldxr_2 = "000000002500g",
-+ lrdr_2 = "000000002500g",
-+ lexbr_2 = "0000b3460000h",
-+ lexr_2 = "0000b3660000h",
-+ ledbr_2 = "0000b3440000h",
-+ ledr_2 = "000000003500g",
-+ lrer_2 = "000000003500g",
-+ lura_2 = "0000b24b0000h",
-+ lurag_2 = "0000b9050000h",
-+ lzxr_2 = "0000b3760000h",
-+ lzdr_2 = "0000b3750000h",
-+ lzer_2 = "0000b3740000h",
-+ msta_2 = "0000b2470000h",
-+ mvcl_2 = "000000000e00g",
-+ mvcle_3 = "0000a8000000q",
-+ mvclu_3 = "eb000000008es",
-+ mvpg_2 = "0000b2540000h",
-+ mvst_2 = "0000b2550000h",
-+ m_2 = "00005c000000j",
-+ mfy_2 = "e3000000005cl",
-+ mr_2 = "000000001c00g",
-+ mxbr_2 = "0000b34c0000h",
-+ mxr_2 = "000000002600g",
-+ mdbr_2 = "0000b31c0000h",
-+ md_2 = "00006c000000j",
-+ mdr_2 = "000000002c00g",
-+ mxdbr_2 = "0000b3070000h",
-+ mxd_2 = "000067000000j",
-+ mxdr_2 = "000000002700g",
-+ meebr_2 = "0000b3170000h",
-+ meer_2 = "0000b3370000h",
-+ mdebr_2 = "0000b30c0000h",
-+ mde_2 = "00007c000000j",
-+ mder_2 = "000000003c00g",
-+ me_2 = "00007c000000j",
-+ mer_2 = "000000003c00g",
-+ mh_2 = "00004c000000j",
-+ mhy_2 = "e3000000007cl",
-+ mlg_2 = "e30000000086l",
-+ mlgr_2 = "0000b9860000h",
-+ ml_2 = "e30000000096l",
-+ mlr_2 = "0000b9960000h",
-+ ms_2 = "000071000000j",
-+ msr_2 = "0000b2520000h",
-+ msy_2 = "e30000000051l",
-+ msg_2 = "e3000000000cl",
-+ msgr_2 = "0000b90c0000h",
-+ msgf_2 = "e3000000001cl",
-+ msgfr_2 = "0000b91c0000h",
-+ msfi_2 = "c20100000000n",
-+ msgfi_2 = "c20000000000n",
-+ o_2 = "000056000000j",
-+ or_2 = "000000001600g",
-+ oy_2 = "e30000000056l",
-+ og_2 = "e30000000081l",
-+ ogr_2 = "0000b9810000h",
-+ oihf_2 = "c00c00000000n",
-+ oilf_2 = "c00d00000000n",
-+ pgin_2 = "0000b22e0000h",
-+ pgout_2 = "0000b22f0000h",
-+ pcc_2 = "0000b92c0000h",
-+ pckmo_2 = "0000b9280000h",
-+ pfmf_2 = "0000b9af0000h",
-+ ptf_2 = "0000b9a20000h",
-+ popcnt_2 = "0000b9e10000h",
-+ pfd_2 = "e30000000036m",
-+ pfdrl_2 = "c60200000000p",
-+ pt_2 = "0000b2280000h",
-+ pti_2 = "0000b99e0000h",
-+ palb_2 = "0000b2480000h",
-+ rrbe_2 = "0000b22a0000h",
-+ rrbm_2 = "0000b9ae0000h",
-+ rll_3 = "eb000000001ds",
-+ rllg_3 = "eb000000001cs",
-+ srst_2 = "0000b25e0000h",
-+ srstu_2 = "0000b9be0000h",
-+ sar_2 = "0000b24e0000h",
-+ sfpc_2 = "0000b3840000h",
-+ sfasr_2 = "0000b3850000h",
-+ spm_2 = "00000000400g",
-+ ssar_2 = "0000b2250000h",
-+ ssair_2 = "0000b99f0000h",
-+ slda_3 = "00008f000000q",
-+ sldl_3 = "00008d000000q",
-+ sla_3 = "00008b000000q",
-+ slak_3 = "eb00000000dds",
-+ slag_3 = "eb000000000bs",
-+ sll_3 = "000089000000q",
-+ sllk_3 = "eb00000000dfs",
-+ sllg_3 = "eb000000000ds",
-+ srda_3 = "00008e000000q",
-+ srdl_3 = "00008c000000q",
-+ sra_3 = "00008a000000q",
-+ srak_3 = "eb00000000dcs",
-+ srag_3 = "eb000000000as",
-+ srl_3 = "000088000000q",
-+ srlk_3 = "eb00000000des",
-+ srlg_3 = "eb000000000cs",
-+ sqxbr_2 = "0000b3160000h",
-+ sqxr_2 = "0000b3360000h",
-+ sqdbr_2 = "0000b3150000h",
-+ sqdr_2 = "0000b2440000h",
-+ sqebr_2 = "0000b3140000h",
-+ sqer_2 = "0000b2450000h",
-+ st_2 = "000050000000j",
-+ sty_2 = "e30000000050l",
-+ stg_2 = "e30000000024l",
-+ std_2 = "000060000000j",
-+ stdy_2 = "ed0000000067l",
-+ ste_2 = "000070000000j",
-+ stey_2 = "ed0000000066l",
-+ stam_3 = "00009b000000q",
-+ stamy_3 = "eb000000009bs",
-+ stc_2 = "000042000000j",
-+ stcy_2 = "e30000000072l",
-+ stch_2 = "e300000000c3l",
-+ stcmh_3 = "eb000000002ct",
-+ stcm_3 = "0000be000000r",
-+ stcmy_3 = "eb000000002dt",
-+ stctl_3 = "0000b6000000q",
-+ stctg_3 = "eb0000000025s",
-+ sth_2 = "000040000000j",
-+ sthy_2 = "e30000000070l",
-+ sthh_2 = "e300000000c7l",
-+ sthrl_2 = "c40700000000o",
-+ stfh_2 = "e300000000cbl",
-+ stm_3 = "000090000000q",
-+ stmy_3 = "eb0000000090s",
-+ stmg_3 = "eb0000000024s",
-+ stmh_3 = "eb0000000026s",
-+ stoc_3 = "eb00000000f3t",
-+ stocg_3 = "eb00000000e3t",
-+ stpq_2 = "e3000000008el",
-+ strl_2 = "c40f00000000o",
-+ stgrl_2 = "c40b00000000o",
-+ strvh_2 = "e3000000003fl",
-+ strv_2 = "e3000000003el",
-+ strvg_2 = "e3000000002fl",
-+ stura_2 = "0000b2460000h",
-+ sturg_2 = "0000b9250000h",
-+ s_2 = "00005b000000j",
-+ sr_2 = "000000001b00g",
-+ sy_2 = "e3000000005bl",
-+ sg_2 = "e30000000009l",
-+ sgr_2 = "0000b9090000h",
-+ sgf_2 = "e30000000019l",
-+ sgfr_2 = "0000b9190000h",
-+ sxbr_2 = "0000b34b0000h",
-+ sdbr_2 = "0000b31b0000h",
-+ sebr_2 = "0000b30b0000h",
-+ sh_2 = "00004b000000j",
-+ shy_2 = "e3000000007bl",
-+ sl_2 = "00005f000000j",
-+ slr_2 = "000000001f00g",
-+ sly_2 = "e3000000005fl",
-+ slg_2 = "e3000000000bl",
-+ slgr_2 = "0000b90b0000h",
-+ slgf_2 = "e3000000001bl",
-+ slgfr_2 = "0000b91b0000h",
-+ slfi_2 = "c20500000000n",
-+ slgfi_2 = "c20400000000n",
-+ slb_2 = "e30000000099l",
-+ slbr_2 = "0000b9990000h",
-+ slbg_2 = "e30000000089l",
-+ slbgr_2 = "0000b9890000h",
-+ sxr_2 = "000000003700g",
-+ sd_2 = "00006b000000j",
-+ sdr_2 = "000000002b00g",
-+ se_2 = "00007b000000j",
-+ ser_2 = "000000003b00g",
-+ su_2 = "00007f000000j",
-+ sur_2 = "000000003f00g",
-+ sw_2 = "00006f000000j",
-+ swr_2 = "000000002f00g",
-+ tar_2 = "0000b24c0000h",
-+ tb_2 = "0000b22c0000h",
-+ trace_3 = "000099000000q",
-+ tracg_3 = "eb000000000fs",
-+ tre_2 = "0000b2a50000h",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
- -- TODO: replace 'B' with correct encoding.
- -- brc
-- map_op["j"..cond.."_1"] =
"00000000"..tohex(0xa7040000+shl(c, 20)).."w"
-+ map_op["j"..cond.."_1"] =
"0000"..tohex(0xa7040000+shl(c, 20)).."w"
- -- brcl
-- map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c,
4)).."00000000".."x"
-+ map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c,
20)).."0000".."x"
- -- bc
-- map_op["b"..cond.."_1"] =
"00000000"..tohex(0x47000000+shl(c, 20)).."y"
-+ map_op["b"..cond.."_1"] =
"0000"..tohex(0x47000000+shl(c, 20)).."y"
- -- bcr
-- map_op["b"..cond.."r_1"] =
"00000000"..tohex(0x0700+shl(c, 4)).."z"
-+ map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c,
4)).."z"
- end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
- local function parse_template(params, template, nparams, pos)
- -- Read the template in 16-bit chunks.
- -- Leading halfword zeroes should not be written out.
-- local op0 = tonumber(sub(template, 5, 8), 16)
-- local op1 = tonumber(sub(template, 9, 12), 16)
-- local op2 = tonumber(sub(template, 13, 16), 16)
-+ local op0 = tonumber(sub(template, 1, 4), 16)
-+ local op1 = tonumber(sub(template, 5, 8), 16)
-+ local op2 = tonumber(sub(template, 9, 12), 16)
-
- local n,rs = 1,26
-
-@@ -1031,7 +1031,7 @@ local function parse_template(params, template, nparams, pos)
- -- oorr iiii 00oo
- -- This should be emitted as oorr, followed by the immediate action, followed by
- -- 00oo.
-- for p in gmatch(sub(template, 17), ".") do
-+ for p in gmatch(sub(template, 13), ".") do
- local pr1,pr2,pr3
- if p == "g" then
- pr1,pr2=params[n],params[n+1]
---
-2.20.1
-
-
-From 8fbf547e3729a4e323b8c0b703066b396b23cdef Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 5 Dec 2016 14:51:48 -0500
-Subject: [PATCH 069/247] Add simple tests for RX and RXY style instructions.
-
-Also changed the test function signature to int64_t fn(int64_t, int64_t)
-to make it easier to test 64-bit operations.
----
- dynasm/Examples/test_z_inst.c | 48 ++++++++++++++++++++++++++++-------
- 1 file changed, 39 insertions(+), 9 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index ed20ea6..7259638 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -33,18 +33,48 @@ static void mul(dasm_State *state)
- | br r14
- }
-
-+static void rx(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ int x = 1;
-+ int y = 4095;
-+
-+ | la r4, 4095(r2, r3)
-+ | la r5, 4095(r4)
-+ | la r1, x(r5)
-+ | la r2, y(r1, r0)
-+ | br r14
-+}
-+
-+static void rxy(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ int x = -524287;
-+ int y = 524286;
-+
-+ | lay r4, -524288(r2, r3)
-+ | lay r5, 524287(r4)
-+ | lay r1, x(r5)
-+ | lay r2, y(r1, r0)
-+ | br r14
-+}
-+
- typedef struct {
-- int arg1;
-- int arg2;
-+ int64_t arg1;
-+ int64_t arg2;
- void (*fn)(dasm_State *);
-- int want;
-+ int64_t want;
- const char *testname;
- } test_table;
-
- test_table test[] = {
-- { 1, 2, add, 3, "add"},
-- {10, 5, sub, 5, "sub"},
-- { 2, 3, mul, 6, "mul"}
-+ { 1, 2, add, 3, "add"},
-+ {10, 5, sub, 5, "sub"},
-+ { 2, 3, mul, 6, "mul"},
-+ { 5, 7, rx, 12298, "rx"},
-+ { 5, 7, rxy, 10, "rxy"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
-@@ -69,11 +99,11 @@ int main(int argc, char *argv[])
- dasm_setup(&state, actions);
- test[i].fn(state);
- size_t size;
-- int (*fptr)(int, int) = jitcode(&state, &size);
-- int got = fptr(test[i].arg1, test[i].arg2);
-+ int64_t (*fptr)(int64_t, int64_t) = jitcode(&state, &size);
-+ int64_t got = fptr(test[i].arg1, test[i].arg2);
-
- if (got != test[i].want) {
-- fprintf(stderr, "FAIL: test %s: want %d, got %d\n", test[i].testname,
test[i].want, got);
-+ fprintf(stderr, "FAIL: test %s: want %ld, got %ld\n", test[i].testname,
test[i].want, got);
- exit(1);
- }
- munmap(fptr, size);
---
-2.20.1
-
-
-From 34a260a0e88dd2bbaf606f0bfed18ba4d18792a6 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 5 Dec 2016 14:57:53 -0500
-Subject: [PATCH 070/247] Minor cleanup of dasm_s390x.lua.
-
----
- dynasm/dasm_s390x.lua | 30 ++++++------------------------
- 1 file changed, 6 insertions(+), 24 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 6416438..5abfe3b 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -239,8 +239,6 @@ local map_cond = {
-
- ------------------------------------------------------------------------------
-
--local parse_reg_type
--
- local function parse_gpr(expr)
- local r = match(expr, "^r(1?[0-9])$")
- if r then
-@@ -1016,30 +1014,14 @@ local function parse_template(params, template, nparams, pos)
- local op1 = tonumber(sub(template, 5, 8), 16)
- local op2 = tonumber(sub(template, 9, 12), 16)
-
-- local n,rs = 1,26
--
-- parse_reg_type = false
- -- Process each character.
-- -- TODO
-- -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
-- -- one of two locations relative to the end of the instruction.
-- -- To make decoding easier we should insert the actions for these immediately
-- -- after the halfword they modify.
-- -- For example, take the instruction ahik, which is laid out as follows (each
-- -- char is 4 bits):
-- -- o = op code, r = register, i = immediate
-- -- oorr iiii 00oo
-- -- This should be emitted as oorr, followed by the immediate action, followed by
-- -- 00oo.
- for p in gmatch(sub(template, 13), ".") do
- local pr1,pr2,pr3
- if p == "g" then
-- pr1,pr2=params[n],params[n+1]
-- op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
-+ op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
- wputhw(op2)
- elseif p == "h" then
-- pr1,pr2=params[n],params[n+1]
-- op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
-+ op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
- wputhw(op1); wputhw(op2)
- elseif p == "j" then
- local d, x, b, a = parse_mem_bx(params[2])
-@@ -1067,20 +1049,20 @@ local function parse_template(params, template, nparams, pos)
- op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2)
-- if a then a() end
-+ if a then a() end -- a() emits action.
- elseif p == "s" then
- local d, b, a = parse_mem_by(params[3])
- op0 = op0 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
- op1 = op1 + shl(b, 12) + band(d, 0xfff)
- op2 = op2 + band(shr(d, 4), 0xff00)
- wputhw(op0); wputhw(op1); wputhw(op2)
-- if a then a() end
-+ if a then a() end -- a() emits action.
- elseif p == "y" then
- local d, x, b, a = parse_mem_bx(params[1])
- op1 = op1 + x
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
-- if a then a() end
-+ if a then a() end -- a() emits action.
- elseif p == "z" then
- op2 = op2 + parse_gpr(params[1])
- wputhw(op2)
-@@ -1166,7 +1148,7 @@ map_op[".align_1"] = function(params)
- for i=1,8 do
- x = x / 2
- if x == 1 then
-- waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
-+ waction("ALIGN", align-1, nil, 1) -- Action halfword is 2**n-1.
- return
- end
- end
---
-2.20.1
-
-
-From 6e6b310b5586f38c4d7bbedf3b1d3a5f41c2fa5e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 5 Dec 2016 17:21:49 -0500
-Subject: [PATCH 071/247] Add support for jumps to local labels.
-
-Currently limited to 16-bits ONLY.
-
-Allows code like:
-
-|1:
-| ...
-| j <1
----
- dynasm/Examples/test_z_inst.c | 23 ++++++++++++++++++++---
- dynasm/dasm_s390x.h | 15 ++++++++-------
- dynasm/dasm_s390x.lua | 10 ++++++++++
- 3 files changed, 38 insertions(+), 10 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 7259638..ad8e6a0 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -8,6 +8,7 @@
- // DynASM directives.
- |.arch s390x
- |.actionlist actions
-+|.globals lab_
-
- static void add(dasm_State *state)
- {
-@@ -61,6 +62,20 @@ static void rxy(dasm_State *state)
- | br r14
- }
-
-+static void lab(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3);
-+ | la r1, 0(r0)
-+ |1:
-+ | agr r2, r2
-+ | la r1, 1(r1)
-+ | cgr r1, r3
-+ | jl <1
-+ | br r14
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -74,7 +89,8 @@ test_table test[] = {
- {10, 5, sub, 5, "sub"},
- { 2, 3, mul, 6, "mul"},
- { 5, 7, rx, 12298, "rx"},
-- { 5, 7, rxy, 10, "rxy"}
-+ { 5, 7, rxy, 10, "rxy"},
-+ { 2, 4, lab, 32, "lab"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
-@@ -93,9 +109,10 @@ static void *jitcode(dasm_State **state, size_t *size)
- int main(int argc, char *argv[])
- {
- dasm_State *state;
--
-- for(int i=0; i < sizeof(test)/sizeof(test[0]); i++) {
-+ for(int i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
- dasm_init(&state, 1);
-+ void* labels[lab__MAX];
-+ dasm_setupglobal(&state, labels, lab__MAX);
- dasm_setup(&state, actions);
- test[i].fn(state);
- size_t size;
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index e2cd519..8fc4cd6 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -219,7 +219,7 @@ void dasm_put(Dst_DECL, int start, ...)
- b[pos++] = ofs;
- break;
- case DASM_REL_LG:
-- n = (ins & 2047) - 10;
-+ n = *p++ - 10;
- pl = D->lglabels + n;
- /* Bkwd rel or global. */
- if (n >= 0) {
-@@ -247,7 +247,7 @@ void dasm_put(Dst_DECL, int start, ...)
- pos++;
- break;
- case DASM_LABEL_LG:
-- pl = D->lglabels + (ins & 2047) - 10;
-+ pl = D->lglabels + *p++ - 10;
- CKPL(lg, LG);
- goto putlabel;
- case DASM_LABEL_PC:
-@@ -262,6 +262,7 @@ void dasm_put(Dst_DECL, int start, ...)
- }
- *pl = -pos; /* Label exists now. */
- b[pos++] = ofs; /* Store pass1 offset estimate. */
-+ ofs += 2;
- break;
- case DASM_IMM16:
- CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
-@@ -348,10 +349,12 @@ int dasm_link(Dst_DECL, size_t * szp)
- break;
- case DASM_REL_LG:
- case DASM_REL_PC:
-+ p++;
- pos++;
- break;
- case DASM_LABEL_LG:
- case DASM_LABEL_PC:
-+ p++;
- b[pos++] += ofs;
- break;
- case DASM_IMM16:
-@@ -421,13 +424,11 @@ int dasm_encode(Dst_DECL, void *buffer)
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
- patchrel:
-- CK((n & 3) == 0 &&
-- (((n + 4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
-- ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
-- cp[-1] |= ((n + 4) & ((ins & 2048) ? 0x0000fffc : 0x03fffffc));
-+ *cp++ = n/2; /* TODO: only 16-bit relative jump currently works. */
-+ p++; /* skip argument */
- break;
- case DASM_LABEL_LG:
-- ins &= 2047;
-+ ins = *p++;
- if (ins >= 20)
- D->globals[ins - 10] = (void *)(base + n);
- break;
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 5abfe3b..0625d5f 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1057,6 +1057,14 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + band(shr(d, 4), 0xff00)
- wputhw(op0); wputhw(op1); wputhw(op2)
- if a then a() end -- a() emits action.
-+ elseif p == "w" then
-+ local mode, n, s = parse_label(params[1])
-+ wputhw(op1)
-+ waction("REL_"..mode, n, s)
-+ elseif p == "x" then
-+ local mode, n, s = parse_label(params[1])
-+ wputhw(op0)
-+ waction("REL_"..mode, n, s)
- elseif p == "y" then
- local d, x, b, a = parse_mem_bx(params[1])
- op1 = op1 + x
-@@ -1066,6 +1074,8 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "z" then
- op2 = op2 + parse_gpr(params[1])
- wputhw(op2)
-+ else
-+ werror("unrecognized encoding")
- end
- end
-
---
-2.20.1
-
-
-From 1e52f5c764cbba60b5a25434ec0d78ad245d2657 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Tue, 6 Dec 2016 12:23:51 +0530
-Subject: [PATCH 072/247] Updated the addressing mode working
-
-The case where immediate was passed as label was not covered initially, so updated it
----
- dynasm/dasm_s390x.lua | 15 ++++++++++-----
- 1 file changed, 10 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 0625d5f..7ed35f3 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -370,10 +370,15 @@ end
-
- local function parse_imm(arg)
- local imm_val = tonumber(arg,16)
-- if not is_int32(imm_val) then
-- werror("Immediate value out of range: ", imm_val)
-+ if imm_val then
-+ if not is_int32(imm_val) then
-+ werror("Immediate value out of range: ", imm_val)
-+ end
-+ wputhw(band(shr(imm_val, 16), 0xffff));
-+ wputhw(band(imm_val, 0xffff));
-+ else
-+ waction("IMM32", nil, arg) -- if we get label
- end
-- return imm_val
- end
-
- local function parse_label(label, def)
-@@ -1042,8 +1047,8 @@ local function parse_template(params, template, nparams, pos)
-
- elseif p == "n" then
- op0 = op0 + shl(parse_gpr(params[1]), 4)
-- local imm = parse_imm(params[2])
-- wputhw(op0); waction("IMM32", nil, imm)
-+ wputhw(op0);
-+ parse_imm(params[2])
- elseif p == "q" then
- local d, b, a = parse_mem_b(params[3])
- op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
---
-2.20.1
-
-
-From 1f9d7b61de55d14e5b1eecb587b5ca61e9937a9f Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Tue, 6 Dec 2016 14:11:30 +0530
-Subject: [PATCH 073/247] Update test_z_inst.c
-
-Added test case for add immediate 16 bits RI-a
-Added test case for add immediate 32 bits RIL-a
----
- dynasm/Examples/test_z_inst.c | 31 +++++++++++++++++++++++++------
- 1 file changed, 25 insertions(+), 6 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index ad8e6a0..dbb50eb 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -76,6 +76,23 @@ static void lab(dasm_State *state)
- | br r14
- }
-
-+static void add_imm16(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | ahi r2 , 0xf
-+ | br r14
-+}
-+
-+
-+static void add_imm32(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | afi r2 , 0xe
-+ | br r14
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -85,12 +102,14 @@ typedef struct {
- } test_table;
-
- test_table test[] = {
-- { 1, 2, add, 3, "add"},
-- {10, 5, sub, 5, "sub"},
-- { 2, 3, mul, 6, "mul"},
-- { 5, 7, rx, 12298, "rx"},
-- { 5, 7, rxy, 10, "rxy"},
-- { 2, 4, lab, 32, "lab"}
-+ { 1, 2, add, 3, "add"},
-+ {10, 5, sub, 5, "sub"},
-+ { 2, 3, mul, 6, "mul"},
-+ { 5, 7, rx, 12298, "rx"},
-+ { 5, 7, rxy, 10, "rxy"},
-+ { 2, 4, lab, 32, "lab"},
-+ { 2, 0, add_imm16,17, "imm16"}
-+ { 2, 0, add_imm32,16, "imm32"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From 10b3dc1838a6ab7d8e3b2209f39d16a16a018525 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Tue, 6 Dec 2016 19:17:54 +0530
-Subject: [PATCH 074/247] Added support for Immediate mode 16 bit
-
-Currently only afi instruction is encoded, will be adding other instructions too. For
encoding we are running out of characters so was planning to append the complete modes
(RXa or rxa) which one do you think is better, and also thinking of just adding remaining
instruction modes as well, which we don't support as of now. Let me know if you want
me to add those, or we will wait for sometime before we add those.
----
- dynasm/dasm_s390x.lua | 20 ++++++++++++++++++++
- 1 file changed, 20 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 7ed35f3..9853aac 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -291,6 +291,9 @@ local function is_int32(num)
- return -2147483648 <= num and num < 2147483648
- end
-
-+local function_is_int16(num)
-+ return -32768 <= num and num < 32768
-+
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
- -- If x is not specified then it is 0.
- local function split_memop(arg)
-@@ -381,6 +384,18 @@ local function parse_imm(arg)
- end
- end
-
-+local function parse_imm16(arg)
-+ local imm_val = tonumber(arg,16)
-+ if imm_val then
-+ if not is_int16(imm_val) then
-+ werror("Immediate value out of range: ", imm_val)
-+ end
-+ wputhw(imm_val)
-+ else
-+ waction("IMM16", nil, arg)
-+ end
-+end
-+
- local function parse_label(label, def)
- local prefix = sub(label, 1, 2)
- -- =>label (pc label reference)
-@@ -449,6 +464,7 @@ map_op = {
- adbr_2 = "0000b31a0000h",
- aebr_2 = "0000b30a0000h",
- ah_2 = "00004a000000j",
-+ ahi_2 = "0000a70a0000i",
- ahy_2 = "e3000000007al",
- afi_2 = "c20900000000n",
- agfi_2 = "c20800000000n",
-@@ -1028,6 +1044,10 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "h" then
- op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
- wputhw(op1); wputhw(op2)
-+ else if p == "i" then
-+ op1 = op1 + shl(parse_gpr(params[1]),4)
-+ wputhw(op1);
-+ parse_imm16(params[2])
- elseif p == "j" then
- local d, x, b, a = parse_mem_bx(params[2])
- op1 = op1 + shl(parse_gpr(params[1]), 4) + x
---
-2.20.1
-
-
-From 934896b1e85a64337826cb7b206583e5cad9b922 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Tue, 6 Dec 2016 19:34:47 +0530
-Subject: [PATCH 075/247] Minor Cleanup
-
----
- dynasm/dasm_s390x.lua | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 9853aac..9a77d4e 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -291,8 +291,9 @@ local function is_int32(num)
- return -2147483648 <= num and num < 2147483648
- end
-
--local function_is_int16(num)
-+local function is_int16(num)
- return -32768 <= num and num < 32768
-+end
-
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
- -- If x is not specified then it is 0.
-@@ -1044,7 +1045,7 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "h" then
- op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
- wputhw(op1); wputhw(op2)
-- else if p == "i" then
-+ elseif p == "i" then
- op1 = op1 + shl(parse_gpr(params[1]),4)
- wputhw(op1);
- parse_imm16(params[2])
---
-2.20.1
-
-
-From ca2a4e5fae802ddd8d0bba3707556059620ea4a9 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 6 Dec 2016 09:16:30 -0500
-Subject: [PATCH 076/247] Fix test table (needed comma at end of line to
- compile).
-
-Also, re-align table columns.
----
- dynasm/Examples/test_z_inst.c | 16 ++++++++--------
- 1 file changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index dbb50eb..547a1c4 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -102,14 +102,14 @@ typedef struct {
- } test_table;
-
- test_table test[] = {
-- { 1, 2, add, 3, "add"},
-- {10, 5, sub, 5, "sub"},
-- { 2, 3, mul, 6, "mul"},
-- { 5, 7, rx, 12298, "rx"},
-- { 5, 7, rxy, 10, "rxy"},
-- { 2, 4, lab, 32, "lab"},
-- { 2, 0, add_imm16,17, "imm16"}
-- { 2, 0, add_imm32,16, "imm32"}
-+ { 1, 2, add, 3, "add"},
-+ {10, 5, sub, 5, "sub"},
-+ { 2, 3, mul, 6, "mul"},
-+ { 5, 7, rx, 12298, "rx"},
-+ { 5, 7, rxy, 10, "rxy"},
-+ { 2, 4, lab, 32, "lab"},
-+ { 2, 0, add_imm16, 17, "imm16"},
-+ { 2, 0, add_imm32, 16, "imm32"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From 20396acd2d6d22f4975260e3097e6e6cabf0ed41 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 6 Dec 2016 11:56:37 -0500
-Subject: [PATCH 077/247] Support forward local branches.
-
----
- dynasm/Examples/test_z_inst.c | 18 ++++++++++++++++++
- dynasm/dasm_s390x.h | 18 ++++++++++++++----
- dynasm/dasm_s390x.lua | 9 ++++++++-
- 3 files changed, 40 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 547a1c4..3938a74 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -76,6 +76,23 @@ static void lab(dasm_State *state)
- | br r14
- }
-
-+static void labg(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ // r1 = 0; do { r2 += r2; r1 += 1; } while(r1 < r3);
-+ | la r1, 0(r0)
-+ |1:
-+ | agr r2, r2
-+ | la r1, 1(r1)
-+ | cgr r1, r3
-+ | jgl <1
-+ | jgnl >1
-+ | stg r0, 0(r0)
-+ |1:
-+ | br r14
-+}
-+
- static void add_imm16(dasm_State *state)
- {
- dasm_State **Dst = &state;
-@@ -108,6 +125,7 @@ test_table test[] = {
- { 5, 7, rx, 12298, "rx"},
- { 5, 7, rxy, 10, "rxy"},
- { 2, 4, lab, 32, "lab"},
-+ { 2, 4, labg, 32, "labg"},
- { 2, 0, add_imm16, 17, "imm16"},
- { 2, 0, add_imm32, 16, "imm32"}
- };
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 8fc4cd6..18a7338 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -210,7 +210,7 @@ void dasm_put(Dst_DECL, int start, ...)
- goto stop;
- case DASM_ESC:
- p++;
-- ofs += 4;
-+ ofs += 2;
- break;
- case DASM_REL_EXT:
- break;
-@@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
- b[pos] = n; /* Else link to rel chain, anchored at label. */
- *pl = pos;
- }
-+ ofs += 2;
-+ if (p[-3] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
-+ ofs += 2;
-+ }
- pos++;
- break;
- case DASM_LABEL_LG:
-@@ -262,7 +266,6 @@ void dasm_put(Dst_DECL, int start, ...)
- }
- *pl = -pos; /* Label exists now. */
- b[pos++] = ofs; /* Store pass1 offset estimate. */
-- ofs += 2;
- break;
- case DASM_IMM16:
- CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
-@@ -424,8 +427,15 @@ int dasm_encode(Dst_DECL, void *buffer)
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
- patchrel:
-- *cp++ = n/2; /* TODO: only 16-bit relative jump currently works. */
-- p++; /* skip argument */
-+ /* Offsets are halfword aligned (so need to be halved). */
-+ n += 2; /* Offset is relative to start of instruction. */
-+ if (cp[-1] >> 12 == 0xc) {
-+ *cp++ = n >> 17;
-+ } else {
-+ CK(-(1 << 16) <= n && n < (1 << 16) && n
& 1 == 0, RANGE_LG);
-+ }
-+ *cp++ = n >> 1;
-+ p++; /* skip argument */
- break;
- case DASM_LABEL_LG:
- ins = *p++;
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 9a77d4e..847a02e 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -75,6 +75,13 @@ local function dumpactions(out)
- out:write("\n")
- end
-
-+local function havearg(a)
-+ return a == "ESC" or
-+ a == "SECTION" or
-+ a == "REL_LG" or
-+ a == "LABEL_LG"
-+end
-+
- -- Write action list buffer as a huge static C array.
- local function writeactions(out, name)
- local nn = #actlist
-@@ -87,7 +94,7 @@ local function writeactions(out, name)
- local name = action_names[actlist[i]+1]
- if not esc and name then
- assert(out:write(" /* ", name, " */"))
-- esc = name == "ESC" or name == "SECTION"
-+ esc = havearg(name)
- else
- esc = false
- end
---
-2.20.1
-
-
-From 6bcad07b197178e4ce3fd84c94d74b4088eef21f Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 6 Dec 2016 12:47:23 -0500
-Subject: [PATCH 078/247] s/SP/sp/ in vm_s390x.dasc.
-
-We support the pseudo-register sp now.
----
- src/vm_s390x.dasc | 41 ++++++++++++++++++++---------------------
- 1 file changed, 20 insertions(+), 21 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 72fe5d2..e639159 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -55,41 +55,40 @@
- |
- |.define CRET1, r2
- |
--|.define SP, r15
- |.define OP, r2
- |.define TMP1, r3
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
--|.define CFRAME_SPACE, 240 // Delta for SP, 8 byte aligned.
-+|.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
- |
- |// Register save area.
--|.define SAVE_GPRS, 288(SP) // Save area for r6-r15 (10*8 bytes).
-+|.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
- |
- |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
--|.define SAVE_ERRF, 280(SP) // Argument 4, in r5.
--|.define SAVE_NRES, 272(SP) // Argument 3, in r4.
--|.define SAVE_CFRAME, 264(SP) // Argument 2, in r3.
--|.define SAVE_L, 256(SP) // Argument 1, in r2.
--|.define RESERVED, 248(SP) // Reserved for compiler use.
--|.define BACKCHAIN, 240(SP) // <- SP entering interpreter.
-+|.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
-+|.define SAVE_NRES, 272(sp) // Argument 3, in r4.
-+|.define SAVE_CFRAME, 264(sp) // Argument 2, in r3.
-+|.define SAVE_L, 256(sp) // Argument 1, in r2.
-+|.define RESERVED, 248(sp) // Reserved for compiler use.
-+|.define BACKCHAIN, 240(sp) // <- sp entering interpreter.
- |
- |// Interpreter stack frame.
--|.define SAVE_FPR15, 232(SP)
--|.define SAVE_FPR14, 224(SP)
--|.define SAVE_FPR13, 216(SP)
--|.define SAVE_FPR12, 208(SP)
--|.define SAVE_FPR11, 200(SP)
--|.define SAVE_FPR10, 192(SP)
--|.define SAVE_FPR9, 184(SP)
--|.define SAVE_FPR8, 176(SP)
--|.define SAVE_PC, 168(SP)
--|.define SAVE_MULTRES, 160(SP)
-+|.define SAVE_FPR15, 232(sp)
-+|.define SAVE_FPR14, 224(sp)
-+|.define SAVE_FPR13, 216(sp)
-+|.define SAVE_FPR12, 208(sp)
-+|.define SAVE_FPR11, 200(sp)
-+|.define SAVE_FPR10, 192(sp)
-+|.define SAVE_FPR9, 184(sp)
-+|.define SAVE_FPR8, 176(sp)
-+|.define SAVE_PC, 168(sp)
-+|.define SAVE_MULTRES, 160(sp)
- |
- |// Callee save area (allocated by interpreter).
--|.define CALLEESAVE 000(SP) // <- SP in interpreter.
-+|.define CALLEESAVE, 000(sp) // <- sp in interpreter.
- |
- |.macro saveregs
--| lay SP, -CFRAME_SPACE(SP) // Allocate stack frame.
-+| lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
- | stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
- | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
- | std f9, SAVE_FPR9
---
-2.20.1
-
-
-From 7cc069ef367169c3af0d1dde54c7e89eeefe0733 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 6 Dec 2016 13:15:29 -0500
-Subject: [PATCH 079/247] Support floating point register arguments.
-
-It would be nice to verify that floating-point/general-purpose
-registers are indeed expected by the instruction, but for now treat
-them both the same so we can use floating-point instructions.
----
- dynasm/Examples/test_z_inst.c | 65 +++++++++++++++++++++++++++++++++--
- dynasm/dasm_s390x.lua | 35 +++++++------------
- 2 files changed, 76 insertions(+), 24 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 3938a74..b54c383 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -101,7 +101,6 @@ static void add_imm16(dasm_State *state)
- | br r14
- }
-
--
- static void add_imm32(dasm_State *state)
- {
- dasm_State **Dst = &state;
-@@ -110,6 +109,67 @@ static void add_imm32(dasm_State *state)
- | br r14
- }
-
-+static void save(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ |.define CFRAME_SPACE, 224 // Delta for sp, 8 byte aligned.
-+ |
-+ |// Register save area.
-+ |.define SAVE_GPRS, 264(sp) // Save area for r6-r15 (10*8 bytes).
-+ |
-+ |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
-+ |.define RESERVED, 232(sp) // Reserved for compiler use.
-+ |.define BACKCHAIN, 224(sp)
-+ |
-+ |// Current stack frame.
-+ |.define SAVE_FPR15, 216(sp)
-+ |.define SAVE_FPR14, 208(sp)
-+ |.define SAVE_FPR13, 200(sp)
-+ |.define SAVE_FPR12, 192(sp)
-+ |.define SAVE_FPR11, 184(sp)
-+ |.define SAVE_FPR10, 176(sp)
-+ |.define SAVE_FPR9, 168(sp)
-+ |.define SAVE_FPR8, 160(sp)
-+ |
-+ |// Callee save area.
-+ |.define CALLEESAVE, 000(sp)
-+ |
-+ |.macro saveregs
-+ | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
-+ | stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
-+ | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
-+ | std f9, SAVE_FPR9
-+ | std f10, SAVE_FPR10
-+ | std f11, SAVE_FPR11
-+ | std f12, SAVE_FPR12
-+ | std f13, SAVE_FPR13
-+ | std f14, SAVE_FPR14
-+ | std f15, SAVE_FPR15
-+ |.endmacro
-+ |
-+ |.macro restoreregs
-+ | ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
-+ | ld f9, SAVE_FPR9
-+ | ld f10, SAVE_FPR10
-+ | ld f11, SAVE_FPR11
-+ | ld f12, SAVE_FPR12
-+ | ld f13, SAVE_FPR13
-+ | ld f14, SAVE_FPR14
-+ | ld f15, SAVE_FPR15
-+ | lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
-+ |.endmacro
-+ |
-+ | saveregs
-+ | lgfi r7, 10 // 16
-+ | lgfi r8, 20 // 32
-+ | agr r2, r3
-+ | agr r7, r8
-+ | msgr r2, r7
-+ | restoreregs
-+ | br r14
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -127,7 +187,8 @@ test_table test[] = {
- { 2, 4, lab, 32, "lab"},
- { 2, 4, labg, 32, "labg"},
- { 2, 0, add_imm16, 17, "imm16"},
-- { 2, 0, add_imm32, 16, "imm32"}
-+ { 2, 0, add_imm32, 16, "imm32"},
-+ { 7, 3, save, 480, "save"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 847a02e..c799bb6 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -246,8 +246,8 @@ local map_cond = {
-
- ------------------------------------------------------------------------------
-
--local function parse_gpr(expr)
-- local r = match(expr, "^r(1?[0-9])$")
-+local function parse_reg(expr)
-+ local r = match(expr, "^[r|f](1?[0-9])$")
- if r then
- r = tonumber(r)
- if r <= 15 then return r, tp end
-@@ -255,15 +255,6 @@ local function parse_gpr(expr)
- werror("bad register name `"..expr.."'")
- end
-
--local function parse_fpr(expr)
-- local r = match(expr, "^f(1?[0-9])$")
-- if r then
-- r = tonumber(r)
-- if r <= 15 then return r end
-- end
-- werror("bad register name `"..expr.."'")
--end
--
- local parse_ctx = {}
-
- local loadenv = setfenv and function(s)
-@@ -308,11 +299,11 @@ local function split_memop(arg)
- local reg = "r1?[0-9]"
- local d, x, b = match(arg, "^(.*)%(("..reg.."),
("..reg..")%)$")
- if d then
-- return d, parse_gpr(x), parse_gpr(b)
-+ return d, parse_reg(x), parse_reg(b)
- end
- local d, b = match(arg, "^(.*)%(("..reg..")%)$")
- if d then
-- return d, 0, parse_gpr(b)
-+ return d, 0, parse_reg(b)
- end
- -- TODO: handle values without registers?
- -- TODO: handle registers without a displacement?
-@@ -1047,18 +1038,18 @@ local function parse_template(params, template, nparams, pos)
- for p in gmatch(sub(template, 13), ".") do
- local pr1,pr2,pr3
- if p == "g" then
-- op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
-+ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
- wputhw(op2)
- elseif p == "h" then
-- op2 = op2 + shl(parse_gpr(params[1]),4) + parse_gpr(params[2])
-+ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
- wputhw(op1); wputhw(op2)
- elseif p == "i" then
-- op1 = op1 + shl(parse_gpr(params[1]),4)
-+ op1 = op1 + shl(parse_reg(params[1]),4)
- wputhw(op1);
- parse_imm16(params[2])
- elseif p == "j" then
- local d, x, b, a = parse_mem_bx(params[2])
-- op1 = op1 + shl(parse_gpr(params[1]), 4) + x
-+ op1 = op1 + shl(parse_reg(params[1]), 4) + x
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
- if a then a() end
-@@ -1066,7 +1057,7 @@ local function parse_template(params, template, nparams, pos)
-
- elseif p == "l" then
- local d, x, b, a = parse_mem_bxy(params[2])
-- op0 = op0 + shl(parse_gpr(params[1]), 4) + x
-+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
- op1 = op1 + shl(b, 12) + band(d, 0xfff)
- op2 = op2 + band(shr(d, 4), 0xff00)
- wputhw(op0); wputhw(op1); wputhw(op2)
-@@ -1074,18 +1065,18 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "m" then
-
- elseif p == "n" then
-- op0 = op0 + shl(parse_gpr(params[1]), 4)
-+ op0 = op0 + shl(parse_reg(params[1]), 4)
- wputhw(op0);
- parse_imm(params[2])
- elseif p == "q" then
- local d, b, a = parse_mem_b(params[3])
-- op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
-+ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2)
- if a then a() end -- a() emits action.
- elseif p == "s" then
- local d, b, a = parse_mem_by(params[3])
-- op0 = op0 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
-+ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
- op1 = op1 + shl(b, 12) + band(d, 0xfff)
- op2 = op2 + band(shr(d, 4), 0xff00)
- wputhw(op0); wputhw(op1); wputhw(op2)
-@@ -1105,7 +1096,7 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op1); wputhw(op2);
- if a then a() end -- a() emits action.
- elseif p == "z" then
-- op2 = op2 + parse_gpr(params[1])
-+ op2 = op2 + parse_reg(params[1])
- wputhw(op2)
- else
- werror("unrecognized encoding")
---
-2.20.1
-
-
-From 266fe118a8e68b76dbe4f3927c89a45f2d3bff10 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 6 Dec 2016 14:38:11 -0500
-Subject: [PATCH 080/247] Add support for brasl instruction.
-
-Important for calling subroutines.
----
- dynasm/Examples/test_z_inst.c | 54 ++++++++++++++++++++++++++++-------
- dynasm/dasm_s390x.lua | 5 ++++
- 2 files changed, 49 insertions(+), 10 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index b54c383..ee005b5 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -93,6 +93,38 @@ static void labg(dasm_State *state)
- | br r14
- }
-
-+static void labmul(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ // Multiply using an add function.
-+ // Only correct if input is positive.
-+ |->mul_func:
-+ | stmg r6, r14, 48(sp)
-+ | lgr r6, r2
-+ | lgr r7, r3
-+ | cgfi r7, 0
-+ | je >3
-+ | cgfi r7, 1
-+ | je >2
-+ |1:
-+ | lgr r3, r6
-+ | brasl r14, ->add_func
-+ | lay r7, -1(r7)
-+ | cgfi r7, 1
-+ | jh <1
-+ |2:
-+ | lmg r6, r14, 48(sp)
-+ | br r14
-+ |3:
-+ | la r2, 0(r0)
-+ | j <2
-+
-+ |->add_func:
-+ | agr r2, r3
-+ | br r14
-+}
-+
- static void add_imm16(dasm_State *state)
- {
- dasm_State **Dst = &state;
-@@ -179,16 +211,18 @@ typedef struct {
- } test_table;
-
- test_table test[] = {
-- { 1, 2, add, 3, "add"},
-- {10, 5, sub, 5, "sub"},
-- { 2, 3, mul, 6, "mul"},
-- { 5, 7, rx, 12298, "rx"},
-- { 5, 7, rxy, 10, "rxy"},
-- { 2, 4, lab, 32, "lab"},
-- { 2, 4, labg, 32, "labg"},
-- { 2, 0, add_imm16, 17, "imm16"},
-- { 2, 0, add_imm32, 16, "imm32"},
-- { 7, 3, save, 480, "save"}
-+ { 1, 2, add, 3, "add"},
-+ {10, 5, sub, 5, "sub"},
-+ { 2, 3, mul, 6, "mul"},
-+ { 5, 7, rx, 12298, "rx"},
-+ { 5, 7, rxy, 10, "rxy"},
-+ { 2, 4, lab, 32, "lab"},
-+ { 2, 4, labg, 32, "labg"},
-+ { 2, 0, add_imm16, 17, "imm16"},
-+ { 2, 0, add_imm32, 16, "imm32"},
-+ { 7, 3, save, 480, "save"},
-+ { 7, 3, labmul, 21, "labmul0"},
-+ { 7, 0, labmul, 0, "labmul1"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index c799bb6..602428b 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1068,6 +1068,11 @@ local function parse_template(params, template, nparams, pos)
- op0 = op0 + shl(parse_reg(params[1]), 4)
- wputhw(op0);
- parse_imm(params[2])
-+ elseif p == "o" then
-+ op0 = op0 + shl(parse_reg(params[1]), 4)
-+ wputhw(op0);
-+ local mode, n, s = parse_label(params[2])
-+ waction("REL_"..mode, n, s)
- elseif p == "q" then
- local d, b, a = parse_mem_b(params[3])
- op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
---
-2.20.1
-
-
-From 0fb519b4c4a91885be68991fc1e6630447bd7089 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 6 Dec 2016 15:37:45 -0500
-Subject: [PATCH 081/247] Add ADD HALFWORD IMMEDIATE (64-bit) instruction
- (aghi).
-
----
- dynasm/dasm_s390x.lua | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 602428b..cfe861c 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -462,6 +462,7 @@ map_op = {
- axbr_2 = "0000b34a0000h",
- adbr_2 = "0000b31a0000h",
- aebr_2 = "0000b30a0000h",
-+ aghi_2 = "0000a70b0000i",
- ah_2 = "00004a000000j",
- ahi_2 = "0000a70a0000i",
- ahy_2 = "e3000000007al",
---
-2.20.1
-
-
-From cd46a409be3459eb4049463e06b5b0a602831796 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 6 Dec 2016 15:31:07 -0500
-Subject: [PATCH 082/247] Add test for PC-style labels.
-
-These are labels which are given a numeric value. For example, the
-following code would generate PC labels 0 to 4:
-
-for (int i = 0; i < 5; i++) {
- |=>i:
-}
----
- dynasm/Examples/test_z_inst.c | 20 +++++++++++++++++++-
- 1 file changed, 19 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index ee005b5..4633b6d 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -202,6 +202,23 @@ static void save(dasm_State *state)
- | br r14
- }
-
-+static void pc(dasm_State *state) {
-+ dasm_State **Dst = &state;
-+ int MAX = 10;
-+ dasm_growpc(Dst, MAX+1);
-+
-+ | j =>MAX
-+ for (int i = 0; i <= MAX; i++) {
-+ |=>i:
-+ if (i == 0) {
-+ | br r14
-+ } else {
-+ | aghi r2, i
-+ | j =>i-1
-+ }
-+ }
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -222,7 +239,8 @@ test_table test[] = {
- { 2, 0, add_imm32, 16, "imm32"},
- { 7, 3, save, 480, "save"},
- { 7, 3, labmul, 21, "labmul0"},
-- { 7, 0, labmul, 0, "labmul1"}
-+ { 7, 0, labmul, 0, "labmul1"},
-+ { 0, 0, pc, 55, "pc"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From 51ddbef52fbd21c1f0adbac7631f39fb9bd4a320 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 6 Dec 2016 15:45:06 -0500
-Subject: [PATCH 083/247] Re-order test function.
-
----
- dynasm/Examples/test_z_inst.c | 64 +++++++++++++++++------------------
- 1 file changed, 32 insertions(+), 32 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 4633b6d..5208d4b 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -93,38 +93,6 @@ static void labg(dasm_State *state)
- | br r14
- }
-
--static void labmul(dasm_State *state)
--{
-- dasm_State **Dst = &state;
--
-- // Multiply using an add function.
-- // Only correct if input is positive.
-- |->mul_func:
-- | stmg r6, r14, 48(sp)
-- | lgr r6, r2
-- | lgr r7, r3
-- | cgfi r7, 0
-- | je >3
-- | cgfi r7, 1
-- | je >2
-- |1:
-- | lgr r3, r6
-- | brasl r14, ->add_func
-- | lay r7, -1(r7)
-- | cgfi r7, 1
-- | jh <1
-- |2:
-- | lmg r6, r14, 48(sp)
-- | br r14
-- |3:
-- | la r2, 0(r0)
-- | j <2
--
-- |->add_func:
-- | agr r2, r3
-- | br r14
--}
--
- static void add_imm16(dasm_State *state)
- {
- dasm_State **Dst = &state;
-@@ -202,6 +170,38 @@ static void save(dasm_State *state)
- | br r14
- }
-
-+static void labmul(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ // Multiply using an add function.
-+ // Only correct if input is positive.
-+ |->mul_func:
-+ | stmg r6, r14, 48(sp)
-+ | lgr r6, r2
-+ | lgr r7, r3
-+ | cgfi r7, 0
-+ | je >3
-+ | cgfi r7, 1
-+ | je >2
-+ |1:
-+ | lgr r3, r6
-+ | brasl r14, ->add_func
-+ | lay r7, -1(r7)
-+ | cgfi r7, 1
-+ | jh <1
-+ |2:
-+ | lmg r6, r14, 48(sp)
-+ | br r14
-+ |3:
-+ | la r2, 0(r0)
-+ | j <2
-+
-+ |->add_func:
-+ | agr r2, r3
-+ | br r14
-+}
-+
- static void pc(dasm_State *state) {
- dasm_State **Dst = &state;
- int MAX = 10;
---
-2.20.1
-
-
-From e417d34a4db909dea9340b51355223b28490eeb7 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 7 Dec 2016 13:13:55 +0530
-Subject: [PATCH 084/247] Update test_z_inst.c
-
-Added another test for forward jump
----
- dynasm/Examples/test_z_inst.c | 19 ++++++++++++++++++-
- 1 file changed, 18 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 5208d4b..0458ce1 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -93,6 +93,22 @@ static void labg(dasm_State *state)
- | br r14
- }
-
-+static void jmp_fwd(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+ | j >1
-+ |1:
-+ | cgr r2 , r3
-+ | jne >2
-+ | je >3
-+ |2:
-+ | afi r2, 0x2
-+ | j <1
-+ |3:
-+ | br r14
-+
-+}
-+
- static void add_imm16(dasm_State *state)
- {
- dasm_State **Dst = &state;
-@@ -240,7 +256,8 @@ test_table test[] = {
- { 7, 3, save, 480, "save"},
- { 7, 3, labmul, 21, "labmul0"},
- { 7, 0, labmul, 0, "labmul1"},
-- { 0, 0, pc, 55, "pc"}
-+ { 0, 0, pc, 55, "pc"},
-+ { 2,12, jmp_fwd, 12, "jmp_fwd"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From 8579096d6517d7c480c13a66f0deeaede9acf7aa Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 7 Dec 2016 14:11:20 +0530
-Subject: [PATCH 085/247] Update test_z_inst.c
-
-Added comments to fwd_jmp
-added test for RRD based test case add_rrd()---> functionality to be tested
-added test for RR based test case load_test()--> test fails
----
- dynasm/Examples/test_z_inst.c | 23 ++++++++++++++++++++++-
- 1 file changed, 22 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 0458ce1..dd36c1a 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -18,6 +18,15 @@ static void add(dasm_State *state)
- | br r14
- }
-
-+static void add_rrd(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | lgfi r4 , 0x02
-+ | maer r2 , r3 , r4
-+ | br r14
-+}
-+
- static void sub(dasm_State *state)
- {
- dasm_State **Dst = &state;
-@@ -96,6 +105,8 @@ static void labg(dasm_State *state)
- static void jmp_fwd(dasm_State *state)
- {
- dasm_State **Dst = &state;
-+
-+ // compare r2 == r3; do { r2 += r2; } while(r2 != r3);
- | j >1
- |1:
- | cgr r2 , r3
-@@ -235,6 +246,14 @@ static void pc(dasm_State *state) {
- }
- }
-
-+static void load_test(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | ltdr r2 , r3
-+ | br r14
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -257,7 +276,9 @@ test_table test[] = {
- { 7, 3, labmul, 21, "labmul0"},
- { 7, 0, labmul, 0, "labmul1"},
- { 0, 0, pc, 55, "pc"},
-- { 2,12, jmp_fwd, 12, "jmp_fwd"}
-+ { 2,12, jmp_fwd, 12, "jmp_fwd"},
-+ { 9,8, add_rrd, 25, "add_rrd"},
-+ { 2,4, load_test, 4,"load_test"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From ae3e0b41603df2f8d32c624cdb7400f40f6cd6eb Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 7 Dec 2016 20:21:06 +0530
-Subject: [PATCH 086/247] Adding RRD support
-
-Currently only "maer" is implemented. I am not able to get this working,
don't know if I am missing out something, or we need to add some more functionality
for RRD.
----
- dynasm/dasm_s390x.lua | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index cfe861c..ff6984c 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -885,6 +885,7 @@ map_op = {
- msgfr_2 = "0000b91c0000h",
- msfi_2 = "c20100000000n",
- msgfi_2 = "c20000000000n",
-+ maer_3 = "0000b32e0000r",
- o_2 = "000056000000j",
- or_2 = "000000001600g",
- oy_2 = "e30000000056l",
-@@ -1080,6 +1081,9 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2)
- if a then a() end -- a() emits action.
-+ elseif p == "r" then
-+ op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) +
parse_reg(params[3])
-+ wputhw(op1); wputhw(op2)
- elseif p == "s" then
- local d, b, a = parse_mem_by(params[3])
- op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
---
-2.20.1
-
-
-From 9eb10a30fd714d55973744e16c1506fba49b1c0c Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 7 Dec 2016 14:06:37 -0500
-Subject: [PATCH 087/247] Comment out failing tests for now.
-
----
- dynasm/Examples/test_z_inst.c | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index dd36c1a..078428b 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -18,6 +18,7 @@ static void add(dasm_State *state)
- | br r14
- }
-
-+/*
- static void add_rrd(dasm_State *state)
- {
- dasm_State **Dst = &state;
-@@ -26,6 +27,7 @@ static void add_rrd(dasm_State *state)
- | maer r2 , r3 , r4
- | br r14
- }
-+*/
-
- static void sub(dasm_State *state)
- {
-@@ -246,6 +248,7 @@ static void pc(dasm_State *state) {
- }
- }
-
-+/*
- static void load_test(dasm_State *state)
- {
- dasm_State **Dst = &state;
-@@ -253,6 +256,7 @@ static void load_test(dasm_State *state)
- | ltdr r2 , r3
- | br r14
- }
-+*/
-
- typedef struct {
- int64_t arg1;
-@@ -276,9 +280,9 @@ test_table test[] = {
- { 7, 3, labmul, 21, "labmul0"},
- { 7, 0, labmul, 0, "labmul1"},
- { 0, 0, pc, 55, "pc"},
-- { 2,12, jmp_fwd, 12, "jmp_fwd"},
-- { 9,8, add_rrd, 25, "add_rrd"},
-- { 2,4, load_test, 4,"load_test"}
-+ { 2,12, jmp_fwd, 12, "jmp_fwd"}
-+// { 9,8, add_rrd, 25, "add_rrd"},
-+// { 2,4, load_test, 4,"load_test"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From 2046baebdefc0a759d66126a031159511a980ce3 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 7 Dec 2016 16:56:00 -0500
-Subject: [PATCH 088/247] Add support for SS-a instructions.
-
-I've also changed the template parser so that it can handle suffixes
-which are longer than 1 character. The suffix for SS-a instructions
-is "SS-a". We could change this again later.
----
- dynasm/Examples/test_z_inst.c | 67 ++++++++---
- dynasm/dasm_s390x.h | 9 ++
- dynasm/dasm_s390x.lua | 213 ++++++++++++++++++++++------------
- 3 files changed, 197 insertions(+), 92 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 078428b..7b3c0f2 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -258,6 +258,39 @@ static void load_test(dasm_State *state)
- }
- */
-
-+static void ssa(dasm_State *state) {
-+ dasm_State **Dst = &state;
-+
-+ | lay sp, -16(sp)
-+ | lay r0, -1(r0)
-+ | stg r0, 8(sp)
-+ | xc 8(8, sp), 8(sp)
-+ | stg r2, 0(sp)
-+ | mvc 13(2, sp), 6(sp)
-+ | lg r2, 8(sp)
-+ | la sp, 16(sp)
-+ | br r14
-+}
-+
-+static void ssa_act(dasm_State *state) {
-+ dasm_State **Dst = &state;
-+
-+ int xl = 8;
-+ int d1 = 13;
-+ int l1 = 2;
-+ int d2 = 6;
-+
-+ | lay sp, -16(sp)
-+ | lay r0, -1(r0)
-+ | stg r0, 8(sp)
-+ | xc 8(xl, sp), 8(sp)
-+ | stg r2, 0(sp)
-+ | mvc d1(l1, sp), d2(sp)
-+ | lg r2, 8(sp)
-+ | la sp, 16(sp)
-+ | br r14
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -267,22 +300,24 @@ typedef struct {
- } test_table;
-
- test_table test[] = {
-- { 1, 2, add, 3, "add"},
-- {10, 5, sub, 5, "sub"},
-- { 2, 3, mul, 6, "mul"},
-- { 5, 7, rx, 12298, "rx"},
-- { 5, 7, rxy, 10, "rxy"},
-- { 2, 4, lab, 32, "lab"},
-- { 2, 4, labg, 32, "labg"},
-- { 2, 0, add_imm16, 17, "imm16"},
-- { 2, 0, add_imm32, 16, "imm32"},
-- { 7, 3, save, 480, "save"},
-- { 7, 3, labmul, 21, "labmul0"},
-- { 7, 0, labmul, 0, "labmul1"},
-- { 0, 0, pc, 55, "pc"},
-- { 2,12, jmp_fwd, 12, "jmp_fwd"}
--// { 9,8, add_rrd, 25, "add_rrd"},
--// { 2,4, load_test, 4,"load_test"}
-+ { 1, 2, add, 3, "add"},
-+ {10, 5, sub, 5, "sub"},
-+ { 2, 3, mul, 6, "mul"},
-+ { 5, 7, rx, 12298, "rx"},
-+ { 5, 7, rxy, 10, "rxy"},
-+ { 2, 4, lab, 32, "lab"},
-+ { 2, 4, labg, 32, "labg"},
-+ { 2, 0, add_imm16, 17, "imm16"},
-+ { 2, 0, add_imm32, 16, "imm32"},
-+ { 7, 3, save, 480, "save"},
-+ { 7, 3, labmul, 21, "labmul0"},
-+ { 7, 0, labmul, 0, "labmul1"},
-+ { 0, 0, pc, 55, "pc"},
-+ { 2,12, jmp_fwd, 12, "jmp_fwd"},
-+// { 9,8, add_rrd, 25, "add_rrd"},
-+// { 2,4, load_test, 4,"load_test"},
-+ {-1, 0, ssa, 65535<<8, "ssa"},
-+ {-1, 0, ssa_act, 65535<<8, "ssa_act"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 18a7338..69e4fc1 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -24,6 +24,7 @@ enum {
- DASM_REL_PC, DASM_LABEL_PC,
- DASM_DISP12, DASM_DISP20,
- DASM_IMM16, DASM_IMM32,
-+ DASM_LEN8R,
- DASM__MAX
- };
-
-@@ -284,6 +285,10 @@ void dasm_put(Dst_DECL, int start, ...)
- CK((n >> 12) == 0, RANGE_I);
- b[pos++] = n;
- break;
-+ case DASM_LEN8R:
-+ CK(n >= 1 && n <= 256, RANGE_I);
-+ b[pos++] = n;
-+ break;
- }
- }
- stop:
-@@ -364,6 +369,7 @@ int dasm_link(Dst_DECL, size_t * szp)
- case DASM_IMM32:
- case DASM_DISP20:
- case DASM_DISP12:
-+ case DASM_LEN8R:
- pos++;
- break;
- }
-@@ -458,6 +464,9 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_DISP12:
- cp[-1] |= n & 0xfff;
- break;
-+ case DASM_LEN8R:
-+ cp[-1] |= (n - 1) & 0xff;
-+ break;
- default:
- *cp++ = ins;
- break;
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index ff6984c..6c2a904 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn
- local action_names = {
- "STOP", "SECTION", "ESC", "REL_EXT",
- "ALIGN", "REL_LG", "LABEL_LG",
-- "REL_PC", "LABEL_PC", "DISP12", "DISP20",
"IMM16", "IMM32",
-+ "REL_PC", "LABEL_PC", "DISP12", "DISP20",
"IMM16", "IMM32", "LEN8R",
- }
-
- -- Maximum number of section buffer positions for dasm_put().
-@@ -370,6 +370,41 @@ local function parse_mem_by(arg)
- return d, b, a
- end
-
-+-- Parse memory operand of the form d(l, b) where 0 <= d < 4096, 1 <= l <=
256,
-+-- and b is a GPR.
-+local function parse_mem_lb(arg)
-+ local reg = "r1?[0-9]"
-+ local d, l, b = match(arg,
"^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
-+ if not d then
-+ -- TODO: handle values without registers?
-+ -- TODO: handle registers without a displacement?
-+ werror("bad memory operand: "..arg)
-+ return nil
-+ end
-+ local dval = tonumber(d)
-+ local dact = nil
-+ if dval then
-+ if not is_uint12(dval) then
-+ werror("displacement out of range: ", dval)
-+ end
-+ else
-+ dval = 0
-+ dact = function() waction("DISP12", nil, d) end
-+ end
-+ local lval = tonumber(l)
-+ local lact = nil
-+ if lval then
-+ if lval < 1 or lval > 256 then
-+ werror("length out of range: ", dval)
-+ end
-+ lval = lval - 1
-+ else
-+ lval = 0
-+ lact = function() waction("LEN8R", nil, l) end
-+ end
-+ return dval, lval, parse_reg(b), dact, lact
-+end
-+
- local function parse_imm(arg)
- local imm_val = tonumber(arg,16)
- if imm_val then
-@@ -1014,6 +1049,23 @@ map_op = {
- trace_3 = "000099000000q",
- tracg_3 = "eb000000000fs",
- tre_2 = "0000b2a50000h",
-+
-+ -- SS-a instructions
-+ clc_2 = "d50000000000SS-a",
-+ ed_2 = "de0000000000SS-a",
-+ edmk_2 = "df0000000000SS-a",
-+ mvc_2 = "d20000000000SS-a",
-+ mvcin_2 = "e80000000000SS-a",
-+ mvn_2 = "d10000000000SS-a",
-+ mvz_2 = "d30000000000SS-a",
-+ nc_2 = "d40000000000SS-a",
-+ oc_2 = "d60000000000SS-a",
-+ tr_2 = "dc0000000000SS-a",
-+ trt_2 = "dd0000000000SS-a",
-+ trtr_2 = "d00000000000SS-a",
-+ unpka_2 = "ea0000000000SS-a",
-+ unpku_2 = "e20000000000SS-a",
-+ xc_2 = "d70000000000SS-a",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1037,85 +1089,94 @@ local function parse_template(params, template, nparams, pos)
- local op2 = tonumber(sub(template, 9, 12), 16)
-
- -- Process each character.
-- for p in gmatch(sub(template, 13), ".") do
-- local pr1,pr2,pr3
-- if p == "g" then
-- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-- wputhw(op2)
-- elseif p == "h" then
-- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-- wputhw(op1); wputhw(op2)
-- elseif p == "i" then
-- op1 = op1 + shl(parse_reg(params[1]),4)
-- wputhw(op1);
-- parse_imm16(params[2])
-- elseif p == "j" then
-- local d, x, b, a = parse_mem_bx(params[2])
-- op1 = op1 + shl(parse_reg(params[1]), 4) + x
-- op2 = op2 + shl(b, 12) + d
-- wputhw(op1); wputhw(op2);
-- if a then a() end
-- elseif p == "k" then
-+ local p = sub(template, 13)
-+ if p == "g" then
-+ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ wputhw(op2)
-+ elseif p == "h" then
-+ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ wputhw(op1); wputhw(op2)
-+ elseif p == "i" then
-+ op1 = op1 + shl(parse_reg(params[1]),4)
-+ wputhw(op1);
-+ parse_imm16(params[2])
-+ elseif p == "j" then
-+ local d, x, b, a = parse_mem_bx(params[2])
-+ op1 = op1 + shl(parse_reg(params[1]), 4) + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2);
-+ if a then a() end
-+ elseif p == "k" then
-+ elseif p == "l" then
-+ local d, x, b, a = parse_mem_bxy(params[2])
-+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
-+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
-+ op2 = op2 + band(shr(d, 4), 0xff00)
-+ wputhw(op0); wputhw(op1); wputhw(op2)
-+ if a then a() end
-+ elseif p == "m" then
-
-- elseif p == "l" then
-- local d, x, b, a = parse_mem_bxy(params[2])
-- op0 = op0 + shl(parse_reg(params[1]), 4) + x
-- op1 = op1 + shl(b, 12) + band(d, 0xfff)
-- op2 = op2 + band(shr(d, 4), 0xff00)
-- wputhw(op0); wputhw(op1); wputhw(op2)
-- if a then a() end
-- elseif p == "m" then
--
-- elseif p == "n" then
-- op0 = op0 + shl(parse_reg(params[1]), 4)
-- wputhw(op0);
-- parse_imm(params[2])
-- elseif p == "o" then
-- op0 = op0 + shl(parse_reg(params[1]), 4)
-- wputhw(op0);
-- local mode, n, s = parse_label(params[2])
-- waction("REL_"..mode, n, s)
-- elseif p == "q" then
-- local d, b, a = parse_mem_b(params[3])
-- op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
-- op2 = op2 + shl(b, 12) + d
-- wputhw(op1); wputhw(op2)
-- if a then a() end -- a() emits action.
-- elseif p == "r" then
-- op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) +
parse_reg(params[3])
-- wputhw(op1); wputhw(op2)
-- elseif p == "s" then
-- local d, b, a = parse_mem_by(params[3])
-- op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
-- op1 = op1 + shl(b, 12) + band(d, 0xfff)
-- op2 = op2 + band(shr(d, 4), 0xff00)
-- wputhw(op0); wputhw(op1); wputhw(op2)
-- if a then a() end -- a() emits action.
-- elseif p == "w" then
-- local mode, n, s = parse_label(params[1])
-- wputhw(op1)
-- waction("REL_"..mode, n, s)
-- elseif p == "x" then
-- local mode, n, s = parse_label(params[1])
-- wputhw(op0)
-- waction("REL_"..mode, n, s)
-- elseif p == "y" then
-- local d, x, b, a = parse_mem_bx(params[1])
-- op1 = op1 + x
-- op2 = op2 + shl(b, 12) + d
-- wputhw(op1); wputhw(op2);
-- if a then a() end -- a() emits action.
-- elseif p == "z" then
-- op2 = op2 + parse_reg(params[1])
-- wputhw(op2)
-- else
-- werror("unrecognized encoding")
-- end
-+ elseif p == "n" then
-+ op0 = op0 + shl(parse_reg(params[1]), 4)
-+ wputhw(op0);
-+ parse_imm(params[2])
-+ elseif p == "o" then
-+ op0 = op0 + shl(parse_reg(params[1]), 4)
-+ wputhw(op0);
-+ local mode, n, s = parse_label(params[2])
-+ waction("REL_"..mode, n, s)
-+ elseif p == "q" then
-+ local d, b, a = parse_mem_b(params[3])
-+ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2)
-+ if a then a() end -- a() emits action.
-+ elseif p == "r" then
-+ op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) +
parse_reg(params[3])
-+ wputhw(op1); wputhw(op2)
-+ elseif p == "s" then
-+ local d, b, a = parse_mem_by(params[3])
-+ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
-+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
-+ op2 = op2 + band(shr(d, 4), 0xff00)
-+ wputhw(op0); wputhw(op1); wputhw(op2)
-+ if a then a() end -- a() emits action.
-+ elseif p == "SS-a" then
-+ local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1])
-+ local d2, b2, d2a = parse_mem_b(params[2])
-+ op0 = op0 + l1
-+ op1 = op1 + shl(b1, 12) + d1
-+ op2 = op2 + shl(b2, 12) + d2
-+ wputhw(op0)
-+ if l1a then l1a() end
-+ wputhw(op1)
-+ if d1a then d1a() end
-+ wputhw(op2)
-+ if d2a then d2a() end
-+ elseif p == "w" then
-+ local mode, n, s = parse_label(params[1])
-+ wputhw(op1)
-+ waction("REL_"..mode, n, s)
-+ elseif p == "x" then
-+ local mode, n, s = parse_label(params[1])
-+ wputhw(op0)
-+ waction("REL_"..mode, n, s)
-+ elseif p == "y" then
-+ local d, x, b, a = parse_mem_bx(params[1])
-+ op1 = op1 + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2);
-+ if a then a() end -- a() emits action.
-+ elseif p == "z" then
-+ op2 = op2 + parse_reg(params[1])
-+ wputhw(op2)
-+ else
-+ werror("unrecognized encoding")
- end
--
- end
-+
- function op_template(params, template, nparams)
-- if not params then return template:gsub("%x%x%x%x%x%x%x%x", "")
end
-+ if not params then return template:gsub("%x%x%x%x%x%x%x%x%x%x%x%x",
"") end
- -- Limit number of section buffer positions used by a single dasm_put().
- -- A single opcode needs a maximum of 5 positions.
- if secpos+5 > maxsecpos then wflush() end
---
-2.20.1
-
-
-From 69dc085fdbfd16829446cbe327afe66381f699b2 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Thu, 8 Dec 2016 10:22:28 +0530
-Subject: [PATCH 089/247] Updated comment for test jmp_fwd()
-
----
- dynasm/Examples/test_z_inst.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 7b3c0f2..c17aebc 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -108,7 +108,7 @@ static void jmp_fwd(dasm_State *state)
- {
- dasm_State **Dst = &state;
-
-- // compare r2 == r3; do { r2 += r2; } while(r2 != r3);
-+ // while(r2!=r3){r2 += 2};
- | j >1
- |1:
- | cgr r2 , r3
---
-2.20.1
-
-
-From e468a08ac3014d093dd70a593238fef1c114dbd9 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 8 Dec 2016 13:59:05 -0500
-Subject: [PATCH 090/247] Delete unused branch_type function.
-
----
- dynasm/dasm_s390x.lua | 13 -------------
- 1 file changed, 13 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 6c2a904..3fa4c13 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -460,19 +460,6 @@ local function parse_label(label, def)
- werror("bad label `"..label.."'")
- end
-
--local function branch_type(op)
-- if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
-- elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
-- band(op, 0x3b000000) == 0x18000000 then
-- return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
-- elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
-- elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
-- elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
-- else
-- assert(false, "unknown branch type")
-- end
--end
--
- ------------------------------------------------------------------------------
-
- local map_op, op_template
---
-2.20.1
-
-
-From 0147a63979a6306328cf0f5cbaccfbb6ccd3045f Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 8 Dec 2016 15:29:26 -0500
-Subject: [PATCH 091/247] Add support for .type directives.
-
----
- dynasm/Examples/test_z_inst.c | 20 +++++++++++++++++++-
- dynasm/dasm_s390x.lua | 25 +++++++++++++++++++++----
- 2 files changed, 40 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index c17aebc..a8895c0 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -291,6 +291,23 @@ static void ssa_act(dasm_State *state) {
- | br r14
- }
-
-+typedef struct {
-+ int a;
-+ int b;
-+} SimpleStruct;
-+
-+static void type(dasm_State *state) {
-+ dasm_State **Dst = &state;
-+
-+ | .type SIMPLE, SimpleStruct
-+ | lay sp, -8(sp)
-+ | stg r2, 0(sp)
-+ | xgr r2, r2
-+ | l r2, SIMPLE:sp->b
-+ | la sp, 8(sp)
-+ | br r14
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -317,7 +334,8 @@ test_table test[] = {
- // { 9,8, add_rrd, 25, "add_rrd"},
- // { 2,4, load_test, 4,"load_test"},
- {-1, 0, ssa, 65535<<8, "ssa"},
-- {-1, 0, ssa_act, 65535<<8, "ssa_act"}
-+ {-1, 0, ssa_act, 65535<<8, "ssa_act"},
-+ {27, 0, type, 27, "type"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 3fa4c13..3a5c500 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -247,7 +247,17 @@ local map_cond = {
- ------------------------------------------------------------------------------
-
- local function parse_reg(expr)
-- local r = match(expr, "^[r|f](1?[0-9])$")
-+ if not expr then werror("expected register name") end
-+ local tname, ovreg = match(expr, "^([%w_]+):(r1?%d)$")
-+ local tp = map_type[tname or expr]
-+ if tp then
-+ local reg = ovreg or tp.reg
-+ if not reg then
-+ werror("type `"..(tname or expr).."' needs a register
override")
-+ end
-+ expr = reg
-+ end
-+ local r = match(expr, "^[rf](1?%d)$")
- if r then
- r = tonumber(r)
- if r <= 15 then return r, tp end
-@@ -296,15 +306,22 @@ end
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
- -- If x is not specified then it is 0.
- local function split_memop(arg)
-- local reg = "r1?[0-9]"
-- local d, x, b = match(arg, "^(.*)%(("..reg.."),
("..reg..")%)$")
-+ local reg = "[%w_:]+"
-+ local d, x, b = match(arg,
"^(.*)%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
- if d then
- return d, parse_reg(x), parse_reg(b)
- end
-- local d, b = match(arg, "^(.*)%(("..reg..")%)$")
-+ local d, b = match(arg, "^(.*)%(%s*("..reg..")%s*%)$")
- if d then
- return d, 0, parse_reg(b)
- end
-+ local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
-+ if reg then
-+ local r, tp = parse_reg(reg)
-+ if tp then
-+ return format(tp.ctypefmt, tailr), 0, r
-+ end
-+ end
- -- TODO: handle values without registers?
- -- TODO: handle registers without a displacement?
- werror("bad memory operand: "..arg)
---
-2.20.1
-
-
-From ad6b76a67ee3c3bb0b5d113930fa78924b4033c6 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 8 Dec 2016 16:00:59 -0500
-Subject: [PATCH 092/247] Fix a couple of templates that were too short.
-
----
- dynasm/dasm_s390x.lua | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 3a5c500..4c716ee 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -540,7 +540,7 @@ map_op = {
- nihf_2 = "c00a00000000n",
- nilf_2 = "c00b00000000n",
- bal_2 = "000045000000j",
-- balr_2 = "00000000500g",
-+ balr_2 = "000000000500g",
- bas_2 = "00004d000000j",
- basr_2 = "000000000d00g",
- bassm_2 = "000000000c00g",
-@@ -549,9 +549,9 @@ map_op = {
- bakr_2 = "0000b2400000h",
- bsg_2 = "0000b2580000h",
- bc_2 = "000047000000k",
-- bcr_2 = "00000000700g",
-+ bcr_2 = "000000000700g",
- bct_2 = "000046000000j",
-- bctr_2 = "00000000600g",
-+ bctr_2 = "000000000600g",
- bctg_2 = "e30000000046l",
- bctgr_2 = "0000b9460000h",
- bxh_3 = "000086000000q",
-@@ -953,7 +953,7 @@ map_op = {
- sar_2 = "0000b24e0000h",
- sfpc_2 = "0000b3840000h",
- sfasr_2 = "0000b3850000h",
-- spm_2 = "00000000400g",
-+ spm_2 = "000000000400g",
- ssar_2 = "0000b2250000h",
- ssair_2 = "0000b99f0000h",
- slda_3 = "00008f000000q",
---
-2.20.1
-
-
-From 7f9e7ae7fdba0cb1032639d924e29d32afbb27ea Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Fri, 9 Dec 2016 19:18:02 +0530
-Subject: [PATCH 093/247] Added SS-b mode
-
-Currently I am not able to test the functionality of this mode, need some help in that.
-Also for the time being I have created different function for parsing, we can merge that
later, just to make sure SS-a doesnt break, I have not merged this since I was not able to
test it.
-Let me know your comments on this
----
- dynasm/dasm_s390x.lua | 55 ++++++++++++++++++++++++++++++++++++++++++-
- 1 file changed, 54 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 4c716ee..08d44a3 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn
- local action_names = {
- "STOP", "SECTION", "ESC", "REL_EXT",
- "ALIGN", "REL_LG", "LABEL_LG",
-- "REL_PC", "LABEL_PC", "DISP12", "DISP20",
"IMM16", "IMM32", "LEN8R",
-+ "REL_PC", "LABEL_PC", "DISP12", "DISP20",
"IMM16", "IMM32",
"LEN8R","LEN4HR","LEN4LR",
- }
-
- -- Maximum number of section buffer positions for dasm_put().
-@@ -422,6 +422,43 @@ local function parse_mem_lb(arg)
- return dval, lval, parse_reg(b), dact, lact
- end
-
-+local function parse_mem_l2b(arg,high_l)
-+ local reg = "r1?[0-9]"
-+ local d, l, b = match(arg,
"^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
-+ if not d then
-+ -- TODO: handle values without registers?
-+ -- TODO: handle registers without a displacement?
-+ werror("bad memory operand: "..arg)
-+ return nil
-+ end
-+ local dval = tonumber(d)
-+ local dact = nil
-+ if dval then
-+ if not is_uint12(dval) then
-+ werror("displacement out of range: ", dval)
-+ end
-+ else
-+ dval = 0
-+ dact = function() waction("DISP12", nil, d) end
-+ end
-+ local lval = tonumber(l)
-+ local lact = nil
-+ if lval then
-+ if lval < 1 or lval > 128 then
-+ werror("length out of range: ", dval)
-+ end
-+ lval = lval - 1
-+ else
-+ lval = 0
-+ if high_l then
-+ lact = function() waction("LEN4HR", nil, l) end
-+ else
-+ lact = function() waction("LEN4LR",nil,l) end
-+ end
-+ end
-+ return dval, lval, parse_reg(b), dact, lact
-+end
-+
- local function parse_imm(arg)
- local imm_val = tonumber(arg,16)
- if imm_val then
-@@ -1070,6 +1107,7 @@ map_op = {
- unpka_2 = "ea0000000000SS-a",
- unpku_2 = "e20000000000SS-a",
- xc_2 = "d70000000000SS-a",
-+ ap_2 = "fa0000000000SS-b",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1157,6 +1195,21 @@ local function parse_template(params, template, nparams, pos)
- if d1a then d1a() end
- wputhw(op2)
- if d2a then d2a() end
-+ elseif p == "SS-b" then
-+ local high_l=true;
-+ local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1],high_l)
-+ high_l=false;
-+ local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2],high_l)
-+ op0 = op0 + shl(l1,4) + l2
-+ op1 = op1 + shl(b1, 12) + d1
-+ op2 = op2 + shl(b2, 12) + d2
-+ wputhw(op0)
-+ if l1a then l1a() end
-+ if l2a then l2a() end
-+ wputhw(op1)
-+ if d1a then d1a() end
-+ wputhw(op2)
-+ if d2a then d2a() end
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From e3b8e06246d26862a5514488c6cfb8a50a95b873 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Fri, 9 Dec 2016 19:24:55 +0530
-Subject: [PATCH 094/247] Added SS-b support in C
-
----
- dynasm/dasm_s390x.h | 18 +++++++++++++++++-
- 1 file changed, 17 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 69e4fc1..e99dc39 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -24,7 +24,7 @@ enum {
- DASM_REL_PC, DASM_LABEL_PC,
- DASM_DISP12, DASM_DISP20,
- DASM_IMM16, DASM_IMM32,
-- DASM_LEN8R,
-+ DASM_LEN8R,DASM_LEN4HR,DASM_LEN4LR,
- DASM__MAX
- };
-
-@@ -289,6 +289,14 @@ void dasm_put(Dst_DECL, int start, ...)
- CK(n >= 1 && n <= 256, RANGE_I);
- b[pos++] = n;
- break;
-+ case DASM_LEN4HR:
-+ CK(n >= 1 && n <= 128, RANGE_I);
-+ b[pos++] = n;
-+ break;
-+ case DASM_LEN4LR:
-+ CK(n >= 1 && n <= 128, RANGE_I);
-+ b[pos++] = n;
-+ break;
- }
- }
- stop:
-@@ -370,6 +378,8 @@ int dasm_link(Dst_DECL, size_t * szp)
- case DASM_DISP20:
- case DASM_DISP12:
- case DASM_LEN8R:
-+ case DASM_LEN4HR:
-+ case DASM_LEN4LR:
- pos++;
- break;
- }
-@@ -467,6 +477,12 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_LEN8R:
- cp[-1] |= (n - 1) & 0xff;
- break;
-+ case DASM_LEN4HR:
-+ cp[-1] |= (n - 1) & 0xf0;
-+ break;
-+ case DASM_LEN4LR:
-+ cp[-1] |= (n - 1) & 0xof;
-+ break;
- default:
- *cp++ = ins;
- break;
---
-2.20.1
-
-
-From a177dd281170d2339daa8533cc693df588950a7e Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Fri, 9 Dec 2016 19:46:12 +0530
-Subject: [PATCH 095/247] Minor fix
-
----
- dynasm/dasm_s390x.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index e99dc39..b5a22ce 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -481,7 +481,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- cp[-1] |= (n - 1) & 0xf0;
- break;
- case DASM_LEN4LR:
-- cp[-1] |= (n - 1) & 0xof;
-+ cp[-1] |= (n - 1) & 0x0f;
- break;
- default:
- *cp++ = ins;
---
-2.20.1
-
-
-From ebdc8b3d69d27dd49bb7ecc7adfcfe0da178df9e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 9 Dec 2016 14:32:02 -0500
-Subject: [PATCH 096/247] Make host/buildvm_asm.c compile.
-
-I've added a rough implementation of this code. It is untested but
-does compile.
----
- src/host/buildvm_asm.c | 53 ++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 53 insertions(+)
-
-diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
-index 43595b3..5817091 100644
---- a/src/host/buildvm_asm.c
-+++ b/src/host/buildvm_asm.c
-@@ -87,6 +87,54 @@ err:
- }
- fprintf(ctx->fp, "\t%s %s\n", opname, sym);
- }
-+#elif LJ_TARGET_S390X
-+/* Emit halfwords piecewise as assembler text. */
-+static void emit_asm_halfwords(BuildCtx *ctx, uint8_t *p, int n)
-+{
-+ uint16_t *cp = (uint16_t*)p;
-+ n /= 2;
-+ int i;
-+ for (i = 0; i < n; i++) {
-+ if ((i & 7) == 0)
-+ fprintf(ctx->fp, "\t.hword 0x%hx", cp[i]);
-+ else
-+ fprintf(ctx->fp, ",0x%hx", cp[i]);
-+ if ((i & 7) == 7) putc('\n', ctx->fp);
-+ }
-+ if ((n & 7) != 0) putc('\n', ctx->fp);
-+}
-+
-+/* Emit s390x text relocations. */
-+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
-+ const char *sym)
-+{
-+ if (n & 1 || n < 2) {
-+ fprintf(stderr, "Error: instruction stream length invalid: %d.\n", n);
-+ exit(1);
-+ }
-+ n -= 2;
-+ const char *opname = NULL;
-+ const char *argt = ""; /* Inserted before argument. */
-+ int opcode = *(uint16_t*)(&cp[n]);
-+ int arg = (opcode>>4) & 0xf;
-+ switch (opcode & 0xff0f) {
-+ case 0xa705: opname = "bras"; argt = "r"; break;
-+ case 0xc005: opname = "brasl"; argt = "r"; break;
-+ case 0xa704: opname = "brc"; break;
-+ case 0xc004: opname = "brcl"; break;
-+ default:
-+ fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
-+ sym);
-+ exit(1);
-+ }
-+ emit_asm_halfwords(ctx, cp, n);
-+ if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
-+ /* Various fixups for external symbols outside of our binary. */
-+ fprintf(ctx->fp, "\t%s %s%d, %s@PLT\n", opname, argt, arg, sym);
-+ return;
-+ }
-+ fprintf(ctx->fp, "\t%s %s%d, %s\n", opname, argt, arg, sym);
-+}
- #else
- /* Emit words piecewise as assembler text. */
- static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
-@@ -310,6 +358,9 @@ void emit_asm(BuildCtx *ctx)
- emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
- }
- ofs += n+4;
-+#elif LJ_TARGET_S390X
-+ emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
-+ ofs += n;
- #else
- emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
- ofs += n;
-@@ -318,6 +369,8 @@ void emit_asm(BuildCtx *ctx)
- }
- #if LJ_TARGET_X86ORX64
- emit_asm_bytes(ctx, ctx->code+ofs, next-ofs);
-+#elif LJ_TARGET_S390X
-+ emit_asm_halfwords(ctx, ctx->code+ofs, next-ofs);
- #else
- emit_asm_words(ctx, ctx->code+ofs, next-ofs);
- #endif
---
-2.20.1
-
-
-From 5afbea95fe90c99bd03eed5f1cbbfe90fa03b15e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 9 Dec 2016 14:35:21 -0500
-Subject: [PATCH 097/247] Fix warning in dasm_s390x.h.
-
----
- dynasm/dasm_s390x.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index b5a22ce..5be8e8a 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -448,7 +448,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- if (cp[-1] >> 12 == 0xc) {
- *cp++ = n >> 17;
- } else {
-- CK(-(1 << 16) <= n && n < (1 << 16) && n
& 1 == 0, RANGE_LG);
-+ CK(-(1 << 16) <= n && n < (1 << 16) && (n
& 1) == 0, RANGE_LG);
- }
- *cp++ = n >> 1;
- p++; /* skip argument */
---
-2.20.1
-
-
-From cc7b26036fea5cd8cab91b2b6998d464003bb1ea Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 9 Dec 2016 14:38:37 -0500
-Subject: [PATCH 098/247] Define DASM_CHECKS when running tests.
-
----
- dynasm/Examples/run.sh | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/run.sh b/dynasm/Examples/run.sh
-index dbe93b0..a4542e8 100755
---- a/dynasm/Examples/run.sh
-+++ b/dynasm/Examples/run.sh
-@@ -2,7 +2,7 @@
- # set -x
-
- # run test
--lua ../dynasm.lua test_z_inst.c | gcc -std=gnu99 -Wall -Werror -g -x c -o test_z_inst -
-+lua ../dynasm.lua test_z_inst.c | gcc -DDASM_CHECKS -std=gnu99 -Wall -Werror -g -x c -o
test_z_inst -
- ./test_z_inst
- ec=$?
-
---
-2.20.1
-
-
-From e2da7f3c648116edb237dca095987c1f47dcd92d Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 9 Dec 2016 16:28:39 -0500
-Subject: [PATCH 099/247] Make LuaJIT compile on s390x.
-
-I've disabled both the JIT and FFI for now. I've also stripped almost
-all of the assembly out of vm_s390x.dasc, leaving only labels for the
-most part. This is enough to get LuaJIT to compile but of course if
-you try and run it it will explode.
-
-The idea now is to re-add enough functionality to get a very basic
-Lua program to run.
----
- src/lib_jit.c | 2 +
- src/lj_arch.h | 1 +
- src/vm_s390x.dasc | 4895 +--------------------------------------------
- 3 files changed, 26 insertions(+), 4872 deletions(-)
-
-diff --git a/src/lib_jit.c b/src/lib_jit.c
-index 22ca0a1..6e265fd 100644
---- a/src/lib_jit.c
-+++ b/src/lib_jit.c
-@@ -732,6 +732,8 @@ static uint32_t jit_cpudetect(lua_State *L)
- }
- #endif
- #endif
-+#elif LJ_TARGET_S390X
-+ /* No optional CPU features to detect (for now). */
- #else
- #error "Missing CPU detection for this architecture"
- #endif
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 930d4c3..b613fab 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -401,6 +401,7 @@
- #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
- #define LJ_TARGET_GC64 1
- #define LJ_ARCH_NOJIT 1 /* NYI */
-+#define LJ_ARCH_NOFFI 1 /* Disable FFI for now. */
-
- #else
- #error "No target architecture defined"
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index e639159..bdd063d 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -135,10 +135,10 @@
- |.macro ins_A; .endmacro
- |.macro ins_AD; .endmacro
- |.macro ins_AJ; .endmacro
--|.macro ins_ABC; mvcl RB, RCH; mvcl RC, RCL; .endmacro
--|.macro ins_AB_; mvcl RB, RCH; .endmacro
--|.macro ins_A_C; mvcl RC, RCL; .endmacro
--|.macro ins_AND; ??? RD; .endmacro
-+|.macro ins_ABC; .endmacro
-+|.macro ins_AB_; .endmacro
-+|.macro ins_A_C; .endmacro
-+|.macro ins_AND; .endmacro
- |
- |// Instruction decode+dispatch.
- | // TODO: tune this, right now we always decode RA-D even if they aren't used.
-@@ -175,89 +175,6 @@
- | ins_NEXT
- | .endmacro
- |.endif
--|
--|// Call decode and dispatch.
--|.macro ins_callt
--| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
--| mvcle PC, LFUNC:RB->pc
--| mvcle RA, [PC]
--| movcl OP, RAL
--| movcl RA, RAH
--| add PC, 4
--|.endmacro
--|
--|.macro ins_call
--| // BASE = new base, RB = LFUNC, RD = nargs+1
--| mvcle [BASE-4], PC
--| ins_callt
--|.endmacro
--|
--|//-----------------------------------------------------------------------
--|
--|// Macros to test operand types.
--|.macro checktp, reg, tp; CG dword [BASE+reg*8+4], tp; .endmacro
--|.macro checknum, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro //
condition to chk is result is above or equal
--|.macro checkint, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro //
condition to chk is result is not equal
--|.macro checkstr, reg, target; checktp reg, LJ_TSTR; brc target; .endmacro //
condition to chk is result is nto equal
--|.macro checktab, reg, target; checktp reg, LJ_TTAB; brc target; .endmacro //
condition to chk is result is nto equal
--|
--|// These operands must be used with movzx.
--|.define PC_OP, byte [PC-4]
--|.define PC_RA, byte [PC-3]
--|.define PC_RB, byte [PC-1]
--|.define PC_RC, byte [PC-2]
--|.define PC_RD, word [PC-2]
--|
--|.macro branchPC, reg
--
--|.endmacro
--|
--|// Assumes DISPATCH is relative to GL.
--#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
--#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
--|
--#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
--|
--|// Decrement hashed hotcount and trigger trace recorder if zero.
--|.macro hotloop, reg
--
--|.endmacro
--|
--|.macro hotcall, reg
--
--|.endmacro
--|
--|// Set current VM state.
--|.macro set_vmstate, st
--
--|.endmacro
--|
--|
--|.macro fpop1; fstp st1; .endmacro
--|
--|
--|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
--|
--|.endmacro
--|.macro sseconst_1, reg, tmp // Synthesize 1.0.
--|
--|.endmacro
--|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
--|
--|.endmacro
--|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
--|
--|.endmacro
--|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
--|
--|.endmacro
--|
--|// Move table write barrier back. Overwrites reg.
--|.macro barrierback, tab, reg
--
--|.endmacro
--|
--|//-----------------------------------------------------------------------
-
- /* Generate subroutines used by opcodes and other parts of the VM. */
- /* The .code_sub section should be last to help static branch prediction. */
-@@ -270,359 +187,49 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->vm_returnp:
-- | test PC, FRAME_P
-- | jz ->cont_dispatch
-- |
-- | // Return from pcall or xpcall fast func.
-- | and PC, -8
-- | sub BASE, PC // Restore caller base.
-- | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
-- | mov PC, [BASE-4] // Fetch PC of previous frame.
-- | // Prepending may overwrite the pcall frame, so do it at the end.
-- | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
- |
- |->vm_returnc:
-- | add RD, 1 // RD = nresults+1
-- | jz ->vm_unwind_yield
-- | mov MULTRES, RD
-- | test PC, FRAME_TYPE
-- | jz ->BC_RET_Z // Handle regular return to Lua.
- |
- |->vm_return:
-- | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
-- | xor PC, FRAME_C
-- | test PC, FRAME_TYPE
-- | jnz ->vm_returnp
-- |
-- | // Return to C.
-- | set_vmstate C
-- | and PC, -8
-- | sub PC, BASE
-- | neg PC // Previous base = BASE - delta.
-- |
-- | sub RD, 1
-- | jz >2
-- |1: // Move results down.
-- |.if X64
-- | mov RBa, [BASE+RA]
-- | mov [BASE-8], RBa
-- |.else
-- | mov RB, [BASE+RA]
-- | mov [BASE-8], RB
-- | mov RB, [BASE+RA+4]
-- | mov [BASE-4], RB
-- |.endif
-- | add BASE, 8
-- | sub RD, 1
-- | jnz <1
-- |2:
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, PC
-- |3:
-- | mov RD, MULTRES
-- | mov RA, SAVE_NRES // RA = wanted nresults+1
-- |4:
-- | cmp RA, RD
-- | jne >6 // More/less results wanted?
-- |5:
-- | sub BASE, 8
-- | mov L:RB->top, BASE
- |
- |->vm_leave_cp:
-- | mov RAa, SAVE_CFRAME // Restore previous C frame.
-- | mov L:RB->cframe, RAa
-- | xor eax, eax // Ok return status for vm_pcall.
- |
- |->vm_leave_unw:
-- | restoreregs
-- | ret
-- |
-- |6:
-- | jb >7 // Less results wanted?
-- | // More results wanted. Check stack size and fill up results with nil.
-- | cmp BASE, L:RB->maxstack
-- | ja >8
-- | mov dword [BASE-4], LJ_TNIL
-- | add BASE, 8
-- | add RD, 1
-- | jmp <4
-- |
-- |7: // Less results wanted.
-- | test RA, RA
-- | jz <5 // But check for LUA_MULTRET+1.
-- | sub RA, RD // Negative result!
-- | lea BASE, [BASE+RA*8] // Correct top.
-- | jmp <5
-- |
-- |8: // Corner case: need to grow stack for filling up results.
-- | // This can happen if:
-- | // - A C function grows the stack (a lot).
-- | // - The GC shrinks the stack in between.
-- | // - A return back from a lua_call() with (high) nresults adjustment.
-- | mov L:RB->top, BASE // Save current top held in BASE (yes).
-- | mov MULTRES, RD // Need to fill only remainder with nil.
-- | mov FCARG2, RA
-- | mov FCARG1, L:RB
-- | call extern lj_state_growstack@8 // (lua_State *L, int n)
-- | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
-- | jmp <3
- |
- |->vm_unwind_yield:
-- | mov al, LUA_YIELD
-- | jmp ->vm_unwind_c_eh
-- |
-- |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall.
-- | // (void *cframe, int errcode)
-- |.if X64
-- | mov eax, CARG2d // Error return status for vm_pcall.
-- | mov rsp, CARG1
-- |.else
-- | mov eax, FCARG2 // Error return status for vm_pcall.
-- | mov esp, FCARG1
-- |.if WIN
-- | lea FCARG1, SEH_NEXT
-- | fs; mov [0], FCARG1
-- |.endif
-- |.endif
-- |->vm_unwind_c_eh: // Landing pad for external unwinder.
-- | mov L:RB, SAVE_L
-- | mov GL:RB, L:RB->glref
-- | mov dword GL:RB->vmstate, ~LJ_VMST_C
-- | jmp ->vm_leave_unw
- |
-+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
-+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
- |->vm_unwind_rethrow:
-- |.if X64 and not X64WIN
-- | mov FCARG1, SAVE_L
-- | mov FCARG2, eax
-- | restoreregs
-- | jmp extern lj_err_throw@8 // (lua_State *L, int errcode)
-- |.endif
-- |
-- |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall.
-- | // (void *cframe)
-- |.if X64
-- | and CARG1, CFRAME_RAWMASK
-- | mov rsp, CARG1
-- |.else
-- | and FCARG1, CFRAME_RAWMASK
-- | mov esp, FCARG1
-- |.if WIN
-- | lea FCARG1, SEH_NEXT
-- | fs; mov [0], FCARG1
-- |.endif
-- |.endif
-+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
- |->vm_unwind_ff_eh: // Landing pad for external unwinder.
-- | mov L:RB, SAVE_L
-- | mov RAa, -8 // Results start at BASE+RA = BASE-8.
-- | mov RD, 1+1 // Really 1+2 results, incr. later.
-- | mov BASE, L:RB->base
-- | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-- | add DISPATCH, GG_G2DISP
-- | mov PC, [BASE-4] // Fetch PC of previous frame.
-- | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
-- | set_vmstate INTERP
-- | jmp ->vm_returnc // Increments RD/MULTRES and returns.
-- |
-- |.if WIN and not X64
-- |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
-- | // (void *cframe, void *excptrec, void *unwinder, int errcode)
-- | mov [esp], FCARG1 // Return value for RtlUnwind.
-- | push FCARG2 // Exception record for RtlUnwind.
-- | push 0 // Ignored by RtlUnwind.
-- | push dword [FCARG1+CFRAME_OFS_SEH]
-- | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
-- | mov FCARG1, eax
-- | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
-- | ret // Jump to unwinder.
-- |.endif
- |
- |//-----------------------------------------------------------------------
- |//-- Grow stack for calls -----------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_growstack_c: // Grow stack for C function.
-- | mov FCARG2, LUA_MINSTACK
-- | jmp >2
- |
- |->vm_growstack_v: // Grow stack for vararg Lua function.
-- | sub RD, 8
-- | jmp >1
- |
- |->vm_growstack_f: // Grow stack for fixarg Lua function.
- | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
-- | lea RD, [BASE+NARGS:RD*8-8]
-- |1:
-- | movzx RA, byte [PC-4+PC2PROTO(framesize)]
-- | add PC, 4 // Must point after first instruction.
-- | mov L:RB->base, BASE
-- | mov L:RB->top, RD
-- | mov SAVE_PC, PC
-- | mov FCARG2, RA
-- |2:
-- | // RB = L, L->base = new base, L->top = top
-- | mov FCARG1, L:RB
-- | call extern lj_state_growstack@8 // (lua_State *L, int n)
-- | mov BASE, L:RB->base
-- | mov RD, L:RB->top
-- | mov LFUNC:RB, [BASE-8]
-- | sub RD, BASE
-- | shr RD, 3
-- | add NARGS:RD, 1
-- | // BASE = new base, RB = LFUNC, RD = nargs+1
-- | ins_callt // Just retry the call.
- |
- |//-----------------------------------------------------------------------
- |//-- Entry points into the assembler VM ---------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_resume: // Setup C frame and resume thread.
-- | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
-- | saveregs
-- |.if X64
-- | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
-- | mov SAVE_L, CARG1d
-- | mov RA, CARG2d
-- |.else
-- | mov L:RB, SAVE_L
-- | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
-- |.endif
-- | mov PC, FRAME_CP
-- | xor RD, RD
-- | lea KBASEa, [esp+CFRAME_RESUME]
-- | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-- | add DISPATCH, GG_G2DISP
-- | mov SAVE_PC, RD // Any value outside of bytecode is ok.
-- | mov SAVE_CFRAME, RDa
-- |.if X64
-- | mov SAVE_NRES, RD
-- | mov SAVE_ERRF, RD
-- |.endif
-- | mov L:RB->cframe, KBASEa
-- | cmp byte L:RB->status, RDL
-- | je >2 // Initial resume (like a call).
-- |
-- | // Resume after yield (like a return).
-- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-- | set_vmstate INTERP
-- | mov byte L:RB->status, RDL
-- | mov BASE, L:RB->base
-- | mov RD, L:RB->top
-- | sub RD, RA
-- | shr RD, 3
-- | add RD, 1 // RD = nresults+1
-- | sub RA, BASE // RA = resultofs
-- | mov PC, [BASE-4]
-- | mov MULTRES, RD
-- | test PC, FRAME_TYPE
-- | jz ->BC_RET_Z
-- | jmp ->vm_return
- |
- |->vm_pcall: // Setup protected C frame and enter VM.
-- | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
-- | saveregs
-- | mov PC, FRAME_CP
-- |.if X64
-- | mov SAVE_ERRF, CARG4d
-- |.endif
-- | jmp >1
- |
- |->vm_call: // Setup C frame and enter VM.
-- | // (lua_State *L, TValue *base, int nres1)
-- | saveregs
-- | mov PC, FRAME_C
-- |
-- |1: // Entry point for vm_pcall above (PC = ftype).
-- |.if X64
-- | mov SAVE_NRES, CARG3d
-- | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
-- | mov SAVE_L, CARG1d
-- | mov RA, CARG2d
-- |.else
-- | mov L:RB, SAVE_L
-- | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
-- |.endif
-- |
-- | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-- | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
-- | mov SAVE_CFRAME, KBASEa
-- | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
-- | add DISPATCH, GG_G2DISP
-- |.if X64
-- | mov L:RB->cframe, rsp
-- |.else
-- | mov L:RB->cframe, esp
-- |.endif
-- |
-- |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
-- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-- | set_vmstate INTERP
-- | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
-- | add PC, RA
-- | sub PC, BASE // PC = frame delta + frame type
-- |
-- | mov RD, L:RB->top
-- | sub RD, RA
-- | shr NARGS:RD, 3
-- | add NARGS:RD, 1 // RD = nargs+1
- |
- |->vm_call_dispatch:
-- | mov LFUNC:RB, [RA-8]
-- | cmp dword [RA-4], LJ_TFUNC
-- | jne ->vmeta_call // Ensure KBASE defined and != BASE.
- |
- |->vm_call_dispatch_f:
-- | mov BASE, RA
-- | ins_call
-- | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
- |
- |->vm_cpcall: // Setup protected C frame, call C.
-- | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
-- | saveregs
-- |.if X64
-- | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
-- | mov SAVE_L, CARG1d
-- |.else
-- | mov L:RB, SAVE_L
-- | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
-- | mov RC, INARG_CP_UD // Get args before they are overwritten.
-- | mov RA, INARG_CP_FUNC
-- | mov BASE, INARG_CP_CALL
-- |.endif
-- | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
-- |
-- | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
-- | sub KBASE, L:RB->top
-- | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-- | mov SAVE_ERRF, 0 // No error function.
-- | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
-- | add DISPATCH, GG_G2DISP
-- | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
-- |
-- |.if X64
-- | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
-- | mov SAVE_CFRAME, KBASEa
-- | mov L:RB->cframe, rsp
-- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-- |
-- | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
-- |.else
-- | mov ARG3, RC // Have to copy args downwards.
-- | mov ARG2, RA
-- | mov ARG1, L:RB
-- |
-- | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
-- | mov SAVE_CFRAME, KBASE
-- | mov L:RB->cframe, esp
-- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-- |
-- | call BASE // (lua_State *L, lua_CFunction func, void *ud)
-- |.endif
-- | // TValue * (new base) or NULL returned in eax (RC).
-- | test RC, RC
-- | jz ->vm_leave_cp // No base? Just remove C frame.
-- | mov RA, RC
-- | mov PC, FRAME_CP
-- | jmp <2 // Else continue with the call.
- |
- |//-----------------------------------------------------------------------
- |//-- Metamethod handling ------------------------------------------------
-@@ -631,546 +238,69 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Continuation dispatch ----------------------------------------------
- |
- |->cont_dispatch:
-- | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
-- | add RA, BASE
-- | and PC, -8
-- | mov RB, BASE
-- | sub BASE, PC // Restore caller BASE.
-- | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
-- | mov RC, RA // ... in [RC]
-- | mov PC, [RB-12] // Restore PC from [cont|PC].
-- |.if X64
-- | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
-- |.if FFI
-- | cmp RA, 1
-- | jbe >1
-- |.endif
-- | lea KBASEa, qword [=>0]
-- | add RAa, KBASEa
-- |.else
-- | mov RA, dword [RB-16]
-- |.if FFI
-- | cmp RA, 1
-- | jbe >1
-- |.endif
-- |.endif
-- | mov LFUNC:KBASE, [BASE-8]
-- | mov KBASE, LFUNC:KBASE->pc
-- | mov KBASE, [KBASE+PC2PROTO(k)]
-- | // BASE = base, RC = result, RB = meta base
-- | jmp RAa // Jump to continuation.
-- |
-- |.if FFI
-- |1:
-- | je ->cont_ffi_callback // cont = 1: return from FFI callback.
-- | // cont = 0: Tail call from C function.
-- | sub RB, BASE
-- | shr RB, 3
-- | lea RD, [RB-1]
-- | jmp ->vm_call_tail
-- |.endif
- |
- |->cont_cat: // BASE = base, RC = result, RB = mbase
-- | movzx RA, PC_RB
-- | sub RB, 16
-- | lea RA, [BASE+RA*8]
-- | sub RA, RB
-- | je ->cont_ra
-- | neg RA
-- | shr RA, 3
-- |.if X64WIN
-- | mov CARG3d, RA
-- | mov L:CARG1d, SAVE_L
-- | mov L:CARG1d->base, BASE
-- | mov RCa, [RC]
-- | mov [RB], RCa
-- | mov CARG2d, RB
-- |.elif X64
-- | mov L:CARG1d, SAVE_L
-- | mov L:CARG1d->base, BASE
-- | mov CARG3d, RA
-- | mov RAa, [RC]
-- | mov [RB], RAa
-- | mov CARG2d, RB
-- |.else
-- | mov ARG3, RA
-- | mov RA, [RC+4]
-- | mov RC, [RC]
-- | mov [RB+4], RA
-- | mov [RB], RC
-- | mov ARG2, RB
-- |.endif
-- | jmp ->BC_CAT_Z
- |
- |//-- Table indexing metamethods -----------------------------------------
- |
- |->vmeta_tgets:
-- | mov TMP1, RC // RC = GCstr *
-- | mov TMP2, LJ_TSTR
-- | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
-- | cmp PC_OP, BC_GGET
-- | jne >1
-- | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
-- | mov [RA], TAB:RB // RB = GCtab *
-- | mov dword [RA+4], LJ_TTAB
-- | mov RB, RA
-- | jmp >2
- |
- |->vmeta_tgetb:
-- | movzx RC, PC_RC
-- |.if DUALNUM
-- | mov TMP2, LJ_TISNUM
-- | mov TMP1, RC
-- |.else
-- | cvtsi2sd xmm0, RC
-- | movsd TMPQ, xmm0
-- |.endif
-- | lea RCa, TMPQ // Store temp. TValue in TMPQ.
-- | jmp >1
- |
- |->vmeta_tgetv:
-- | movzx RC, PC_RC // Reload TValue *k from RC.
-- | lea RC, [BASE+RC*8]
-- |1:
-- | movzx RB, PC_RB // Reload TValue *t from RB.
-- | lea RB, [BASE+RB*8]
-- |2:
-- |.if X64
-- | mov L:CARG1d, SAVE_L
-- | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-- | mov CARG2d, RB
-- | mov CARG3, RCa // May be 64 bit ptr to stack.
-- | mov L:RB, L:CARG1d
-- |.else
-- | mov ARG2, RB
-- | mov L:RB, SAVE_L
-- | mov ARG3, RC
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
-- | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
-- | mov BASE, L:RB->base
-- | test RC, RC
-- | jz >3
- |->cont_ra: // BASE = base, RC = result
-- | movzx RA, PC_RA
-- |.if X64
-- | mov RBa, [RC]
-- | mov [BASE+RA*8], RBa
-- |.else
-- | mov RB, [RC+4]
-- | mov RC, [RC]
-- | mov [BASE+RA*8+4], RB
-- | mov [BASE+RA*8], RC
-- |.endif
-- | ins_next
-- |
-- |3: // Call __index metamethod.
-- | // BASE = base, L->top = new base, stack = cont/func/t/k
-- | mov RA, L:RB->top
-- | mov [RA-12], PC // [cont|PC]
-- | lea PC, [RA+FRAME_CONT]
-- | sub PC, BASE
-- | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
-- | mov NARGS:RD, 2+1 // 2 args for func(t, k).
-- | jmp ->vm_call_dispatch_f
- |
- |->vmeta_tgetr:
-- | mov FCARG1, TAB:RB
-- | mov RB, BASE // Save BASE.
-- | mov FCARG2, RC // Caveat: FCARG2 == BASE
-- | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
-- | // cTValue * or NULL returned in eax (RC).
-- | movzx RA, PC_RA
-- | mov BASE, RB // Restore BASE.
-- | test RC, RC
-- | jnz ->BC_TGETR_Z
-- | mov dword [BASE+RA*8+4], LJ_TNIL
-- | jmp ->BC_TGETR2_Z
- |
- |//-----------------------------------------------------------------------
- |
- |->vmeta_tsets:
-- | mov TMP1, RC // RC = GCstr *
-- | mov TMP2, LJ_TSTR
-- | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
-- | cmp PC_OP, BC_GSET
-- | jne >1
-- | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
-- | mov [RA], TAB:RB // RB = GCtab *
-- | mov dword [RA+4], LJ_TTAB
-- | mov RB, RA
-- | jmp >2
- |
- |->vmeta_tsetb:
-- | movzx RC, PC_RC
-- |.if DUALNUM
-- | mov TMP2, LJ_TISNUM
-- | mov TMP1, RC
-- |.else
-- | cvtsi2sd xmm0, RC
-- | movsd TMPQ, xmm0
-- |.endif
-- | lea RCa, TMPQ // Store temp. TValue in TMPQ.
-- | jmp >1
- |
- |->vmeta_tsetv:
-- | movzx RC, PC_RC // Reload TValue *k from RC.
-- | lea RC, [BASE+RC*8]
-- |1:
-- | movzx RB, PC_RB // Reload TValue *t from RB.
-- | lea RB, [BASE+RB*8]
-- |2:
-- |.if X64
-- | mov L:CARG1d, SAVE_L
-- | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-- | mov CARG2d, RB
-- | mov CARG3, RCa // May be 64 bit ptr to stack.
-- | mov L:RB, L:CARG1d
-- |.else
-- | mov ARG2, RB
-- | mov L:RB, SAVE_L
-- | mov ARG3, RC
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
-- | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
-- | mov BASE, L:RB->base
-- | test RC, RC
-- | jz >3
-- | // NOBARRIER: lj_meta_tset ensures the table is not black.
-- | movzx RA, PC_RA
-- |.if X64
-- | mov RBa, [BASE+RA*8]
-- | mov [RC], RBa
-- |.else
-- | mov RB, [BASE+RA*8+4]
-- | mov RA, [BASE+RA*8]
-- | mov [RC+4], RB
-- | mov [RC], RA
-- |.endif
- |->cont_nop: // BASE = base, (RC = result)
-- | ins_next
-- |
-- |3: // Call __newindex metamethod.
-- | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
-- | mov RA, L:RB->top
-- | mov [RA-12], PC // [cont|PC]
-- | movzx RC, PC_RA
-- | // Copy value to third argument.
-- |.if X64
-- | mov RBa, [BASE+RC*8]
-- | mov [RA+16], RBa
-- |.else
-- | mov RB, [BASE+RC*8+4]
-- | mov RC, [BASE+RC*8]
-- | mov [RA+20], RB
-- | mov [RA+16], RC
-- |.endif
-- | lea PC, [RA+FRAME_CONT]
-- | sub PC, BASE
-- | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
-- | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
-- | jmp ->vm_call_dispatch_f
- |
- |->vmeta_tsetr:
-- |.if X64WIN
-- | mov L:CARG1d, SAVE_L
-- | mov CARG3d, RC
-- | mov L:CARG1d->base, BASE
-- | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
-- |.elif X64
-- | mov L:CARG1d, SAVE_L
-- | mov CARG2d, TAB:RB
-- | mov L:CARG1d->base, BASE
-- | mov RB, BASE // Save BASE.
-- | mov CARG3d, RC // Caveat: CARG3d == BASE.
-- |.else
-- | mov L:RA, SAVE_L
-- | mov ARG2, TAB:RB
-- | mov RB, BASE // Save BASE.
-- | mov ARG3, RC
-- | mov ARG1, L:RA
-- | mov L:RA->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
-- | // TValue * returned in eax (RC).
-- | movzx RA, PC_RA
-- | mov BASE, RB // Restore BASE.
-- | jmp ->BC_TSETR_Z
- |
- |//-- Comparison metamethods ---------------------------------------------
- |
-- |->vmeta_comp:
-- |.if X64
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE.
-- |.if X64WIN
-- | lea CARG3d, [BASE+RD*8]
-- | lea CARG2d, [BASE+RA*8]
-- |.else
-- | lea CARG2d, [BASE+RA*8]
-- | lea CARG3d, [BASE+RD*8]
-- |.endif
-- | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA.
-- | movzx CARG4d, PC_OP
-- |.else
-- | movzx RB, PC_OP
-- | lea RD, [BASE+RD*8]
-- | lea RA, [BASE+RA*8]
-- | mov ARG4, RB
-- | mov L:RB, SAVE_L
-- | mov ARG3, RD
-- | mov ARG2, RA
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
-- | // 0/1 or TValue * (metamethod) returned in eax (RC).
-- |3:
-- | mov BASE, L:RB->base
-- | cmp RC, 1
-- | ja ->vmeta_binop
-- |4:
-- | lea PC, [PC+4]
-- | jb >6
-- |5:
-- | movzx RD, PC_RD
-- | branchPC RD
-- |6:
-- | ins_next
-- |
- |->cont_condt: // BASE = base, RC = result
-- | add PC, 4
-- | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
-- | jb <5
-- | jmp <6
- |
- |->cont_condf: // BASE = base, RC = result
-- | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
-- | jmp <4
- |
- |->vmeta_equal:
-- | sub PC, 4
-- |.if X64WIN
-- | mov CARG3d, RD
-- | mov CARG4d, RB
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
-- | mov CARG2d, RA
-- | mov CARG1d, L:RB // Caveat: CARG1d == RA.
-- |.elif X64
-- | mov CARG2d, RA
-- | mov CARG4d, RB // Caveat: CARG4d == RA.
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE // Caveat: CARG3d == BASE.
-- | mov CARG3d, RD
-- | mov CARG1d, L:RB
-- |.else
-- | mov ARG4, RB
-- | mov L:RB, SAVE_L
-- | mov ARG3, RD
-- | mov ARG2, RA
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
-- | // 0/1 or TValue * (metamethod) returned in eax (RC).
-- | jmp <3
- |
- |->vmeta_equal_cd:
-- |.if FFI
-- | sub PC, 4
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | mov FCARG1, L:RB
-- | mov FCARG2, dword [PC-4]
-- | mov SAVE_PC, PC
-- | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins)
-- | // 0/1 or TValue * (metamethod) returned in eax (RC).
-- | jmp <3
-- |.endif
- |
- |->vmeta_istype:
-- |.if X64
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-- | mov CARG2d, RA
-- | movzx CARG3d, PC_RD
-- | mov L:CARG1d, L:RB
-- |.else
-- | movzx RD, PC_RD
-- | mov ARG2, RA
-- | mov L:RB, SAVE_L
-- | mov ARG3, RD
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
-- | mov BASE, L:RB->base
-- | jmp <6
- |
- |//-- Arithmetic metamethods ---------------------------------------------
- |
- |->vmeta_arith_vno:
-- |.if DUALNUM
-- | movzx RB, PC_RB
-- |.endif
- |->vmeta_arith_vn:
-- | lea RC, [KBASE+RC*8]
-- | jmp >1
- |
- |->vmeta_arith_nvo:
-- |.if DUALNUM
-- | movzx RC, PC_RC
-- |.endif
- |->vmeta_arith_nv:
-- | lea RC, [KBASE+RC*8]
-- | lea RB, [BASE+RB*8]
-- | xchg RB, RC
-- | jmp >2
- |
- |->vmeta_unm:
-- | lea RC, [BASE+RD*8]
-- | mov RB, RC
-- | jmp >2
- |
- |->vmeta_arith_vvo:
-- |.if DUALNUM
-- | movzx RB, PC_RB
-- |.endif
- |->vmeta_arith_vv:
-- | lea RC, [BASE+RC*8]
-- |1:
-- | lea RB, [BASE+RB*8]
-- |2:
-- | lea RA, [BASE+RA*8]
-- |.if X64WIN
-- | mov CARG3d, RB
-- | mov CARG4d, RC
-- | movzx RC, PC_OP
-- | mov ARG5d, RC
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
-- | mov CARG2d, RA
-- | mov CARG1d, L:RB // Caveat: CARG1d == RA.
-- |.elif X64
-- | movzx CARG5d, PC_OP
-- | mov CARG2d, RA
-- | mov CARG4d, RC // Caveat: CARG4d == RA.
-- | mov L:CARG1d, SAVE_L
-- | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE.
-- | mov CARG3d, RB
-- | mov L:RB, L:CARG1d
-- |.else
-- | mov ARG3, RB
-- | mov L:RB, SAVE_L
-- | mov ARG4, RC
-- | movzx RC, PC_OP
-- | mov ARG2, RA
-- | mov ARG5, RC
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
-- | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
-- | mov BASE, L:RB->base
-- | test RC, RC
-- | jz ->cont_nop
- |
- | // Call metamethod for binary op.
- |->vmeta_binop:
-- | // BASE = base, RC = new base, stack = cont/func/o1/o2
-- | mov RA, RC
-- | sub RC, BASE
-- | mov [RA-12], PC // [cont|PC]
-- | lea PC, [RC+FRAME_CONT]
-- | mov NARGS:RD, 2+1 // 2 args for func(o1, o2).
-- | jmp ->vm_call_dispatch
- |
- |->vmeta_len:
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
-- | mov L:FCARG1, L:RB
-- | mov SAVE_PC, PC
-- | call extern lj_meta_len@8 // (lua_State *L, TValue *o)
-- | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
-- | mov BASE, L:RB->base
--#if LJ_52
-- | test RC, RC
-- | jne ->vmeta_binop // Binop call for compatibility.
-- | movzx RD, PC_RD
-- | mov TAB:FCARG1, [BASE+RD*8]
-- | jmp ->BC_LEN_Z
--#else
-- | jmp ->vmeta_binop // Binop call for compatibility.
--#endif
- |
- |//-- Call metamethod ----------------------------------------------------
- |
- |->vmeta_call_ra:
-- | lea RA, [BASE+RA*8+8]
- |->vmeta_call: // Resolve and call __call metamethod.
-- | // BASE = old base, RA = new base, RC = nargs+1, PC = return
-- | mov TMP2, RA // Save RA, RC for us.
-- | mov TMP1, NARGS:RD
-- | sub RA, 8
-- |.if X64
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-- | mov CARG2d, RA
-- | lea CARG3d, [RA+NARGS:RD*8]
-- | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
-- |.else
-- | lea RC, [RA+NARGS:RD*8]
-- | mov L:RB, SAVE_L
-- | mov ARG2, RA
-- | mov ARG3, RC
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE // This is the callers base!
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
-- | mov BASE, L:RB->base
-- | mov RA, TMP2
-- | mov NARGS:RD, TMP1
-- | mov LFUNC:RB, [RA-8]
-- | add NARGS:RD, 1
-- | // This is fragile. L->base must not move, KBASE must always be defined.
-- | cmp KBASE, BASE // Continue with CALLT if flag set.
-- | je ->BC_CALLT_Z
-- | mov BASE, RA
-- | ins_call // Otherwise call resolved metamethod.
- |
- |//-- Argument coercion for 'for' statement ------------------------------
- |
- |->vmeta_for:
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | mov FCARG2, RA // Caveat: FCARG2 == BASE
-- | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
-- | mov SAVE_PC, PC
-- | call extern lj_meta_for@8 // (lua_State *L, TValue *base)
-- | mov BASE, L:RB->base
-- | mov RC, [PC-4]
-- | movzx RA, RCH
-- | movzx OP, RCL
-- | shr RC, 16
-- |.if X64
-- | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
-- |.else
-- | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI.
-- |.endif
- |
- |//-----------------------------------------------------------------------
- |//-- Fast functions -----------------------------------------------------
-@@ -1182,761 +312,109 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc_1, name
- |->ff_ .. name:
-- | cmp NARGS:RD, 1+1; jb ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_2, name
- |->ff_ .. name:
-- | cmp NARGS:RD, 2+1; jb ->fff_fallback
- |.endmacro
- |
-- |.macro .ffunc_nsse, name, op
-+ |.macro .ffunc_n, name, op
- | .ffunc_1 name
-- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-- | op xmm0, qword [BASE]
- |.endmacro
- |
-- |.macro .ffunc_nsse, name
-- | .ffunc_nsse name, movsd
-+ |.macro .ffunc_n, name
-+ | .ffunc_n name, mvc
- |.endmacro
- |
-- |.macro .ffunc_nnsse, name
-+ |.macro .ffunc_nn, name
- | .ffunc_2 name
-- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-- | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
-- | movsd xmm0, qword [BASE]
-- | movsd xmm1, qword [BASE+8]
-- |.endmacro
-- |
-- |.macro .ffunc_nnr, name
-- | .ffunc_2 name
-- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-- | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
-- | fld qword [BASE+8]
-- | fld qword [BASE]
- |.endmacro
- |
- |// Inlined GC threshold check. Caveat: uses label 1.
- |.macro ffgccheck
-- | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
-- | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
-- | jb >1
-- | call ->fff_gcstep
-- |1:
- |.endmacro
- |
- |//-- Base library: checks -----------------------------------------------
- |
- |.ffunc_1 assert
-- | mov RB, [BASE+4]
-- | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
-- | mov PC, [BASE-4]
-- | mov MULTRES, RD
-- | mov [BASE-4], RB
-- | mov RB, [BASE]
-- | mov [BASE-8], RB
-- | sub RD, 2
-- | jz >2
-- | mov RA, BASE
-- |1:
-- | add RA, 8
-- |.if X64
-- | mov RBa, [RA]
-- | mov [RA-8], RBa
-- |.else
-- | mov RB, [RA+4]
-- | mov [RA-4], RB
-- | mov RB, [RA]
-- | mov [RA-8], RB
-- |.endif
-- | sub RD, 1
-- | jnz <1
-- |2:
-- | mov RD, MULTRES
-- | jmp ->fff_res_
- |
- |.ffunc_1 type
-- | mov RB, [BASE+4]
-- |.if X64
-- | mov RA, RB
-- | sar RA, 15
-- | cmp RA, -2
-- | je >3
-- |.endif
-- | mov RC, ~LJ_TNUMX
-- | not RB
-- | cmp RC, RB
-- | cmova RC, RB
-- |2:
-- | mov CFUNC:RB, [BASE-8]
-- | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
-- | mov PC, [BASE-4]
-- | mov dword [BASE-4], LJ_TSTR
-- | mov [BASE-8], STR:RC
-- | jmp ->fff_res1
-- |.if X64
-- |3:
-- | mov RC, ~LJ_TLIGHTUD
-- | jmp <2
-- |.endif
- |
- |//-- Base library: getters and setters ---------------------------------
- |
- |.ffunc_1 getmetatable
-- | mov RB, [BASE+4]
-- | mov PC, [BASE-4]
-- | cmp RB, LJ_TTAB; jne >6
-- |1: // Field metatable must be at same offset for GCtab and GCudata!
-- | mov TAB:RB, [BASE]
-- | mov TAB:RB, TAB:RB->metatable
-- |2:
-- | test TAB:RB, TAB:RB
-- | mov dword [BASE-4], LJ_TNIL
-- | jz ->fff_res1
-- | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)]
-- | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
-- | mov [BASE-8], TAB:RB
-- | mov RA, TAB:RB->hmask
-- | and RA, STR:RC->hash
-- | imul RA, #NODE
-- | add NODE:RA, TAB:RB->node
-- |3: // Rearranged logic, because we expect _not_ to find the key.
-- | cmp dword NODE:RA->key.it, LJ_TSTR
-- | jne >4
-- | cmp dword NODE:RA->key.gcr, STR:RC
-- | je >5
-- |4:
-- | mov NODE:RA, NODE:RA->next
-- | test NODE:RA, NODE:RA
-- | jnz <3
-- | jmp ->fff_res1 // Not found, keep default result.
-- |5:
-- | mov RB, [RA+4]
-- | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
-- | mov RC, [RA]
-- | mov [BASE-4], RB // Return value of mt.__metatable.
-- | mov [BASE-8], RC
-- | jmp ->fff_res1
-- |
-- |6:
-- | cmp RB, LJ_TUDATA; je <1
-- |.if X64
-- | cmp RB, LJ_TNUMX; ja >8
-- | cmp RB, LJ_TISNUM; jbe >7
-- | mov RB, LJ_TLIGHTUD
-- | jmp >8
-- |7:
-- |.else
-- | cmp RB, LJ_TISNUM; ja >8
-- |.endif
-- | mov RB, LJ_TNUMX
-- |8:
-- | not RB
-- | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
-- | jmp <2
- |
- |.ffunc_2 setmetatable
-- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-- | // Fast path: no mt for table yet and not clearing the mt.
-- | mov TAB:RB, [BASE]
-- | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
-- | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback
-- | mov TAB:RC, [BASE+8]
-- | mov TAB:RB->metatable, TAB:RC
-- | mov PC, [BASE-4]
-- | mov dword [BASE-4], LJ_TTAB // Return original table.
-- | mov [BASE-8], TAB:RB
-- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-- | jz >1
-- | // Possible write barrier. Table is black, but skip iswhite(mt) check.
-- | barrierback TAB:RB, RC
-- |1:
-- | jmp ->fff_res1
- |
- |.ffunc_2 rawget
-- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-- |.if X64WIN
-- | mov RB, BASE // Save BASE.
-- | lea CARG3d, [BASE+8]
-- | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
-- | mov CARG1d, SAVE_L
-- |.elif X64
-- | mov RB, BASE // Save BASE.
-- | mov CARG2d, [BASE]
-- | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
-- | mov CARG1d, SAVE_L
-- |.else
-- | mov TAB:RD, [BASE]
-- | mov L:RB, SAVE_L
-- | mov ARG2, TAB:RD
-- | mov ARG1, L:RB
-- | mov RB, BASE // Save BASE.
-- | add BASE, 8
-- | mov ARG3, BASE
-- |.endif
-- | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
-- | // cTValue * returned in eax (RD).
-- | mov BASE, RB // Restore BASE.
-- | // Copy table slot.
-- |.if X64
-- | mov RBa, [RD]
-- | mov PC, [BASE-4]
-- | mov [BASE-8], RBa
-- |.else
-- | mov RB, [RD]
-- | mov RD, [RD+4]
-- | mov PC, [BASE-4]
-- | mov [BASE-8], RB
-- | mov [BASE-4], RD
-- |.endif
-- | jmp ->fff_res1
- |
- |//-- Base library: conversions ------------------------------------------
- |
- |.ffunc tonumber
-- | // Only handles the number case inline (without a base argument).
-- | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
-- | cmp dword [BASE+4], LJ_TISNUM
-- |.if DUALNUM
-- | jne >1
-- | mov RB, dword [BASE]; jmp ->fff_resi
-- |1:
-- | ja ->fff_fallback
-- |.else
-- | jae ->fff_fallback
-- |.endif
-- | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
- |
- |.ffunc_1 tostring
-- | // Only handles the string or number case inline.
-- | mov PC, [BASE-4]
-- | cmp dword [BASE+4], LJ_TSTR; jne >3
-- | // A __tostring method in the string base metatable is ignored.
-- | mov STR:RD, [BASE]
-- |2:
-- | mov dword [BASE-4], LJ_TSTR
-- | mov [BASE-8], STR:RD
-- | jmp ->fff_res1
-- |3: // Handle numbers inline, unless a number base metatable is present.
-- | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
-- | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
-- | jne ->fff_fallback
-- | ffgccheck // Caveat: uses label 1.
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE // Add frame since C call can throw.
-- | mov SAVE_PC, PC // Redundant (but a defined value).
-- |.if X64 and not X64WIN
-- | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
-- |.endif
-- | mov L:FCARG1, L:RB
-- |.if DUALNUM
-- | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
-- |.else
-- | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
-- |.endif
-- | // GCstr returned in eax (RD).
-- | mov BASE, L:RB->base
-- | jmp <2
- |
- |//-- Base library: iterators -------------------------------------------
- |
- |.ffunc_1 next
-- | je >2 // Missing 2nd arg?
-- |1:
-- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE // Add frame since C call can throw.
-- | mov L:RB->top, BASE // Dummy frame length is ok.
-- | mov PC, [BASE-4]
-- |.if X64WIN
-- | lea CARG3d, [BASE+8]
-- | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
-- | mov CARG1d, L:RB
-- |.elif X64
-- | mov CARG2d, [BASE]
-- | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
-- | mov CARG1d, L:RB
-- |.else
-- | mov TAB:RD, [BASE]
-- | mov ARG2, TAB:RD
-- | mov ARG1, L:RB
-- | add BASE, 8
-- | mov ARG3, BASE
-- |.endif
-- | mov SAVE_PC, PC // Needed for ITERN fallback.
-- | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
-- | // Flag returned in eax (RD).
-- | mov BASE, L:RB->base
-- | test RD, RD; jz >3 // End of traversal?
-- | // Copy key and value to results.
-- |.if X64
-- | mov RBa, [BASE+8]
-- | mov RDa, [BASE+16]
-- | mov [BASE-8], RBa
-- | mov [BASE], RDa
-- |.else
-- | mov RB, [BASE+8]
-- | mov RD, [BASE+12]
-- | mov [BASE-8], RB
-- | mov [BASE-4], RD
-- | mov RB, [BASE+16]
-- | mov RD, [BASE+20]
-- | mov [BASE], RB
-- | mov [BASE+4], RD
-- |.endif
-- |->fff_res2:
-- | mov RD, 1+2
-- | jmp ->fff_res
-- |2: // Set missing 2nd arg to nil.
-- | mov dword [BASE+12], LJ_TNIL
-- | jmp <1
-- |3: // End of traversal: return nil.
-- | mov dword [BASE-4], LJ_TNIL
-- | jmp ->fff_res1
- |
- |.ffunc_1 pairs
-- | mov TAB:RB, [BASE]
-- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
--#if LJ_52
-- | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
--#endif
-- | mov CFUNC:RB, [BASE-8]
-- | mov CFUNC:RD, CFUNC:RB->upvalue[0]
-- | mov PC, [BASE-4]
-- | mov dword [BASE-4], LJ_TFUNC
-- | mov [BASE-8], CFUNC:RD
-- | mov dword [BASE+12], LJ_TNIL
-- | mov RD, 1+3
-- | jmp ->fff_res
- |
- |.ffunc_2 ipairs_aux
-- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-- | cmp dword [BASE+12], LJ_TISNUM
-- |.if DUALNUM
-- | jne ->fff_fallback
-- |.else
-- | jae ->fff_fallback
-- |.endif
-- | mov PC, [BASE-4]
-- |.if DUALNUM
-- | mov RD, dword [BASE+8]
-- | add RD, 1
-- | mov dword [BASE-4], LJ_TISNUM
-- | mov dword [BASE-8], RD
-- |.else
-- | movsd xmm0, qword [BASE+8]
-- | sseconst_1 xmm1, RBa
-- | addsd xmm0, xmm1
-- | cvttsd2si RD, xmm0
-- | movsd qword [BASE-8], xmm0
-- |.endif
-- | mov TAB:RB, [BASE]
-- | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
-- | shl RD, 3
-- | add RD, TAB:RB->array
-- |1:
-- | cmp dword [RD+4], LJ_TNIL; je ->fff_res0
-- | // Copy array slot.
-- |.if X64
-- | mov RBa, [RD]
-- | mov [BASE], RBa
-- |.else
-- | mov RB, [RD]
-- | mov RD, [RD+4]
-- | mov [BASE], RB
-- | mov [BASE+4], RD
-- |.endif
-- | jmp ->fff_res2
-- |2: // Check for empty hash part first. Otherwise call C function.
-- | cmp dword TAB:RB->hmask, 0; je ->fff_res0
-- | mov FCARG1, TAB:RB
-- | mov RB, BASE // Save BASE.
-- | mov FCARG2, RD // Caveat: FCARG2 == BASE
-- | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
-- | // cTValue * or NULL returned in eax (RD).
-- | mov BASE, RB
-- | test RD, RD
-- | jnz <1
- |->fff_res0:
-- | mov RD, 1+0
-- | jmp ->fff_res
- |
- |.ffunc_1 ipairs
-- | mov TAB:RB, [BASE]
-- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
--#if LJ_52
-- | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
--#endif
-- | mov CFUNC:RB, [BASE-8]
-- | mov CFUNC:RD, CFUNC:RB->upvalue[0]
-- | mov PC, [BASE-4]
-- | mov dword [BASE-4], LJ_TFUNC
-- | mov [BASE-8], CFUNC:RD
-- |.if DUALNUM
-- | mov dword [BASE+12], LJ_TISNUM
-- | mov dword [BASE+8], 0
-- |.else
-- | xorps xmm0, xmm0
-- | movsd qword [BASE+8], xmm0
-- |.endif
-- | mov RD, 1+3
-- | jmp ->fff_res
- |
- |//-- Base library: catch errors ----------------------------------------
- |
- |.ffunc_1 pcall
-- | lea RA, [BASE+8]
-- | sub NARGS:RD, 1
-- | mov PC, 8+FRAME_PCALL
-- |1:
-- | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
-- | shr RB, HOOK_ACTIVE_SHIFT
-- | and RB, 1
-- | add PC, RB // Remember active hook before pcall.
-- | jmp ->vm_call_dispatch
- |
- |.ffunc_2 xpcall
-- | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
-- | mov RB, [BASE+4] // Swap function and traceback.
-- | mov [BASE+12], RB
-- | mov dword [BASE+4], LJ_TFUNC
-- | mov LFUNC:RB, [BASE]
-- | mov PC, [BASE+8]
-- | mov [BASE+8], LFUNC:RB
-- | mov [BASE], PC
-- | lea RA, [BASE+16]
-- | sub NARGS:RD, 2
-- | mov PC, 16+FRAME_PCALL
-- | jmp <1
- |
- |//-- Coroutine library --------------------------------------------------
- |
- |.macro coroutine_resume_wrap, resume
- |.if resume
- |.ffunc_1 coroutine_resume
-- | mov L:RB, [BASE]
- |.else
- |.ffunc coroutine_wrap_aux
-- | mov CFUNC:RB, [BASE-8]
-- | mov L:RB, CFUNC:RB->upvalue[0].gcr
-- |.endif
-- | mov PC, [BASE-4]
-- | mov SAVE_PC, PC
-- |.if X64
-- | mov TMP1, L:RB
-- |.else
-- | mov ARG1, L:RB
-- |.endif
-- |.if resume
-- | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback
-- |.endif
-- | cmp aword L:RB->cframe, 0; jne ->fff_fallback
-- | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
-- | mov RA, L:RB->top
-- | je >1 // Status != LUA_YIELD (i.e. 0)?
-- | cmp RA, L:RB->base // Check for presence of initial func.
-- | je ->fff_fallback
-- |1:
-- |.if resume
-- | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
-- |.else
-- | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
-- |.endif
-- | cmp PC, L:RB->maxstack; ja ->fff_fallback
-- | mov L:RB->top, PC
-- |
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- |.if resume
-- | add BASE, 8 // Keep resumed thread in stack for GC.
-- |.endif
-- | mov L:RB->top, BASE
-- |.if resume
-- | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
-- |.else
-- | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
-- |.endif
-- | sub RBa, PCa // Relative to PC.
-- |
-- | cmp PC, RA
-- | je >3
-- |2: // Move args to coroutine.
-- |.if X64
-- | mov RCa, [PC+RB]
-- | mov [PC-8], RCa
-- |.else
-- | mov RC, [PC+RB+4]
-- | mov [PC-4], RC
-- | mov RC, [PC+RB]
-- | mov [PC-8], RC
-- |.endif
-- | sub PC, 8
-- | cmp PC, RA
-- | jne <2
-- |3:
-- |.if X64
-- | mov CARG2d, RA
-- | mov CARG1d, TMP1
-- |.else
-- | mov ARG2, RA
-- | xor RA, RA
-- | mov ARG4, RA
-- | mov ARG3, RA
-- |.endif
-- | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
-- |
-- | mov L:RB, SAVE_L
-- |.if X64
-- | mov L:PC, TMP1
-- |.else
-- | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
-- |.endif
-- | mov BASE, L:RB->base
-- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-- | set_vmstate INTERP
-- |
-- | cmp eax, LUA_YIELD
-- | ja >8
-- |4:
-- | mov RA, L:PC->base
-- | mov KBASE, L:PC->top
-- | mov L:PC->top, RA // Clear coroutine stack.
-- | mov PC, KBASE
-- | sub PC, RA
-- | je >6 // No results?
-- | lea RD, [BASE+PC]
-- | shr PC, 3
-- | cmp RD, L:RB->maxstack
-- | ja >9 // Need to grow stack?
-- |
-- | mov RB, BASE
-- | sub RBa, RAa
-- |5: // Move results from coroutine.
-- |.if X64
-- | mov RDa, [RA]
-- | mov [RA+RB], RDa
-- |.else
-- | mov RD, [RA]
-- | mov [RA+RB], RD
-- | mov RD, [RA+4]
-- | mov [RA+RB+4], RD
-- |.endif
-- | add RA, 8
-- | cmp RA, KBASE
-- | jne <5
-- |6:
-- |.if resume
-- | lea RD, [PC+2] // nresults+1 = 1 + true + results.
-- | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
-- |.else
-- | lea RD, [PC+1] // nresults+1 = 1 + results.
-- |.endif
-- |7:
-- | mov PC, SAVE_PC
-- | mov MULTRES, RD
-- |.if resume
-- | mov RAa, -8
-- |.else
-- | xor RA, RA
-- |.endif
-- | test PC, FRAME_TYPE
-- | jz ->BC_RET_Z
-- | jmp ->vm_return
-- |
-- |8: // Coroutine returned with error (at co->top-1).
-- |.if resume
-- | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
-- | mov RA, L:PC->top
-- | sub RA, 8
-- | mov L:PC->top, RA // Clear error from coroutine stack.
-- | // Copy error message.
-- |.if X64
-- | mov RDa, [RA]
-- | mov [BASE], RDa
-- |.else
-- | mov RD, [RA]
-- | mov [BASE], RD
-- | mov RD, [RA+4]
-- | mov [BASE+4], RD
-- |.endif
-- | mov RD, 1+2 // nresults+1 = 1 + false + error.
-- | jmp <7
-- |.else
-- | mov FCARG2, L:PC
-- | mov FCARG1, L:RB
-- | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
-- | // Error function does not return.
-- |.endif
-- |
-- |9: // Handle stack expansion on return from yield.
-- |.if X64
-- | mov L:RA, TMP1
-- |.else
-- | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
- |.endif
-- | mov L:RA->top, KBASE // Undo coroutine stack clearing.
-- | mov FCARG2, PC
-- | mov FCARG1, L:RB
-- | call extern lj_state_growstack@8 // (lua_State *L, int n)
-- |.if X64
-- | mov L:PC, TMP1
-- |.else
-- | mov L:PC, ARG1
-- |.endif
-- | mov BASE, L:RB->base
-- | jmp <4 // Retry the stack move.
- |.endmacro
- |
- | coroutine_resume_wrap 1 // coroutine.resume
- | coroutine_resume_wrap 0 // coroutine.wrap
- |
- |.ffunc coroutine_yield
-- | mov L:RB, SAVE_L
-- | test aword L:RB->cframe, CFRAME_RESUME
-- | jz ->fff_fallback
-- | mov L:RB->base, BASE
-- | lea RD, [BASE+NARGS:RD*8-8]
-- | mov L:RB->top, RD
-- | xor RD, RD
-- | mov aword L:RB->cframe, RDa
-- | mov al, LUA_YIELD
-- | mov byte L:RB->status, al
-- | jmp ->vm_leave_unw
- |
- |//-- Math library -------------------------------------------------------
- |
-- |.if not DUALNUM
-- |->fff_resi: // Dummy.
-- |.endif
-- |
-- |->fff_resn:
-- | mov PC, [BASE-4]
-- | fstp qword [BASE-8]
-- | jmp ->fff_res1
-- |
- | .ffunc_1 math_abs
-- |.if DUALNUM
-- | cmp dword [BASE+4], LJ_TISNUM; jne >2
-- | mov RB, dword [BASE]
-- | cmp RB, 0; jns ->fff_resi
-- | neg RB; js >1
- |->fff_resbit:
- |->fff_resi:
-- | mov PC, [BASE-4]
-- | mov dword [BASE-4], LJ_TISNUM
-- | mov dword [BASE-8], RB
-- | jmp ->fff_res1
-- |1:
-- | mov PC, [BASE-4]
-- | mov dword [BASE-4], 0x41e00000 // 2^31.
-- | mov dword [BASE-8], 0
-- | jmp ->fff_res1
-- |2:
-- | ja ->fff_fallback
-- |.else
-- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-- |.endif
-- | movsd xmm0, qword [BASE]
-- | sseconst_abs xmm1, RDa
-- | andps xmm0, xmm1
-+ |->fff_resRB:
-+ |
-+ |.ffunc_n math_sqrt, sqrtsd
- |->fff_resxmm0:
-- | mov PC, [BASE-4]
-- | movsd qword [BASE-8], xmm0
-- | // fallthrough
- |
- |->fff_res1:
-- | mov RD, 1+1
- |->fff_res:
-- | mov MULTRES, RD
- |->fff_res_:
-- | test PC, FRAME_TYPE
-- | jnz >7
-- |5:
-- | cmp PC_RB, RDL // More results expected?
-- | ja >6
-- | // Adjust BASE. KBASE is assumed to be set for the calling frame.
-- | movzx RA, PC_RA
-- | not RAa // Note: ~RA = -(RA+1)
-- | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
-- | ins_next
-- |
-- |6: // Fill up results with nil.
-- | mov dword [BASE+RD*8-12], LJ_TNIL
-- | add RD, 1
-- | jmp <5
-- |
-- |7: // Non-standard return case.
-- | mov RAa, -8 // Results start at BASE+RA = BASE-8.
-- | jmp ->vm_return
-- |
-- |.if X64
-- |.define fff_resfp, fff_resxmm0
-- |.else
-- |.define fff_resfp, fff_resn
-- |.endif
- |
- |.macro math_round, func
- | .ffunc math_ .. func
-- |.if DUALNUM
-- | cmp dword [BASE+4], LJ_TISNUM; jne >1
-- | mov RB, dword [BASE]; jmp ->fff_resi
-- |1:
-- | ja ->fff_fallback
-- |.else
-- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-- |.endif
-- | movsd xmm0, qword [BASE]
-- | call ->vm_ .. func .. _sse
-- |.if DUALNUM
-- | cvttsd2si RB, xmm0
-- | cmp RB, 0x80000000
-- | jne ->fff_resi
-- | cvtsi2sd xmm1, RB
-- | ucomisd xmm0, xmm1
-- | jp ->fff_resxmm0
-- | je ->fff_resi
-- |.endif
-- | jmp ->fff_resxmm0
- |.endmacro
- |
- | math_round floor
- | math_round ceil
- |
-- |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
-- |
- |.ffunc math_log
-- | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
-- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-- | movsd xmm0, qword [BASE]
-- |.if not X64
-- | movsd FPARG1, xmm0
-- |.endif
-- | mov RB, BASE
-- | call extern log
-- | mov BASE, RB
-- | jmp ->fff_resfp
- |
- |.macro math_extern, func
-- | .ffunc_nsse math_ .. func
-- |.if not X64
-- | movsd FPARG1, xmm0
-- |.endif
-- | mov RB, BASE
-- | call extern func
-- | mov BASE, RB
-- | jmp ->fff_resfp
-+ | .ffunc_n math_ .. func
- |.endmacro
- |
- |.macro math_extern2, func
-- | .ffunc_nnsse math_ .. func
-- |.if not X64
-- | movsd FPARG1, xmm0
-- | movsd FPARG3, xmm1
-- |.endif
-- | mov RB, BASE
-- | call extern func
-- | mov BASE, RB
-- | jmp ->fff_resfp
-+ | .ffunc_nn math_ .. func
- |.endmacro
- |
- | math_extern log10
-@@ -1954,102 +432,13 @@ static void build_subroutines(BuildCtx *ctx)
- | math_extern2 atan2
- | math_extern2 fmod
- |
-- |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
-- |
-- |.ffunc_1 math_frexp
-- | mov RB, [BASE+4]
-- | cmp RB, LJ_TISNUM; jae ->fff_fallback
-- | mov PC, [BASE-4]
-- | mov RC, [BASE]
-- | mov [BASE-4], RB; mov [BASE-8], RC
-- | shl RB, 1; cmp RB, 0xffe00000; jae >3
-- | or RC, RB; jz >3
-- | mov RC, 1022
-- | cmp RB, 0x00200000; jb >4
-- |1:
-- | shr RB, 21; sub RB, RC // Extract and unbias exponent.
-- | cvtsi2sd xmm0, RB
-- | mov RB, [BASE-4]
-- | and RB, 0x800fffff // Mask off exponent.
-- | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
-- | mov [BASE-4], RB
-- |2:
-- | movsd qword [BASE], xmm0
-- | mov RD, 1+2
-- | jmp ->fff_res
-- |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
-- | xorps xmm0, xmm0; jmp <2
-- |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
-- | movsd xmm0, qword [BASE]
-- | sseconst_hi xmm1, RBa, 43500000 // 2^54.
-- | mulsd xmm0, xmm1
-- | movsd qword [BASE-8], xmm0
-- | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
-- |
-- |.ffunc_nsse math_modf
-- | mov RB, [BASE+4]
-- | mov PC, [BASE-4]
-- | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
-- | movaps xmm4, xmm0
-- | call ->vm_trunc_sse
-- | subsd xmm4, xmm0
-- |1:
-- | movsd qword [BASE-8], xmm0
-- | movsd qword [BASE], xmm4
-- | mov RC, [BASE-4]; mov RB, [BASE+4]
-- | xor RC, RB; js >3 // Need to adjust sign?
-- |2:
-- | mov RD, 1+2
-- | jmp ->fff_res
-- |3:
-- | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
-- | jmp <2
-- |4:
-- | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
-+ |.ffunc_2 math_ldexp
-+ |
-+ |.ffunc_n math_frexp
- |
-+ |.ffunc_n math_modf
- |.macro math_minmax, name, cmovop, sseop
- | .ffunc name
-- | mov RA, 2
-- | cmp dword [BASE+4], LJ_TISNUM
-- |.if DUALNUM
-- | jne >4
-- | mov RB, dword [BASE]
-- |1: // Handle integers.
-- | cmp RA, RD; jae ->fff_resi
-- | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3
-- | cmp RB, dword [BASE+RA*8-8]
-- | cmovop RB, dword [BASE+RA*8-8]
-- | add RA, 1
-- | jmp <1
-- |3:
-- | ja ->fff_fallback
-- | // Convert intermediate result to number and continue below.
-- | cvtsi2sd xmm0, RB
-- | jmp >6
-- |4:
-- | ja ->fff_fallback
-- |.else
-- | jae ->fff_fallback
-- |.endif
-- |
-- | movsd xmm0, qword [BASE]
-- |5: // Handle numbers or integers.
-- | cmp RA, RD; jae ->fff_resxmm0
-- | cmp dword [BASE+RA*8-4], LJ_TISNUM
-- |.if DUALNUM
-- | jb >6
-- | ja ->fff_fallback
-- | cvtsi2sd xmm1, dword [BASE+RA*8-8]
-- | jmp >7
-- |.else
-- | jae ->fff_fallback
-- |.endif
-- |6:
-- | movsd xmm1, qword [BASE+RA*8-8]
-- |7:
-- | sseop xmm0, xmm1
-- | add RA, 1
-- | jmp <5
- |.endmacro
- |
- | math_minmax math_min, cmovg, minsd
-@@ -2058,150 +447,17 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- String library -----------------------------------------------------
- |
- |.ffunc string_byte // Only handle the 1-arg case here.
-- | cmp NARGS:RD, 1+1; jne ->fff_fallback
-- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-- | mov STR:RB, [BASE]
-- | mov PC, [BASE-4]
-- | cmp dword STR:RB->len, 1
-- | jb ->fff_res0 // Return no results for empty string.
-- | movzx RB, byte STR:RB[1]
-- |.if DUALNUM
-- | jmp ->fff_resi
-- |.else
-- | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
-- |.endif
- |
- |.ffunc string_char // Only handle the 1-arg case here.
-- | ffgccheck
-- | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
-- | cmp dword [BASE+4], LJ_TISNUM
-- |.if DUALNUM
-- | jne ->fff_fallback
-- | mov RB, dword [BASE]
-- | cmp RB, 255; ja ->fff_fallback
-- | mov TMP2, RB
-- |.else
-- | jae ->fff_fallback
-- | cvttsd2si RB, qword [BASE]
-- | cmp RB, 255; ja ->fff_fallback
-- | mov TMP2, RB
-- |.endif
-- |.if X64
-- | mov TMP3, 1
-- |.else
-- | mov ARG3, 1
-- |.endif
-- | lea RDa, TMP2 // Points to stack. Little-endian.
- |->fff_newstr:
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- |.if X64
-- | mov CARG3d, TMP3 // Zero-extended to size_t.
-- | mov CARG2, RDa // May be 64 bit ptr to stack.
-- | mov CARG1d, L:RB
-- |.else
-- | mov ARG2, RD
-- | mov ARG1, L:RB
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_str_new // (lua_State *L, char *str, size_t l)
- |->fff_resstr:
-- | // GCstr * returned in eax (RD).
-- | mov BASE, L:RB->base
-- | mov PC, [BASE-4]
-- | mov dword [BASE-4], LJ_TSTR
-- | mov [BASE-8], STR:RD
-- | jmp ->fff_res1
- |
- |.ffunc string_sub
-- | ffgccheck
-- | mov TMP2, -1
-- | cmp NARGS:RD, 1+2; jb ->fff_fallback
-- | jna >1
-- | cmp dword [BASE+20], LJ_TISNUM
-- |.if DUALNUM
-- | jne ->fff_fallback
-- | mov RB, dword [BASE+16]
-- | mov TMP2, RB
-- |.else
-- | jae ->fff_fallback
-- | cvttsd2si RB, qword [BASE+16]
-- | mov TMP2, RB
-- |.endif
-- |1:
-- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-- | cmp dword [BASE+12], LJ_TISNUM
-- |.if DUALNUM
-- | jne ->fff_fallback
-- |.else
-- | jae ->fff_fallback
-- |.endif
-- | mov STR:RB, [BASE]
-- | mov TMP3, STR:RB
-- | mov RB, STR:RB->len
-- |.if DUALNUM
-- | mov RA, dword [BASE+8]
-- |.else
-- | cvttsd2si RA, qword [BASE+8]
-- |.endif
-- | mov RC, TMP2
-- | cmp RB, RC // len < end? (unsigned compare)
-- | jb >5
-- |2:
-- | test RA, RA // start <= 0?
-- | jle >7
-- |3:
-- | mov STR:RB, TMP3
-- | sub RC, RA // start > end?
-- | jl ->fff_emptystr
-- | lea RB, [STR:RB+RA+#STR-1]
-- | add RC, 1
-- |4:
-- |.if X64
-- | mov TMP3, RC
-- |.else
-- | mov ARG3, RC
-- |.endif
-- | mov RD, RB
-- | jmp ->fff_newstr
-- |
-- |5: // Negative end or overflow.
-- | jl >6
-- | lea RC, [RC+RB+1] // end = end+(len+1)
-- | jmp <2
-- |6: // Overflow.
-- | mov RC, RB // end = len
-- | jmp <2
-- |
-- |7: // Negative start or underflow.
-- | je >8
-- | add RA, RB // start = start+(len+1)
-- | add RA, 1
-- | jg <3 // start > 0?
-- |8: // Underflow.
-- | mov RA, 1 // start = 1
-- | jmp <3
- |
- |->fff_emptystr: // Range underflow.
-- | xor RC, RC // Zero length. Any ptr in RB is ok.
-- | jmp <4
- |
- |.macro ffstring_op, name
- | .ffunc_1 string_ .. name
-- | ffgccheck
-- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-- | mov L:RB, SAVE_L
-- | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
-- | mov L:RB->base, BASE
-- | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
-- | mov RC, SBUF:FCARG1->b
-- | mov SBUF:FCARG1->L, L:RB
-- | mov SBUF:FCARG1->p, RC
-- | mov SAVE_PC, PC
-- | call extern lj_buf_putstr_ .. name .. @8
-- | mov FCARG1, eax
-- | call extern lj_buf_tostr@4
-- | jmp ->fff_resstr
- |.endmacro
- |
- |ffstring_op reverse
-@@ -2212,30 +468,6 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc_bit, name, kind, fdef
- | fdef name
-- |.if kind == 2
-- | sseconst_tobit xmm1, RBa
-- |.endif
-- | cmp dword [BASE+4], LJ_TISNUM
-- |.if DUALNUM
-- | jne >1
-- | mov RB, dword [BASE]
-- |.if kind > 0
-- | jmp >2
-- |.else
-- | jmp ->fff_resbit
-- |.endif
-- |1:
-- | ja ->fff_fallback
-- |.else
-- | jae ->fff_fallback
-- |.endif
-- | movsd xmm0, qword [BASE]
-- |.if kind < 2
-- | sseconst_tobit xmm1, RBa
-- |.endif
-- | addsd xmm0, xmm1
-- | movd RB, xmm0
-- |2:
- |.endmacro
- |
- |.macro .ffunc_bit, name, kind
-@@ -2243,32 +475,9 @@ static void build_subroutines(BuildCtx *ctx)
- |.endmacro
- |
- |.ffunc_bit bit_tobit, 0
-- | jmp ->fff_resbit
- |
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name, 2
-- | mov TMP2, NARGS:RD // Save for fallback.
-- | lea RD, [BASE+NARGS:RD*8-16]
-- |1:
-- | cmp RD, BASE
-- | jbe ->fff_resbit
-- | cmp dword [RD+4], LJ_TISNUM
-- |.if DUALNUM
-- | jne >2
-- | ins RB, dword [RD]
-- | sub RD, 8
-- | jmp <1
-- |2:
-- | ja ->fff_fallback_bit_op
-- |.else
-- | jae ->fff_fallback_bit_op
-- |.endif
-- | movsd xmm0, qword [RD]
-- | addsd xmm0, xmm1
-- | movd RA, xmm0
-- | ins RB, RA
-- | sub RD, 8
-- | jmp <1
- |.endmacro
- |
- |.ffunc_bit_op bit_band, and
-@@ -2276,39 +485,14 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc_bit_op bit_bxor, xor
- |
- |.ffunc_bit bit_bswap, 1
-- | bswap RB
-- | jmp ->fff_resbit
- |
- |.ffunc_bit bit_bnot, 1
-- | not RB
-- |.if DUALNUM
-- | jmp ->fff_resbit
-- |.else
- |->fff_resbit:
-- | cvtsi2sd xmm0, RB
-- | jmp ->fff_resxmm0
-- |.endif
- |
- |->fff_fallback_bit_op:
-- | mov NARGS:RD, TMP2 // Restore for fallback
-- | jmp ->fff_fallback
- |
- |.macro .ffunc_bit_sh, name, ins
-- |.if DUALNUM
- | .ffunc_bit name, 1, .ffunc_2
-- | // Note: no inline conversion from number for 2nd argument!
-- | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
-- | mov RA, dword [BASE+8]
-- |.else
-- | .ffunc_nnsse name
-- | sseconst_tobit xmm2, RBa
-- | addsd xmm0, xmm2
-- | addsd xmm1, xmm2
-- | movd RB, xmm0
-- | movd RA, xmm1
-- |.endif
-- | ins RB, cl // Assumes RA is ecx.
-- | jmp ->fff_resbit
- |.endmacro
- |
- |.ffunc_bit_sh bit_lshift, shl
-@@ -2320,268 +504,36 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->fff_fallback_2:
-- | mov NARGS:RD, 1+2 // Other args are ignored, anyway.
-- | jmp ->fff_fallback
- |->fff_fallback_1:
-- | mov NARGS:RD, 1+1 // Other args are ignored, anyway.
- |->fff_fallback: // Call fast function fallback handler.
-- | // BASE = new base, RD = nargs+1
-- | mov L:RB, SAVE_L
-- | mov PC, [BASE-4] // Fallback may overwrite PC.
-- | mov SAVE_PC, PC // Redundant (but a defined value).
-- | mov L:RB->base, BASE
-- | lea RD, [BASE+NARGS:RD*8-8]
-- | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
-- | mov L:RB->top, RD
-- | mov CFUNC:RD, [BASE-8]
-- | cmp RA, L:RB->maxstack
-- | ja >5 // Need to grow stack.
-- |.if X64
-- | mov CARG1d, L:RB
-- |.else
-- | mov ARG1, L:RB
-- |.endif
-- | call aword CFUNC:RD->f // (lua_State *L)
-- | mov BASE, L:RB->base
-- | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
-- | test RD, RD; jg ->fff_res // Returned nresults+1?
-- |1:
-- | mov RA, L:RB->top
-- | sub RA, BASE
-- | shr RA, 3
-- | test RD, RD
-- | lea NARGS:RD, [RA+1]
-- | mov LFUNC:RB, [BASE-8]
-- | jne ->vm_call_tail // Returned -1?
-- | ins_callt // Returned 0: retry fast path.
- |
- |// Reconstruct previous base for vmeta_call during tailcall.
- |->vm_call_tail:
-- | mov RA, BASE
-- | test PC, FRAME_TYPE
-- | jnz >3
-- | movzx RB, PC_RA
-- | not RBa // Note: ~RB = -(RB+1)
-- | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8
-- | jmp ->vm_call_dispatch // Resolve again for tailcall.
-- |3:
-- | mov RB, PC
-- | and RB, -8
-- | sub BASE, RB
-- | jmp ->vm_call_dispatch // Resolve again for tailcall.
-- |
-- |5: // Grow stack for fallback handler.
-- | mov FCARG2, LUA_MINSTACK
-- | mov FCARG1, L:RB
-- | call extern lj_state_growstack@8 // (lua_State *L, int n)
-- | mov BASE, L:RB->base
-- | xor RD, RD // Simulate a return 0.
-- | jmp <1 // Dumb retry (goes through ff first).
- |
- |->fff_gcstep: // Call GC step function.
- | // BASE = new base, RD = nargs+1
-- | pop RBa // Must keep stack at same level.
-- | mov TMPa, RBa // Save return address
-- | mov L:RB, SAVE_L
-- | mov SAVE_PC, PC // Redundant (but a defined value).
-- | mov L:RB->base, BASE
-- | lea RD, [BASE+NARGS:RD*8-8]
-- | mov FCARG1, L:RB
-- | mov L:RB->top, RD
-- | call extern lj_gc_step@4 // (lua_State *L)
-- | mov BASE, L:RB->base
-- | mov RD, L:RB->top
-- | sub RD, BASE
-- | shr RD, 3
-- | add NARGS:RD, 1
-- | mov RBa, TMPa
-- | push RBa // Restore return address.
-- | ret
- |
- |//-----------------------------------------------------------------------
- |//-- Special dispatch targets -------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_record: // Dispatch target for recording phase.
-- |.if JIT
-- | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
-- | test RDL, HOOK_VMEVENT // No recording while in vmevent.
-- | jnz >5
-- | // Decrement the hookcount for consistency, but always do the call.
-- | test RDL, HOOK_ACTIVE
-- | jnz >1
-- | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
-- | jz >1
-- | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
-- | jmp >1
-- |.endif
- |
- |->vm_rethook: // Dispatch target for return hooks.
-- | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
-- | test RDL, HOOK_ACTIVE // Hook already active?
-- | jnz >5
-- | jmp >1
- |
- |->vm_inshook: // Dispatch target for instr/line hooks.
-- | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
-- | test RDL, HOOK_ACTIVE // Hook already active?
-- | jnz >5
-- |
-- | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
-- | jz >5
-- | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
-- | jz >1
-- | test RDL, LUA_MASKLINE
-- | jz >5
-- |1:
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | mov FCARG2, PC // Caveat: FCARG2 == BASE
-- | mov FCARG1, L:RB
-- | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
-- | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
-- |3:
-- | mov BASE, L:RB->base
-- |4:
-- | movzx RA, PC_RA
-- |5:
-- | movzx OP, PC_OP
-- | movzx RD, PC_RD
-- |.if X64
-- | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
-- |.else
-- | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins.
-- |.endif
- |
- |->cont_hook: // Continue from hook yield.
-- | add PC, 4
-- | mov RA, [RB-24]
-- | mov MULTRES, RA // Restore MULTRES for *M ins.
-- | jmp <4
- |
- |->vm_hotloop: // Hot loop counter underflow.
-- |.if JIT
-- | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
-- | mov RB, LFUNC:RB->pc
-- | movzx RD, byte [RB+PC2PROTO(framesize)]
-- | lea RD, [BASE+RD*8]
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | mov L:RB->top, RD
-- | mov FCARG2, PC
-- | lea FCARG1, [DISPATCH+GG_DISP2J]
-- | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-- | mov SAVE_PC, PC
-- | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
-- | jmp <3
-- |.endif
- |
- |->vm_callhook: // Dispatch target for call hooks.
-- | mov SAVE_PC, PC
-- |.if JIT
-- | jmp >1
-- |.endif
- |
- |->vm_hotcall: // Hot call counter underflow.
-- |.if JIT
-- | mov SAVE_PC, PC
-- | or PC, 1 // Marker for hot call.
-- |1:
-- |.endif
-- | lea RD, [BASE+NARGS:RD*8-8]
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | mov L:RB->top, RD
-- | mov FCARG2, PC
-- | mov FCARG1, L:RB
-- | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc)
-- | // ASMFunction returned in eax/rax (RDa).
-- | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
-- |.if JIT
-- | and PC, -2
-- |.endif
-- | mov BASE, L:RB->base
-- | mov RAa, RDa
-- | mov RD, L:RB->top
-- | sub RD, BASE
-- | mov RBa, RAa
-- | movzx RA, PC_RA
-- | shr RD, 3
-- | add NARGS:RD, 1
-- | jmp RBa
- |
- |->cont_stitch: // Trace stitching.
-- |.if JIT
-- | // BASE = base, RC = result, RB = mbase
-- | mov TRACE:RA, [RB-24] // Save previous trace.
-- | mov TMP1, TRACE:RA
-- | mov TMP3, DISPATCH // Need one more register.
-- | mov DISPATCH, MULTRES
-- | movzx RA, PC_RA
-- | lea RA, [BASE+RA*8] // Call base.
-- | sub DISPATCH, 1
-- | jz >2
-- |1: // Move results down.
-- |.if X64
-- | mov RBa, [RC]
-- | mov [RA], RBa
-- |.else
-- | mov RB, [RC]
-- | mov [RA], RB
-- | mov RB, [RC+4]
-- | mov [RA+4], RB
-- |.endif
-- | add RC, 8
-- | add RA, 8
-- | sub DISPATCH, 1
-- | jnz <1
-- |2:
-- | movzx RC, PC_RA
-- | movzx RB, PC_RB
-- | add RC, RB
-- | lea RC, [BASE+RC*8-8]
-- |3:
-- | cmp RC, RA
-- | ja >9 // More results wanted?
-- |
-- | mov DISPATCH, TMP3
-- | mov TRACE:RD, TMP1 // Get previous trace.
-- | movzx RB, word TRACE:RD->traceno
-- | movzx RD, word TRACE:RD->link
-- | cmp RD, RB
-- | je ->cont_nop // Blacklisted.
-- | test RD, RD
-- | jne =>BC_JLOOP // Jump to stitched trace.
-- |
-- | // Stitch a new trace to the previous trace.
-- | mov [DISPATCH+DISPATCH_J(exitno)], RB
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | mov FCARG2, PC
-- | lea FCARG1, [DISPATCH+GG_DISP2J]
-- | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-- | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
-- | mov BASE, L:RB->base
-- | jmp ->cont_nop
-- |
-- |9: // Fill up results with nil.
-- | mov dword [RA+4], LJ_TNIL
-- | add RA, 8
-- | jmp <3
-- |.endif
- |
- |->vm_profhook: // Dispatch target for profiler hook.
--#if LJ_HASPROFILE
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | mov FCARG2, PC // Caveat: FCARG2 == BASE
-- | mov FCARG1, L:RB
-- | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
-- | mov BASE, L:RB->base
-- | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
-- | sub PC, 4
-- | jmp ->cont_nop
--#endif
- |
- |//-----------------------------------------------------------------------
- |//-- Trace exit handler -------------------------------------------------
-@@ -2590,207 +542,14 @@ static void build_subroutines(BuildCtx *ctx)
- |// Called from an exit stub with the exit number on the stack.
- |// The 16 bit exit number is stored with two (sign-extended) push imm8.
- |->vm_exit_handler:
-- |.if JIT
-- |.if X64
-- | push r13; push r12
-- | push r11; push r10; push r9; push r8
-- | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
-- | push rbx; push rdx; push rcx; push rax
-- | movzx RC, byte [rbp-8] // Reconstruct exit number.
-- | mov RCH, byte [rbp-16]
-- | mov [rbp-8], r15; mov [rbp-16], r14
-- |.else
-- | push ebp; lea ebp, [esp+12]; push ebp
-- | push ebx; push edx; push ecx; push eax
-- | movzx RC, byte [ebp-4] // Reconstruct exit number.
-- | mov RCH, byte [ebp-8]
-- | mov [ebp-4], edi; mov [ebp-8], esi
-- |.endif
-- | // Caveat: DISPATCH is ebx.
-- | mov DISPATCH, [ebp]
-- | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
-- | set_vmstate EXIT
-- | mov [DISPATCH+DISPATCH_J(exitno)], RC
-- | mov [DISPATCH+DISPATCH_J(parent)], RA
-- |.if X64
-- |.if X64WIN
-- | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
-- |.else
-- | sub rsp, 16*8 // Room for SSE regs.
-- |.endif
-- | add rbp, -128
-- | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
-- | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
-- | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
-- | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
-- | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
-- | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
-- | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
-- | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
-- |.else
-- | sub esp, 8*8+16 // Room for SSE regs + args.
-- | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
-- | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
-- | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
-- | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
-- |.endif
-- | // Caveat: RB is ebp.
-- | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
-- | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
-- | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-- | mov L:RB->base, BASE
-- |.if X64WIN
-- | lea CARG2, [rsp+4*8]
-- |.elif X64
-- | mov CARG2, rsp
-- |.else
-- | lea FCARG2, [esp+16]
-- |.endif
-- | lea FCARG1, [DISPATCH+GG_DISP2J]
-- | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-- | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
-- | // MULTRES or negated error code returned in eax (RD).
-- | mov RAa, L:RB->cframe
-- | and RAa, CFRAME_RAWMASK
-- |.if X64WIN
-- | // Reposition stack later.
-- |.elif X64
-- | mov rsp, RAa // Reposition stack to C frame.
-- |.else
-- | mov esp, RAa // Reposition stack to C frame.
-- |.endif
-- | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
-- | mov BASE, L:RB->base
-- | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC.
-- |.if X64
-- | jmp >1
-- |.endif
-- |.endif
- |->vm_exit_interp:
-- | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
-- |.if JIT
-- |.if X64
-- | // Restore additional callee-save registers only used in compiled code.
-- |.if X64WIN
-- | lea RAa, [rsp+9*16+4*8]
-- |1:
-- | movdqa xmm15, [RAa-9*16]
-- | movdqa xmm14, [RAa-8*16]
-- | movdqa xmm13, [RAa-7*16]
-- | movdqa xmm12, [RAa-6*16]
-- | movdqa xmm11, [RAa-5*16]
-- | movdqa xmm10, [RAa-4*16]
-- | movdqa xmm9, [RAa-3*16]
-- | movdqa xmm8, [RAa-2*16]
-- | movdqa xmm7, [RAa-1*16]
-- | mov rsp, RAa // Reposition stack to C frame.
-- | movdqa xmm6, [RAa]
-- | mov r15, CSAVE_3
-- | mov r14, CSAVE_4
-- |.else
-- | add rsp, 16 // Reposition stack to C frame.
-- |1:
-- |.endif
-- | mov r13, TMPa
-- | mov r12, TMPQ
-- |.endif
-- | test RD, RD; js >9 // Check for error from exit.
-- | mov L:RB, SAVE_L
-- | mov MULTRES, RD
-- | mov LFUNC:KBASE, [BASE-8]
-- | mov KBASE, LFUNC:KBASE->pc
-- | mov KBASE, [KBASE+PC2PROTO(k)]
-- | mov L:RB->base, BASE
-- | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-- | set_vmstate INTERP
-- | // Modified copy of ins_next which handles function header dispatch, too.
-- | mov RC, [PC]
-- | movzx RA, RCH
-- | movzx OP, RCL
-- | add PC, 4
-- | shr RC, 16
-- | cmp OP, BC_FUNCF // Function header?
-- | jb >3
-- | cmp OP, BC_FUNCC+2 // Fast function?
-- | jae >4
-- |2:
-- | mov RC, MULTRES // RC/RD holds nres+1.
-- |3:
-- |.if X64
-- | jmp aword [DISPATCH+OP*8]
-- |.else
-- | jmp aword [DISPATCH+OP*4]
-- |.endif
-- |
-- |4: // Check frame below fast function.
-- | mov RC, [BASE-4]
-- | test RC, FRAME_TYPE
-- | jnz <2 // Trace stitching continuation?
-- | // Otherwise set KBASE for Lua function below fast function.
-- | movzx RC, byte [RC-3]
-- | not RCa
-- | mov LFUNC:KBASE, [BASE+RC*8-8]
-- | mov KBASE, LFUNC:KBASE->pc
-- | mov KBASE, [KBASE+PC2PROTO(k)]
-- | jmp <2
-- |
-- |9: // Rethrow error from the right C frame.
-- | neg RD
-- | mov FCARG1, L:RB
-- | mov FCARG2, RD
-- | call extern lj_err_throw@8 // (lua_State *L, int errcode)
-- |.endif
- |
- |//-----------------------------------------------------------------------
- |//-- Math helper functions ----------------------------------------------
- |//-----------------------------------------------------------------------
- |
-- |// FP value rounding. Called by math.floor/math.ceil fast functions
-- |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
- |.macro vm_round, name, mode, cond
- |->name:
-- |.if not X64 and cond
-- | movsd xmm0, qword [esp+4]
-- | call ->name .. _sse
-- | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
-- | fld qword [esp+4]
-- | ret
-- |.endif
-- |
-- |->name .. _sse:
-- | sseconst_abs xmm2, RDa
-- | sseconst_2p52 xmm3, RDa
-- | movaps xmm1, xmm0
-- | andpd xmm1, xmm2 // |x|
-- | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
-- | jbe >1
-- | andnpd xmm2, xmm0 // Isolate sign bit.
-- |.if mode == 2 // trunc(x)?
-- | movaps xmm0, xmm1
-- | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
-- | subsd xmm1, xmm3
-- | sseconst_1 xmm3, RDa
-- | cmpsd xmm0, xmm1, 1 // |x| < result?
-- | andpd xmm0, xmm3
-- | subsd xmm1, xmm0 // If yes, subtract -1.
-- | orpd xmm1, xmm2 // Merge sign bit back in.
-- |.else
-- | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
-- | subsd xmm1, xmm3
-- | orpd xmm1, xmm2 // Merge sign bit back in.
-- | .if mode == 1 // ceil(x)?
-- | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
-- | cmpsd xmm0, xmm1, 6 // x > result?
-- | .else // floor(x)?
-- | sseconst_1 xmm2, RDa
-- | cmpsd xmm0, xmm1, 1 // x < result?
-- | .endif
-- | andpd xmm0, xmm2
-- | subsd xmm1, xmm0 // If yes, subtract +-1.
-- |.endif
-- | movaps xmm0, xmm1
-- |1:
-- | ret
- |.endmacro
- |
- | vm_round vm_floor, 0, 1
-@@ -2799,68 +558,9 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// FP modulo x%y. Called by BC_MOD* and vm_arith.
- |->vm_mod:
-- |// Args in xmm0/xmm1, return value in xmm0.
-- |// Caveat: xmm0-xmm5 and RC (eax) modified!
-- | movaps xmm5, xmm0
-- | divsd xmm0, xmm1
-- | sseconst_abs xmm2, RDa
-- | sseconst_2p52 xmm3, RDa
-- | movaps xmm4, xmm0
-- | andpd xmm4, xmm2 // |x/y|
-- | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
-- | jbe >1
-- | andnpd xmm2, xmm0 // Isolate sign bit.
-- | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
-- | subsd xmm4, xmm3
-- | orpd xmm4, xmm2 // Merge sign bit back in.
-- | sseconst_1 xmm2, RDa
-- | cmpsd xmm0, xmm4, 1 // x/y < result?
-- | andpd xmm0, xmm2
-- | subsd xmm4, xmm0 // If yes, subtract 1.0.
-- | movaps xmm0, xmm5
-- | mulsd xmm1, xmm4
-- | subsd xmm0, xmm1
-- | ret
-- |1:
-- | mulsd xmm1, xmm0
-- | movaps xmm0, xmm5
-- | subsd xmm0, xmm1
-- | ret
- |
- |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
- |->vm_powi_sse:
-- | cmp eax, 1; jle >6 // i<=1?
-- | // Now 1 < (unsigned)i <= 0x80000000.
-- |1: // Handle leading zeros.
-- | test eax, 1; jnz >2
-- | mulsd xmm0, xmm0
-- | shr eax, 1
-- | jmp <1
-- |2:
-- | shr eax, 1; jz >5
-- | movaps xmm1, xmm0
-- |3: // Handle trailing bits.
-- | mulsd xmm0, xmm0
-- | shr eax, 1; jz >4
-- | jnc <3
-- | mulsd xmm1, xmm0
-- | jmp <3
-- |4:
-- | mulsd xmm0, xmm1
-- |5:
-- | ret
-- |6:
-- | je <5 // x^1 ==> x
-- | jb >7 // x^0 ==> 1
-- | neg eax
-- | call <1
-- | sseconst_1 xmm1, RDa
-- | divsd xmm1, xmm0
-- | movaps xmm0, xmm1
-- | ret
-- |7:
-- | sseconst_1 xmm0, RDa
-- | ret
- |
- |//-----------------------------------------------------------------------
- |//-- Miscellaneous functions --------------------------------------------
-@@ -2868,46 +568,6 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
- |->vm_cpuid:
-- |.if X64
-- | mov eax, CARG1d
-- | .if X64WIN; push rsi; mov rsi, CARG2; .endif
-- | push rbx
-- | xor ecx, ecx
-- | cpuid
-- | mov [rsi], eax
-- | mov [rsi+4], ebx
-- | mov [rsi+8], ecx
-- | mov [rsi+12], edx
-- | pop rbx
-- | .if X64WIN; pop rsi; .endif
-- | ret
-- |.else
-- | pushfd
-- | pop edx
-- | mov ecx, edx
-- | xor edx, 0x00200000 // Toggle ID bit in flags.
-- | push edx
-- | popfd
-- | pushfd
-- | pop edx
-- | xor eax, eax // Zero means no features supported.
-- | cmp ecx, edx
-- | jz >1 // No ID toggle means no CPUID support.
-- | mov eax, [esp+4] // Argument 1 is function number.
-- | push edi
-- | push ebx
-- | xor ecx, ecx
-- | cpuid
-- | mov edi, [esp+16] // Argument 2 is result area.
-- | mov [edi], eax
-- | mov [edi+4], ebx
-- | mov [edi+8], ecx
-- | mov [edi+12], edx
-- | pop ebx
-- | pop edi
-- |1:
-- | ret
-- |.endif
- |
- |//-----------------------------------------------------------------------
- |//-- Assertions ---------------------------------------------------------
-@@ -2915,9 +575,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->assert_bad_for_arg_type:
- #ifdef LUA_USE_ASSERT
-- | int3
- #endif
-- | int3
- |
- |//-----------------------------------------------------------------------
- |//-- FFI helper functions -----------------------------------------------
-@@ -2925,198 +583,10 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// Handler for callback functions. Callback slot number in ah/al.
- |->vm_ffi_callback:
-- |.if FFI
-- |.type CTSTATE, CTState, PC
-- |.if not X64
-- | sub esp, 16 // Leave room for SAVE_ERRF etc.
-- |.endif
-- | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
-- | lea DISPATCH, [ebp+GG_G2DISP]
-- | mov CTSTATE, GL:ebp->ctype_state
-- | movzx eax, ax
-- | mov CTSTATE->cb.slot, eax
-- |.if X64
-- | mov CTSTATE->cb.gpr[0], CARG1
-- | mov CTSTATE->cb.gpr[1], CARG2
-- | mov CTSTATE->cb.gpr[2], CARG3
-- | mov CTSTATE->cb.gpr[3], CARG4
-- | movsd qword CTSTATE->cb.fpr[0], xmm0
-- | movsd qword CTSTATE->cb.fpr[1], xmm1
-- | movsd qword CTSTATE->cb.fpr[2], xmm2
-- | movsd qword CTSTATE->cb.fpr[3], xmm3
-- |.if X64WIN
-- | lea rax, [rsp+CFRAME_SIZE+4*8]
-- |.else
-- | lea rax, [rsp+CFRAME_SIZE]
-- | mov CTSTATE->cb.gpr[4], CARG5
-- | mov CTSTATE->cb.gpr[5], CARG6
-- | movsd qword CTSTATE->cb.fpr[4], xmm4
-- | movsd qword CTSTATE->cb.fpr[5], xmm5
-- | movsd qword CTSTATE->cb.fpr[6], xmm6
-- | movsd qword CTSTATE->cb.fpr[7], xmm7
-- |.endif
-- | mov CTSTATE->cb.stack, rax
-- | mov CARG2, rsp
-- |.else
-- | lea eax, [esp+CFRAME_SIZE+16]
-- | mov CTSTATE->cb.gpr[0], FCARG1
-- | mov CTSTATE->cb.gpr[1], FCARG2
-- | mov CTSTATE->cb.stack, eax
-- | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp.
-- | mov FCARG2, [esp+CFRAME_SIZE+8]
-- | mov SAVE_RET, FCARG1
-- | mov SAVE_R4, FCARG2
-- | mov FCARG2, esp
-- |.endif
-- | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
-- | mov FCARG1, CTSTATE
-- | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf)
-- | // lua_State * returned in eax (RD).
-- | set_vmstate INTERP
-- | mov BASE, L:RD->base
-- | mov RD, L:RD->top
-- | sub RD, BASE
-- | mov LFUNC:RB, [BASE-8]
-- | shr RD, 3
-- | add RD, 1
-- | ins_callt
-- |.endif
- |
- |->cont_ffi_callback: // Return from FFI callback.
-- |.if FFI
-- | mov L:RA, SAVE_L
-- | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
-- | mov aword CTSTATE->L, L:RAa
-- | mov L:RA->base, BASE
-- | mov L:RA->top, RB
-- | mov FCARG1, CTSTATE
-- | mov FCARG2, RC
-- | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o)
-- |.if X64
-- | mov rax, CTSTATE->cb.gpr[0]
-- | movsd xmm0, qword CTSTATE->cb.fpr[0]
-- | jmp ->vm_leave_unw
-- |.else
-- | mov L:RB, SAVE_L
-- | mov eax, CTSTATE->cb.gpr[0]
-- | mov edx, CTSTATE->cb.gpr[1]
-- | cmp dword CTSTATE->cb.gpr[2], 1
-- | jb >7
-- | je >6
-- | fld qword CTSTATE->cb.fpr[0].d
-- | jmp >7
-- |6:
-- | fld dword CTSTATE->cb.fpr[0].f
-- |7:
-- | mov ecx, L:RB->top
-- | movzx ecx, word [ecx+6] // Get stack adjustment and copy up.
-- | mov SAVE_L, ecx // Must be one slot above SAVE_RET
-- | restoreregs
-- | pop ecx // Move return addr from SAVE_RET.
-- | add esp, [esp] // Adjust stack.
-- | add esp, 16
-- | push ecx
-- | ret
-- |.endif
-- |.endif
-- |
-- |->vm_ffi_call@4: // Call C function via FFI.
-- | // Caveat: needs special frame unwinding, see below.
-- |.if FFI
-- |.if X64
-- | .type CCSTATE, CCallState, rbx
-- | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
-- |.else
-- | .type CCSTATE, CCallState, ebx
-- | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1
-- |.endif
-- |
-- | // Readjust stack.
-- |.if X64
-- | mov eax, CCSTATE->spadj
-- | sub rsp, rax
-- |.else
-- | sub esp, CCSTATE->spadj
-- |.if WIN
-- | mov CCSTATE->spadj, esp
-- |.endif
-- |.endif
- |
-- | // Copy stack slots.
-- | movzx ecx, byte CCSTATE->nsp
-- | sub ecx, 1
-- | js >2
-- |1:
-- |.if X64
-- | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
-- | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
-- |.else
-- | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
-- | mov [esp+ecx*4], eax
-- |.endif
-- | sub ecx, 1
-- | jns <1
-- |2:
-- |
-- |.if X64
-- | movzx eax, byte CCSTATE->nfpr
-- | mov CARG1, CCSTATE->gpr[0]
-- | mov CARG2, CCSTATE->gpr[1]
-- | mov CARG3, CCSTATE->gpr[2]
-- | mov CARG4, CCSTATE->gpr[3]
-- |.if not X64WIN
-- | mov CARG5, CCSTATE->gpr[4]
-- | mov CARG6, CCSTATE->gpr[5]
-- |.endif
-- | test eax, eax; jz >5
-- | movaps xmm0, CCSTATE->fpr[0]
-- | movaps xmm1, CCSTATE->fpr[1]
-- | movaps xmm2, CCSTATE->fpr[2]
-- | movaps xmm3, CCSTATE->fpr[3]
-- |.if not X64WIN
-- | cmp eax, 4; jbe >5
-- | movaps xmm4, CCSTATE->fpr[4]
-- | movaps xmm5, CCSTATE->fpr[5]
-- | movaps xmm6, CCSTATE->fpr[6]
-- | movaps xmm7, CCSTATE->fpr[7]
-- |.endif
-- |5:
-- |.else
-- | mov FCARG1, CCSTATE->gpr[0]
-- | mov FCARG2, CCSTATE->gpr[1]
-- |.endif
-- |
-- | call aword CCSTATE->func
-- |
-- |.if X64
-- | mov CCSTATE->gpr[0], rax
-- | movaps CCSTATE->fpr[0], xmm0
-- |.if not X64WIN
-- | mov CCSTATE->gpr[1], rdx
-- | movaps CCSTATE->fpr[1], xmm1
-- |.endif
-- |.else
-- | mov CCSTATE->gpr[0], eax
-- | mov CCSTATE->gpr[1], edx
-- | cmp byte CCSTATE->resx87, 1
-- | jb >7
-- | je >6
-- | fstp qword CCSTATE->fpr[0].d[0]
-- | jmp >7
-- |6:
-- | fstp dword CCSTATE->fpr[0].f[0]
-- |7:
-- |.if WIN
-- | sub CCSTATE->spadj, esp
-- |.endif
-- |.endif
-- |
-- |.if X64
-- | mov rbx, [rbp-8]; leave; ret
-- |.else
-- | mov ebx, [ebp-4]; leave; ret
-- |.endif
-- |.endif
-+ |->vm_ffi_call: // Call C function via FFI.
- |// Note: vm_ffi_call must be the last function in this object file!
- |
- |//-----------------------------------------------------------------------
-@@ -3126,2096 +596,87 @@ static void build_subroutines(BuildCtx *ctx)
- static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- {
- int vk = 0;
-+ (void)vk;
- |// Note: aligning all instructions does not pay off.
- |=>defop:
-
- switch (op) {
--
-- /* -- Comparison ops ---------------------------------------------------- */
--
-- /* Remember: all ops branch for a true comparison, fall through otherwise. */
--
-- |.macro jmp_comp, lt, ge, le, gt, target
-- ||switch (op) {
-- ||case BC_ISLT:
-- | lt target
-- ||break;
-- ||case BC_ISGE:
-- | ge target
-- ||break;
-- ||case BC_ISLE:
-- | le target
-- ||break;
-- ||case BC_ISGT:
-- | gt target
-- ||break;
-- ||default: break; /* Shut up GCC. */
-- ||}
-- |.endmacro
--
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
-- | // RA = src1, RD = src2, JMP with RD = target
-- | ins_AD
-- |.if DUALNUM
-- | checkint RA, >7
-- | checkint RD, >8
-- | mov RB, dword [BASE+RA*8]
-- | add PC, 4
-- | cmp RB, dword [BASE+RD*8]
-- | jmp_comp jge, jl, jg, jle, >9
-- |6:
-- | movzx RD, PC_RD
-- | branchPC RD
-- |9:
-- | ins_next
-- |
-- |7: // RA is not an integer.
-- | ja ->vmeta_comp
-- | // RA is a number.
-- | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
-- | // RA is a number, RD is an integer.
-- | cvtsi2sd xmm0, dword [BASE+RD*8]
-- | jmp >2
-- |
-- |8: // RA is an integer, RD is not an integer.
-- | ja ->vmeta_comp
-- | // RA is an integer, RD is a number.
-- | cvtsi2sd xmm1, dword [BASE+RA*8]
-- | movsd xmm0, qword [BASE+RD*8]
-- | add PC, 4
-- | ucomisd xmm0, xmm1
-- | jmp_comp jbe, ja, jb, jae, <9
-- | jmp <6
-- |.else
-- | checknum RA, ->vmeta_comp
-- | checknum RD, ->vmeta_comp
-- |.endif
-- |1:
-- | movsd xmm0, qword [BASE+RD*8]
-- |2:
-- | add PC, 4
-- | ucomisd xmm0, qword [BASE+RA*8]
-- |3:
-- | // Unordered: all of ZF CF PF set, ordered: PF clear.
-- | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
-- |.if DUALNUM
-- | jmp_comp jbe, ja, jb, jae, <9
-- | jmp <6
-- |.else
-- | jmp_comp jbe, ja, jb, jae, >1
-- | movzx RD, PC_RD
-- | branchPC RD
-- |1:
-- | ins_next
-- |.endif
-- break;
--
- case BC_ISEQV: case BC_ISNEV:
-- vk = op == BC_ISEQV;
-- | ins_AD // RA = src1, RD = src2, JMP with RD = target
-- | mov RB, [BASE+RD*8+4]
-- | add PC, 4
-- |.if DUALNUM
-- | cmp RB, LJ_TISNUM; jne >7
-- | checkint RA, >8
-- | mov RB, dword [BASE+RD*8]
-- | cmp RB, dword [BASE+RA*8]
-- if (vk) {
-- | jne >9
-- } else {
-- | je >9
-- }
-- | movzx RD, PC_RD
-- | branchPC RD
-- |9:
-- | ins_next
-- |
-- |7: // RD is not an integer.
-- | ja >5
-- | // RD is a number.
-- | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
-- | // RD is a number, RA is an integer.
-- | cvtsi2sd xmm0, dword [BASE+RA*8]
-- | jmp >2
-- |
-- |8: // RD is an integer, RA is not an integer.
-- | ja >5
-- | // RD is an integer, RA is a number.
-- | cvtsi2sd xmm0, dword [BASE+RD*8]
-- | ucomisd xmm0, qword [BASE+RA*8]
-- | jmp >4
-- |
-- |.else
-- | cmp RB, LJ_TISNUM; jae >5
-- | checknum RA, >5
-- |.endif
-- |1:
-- | movsd xmm0, qword [BASE+RA*8]
-- |2:
-- | ucomisd xmm0, qword [BASE+RD*8]
-- |4:
-- iseqne_fp:
-- if (vk) {
-- | jp >2 // Unordered means not equal.
-- | jne >2
-- } else {
-- | jp >2 // Unordered means not equal.
-- | je >1
-- }
-- iseqne_end:
-- if (vk) {
-- |1: // EQ: Branch to the target.
-- | movzx RD, PC_RD
-- | branchPC RD
-- |2: // NE: Fallthrough to next instruction.
-- |.if not FFI
-- |3:
-- |.endif
-- } else {
-- |.if not FFI
-- |3:
-- |.endif
-- |2: // NE: Branch to the target.
-- | movzx RD, PC_RD
-- | branchPC RD
-- |1: // EQ: Fallthrough to next instruction.
-- }
-- if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
-- op == BC_ISEQN || op == BC_ISNEN)) {
-- | jmp <9
-- } else {
-- | ins_next
-- }
-- |
-- if (op == BC_ISEQV || op == BC_ISNEV) {
-- |5: // Either or both types are not numbers.
-- |.if FFI
-- | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
-- | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd
-- |.endif
-- | checktp RA, RB // Compare types.
-- | jne <2 // Not the same type?
-- | cmp RB, LJ_TISPRI
-- | jae <1 // Same type and primitive type?
-- |
-- | // Same types and not a primitive type. Compare GCobj or pvalue.
-- | mov RA, [BASE+RA*8]
-- | mov RD, [BASE+RD*8]
-- | cmp RA, RD
-- | je <1 // Same GCobjs or pvalues?
-- | cmp RB, LJ_TISTABUD
-- | ja <2 // Different objects and not table/ud?
-- |.if X64
-- | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata.
-- | jb <2
-- |.endif
-- |
-- | // Different tables or userdatas. Need to check __eq metamethod.
-- | // Field metatable must be at same offset for GCtab and GCudata!
-- | mov TAB:RB, TAB:RA->metatable
-- | test TAB:RB, TAB:RB
-- | jz <2 // No metatable?
-- | test byte TAB:RB->nomm, 1<<MM_eq
-- | jnz <2 // Or 'no __eq' flag set?
-- if (vk) {
-- | xor RB, RB // ne = 0
-- } else {
-- | mov RB, 1 // ne = 1
-- }
-- | jmp ->vmeta_equal // Handle __eq metamethod.
-- } else {
-- |.if FFI
-- |3:
-- | cmp RB, LJ_TCDATA
-- if (LJ_DUALNUM && vk) {
-- | jne <9
-- } else {
-- | jne <2
-- }
-- | jmp ->vmeta_equal_cd
-- |.endif
-- }
-- break;
- case BC_ISEQS: case BC_ISNES:
-- vk = op == BC_ISEQS;
-- | ins_AND // RA = src, RD = str const, JMP with RD = target
-- | mov RB, [BASE+RA*8+4]
-- | add PC, 4
-- | cmp RB, LJ_TSTR; jne >3
-- | mov RA, [BASE+RA*8]
-- | cmp RA, [KBASE+RD*4]
-- iseqne_test:
-- if (vk) {
-- | jne >2
-- } else {
-- | je >1
-- }
-- goto iseqne_end;
- case BC_ISEQN: case BC_ISNEN:
-- vk = op == BC_ISEQN;
-- | ins_AD // RA = src, RD = num const, JMP with RD = target
-- | mov RB, [BASE+RA*8+4]
-- | add PC, 4
-- |.if DUALNUM
-- | cmp RB, LJ_TISNUM; jne >7
-- | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
-- | mov RB, dword [KBASE+RD*8]
-- | cmp RB, dword [BASE+RA*8]
-- if (vk) {
-- | jne >9
-- } else {
-- | je >9
-- }
-- | movzx RD, PC_RD
-- | branchPC RD
-- |9:
-- | ins_next
-- |
-- |7: // RA is not an integer.
-- | ja >3
-- | // RA is a number.
-- | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
-- | // RA is a number, RD is an integer.
-- | cvtsi2sd xmm0, dword [KBASE+RD*8]
-- | jmp >2
-- |
-- |8: // RA is an integer, RD is a number.
-- | cvtsi2sd xmm0, dword [BASE+RA*8]
-- | ucomisd xmm0, qword [KBASE+RD*8]
-- | jmp >4
-- |.else
-- | cmp RB, LJ_TISNUM; jae >3
-- |.endif
-- |1:
-- | movsd xmm0, qword [KBASE+RD*8]
-- |2:
-- | ucomisd xmm0, qword [BASE+RA*8]
-- |4:
-- goto iseqne_fp;
- case BC_ISEQP: case BC_ISNEP:
-- vk = op == BC_ISEQP;
-- | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
-- | mov RB, [BASE+RA*8+4]
-- | add PC, 4
-- | cmp RB, RD
-- if (!LJ_HASFFI) goto iseqne_test;
-- if (vk) {
-- | jne >3
-- | movzx RD, PC_RD
-- | branchPC RD
-- |2:
-- | ins_next
-- |3:
-- | cmp RB, LJ_TCDATA; jne <2
-- | jmp ->vmeta_equal_cd
-- } else {
-- | je >2
-- | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
-- | movzx RD, PC_RD
-- | branchPC RD
-- |2:
-- | ins_next
-- }
-- break;
--
-- /* -- Unary test and copy ops ------------------------------------------- */
--
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
-- | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
-- | mov RB, [BASE+RD*8+4]
-- | add PC, 4
-- | cmp RB, LJ_TISTRUECOND
-- if (op == BC_IST || op == BC_ISTC) {
-- | jae >1
-- } else {
-- | jb >1
-- }
-- if (op == BC_ISTC || op == BC_ISFC) {
-- | mov [BASE+RA*8+4], RB
-- | mov RB, [BASE+RD*8]
-- | mov [BASE+RA*8], RB
-- }
-- | movzx RD, PC_RD
-- | branchPC RD
-- |1: // Fallthrough to the next instruction.
-- | ins_next
-- break;
--
- case BC_ISTYPE:
-- | ins_AD // RA = src, RD = -type
-- | add RD, [BASE+RA*8+4]
-- | jne ->vmeta_istype
-- | ins_next
-- break;
- case BC_ISNUM:
-- | ins_AD // RA = src, RD = -(TISNUM-1)
-- | checknum RA, ->vmeta_istype
-- | ins_next
-- break;
--
-- /* -- Unary ops --------------------------------------------------------- */
--
- case BC_MOV:
-- | ins_AD // RA = dst, RD = src
-- |.if X64
-- | mov RBa, [BASE+RD*8]
-- | mov [BASE+RA*8], RBa
-- |.else
-- | mov RB, [BASE+RD*8+4]
-- | mov RD, [BASE+RD*8]
-- | mov [BASE+RA*8+4], RB
-- | mov [BASE+RA*8], RD
-- |.endif
-- | ins_next_
-- break;
- case BC_NOT:
-- | ins_AD // RA = dst, RD = src
-- | xor RB, RB
-- | checktp RD, LJ_TISTRUECOND
-- | adc RB, LJ_TTRUE
-- | mov [BASE+RA*8+4], RB
-- | ins_next
-- break;
- case BC_UNM:
-- | ins_AD // RA = dst, RD = src
-- |.if DUALNUM
-- | checkint RD, >5
-- | mov RB, [BASE+RD*8]
-- | neg RB
-- | jo >4
-- | mov dword [BASE+RA*8+4], LJ_TISNUM
-- | mov dword [BASE+RA*8], RB
-- |9:
-- | ins_next
-- |4:
-- | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31.
-- | mov dword [BASE+RA*8], 0
-- | jmp <9
-- |5:
-- | ja ->vmeta_unm
-- |.else
-- | checknum RD, ->vmeta_unm
-- |.endif
-- | movsd xmm0, qword [BASE+RD*8]
-- | sseconst_sign xmm1, RDa
-- | xorps xmm0, xmm1
-- | movsd qword [BASE+RA*8], xmm0
-- |.if DUALNUM
-- | jmp <9
-- |.else
-- | ins_next
-- |.endif
-- break;
- case BC_LEN:
-- | ins_AD // RA = dst, RD = src
-- | checkstr RD, >2
-- | mov STR:RD, [BASE+RD*8]
-- |.if DUALNUM
-- | mov RD, dword STR:RD->len
-- |1:
-- | mov dword [BASE+RA*8+4], LJ_TISNUM
-- | mov dword [BASE+RA*8], RD
-- |.else
-- | xorps xmm0, xmm0
-- | cvtsi2sd xmm0, dword STR:RD->len
-- |1:
-- | movsd qword [BASE+RA*8], xmm0
-- |.endif
-- | ins_next
-- |2:
-- | checktab RD, ->vmeta_len
-- | mov TAB:FCARG1, [BASE+RD*8]
--#if LJ_52
-- | mov TAB:RB, TAB:FCARG1->metatable
-- | cmp TAB:RB, 0
-- | jnz >9
-- |3:
--#endif
-- |->BC_LEN_Z:
-- | mov RB, BASE // Save BASE.
-- | call extern lj_tab_len@4 // (GCtab *t)
-- | // Length of table returned in eax (RD).
-- |.if DUALNUM
-- | // Nothing to do.
-- |.else
-- | cvtsi2sd xmm0, RD
-- |.endif
-- | mov BASE, RB // Restore BASE.
-- | movzx RA, PC_RA
-- | jmp <1
--#if LJ_52
-- |9: // Check for __len.
-- | test byte TAB:RB->nomm, 1<<MM_len
-- | jnz <3
-- | jmp ->vmeta_len // 'no __len' flag NOT set: check.
--#endif
-- break;
--
-- /* -- Binary ops -------------------------------------------------------- */
--
-- |.macro ins_arithpre, sseins, ssereg
-- | ins_ABC
-- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-- ||switch (vk) {
-- ||case 0:
-- | checknum RB, ->vmeta_arith_vn
-- | .if DUALNUM
-- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
-- | .endif
-- | movsd xmm0, qword [BASE+RB*8]
-- | sseins ssereg, qword [KBASE+RC*8]
-- || break;
-- ||case 1:
-- | checknum RB, ->vmeta_arith_nv
-- | .if DUALNUM
-- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
-- | .endif
-- | movsd xmm0, qword [KBASE+RC*8]
-- | sseins ssereg, qword [BASE+RB*8]
-- || break;
-- ||default:
-- | checknum RB, ->vmeta_arith_vv
-- | checknum RC, ->vmeta_arith_vv
-- | movsd xmm0, qword [BASE+RB*8]
-- | sseins ssereg, qword [BASE+RC*8]
-- || break;
-- ||}
-- |.endmacro
-- |
-- |.macro ins_arithdn, intins
-- | ins_ABC
-- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-- ||switch (vk) {
-- ||case 0:
-- | checkint RB, ->vmeta_arith_vn
-- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn
-- | mov RB, [BASE+RB*8]
-- | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno
-- || break;
-- ||case 1:
-- | checkint RB, ->vmeta_arith_nv
-- | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv
-- | mov RC, [KBASE+RC*8]
-- | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo
-- || break;
-- ||default:
-- | checkint RB, ->vmeta_arith_vv
-- | checkint RC, ->vmeta_arith_vv
-- | mov RB, [BASE+RB*8]
-- | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo
-- || break;
-- ||}
-- | mov dword [BASE+RA*8+4], LJ_TISNUM
-- ||if (vk == 1) {
-- | mov dword [BASE+RA*8], RC
-- ||} else {
-- | mov dword [BASE+RA*8], RB
-- ||}
-- | ins_next
-- |.endmacro
-- |
-- |.macro ins_arithpost
-- | movsd qword [BASE+RA*8], xmm0
-- |.endmacro
-- |
-- |.macro ins_arith, sseins
-- | ins_arithpre sseins, xmm0
-- | ins_arithpost
-- | ins_next
-- |.endmacro
-- |
-- |.macro ins_arith, intins, sseins
-- |.if DUALNUM
-- | ins_arithdn intins
-- |.else
-- | ins_arith, sseins
-- |.endif
-- |.endmacro
--
-- | // RA = dst, RB = src1 or num const, RC = src2 or num const
- case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-- | ins_arith add, addsd
-- break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-- | ins_arith sub, subsd
-- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
-- | ins_arith imul, mulsd
-- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-- | ins_arith divsd
-- break;
- case BC_MODVN:
-- | ins_arithpre movsd, xmm1
-- |->BC_MODVN_Z:
-- | call ->vm_mod
-- | ins_arithpost
-- | ins_next
-- break;
- case BC_MODNV: case BC_MODVV:
-- | ins_arithpre movsd, xmm1
-- | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
-- break;
- case BC_POW:
-- | ins_arithpre movsd, xmm1
-- | mov RB, BASE
-- |.if not X64
-- | movsd FPARG1, xmm0
-- | movsd FPARG3, xmm1
-- |.endif
-- | call extern pow
-- | movzx RA, PC_RA
-- | mov BASE, RB
-- |.if X64
-- | ins_arithpost
-- |.else
-- | fstp qword [BASE+RA*8]
-- |.endif
-- | ins_next
-- break;
--
- case BC_CAT:
-- | ins_ABC // RA = dst, RB = src_start, RC = src_end
-- |.if X64
-- | mov L:CARG1d, SAVE_L
-- | mov L:CARG1d->base, BASE
-- | lea CARG2d, [BASE+RC*8]
-- | mov CARG3d, RC
-- | sub CARG3d, RB
-- |->BC_CAT_Z:
-- | mov L:RB, L:CARG1d
-- |.else
-- | lea RA, [BASE+RC*8]
-- | sub RC, RB
-- | mov ARG2, RA
-- | mov ARG3, RC
-- |->BC_CAT_Z:
-- | mov L:RB, SAVE_L
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
-- | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
-- | mov BASE, L:RB->base
-- | test RC, RC
-- | jnz ->vmeta_binop
-- | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
-- | movzx RA, PC_RA
-- |.if X64
-- | mov RCa, [BASE+RB*8]
-- | mov [BASE+RA*8], RCa
-- |.else
-- | mov RC, [BASE+RB*8+4]
-- | mov RB, [BASE+RB*8]
-- | mov [BASE+RA*8+4], RC
-- | mov [BASE+RA*8], RB
-- |.endif
-- | ins_next
-- break;
--
-- /* -- Constant ops ------------------------------------------------------ */
--
- case BC_KSTR:
-- | ins_AND // RA = dst, RD = str const (~)
-- | mov RD, [KBASE+RD*4]
-- | mov dword [BASE+RA*8+4], LJ_TSTR
-- | mov [BASE+RA*8], RD
-- | ins_next
-- break;
- case BC_KCDATA:
-- |.if FFI
-- | ins_AND // RA = dst, RD = cdata const (~)
-- | mov RD, [KBASE+RD*4]
-- | mov dword [BASE+RA*8+4], LJ_TCDATA
-- | mov [BASE+RA*8], RD
-- | ins_next
-- |.endif
-- break;
- case BC_KSHORT:
-- | ins_AD // RA = dst, RD = signed int16 literal
-- |.if DUALNUM
-- | movsx RD, RDW
-- | mov dword [BASE+RA*8+4], LJ_TISNUM
-- | mov dword [BASE+RA*8], RD
-- |.else
-- | movsx RD, RDW // Sign-extend literal.
-- | cvtsi2sd xmm0, RD
-- | movsd qword [BASE+RA*8], xmm0
-- |.endif
-- | ins_next
-- break;
- case BC_KNUM:
-- | ins_AD // RA = dst, RD = num const
-- | movsd xmm0, qword [KBASE+RD*8]
-- | movsd qword [BASE+RA*8], xmm0
-- | ins_next
-- break;
- case BC_KPRI:
-- | ins_AND // RA = dst, RD = primitive type (~)
-- | mov [BASE+RA*8+4], RD
-- | ins_next
-- break;
- case BC_KNIL:
-- | ins_AD // RA = dst_start, RD = dst_end
-- | lea RA, [BASE+RA*8+12]
-- | lea RD, [BASE+RD*8+4]
-- | mov RB, LJ_TNIL
-- | mov [RA-8], RB // Sets minimum 2 slots.
-- |1:
-- | mov [RA], RB
-- | add RA, 8
-- | cmp RA, RD
-- | jbe <1
-- | ins_next
-- break;
--
-- /* -- Upvalue and function ops ------------------------------------------ */
--
- case BC_UGET:
-- | ins_AD // RA = dst, RD = upvalue #
-- | mov LFUNC:RB, [BASE-8]
-- | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
-- | mov RB, UPVAL:RB->v
-- |.if X64
-- | mov RDa, [RB]
-- | mov [BASE+RA*8], RDa
-- |.else
-- | mov RD, [RB+4]
-- | mov RB, [RB]
-- | mov [BASE+RA*8+4], RD
-- | mov [BASE+RA*8], RB
-- |.endif
-- | ins_next
-- break;
- case BC_USETV:
--#define TV2MARKOFS \
-- ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
-- | ins_AD // RA = upvalue #, RD = src
-- | mov LFUNC:RB, [BASE-8]
-- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-- | cmp byte UPVAL:RB->closed, 0
-- | mov RB, UPVAL:RB->v
-- | mov RA, [BASE+RD*8]
-- | mov RD, [BASE+RD*8+4]
-- | mov [RB], RA
-- | mov [RB+4], RD
-- | jz >1
-- | // Check barrier for closed upvalue.
-- | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
-- | jnz >2
-- |1:
-- | ins_next
-- |
-- |2: // Upvalue is black. Check if new value is collectable and white.
-- | sub RD, LJ_TISGCV
-- | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
-- | jbe <1
-- | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
-- | jz <1
-- | // Crossed a write barrier. Move the barrier forward.
-- |.if X64 and not X64WIN
-- | mov FCARG2, RB
-- | mov RB, BASE // Save BASE.
-- |.else
-- | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
-- |.endif
-- | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
-- | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
-- | mov BASE, RB // Restore BASE.
-- | jmp <1
-- break;
--#undef TV2MARKOFS
- case BC_USETS:
-- | ins_AND // RA = upvalue #, RD = str const (~)
-- | mov LFUNC:RB, [BASE-8]
-- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-- | mov GCOBJ:RA, [KBASE+RD*4]
-- | mov RD, UPVAL:RB->v
-- | mov [RD], GCOBJ:RA
-- | mov dword [RD+4], LJ_TSTR
-- | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
-- | jnz >2
-- |1:
-- | ins_next
-- |
-- |2: // Check if string is white and ensure upvalue is closed.
-- | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
-- | jz <1
-- | cmp byte UPVAL:RB->closed, 0
-- | jz <1
-- | // Crossed a write barrier. Move the barrier forward.
-- | mov RB, BASE // Save BASE (FCARG2 == BASE).
-- | mov FCARG2, RD
-- | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
-- | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
-- | mov BASE, RB // Restore BASE.
-- | jmp <1
-- break;
- case BC_USETN:
-- | ins_AD // RA = upvalue #, RD = num const
-- | mov LFUNC:RB, [BASE-8]
-- | movsd xmm0, qword [KBASE+RD*8]
-- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-- | mov RA, UPVAL:RB->v
-- | movsd qword [RA], xmm0
-- | ins_next
-- break;
- case BC_USETP:
-- | ins_AND // RA = upvalue #, RD = primitive type (~)
-- | mov LFUNC:RB, [BASE-8]
-- | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-- | mov RA, UPVAL:RB->v
-- | mov [RA+4], RD
-- | ins_next
-- break;
- case BC_UCLO:
-- | ins_AD // RA = level, RD = target
-- | branchPC RD // Do this first to free RD.
-- | mov L:RB, SAVE_L
-- | cmp dword L:RB->openupval, 0
-- | je >1
-- | mov L:RB->base, BASE
-- | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
-- | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
-- | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
-- | mov BASE, L:RB->base
-- |1:
-- | ins_next
-- break;
--
- case BC_FNEW:
-- | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
-- |.if X64
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-- | mov CARG3d, [BASE-8]
-- | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *.
-- | mov CARG1d, L:RB
-- |.else
-- | mov LFUNC:RA, [BASE-8]
-- | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
-- | mov L:RB, SAVE_L
-- | mov ARG3, LFUNC:RA
-- | mov ARG2, PROTO:RD
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | // (lua_State *L, GCproto *pt, GCfuncL *parent)
-- | call extern lj_func_newL_gc
-- | // GCfuncL * returned in eax (RC).
-- | mov BASE, L:RB->base
-- | movzx RA, PC_RA
-- | mov [BASE+RA*8], LFUNC:RC
-- | mov dword [BASE+RA*8+4], LJ_TFUNC
-- | ins_next
-- break;
--
-- /* -- Table ops --------------------------------------------------------- */
--
- case BC_TNEW:
-- | ins_AD // RA = dst, RD = hbits|asize
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
-- | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
-- | mov SAVE_PC, PC
-- | jae >5
-- |1:
-- |.if X64
-- | mov CARG3d, RD
-- | and RD, 0x7ff
-- | shr CARG3d, 11
-- |.else
-- | mov RA, RD
-- | and RD, 0x7ff
-- | shr RA, 11
-- | mov ARG3, RA
-- |.endif
-- | cmp RD, 0x7ff
-- | je >3
-- |2:
-- |.if X64
-- | mov L:CARG1d, L:RB
-- | mov CARG2d, RD
-- |.else
-- | mov ARG1, L:RB
-- | mov ARG2, RD
-- |.endif
-- | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
-- | // Table * returned in eax (RC).
-- | mov BASE, L:RB->base
-- | movzx RA, PC_RA
-- | mov [BASE+RA*8], TAB:RC
-- | mov dword [BASE+RA*8+4], LJ_TTAB
-- | ins_next
-- |3: // Turn 0x7ff into 0x801.
-- | mov RD, 0x801
-- | jmp <2
-- |5:
-- | mov L:FCARG1, L:RB
-- | call extern lj_gc_step_fixtop@4 // (lua_State *L)
-- | movzx RD, PC_RD
-- | jmp <1
-- break;
- case BC_TDUP:
-- | ins_AND // RA = dst, RD = table const (~) (holding template table)
-- | mov L:RB, SAVE_L
-- | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
-- | mov SAVE_PC, PC
-- | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
-- | mov L:RB->base, BASE
-- | jae >3
-- |2:
-- | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
-- | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
-- | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
-- | // Table * returned in eax (RC).
-- | mov BASE, L:RB->base
-- | movzx RA, PC_RA
-- | mov [BASE+RA*8], TAB:RC
-- | mov dword [BASE+RA*8+4], LJ_TTAB
-- | ins_next
-- |3:
-- | mov L:FCARG1, L:RB
-- | call extern lj_gc_step_fixtop@4 // (lua_State *L)
-- | movzx RD, PC_RD // Need to reload RD.
-- | not RDa
-- | jmp <2
-- break;
--
- case BC_GGET:
-- | ins_AND // RA = dst, RD = str const (~)
-- | mov LFUNC:RB, [BASE-8]
-- | mov TAB:RB, LFUNC:RB->env
-- | mov STR:RC, [KBASE+RD*4]
-- | jmp ->BC_TGETS_Z
-- break;
- case BC_GSET:
-- | ins_AND // RA = src, RD = str const (~)
-- | mov LFUNC:RB, [BASE-8]
-- | mov TAB:RB, LFUNC:RB->env
-- | mov STR:RC, [KBASE+RD*4]
-- | jmp ->BC_TSETS_Z
-- break;
--
- case BC_TGETV:
-- | ins_ABC // RA = dst, RB = table, RC = key
-- | checktab RB, ->vmeta_tgetv
-- | mov TAB:RB, [BASE+RB*8]
-- |
-- | // Integer key?
-- |.if DUALNUM
-- | checkint RC, >5
-- | mov RC, dword [BASE+RC*8]
-- |.else
-- | // Convert number to int and back and compare.
-- | checknum RC, >5
-- | movsd xmm0, qword [BASE+RC*8]
-- | cvttsd2si RC, xmm0
-- | cvtsi2sd xmm1, RC
-- | ucomisd xmm0, xmm1
-- | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
-- |.endif
-- | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-- | jae ->vmeta_tgetv // Not in array part? Use fallback.
-- | shl RC, 3
-- | add RC, TAB:RB->array
-- | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
-- | je >2
-- | // Get array slot.
-- |.if X64
-- | mov RBa, [RC]
-- | mov [BASE+RA*8], RBa
-- |.else
-- | mov RB, [RC]
-- | mov RC, [RC+4]
-- | mov [BASE+RA*8], RB
-- | mov [BASE+RA*8+4], RC
-- |.endif
-- |1:
-- | ins_next
-- |
-- |2: // Check for __index if table value is nil.
-- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-- | jz >3
-- | mov TAB:RA, TAB:RB->metatable
-- | test byte TAB:RA->nomm, 1<<MM_index
-- | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
-- | movzx RA, PC_RA // Restore RA.
-- |3:
-- | mov dword [BASE+RA*8+4], LJ_TNIL
-- | jmp <1
-- |
-- |5: // String key?
-- | checkstr RC, ->vmeta_tgetv
-- | mov STR:RC, [BASE+RC*8]
-- | jmp ->BC_TGETS_Z
-- break;
- case BC_TGETS:
-- | ins_ABC // RA = dst, RB = table, RC = str const (~)
-- | not RCa
-- | mov STR:RC, [KBASE+RC*4]
-- | checktab RB, ->vmeta_tgets
-- | mov TAB:RB, [BASE+RB*8]
-- |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
-- | mov RA, TAB:RB->hmask
-- | and RA, STR:RC->hash
-- | imul RA, #NODE
-- | add NODE:RA, TAB:RB->node
-- |1:
-- | cmp dword NODE:RA->key.it, LJ_TSTR
-- | jne >4
-- | cmp dword NODE:RA->key.gcr, STR:RC
-- | jne >4
-- | // Ok, key found. Assumes: offsetof(Node, val) == 0
-- | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
-- | je >5 // Key found, but nil value?
-- | movzx RC, PC_RA
-- | // Get node value.
-- |.if X64
-- | mov RBa, [RA]
-- | mov [BASE+RC*8], RBa
-- |.else
-- | mov RB, [RA]
-- | mov RA, [RA+4]
-- | mov [BASE+RC*8], RB
-- | mov [BASE+RC*8+4], RA
-- |.endif
-- |2:
-- | ins_next
-- |
-- |3:
-- | movzx RC, PC_RA
-- | mov dword [BASE+RC*8+4], LJ_TNIL
-- | jmp <2
-- |
-- |4: // Follow hash chain.
-- | mov NODE:RA, NODE:RA->next
-- | test NODE:RA, NODE:RA
-- | jnz <1
-- | // End of hash chain: key not found, nil result.
-- |
-- |5: // Check for __index if table value is nil.
-- | mov TAB:RA, TAB:RB->metatable
-- | test TAB:RA, TAB:RA
-- | jz <3 // No metatable: done.
-- | test byte TAB:RA->nomm, 1<<MM_index
-- | jnz <3 // 'no __index' flag set: done.
-- | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
-- break;
- case BC_TGETB:
-- | ins_ABC // RA = dst, RB = table, RC = byte literal
-- | checktab RB, ->vmeta_tgetb
-- | mov TAB:RB, [BASE+RB*8]
-- | cmp RC, TAB:RB->asize
-- | jae ->vmeta_tgetb
-- | shl RC, 3
-- | add RC, TAB:RB->array
-- | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
-- | je >2
-- | // Get array slot.
-- |.if X64
-- | mov RBa, [RC]
-- | mov [BASE+RA*8], RBa
-- |.else
-- | mov RB, [RC]
-- | mov RC, [RC+4]
-- | mov [BASE+RA*8], RB
-- | mov [BASE+RA*8+4], RC
-- |.endif
-- |1:
-- | ins_next
-- |
-- |2: // Check for __index if table value is nil.
-- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-- | jz >3
-- | mov TAB:RA, TAB:RB->metatable
-- | test byte TAB:RA->nomm, 1<<MM_index
-- | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
-- | movzx RA, PC_RA // Restore RA.
-- |3:
-- | mov dword [BASE+RA*8+4], LJ_TNIL
-- | jmp <1
-- break;
- case BC_TGETR:
-- | ins_ABC // RA = dst, RB = table, RC = key
-- | mov TAB:RB, [BASE+RB*8]
-- |.if DUALNUM
-- | mov RC, dword [BASE+RC*8]
-- |.else
-- | cvttsd2si RC, qword [BASE+RC*8]
-- |.endif
-- | cmp RC, TAB:RB->asize
-- | jae ->vmeta_tgetr // Not in array part? Use fallback.
-- | shl RC, 3
-- | add RC, TAB:RB->array
-- | // Get array slot.
-- |->BC_TGETR_Z:
-- |.if X64
-- | mov RBa, [RC]
-- | mov [BASE+RA*8], RBa
-- |.else
-- | mov RB, [RC]
-- | mov RC, [RC+4]
-- | mov [BASE+RA*8], RB
-- | mov [BASE+RA*8+4], RC
-- |.endif
-- |->BC_TGETR2_Z:
-- | ins_next
-- break;
--
- case BC_TSETV:
-- | ins_ABC // RA = src, RB = table, RC = key
-- | checktab RB, ->vmeta_tsetv
-- | mov TAB:RB, [BASE+RB*8]
-- |
-- | // Integer key?
-- |.if DUALNUM
-- | checkint RC, >5
-- | mov RC, dword [BASE+RC*8]
-- |.else
-- | // Convert number to int and back and compare.
-- | checknum RC, >5
-- | movsd xmm0, qword [BASE+RC*8]
-- | cvttsd2si RC, xmm0
-- | cvtsi2sd xmm1, RC
-- | ucomisd xmm0, xmm1
-- | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
-- |.endif
-- | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-- | jae ->vmeta_tsetv
-- | shl RC, 3
-- | add RC, TAB:RB->array
-- | cmp dword [RC+4], LJ_TNIL
-- | je >3 // Previous value is nil?
-- |1:
-- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-- | jnz >7
-- |2: // Set array slot.
-- |.if X64
-- | mov RBa, [BASE+RA*8]
-- | mov [RC], RBa
-- |.else
-- | mov RB, [BASE+RA*8+4]
-- | mov RA, [BASE+RA*8]
-- | mov [RC+4], RB
-- | mov [RC], RA
-- |.endif
-- | ins_next
-- |
-- |3: // Check for __newindex if previous value is nil.
-- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-- | jz <1
-- | mov TAB:RA, TAB:RB->metatable
-- | test byte TAB:RA->nomm, 1<<MM_newindex
-- | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
-- | movzx RA, PC_RA // Restore RA.
-- | jmp <1
-- |
-- |5: // String key?
-- | checkstr RC, ->vmeta_tsetv
-- | mov STR:RC, [BASE+RC*8]
-- | jmp ->BC_TSETS_Z
-- |
-- |7: // Possible table write barrier for the value. Skip valiswhite check.
-- | barrierback TAB:RB, RA
-- | movzx RA, PC_RA // Restore RA.
-- | jmp <2
-- break;
- case BC_TSETS:
-- | ins_ABC // RA = src, RB = table, RC = str const (~)
-- | not RCa
-- | mov STR:RC, [KBASE+RC*4]
-- | checktab RB, ->vmeta_tsets
-- | mov TAB:RB, [BASE+RB*8]
-- |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
-- | mov RA, TAB:RB->hmask
-- | and RA, STR:RC->hash
-- | imul RA, #NODE
-- | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
-- | add NODE:RA, TAB:RB->node
-- |1:
-- | cmp dword NODE:RA->key.it, LJ_TSTR
-- | jne >5
-- | cmp dword NODE:RA->key.gcr, STR:RC
-- | jne >5
-- | // Ok, key found. Assumes: offsetof(Node, val) == 0
-- | cmp dword [RA+4], LJ_TNIL
-- | je >4 // Previous value is nil?
-- |2:
-- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-- | jnz >7
-- |3: // Set node value.
-- | movzx RC, PC_RA
-- |.if X64
-- | mov RBa, [BASE+RC*8]
-- | mov [RA], RBa
-- |.else
-- | mov RB, [BASE+RC*8+4]
-- | mov RC, [BASE+RC*8]
-- | mov [RA+4], RB
-- | mov [RA], RC
-- |.endif
-- | ins_next
-- |
-- |4: // Check for __newindex if previous value is nil.
-- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-- | jz <2
-- | mov TMP1, RA // Save RA.
-- | mov TAB:RA, TAB:RB->metatable
-- | test byte TAB:RA->nomm, 1<<MM_newindex
-- | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
-- | mov RA, TMP1 // Restore RA.
-- | jmp <2
-- |
-- |5: // Follow hash chain.
-- | mov NODE:RA, NODE:RA->next
-- | test NODE:RA, NODE:RA
-- | jnz <1
-- | // End of hash chain: key not found, add a new one.
-- |
-- | // But check for __newindex first.
-- | mov TAB:RA, TAB:RB->metatable
-- | test TAB:RA, TAB:RA
-- | jz >6 // No metatable: continue.
-- | test byte TAB:RA->nomm, 1<<MM_newindex
-- | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
-- |6:
-- | mov TMP1, STR:RC
-- | mov TMP2, LJ_TSTR
-- | mov TMP3, TAB:RB // Save TAB:RB for us.
-- |.if X64
-- | mov L:CARG1d, SAVE_L
-- | mov L:CARG1d->base, BASE
-- | lea CARG3, TMP1
-- | mov CARG2d, TAB:RB
-- | mov L:RB, L:CARG1d
-- |.else
-- | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
-- | mov ARG2, TAB:RB
-- | mov L:RB, SAVE_L
-- | mov ARG3, RC
-- | mov ARG1, L:RB
-- | mov L:RB->base, BASE
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
-- | // Handles write barrier for the new key. TValue * returned in eax (RC).
-- | mov BASE, L:RB->base
-- | mov TAB:RB, TMP3 // Need TAB:RB for barrier.
-- | mov RA, eax
-- | jmp <2 // Must check write barrier for value.
-- |
-- |7: // Possible table write barrier for the value. Skip valiswhite check.
-- | barrierback TAB:RB, RC // Destroys STR:RC.
-- | jmp <3
-- break;
- case BC_TSETB:
-- | ins_ABC // RA = src, RB = table, RC = byte literal
-- | checktab RB, ->vmeta_tsetb
-- | mov TAB:RB, [BASE+RB*8]
-- | cmp RC, TAB:RB->asize
-- | jae ->vmeta_tsetb
-- | shl RC, 3
-- | add RC, TAB:RB->array
-- | cmp dword [RC+4], LJ_TNIL
-- | je >3 // Previous value is nil?
-- |1:
-- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-- | jnz >7
-- |2: // Set array slot.
-- |.if X64
-- | mov RAa, [BASE+RA*8]
-- | mov [RC], RAa
-- |.else
-- | mov RB, [BASE+RA*8+4]
-- | mov RA, [BASE+RA*8]
-- | mov [RC+4], RB
-- | mov [RC], RA
-- |.endif
-- | ins_next
-- |
-- |3: // Check for __newindex if previous value is nil.
-- | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-- | jz <1
-- | mov TAB:RA, TAB:RB->metatable
-- | test byte TAB:RA->nomm, 1<<MM_newindex
-- | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
-- | movzx RA, PC_RA // Restore RA.
-- | jmp <1
-- |
-- |7: // Possible table write barrier for the value. Skip valiswhite check.
-- | barrierback TAB:RB, RA
-- | movzx RA, PC_RA // Restore RA.
-- | jmp <2
-- break;
- case BC_TSETR:
-- | ins_ABC // RA = src, RB = table, RC = key
-- | mov TAB:RB, [BASE+RB*8]
-- |.if DUALNUM
-- | mov RC, dword [BASE+RC*8]
-- |.else
-- | cvttsd2si RC, qword [BASE+RC*8]
-- |.endif
-- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-- | jnz >7
-- |2:
-- | cmp RC, TAB:RB->asize
-- | jae ->vmeta_tsetr
-- | shl RC, 3
-- | add RC, TAB:RB->array
-- | // Set array slot.
-- |->BC_TSETR_Z:
-- |.if X64
-- | mov RBa, [BASE+RA*8]
-- | mov [RC], RBa
-- |.else
-- | mov RB, [BASE+RA*8+4]
-- | mov RA, [BASE+RA*8]
-- | mov [RC+4], RB
-- | mov [RC], RA
-- |.endif
-- | ins_next
-- |
-- |7: // Possible table write barrier for the value. Skip valiswhite check.
-- | barrierback TAB:RB, RA
-- | movzx RA, PC_RA // Restore RA.
-- | jmp <2
-- break;
--
- case BC_TSETM:
-- | ins_AD // RA = base (table at base-1), RD = num const (start index)
-- | mov TMP1, KBASE // Need one more free register.
-- | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word.
-- |1:
-- | lea RA, [BASE+RA*8]
-- | mov TAB:RB, [RA-8] // Guaranteed to be a table.
-- | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-- | jnz >7
-- |2:
-- | mov RD, MULTRES
-- | sub RD, 1
-- | jz >4 // Nothing to copy?
-- | add RD, KBASE // Compute needed size.
-- | cmp RD, TAB:RB->asize
-- | ja >5 // Doesn't fit into array part?
-- | sub RD, KBASE
-- | shl KBASE, 3
-- | add KBASE, TAB:RB->array
-- |3: // Copy result slots to table.
-- |.if X64
-- | mov RBa, [RA]
-- | add RA, 8
-- | mov [KBASE], RBa
-- |.else
-- | mov RB, [RA]
-- | mov [KBASE], RB
-- | mov RB, [RA+4]
-- | add RA, 8
-- | mov [KBASE+4], RB
-- |.endif
-- | add KBASE, 8
-- | sub RD, 1
-- | jnz <3
-- |4:
-- | mov KBASE, TMP1
-- | ins_next
-- |
-- |5: // Need to resize array part.
-- |.if X64
-- | mov L:CARG1d, SAVE_L
-- | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-- | mov CARG2d, TAB:RB
-- | mov CARG3d, RD
-- | mov L:RB, L:CARG1d
-- |.else
-- | mov ARG2, TAB:RB
-- | mov L:RB, SAVE_L
-- | mov L:RB->base, BASE
-- | mov ARG3, RD
-- | mov ARG1, L:RB
-- |.endif
-- | mov SAVE_PC, PC
-- | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
-- | mov BASE, L:RB->base
-- | movzx RA, PC_RA // Restore RA.
-- | jmp <1 // Retry.
-- |
-- |7: // Possible table write barrier for any value. Skip valiswhite check.
-- | barrierback TAB:RB, RD
-- | jmp <2
-- break;
--
-- /* -- Calls and vararg handling ----------------------------------------- */
--
- case BC_CALL: case BC_CALLM:
-- | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
-- if (op == BC_CALLM) {
-- | add NARGS:RD, MULTRES
-- }
-- | cmp dword [BASE+RA*8+4], LJ_TFUNC
-- | mov LFUNC:RB, [BASE+RA*8]
-- | jne ->vmeta_call_ra
-- | lea BASE, [BASE+RA*8+8]
-- | ins_call
-- break;
--
- case BC_CALLMT:
-- | ins_AD // RA = base, RD = extra_nargs
-- | add NARGS:RD, MULTRES
-- | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
-- break;
- case BC_CALLT:
-- | ins_AD // RA = base, RD = nargs+1
-- | lea RA, [BASE+RA*8+8]
-- | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
-- | mov LFUNC:RB, [RA-8]
-- | cmp dword [RA-4], LJ_TFUNC
-- | jne ->vmeta_call
-- |->BC_CALLT_Z:
-- | mov PC, [BASE-4]
-- | test PC, FRAME_TYPE
-- | jnz >7
-- |1:
-- | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
-- | mov MULTRES, NARGS:RD
-- | sub NARGS:RD, 1
-- | jz >3
-- |2: // Move args down.
-- |.if X64
-- | mov RBa, [RA]
-- | add RA, 8
-- | mov [KBASE], RBa
-- |.else
-- | mov RB, [RA]
-- | mov [KBASE], RB
-- | mov RB, [RA+4]
-- | add RA, 8
-- | mov [KBASE+4], RB
-- |.endif
-- | add KBASE, 8
-- | sub NARGS:RD, 1
-- | jnz <2
-- |
-- | mov LFUNC:RB, [BASE-8]
-- |3:
-- | mov NARGS:RD, MULTRES
-- | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
-- | ja >5
-- |4:
-- | ins_callt
-- |
-- |5: // Tailcall to a fast function.
-- | test PC, FRAME_TYPE // Lua frame below?
-- | jnz <4
-- | movzx RA, PC_RA
-- | not RAa
-- | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE.
-- | mov KBASE, LFUNC:KBASE->pc
-- | mov KBASE, [KBASE+PC2PROTO(k)]
-- | jmp <4
-- |
-- |7: // Tailcall from a vararg function.
-- | sub PC, FRAME_VARG
-- | test PC, FRAME_TYPEP
-- | jnz >8 // Vararg frame below?
-- | sub BASE, PC // Need to relocate BASE/KBASE down.
-- | mov KBASE, BASE
-- | mov PC, [BASE-4]
-- | jmp <1
-- |8:
-- | add PC, FRAME_VARG
-- | jmp <1
-- break;
--
- case BC_ITERC:
-- | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
-- | lea RA, [BASE+RA*8+8] // fb = base+1
-- |.if X64
-- | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3].
-- | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2].
-- | mov [RA], RBa
-- | mov [RA+8], RCa
-- |.else
-- | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
-- | mov RC, [RA-20]
-- | mov [RA], RB
-- | mov [RA+4], RC
-- | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
-- | mov RC, [RA-12]
-- | mov [RA+8], RB
-- | mov [RA+12], RC
-- |.endif
-- | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
-- | mov RC, [RA-28]
-- | mov [RA-8], LFUNC:RB
-- | mov [RA-4], RC
-- | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
-- | mov NARGS:RD, 2+1
-- | jne ->vmeta_call
-- | mov BASE, RA
-- | ins_call
-- break;
--
- case BC_ITERN:
-- | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
-- |.if JIT
-- | // NYI: add hotloop, record BC_ITERN.
-- |.endif
-- | mov TMP1, KBASE // Need two more free registers.
-- | mov TMP2, DISPATCH
-- | mov TAB:RB, [BASE+RA*8-16]
-- | mov RC, [BASE+RA*8-8] // Get index from control var.
-- | mov DISPATCH, TAB:RB->asize
-- | add PC, 4
-- | mov KBASE, TAB:RB->array
-- |1: // Traverse array part.
-- | cmp RC, DISPATCH; jae >5 // Index points after array part?
-- | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
-- |.if DUALNUM
-- | mov dword [BASE+RA*8+4], LJ_TISNUM
-- | mov dword [BASE+RA*8], RC
-- |.else
-- | cvtsi2sd xmm0, RC
-- |.endif
-- | // Copy array slot to returned value.
-- |.if X64
-- | mov RBa, [KBASE+RC*8]
-- | mov [BASE+RA*8+8], RBa
-- |.else
-- | mov RB, [KBASE+RC*8+4]
-- | mov [BASE+RA*8+12], RB
-- | mov RB, [KBASE+RC*8]
-- | mov [BASE+RA*8+8], RB
-- |.endif
-- | add RC, 1
-- | // Return array index as a numeric key.
-- |.if DUALNUM
-- | // See above.
-- |.else
-- | movsd qword [BASE+RA*8], xmm0
-- |.endif
-- | mov [BASE+RA*8-8], RC // Update control var.
-- |2:
-- | movzx RD, PC_RD // Get target from ITERL.
-- | branchPC RD
-- |3:
-- | mov DISPATCH, TMP2
-- | mov KBASE, TMP1
-- | ins_next
-- |
-- |4: // Skip holes in array part.
-- | add RC, 1
-- | jmp <1
-- |
-- |5: // Traverse hash part.
-- | sub RC, DISPATCH
-- |6:
-- | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
-- | imul KBASE, RC, #NODE
-- | add NODE:KBASE, TAB:RB->node
-- | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7
-- | lea DISPATCH, [RC+DISPATCH+1]
-- | // Copy key and value from hash slot.
-- |.if X64
-- | mov RBa, NODE:KBASE->key
-- | mov RCa, NODE:KBASE->val
-- | mov [BASE+RA*8], RBa
-- | mov [BASE+RA*8+8], RCa
-- |.else
-- | mov RB, NODE:KBASE->key.gcr
-- | mov RC, NODE:KBASE->key.it
-- | mov [BASE+RA*8], RB
-- | mov [BASE+RA*8+4], RC
-- | mov RB, NODE:KBASE->val.gcr
-- | mov RC, NODE:KBASE->val.it
-- | mov [BASE+RA*8+8], RB
-- | mov [BASE+RA*8+12], RC
-- |.endif
-- | mov [BASE+RA*8-8], DISPATCH
-- | jmp <2
-- |
-- |7: // Skip holes in hash part.
-- | add RC, 1
-- | jmp <6
-- break;
--
- case BC_ISNEXT:
-- | ins_AD // RA = base, RD = target (points to ITERN)
-- | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5
-- | mov CFUNC:RB, [BASE+RA*8-24]
-- | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5
-- | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5
-- | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
-- | branchPC RD
-- | mov dword [BASE+RA*8-8], 0 // Initialize control var.
-- | mov dword [BASE+RA*8-4], 0xfffe7fff
-- |1:
-- | ins_next
-- |5: // Despecialize bytecode if any of the checks fail.
-- | mov PC_OP, BC_JMP
-- | branchPC RD
-- | mov byte [PC], BC_ITERC
-- | jmp <1
-- break;
--
- case BC_VARG:
-- | ins_ABC // RA = base, RB = nresults+1, RC = numparams
-- | mov TMP1, KBASE // Need one more free register.
-- | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
-- | lea RA, [BASE+RA*8]
-- | sub KBASE, [BASE-4]
-- | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
-- | test RB, RB
-- | jz >5 // Copy all varargs?
-- | lea RB, [RA+RB*8-8]
-- | cmp KBASE, BASE // No vararg slots?
-- | jnb >2
-- |1: // Copy vararg slots to destination slots.
-- |.if X64
-- | mov RCa, [KBASE-8]
-- | add KBASE, 8
-- | mov [RA], RCa
-- |.else
-- | mov RC, [KBASE-8]
-- | mov [RA], RC
-- | mov RC, [KBASE-4]
-- | add KBASE, 8
-- | mov [RA+4], RC
-- |.endif
-- | add RA, 8
-- | cmp RA, RB // All destination slots filled?
-- | jnb >3
-- | cmp KBASE, BASE // No more vararg slots?
-- | jb <1
-- |2: // Fill up remainder with nil.
-- | mov dword [RA+4], LJ_TNIL
-- | add RA, 8
-- | cmp RA, RB
-- | jb <2
-- |3:
-- | mov KBASE, TMP1
-- | ins_next
-- |
-- |5: // Copy all varargs.
-- | mov MULTRES, 1 // MULTRES = 0+1
-- | mov RC, BASE
-- | sub RC, KBASE
-- | jbe <3 // No vararg slots?
-- | mov RB, RC
-- | shr RB, 3
-- | add RB, 1
-- | mov MULTRES, RB // MULTRES = #varargs+1
-- | mov L:RB, SAVE_L
-- | add RC, RA
-- | cmp RC, L:RB->maxstack
-- | ja >7 // Need to grow stack?
-- |6: // Copy all vararg slots.
-- |.if X64
-- | mov RCa, [KBASE-8]
-- | add KBASE, 8
-- | mov [RA], RCa
-- |.else
-- | mov RC, [KBASE-8]
-- | mov [RA], RC
-- | mov RC, [KBASE-4]
-- | add KBASE, 8
-- | mov [RA+4], RC
-- |.endif
-- | add RA, 8
-- | cmp KBASE, BASE // No more vararg slots?
-- | jb <6
-- | jmp <3
-- |
-- |7: // Grow stack for varargs.
-- | mov L:RB->base, BASE
-- | mov L:RB->top, RA
-- | mov SAVE_PC, PC
-- | sub KBASE, BASE // Need delta, because BASE may change.
-- | mov FCARG2, MULTRES
-- | sub FCARG2, 1
-- | mov FCARG1, L:RB
-- | call extern lj_state_growstack@8 // (lua_State *L, int n)
-- | mov BASE, L:RB->base
-- | mov RA, L:RB->top
-- | add KBASE, BASE
-- | jmp <6
-- break;
--
-- /* -- Returns ----------------------------------------------------------- */
--
- case BC_RETM:
-- | ins_AD // RA = results, RD = extra_nresults
-- | add RD, MULTRES // MULTRES >=1, so RD >=1.
-- | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
-- break;
--
- case BC_RET: case BC_RET0: case BC_RET1:
-- | ins_AD // RA = results, RD = nresults+1
-- if (op != BC_RET0) {
-- | shl RA, 3
-- }
-- |1:
-- | mov PC, [BASE-4]
-- | mov MULTRES, RD // Save nresults+1.
-- | test PC, FRAME_TYPE // Check frame type marker.
-- | jnz >7 // Not returning to a fixarg Lua func?
-- switch (op) {
-- case BC_RET:
-- |->BC_RET_Z:
-- | mov KBASE, BASE // Use KBASE for result move.
-- | sub RD, 1
-- | jz >3
-- |2: // Move results down.
-- |.if X64
-- | mov RBa, [KBASE+RA]
-- | mov [KBASE-8], RBa
-- |.else
-- | mov RB, [KBASE+RA]
-- | mov [KBASE-8], RB
-- | mov RB, [KBASE+RA+4]
-- | mov [KBASE-4], RB
-- |.endif
-- | add KBASE, 8
-- | sub RD, 1
-- | jnz <2
-- |3:
-- | mov RD, MULTRES // Note: MULTRES may be >255.
-- | movzx RB, PC_RB // So cannot compare with RDL!
-- |5:
-- | cmp RB, RD // More results expected?
-- | ja >6
-- break;
-- case BC_RET1:
-- |.if X64
-- | mov RBa, [BASE+RA]
-- | mov [BASE-8], RBa
-- |.else
-- | mov RB, [BASE+RA+4]
-- | mov [BASE-4], RB
-- | mov RB, [BASE+RA]
-- | mov [BASE-8], RB
-- |.endif
-- /* fallthrough */
-- case BC_RET0:
-- |5:
-- | cmp PC_RB, RDL // More results expected?
-- | ja >6
-- default:
-- break;
-- }
-- | movzx RA, PC_RA
-- | not RAa // Note: ~RA = -(RA+1)
-- | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
-- | mov LFUNC:KBASE, [BASE-8]
-- | mov KBASE, LFUNC:KBASE->pc
-- | mov KBASE, [KBASE+PC2PROTO(k)]
-- | ins_next
-- |
-- |6: // Fill up results with nil.
-- if (op == BC_RET) {
-- | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
-- | add KBASE, 8
-- } else {
-- | mov dword [BASE+RD*8-12], LJ_TNIL
-- }
-- | add RD, 1
-- | jmp <5
-- |
-- |7: // Non-standard return case.
-- | lea RB, [PC-FRAME_VARG]
-- | test RB, FRAME_TYPEP
-- | jnz ->vm_return
-- | // Return from vararg function: relocate BASE down and RA up.
-- | sub BASE, RB
-- if (op != BC_RET0) {
-- | add RA, RB
-- }
-- | jmp <1
-- break;
--
-- /* -- Loops and branches ------------------------------------------------ */
--
-- |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4]
-- |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12]
-- |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20]
-- |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28]
--
- case BC_FORL:
-- |.if JIT
-- | hotloop RB
-- |.endif
-- | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
-- break;
--
- case BC_JFORI:
- case BC_JFORL:
--#if !LJ_HASJIT
-- break;
--#endif
- case BC_FORI:
- case BC_IFORL:
-- vk = (op == BC_IFORL || op == BC_JFORL);
-- | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
-- | lea RA, [BASE+RA*8]
-- if (LJ_DUALNUM) {
-- | cmp FOR_TIDX, LJ_TISNUM; jne >9
-- if (!vk) {
-- | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
-- | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
-- | mov RB, dword FOR_IDX
-- | cmp dword FOR_STEP, 0; jl >5
-- } else {
--#ifdef LUA_USE_ASSERT
-- | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type
-- | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type
--#endif
-- | mov RB, dword FOR_STEP
-- | test RB, RB; js >5
-- | add RB, dword FOR_IDX; jo >1
-- | mov dword FOR_IDX, RB
-- }
-- | cmp RB, dword FOR_STOP
-- | mov FOR_TEXT, LJ_TISNUM
-- | mov dword FOR_EXT, RB
-- if (op == BC_FORI) {
-- | jle >7
-- |1:
-- |6:
-- | branchPC RD
-- } else if (op == BC_JFORI) {
-- | branchPC RD
-- | movzx RD, PC_RD
-- | jle =>BC_JLOOP
-- |1:
-- |6:
-- } else if (op == BC_IFORL) {
-- | jg >7
-- |6:
-- | branchPC RD
-- |1:
-- } else {
-- | jle =>BC_JLOOP
-- |1:
-- |6:
-- }
-- |7:
-- | ins_next
-- |
-- |5: // Invert check for negative step.
-- if (vk) {
-- | add RB, dword FOR_IDX; jo <1
-- | mov dword FOR_IDX, RB
-- }
-- | cmp RB, dword FOR_STOP
-- | mov FOR_TEXT, LJ_TISNUM
-- | mov dword FOR_EXT, RB
-- if (op == BC_FORI) {
-- | jge <7
-- } else if (op == BC_JFORI) {
-- | branchPC RD
-- | movzx RD, PC_RD
-- | jge =>BC_JLOOP
-- } else if (op == BC_IFORL) {
-- | jl <7
-- } else {
-- | jge =>BC_JLOOP
-- }
-- | jmp <6
-- |9: // Fallback to FP variant.
-- } else if (!vk) {
-- | cmp FOR_TIDX, LJ_TISNUM
-- }
-- if (!vk) {
-- | jae ->vmeta_for
-- | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for
-- } else {
--#ifdef LUA_USE_ASSERT
-- | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type
-- | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type
--#endif
-- }
-- | mov RB, FOR_TSTEP // Load type/hiword of for step.
-- if (!vk) {
-- | cmp RB, LJ_TISNUM; jae ->vmeta_for
-- }
-- | movsd xmm0, qword FOR_IDX
-- | movsd xmm1, qword FOR_STOP
-- if (vk) {
-- | addsd xmm0, qword FOR_STEP
-- | movsd qword FOR_IDX, xmm0
-- | test RB, RB; js >3
-- } else {
-- | jl >3
-- }
-- | ucomisd xmm1, xmm0
-- |1:
-- | movsd qword FOR_EXT, xmm0
-- if (op == BC_FORI) {
-- |.if DUALNUM
-- | jnb <7
-- |.else
-- | jnb >2
-- | branchPC RD
-- |.endif
-- } else if (op == BC_JFORI) {
-- | branchPC RD
-- | movzx RD, PC_RD
-- | jnb =>BC_JLOOP
-- } else if (op == BC_IFORL) {
-- |.if DUALNUM
-- | jb <7
-- |.else
-- | jb >2
-- | branchPC RD
-- |.endif
-- } else {
-- | jnb =>BC_JLOOP
-- }
-- |.if DUALNUM
-- | jmp <6
-- |.else
-- |2:
-- | ins_next
-- |.endif
-- |
-- |3: // Invert comparison if step is negative.
-- | ucomisd xmm0, xmm1
-- | jmp <1
-- break;
--
- case BC_ITERL:
-- |.if JIT
-- | hotloop RB
-- |.endif
-- | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
-- break;
--
- case BC_JITERL:
--#if !LJ_HASJIT
-- break;
--#endif
- case BC_IITERL:
-- | ins_AJ // RA = base, RD = target
-- | lea RA, [BASE+RA*8]
-- | mov RB, [RA+4]
-- | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
-- if (op == BC_JITERL) {
-- | mov [RA-4], RB
-- | mov RB, [RA]
-- | mov [RA-8], RB
-- | jmp =>BC_JLOOP
-- } else {
-- | branchPC RD // Otherwise save control var + branch.
-- | mov RD, [RA]
-- | mov [RA-4], RB
-- | mov [RA-8], RD
-- }
-- |1:
-- | ins_next
-- break;
--
- case BC_LOOP:
-- | ins_A // RA = base, RD = target (loop extent)
-- | // Note: RA/RD is only used by trace recorder to determine scope/extent
-- | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
-- |.if JIT
-- | hotloop RB
-- |.endif
-- | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
-- break;
--
- case BC_ILOOP:
-- | ins_A // RA = base, RD = target (loop extent)
-- | ins_next
-- break;
--
- case BC_JLOOP:
-- |.if JIT
-- | ins_AD // RA = base (ignored), RD = traceno
-- | mov RA, [DISPATCH+DISPATCH_J(trace)]
-- | mov TRACE:RD, [RA+RD*4]
-- | mov RDa, TRACE:RD->mcode
-- | mov L:RB, SAVE_L
-- | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
-- | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
-- | // Save additional callee-save registers only used in compiled code.
-- |.if X64WIN
-- | mov TMPQ, r12
-- | mov TMPa, r13
-- | mov CSAVE_4, r14
-- | mov CSAVE_3, r15
-- | mov RAa, rsp
-- | sub rsp, 9*16+4*8
-- | movdqa [RAa], xmm6
-- | movdqa [RAa-1*16], xmm7
-- | movdqa [RAa-2*16], xmm8
-- | movdqa [RAa-3*16], xmm9
-- | movdqa [RAa-4*16], xmm10
-- | movdqa [RAa-5*16], xmm11
-- | movdqa [RAa-6*16], xmm12
-- | movdqa [RAa-7*16], xmm13
-- | movdqa [RAa-8*16], xmm14
-- | movdqa [RAa-9*16], xmm15
-- |.elif X64
-- | mov TMPQ, r12
-- | mov TMPa, r13
-- | sub rsp, 16
-- |.endif
-- | jmp RDa
-- |.endif
-- break;
--
- case BC_JMP:
-- | ins_AJ // RA = unused, RD = target
-- | branchPC RD
-- | ins_next
-- break;
--
-- /* -- Function headers -------------------------------------------------- */
--
-- /*
-- ** Reminder: A function may be called with func/args above L->maxstack,
-- ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
-- ** too. This means all FUNC* ops (including fast functions) must check
-- ** for stack overflow _before_ adding more slots!
-- */
--
- case BC_FUNCF:
-- |.if JIT
-- | hotcall RB
-- |.endif
- case BC_FUNCV: /* NYI: compiled vararg functions. */
-- | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
-- break;
--
- case BC_JFUNCF:
--#if !LJ_HASJIT
-- break;
--#endif
- case BC_IFUNCF:
-- | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
-- | mov KBASE, [PC-4+PC2PROTO(k)]
-- | mov L:RB, SAVE_L
-- | lea RA, [BASE+RA*8] // Top of frame.
-- | cmp RA, L:RB->maxstack
-- | ja ->vm_growstack_f
-- | movzx RA, byte [PC-4+PC2PROTO(numparams)]
-- | cmp NARGS:RD, RA // Check for missing parameters.
-- | jbe >3
-- |2:
-- if (op == BC_JFUNCF) {
-- | movzx RD, PC_RD
-- | jmp =>BC_JLOOP
-- } else {
-- | ins_next
-- }
-- |
-- |3: // Clear missing parameters.
-- | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
-- | add NARGS:RD, 1
-- | cmp NARGS:RD, RA
-- | jbe <3
-- | jmp <2
-- break;
--
- case BC_JFUNCV:
--#if !LJ_HASJIT
-- break;
--#endif
-- | int3 // NYI: compiled vararg functions
-- break; /* NYI: compiled vararg functions. */
--
- case BC_IFUNCV:
-- | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
-- | lea RB, [NARGS:RD*8+FRAME_VARG]
-- | lea RD, [BASE+NARGS:RD*8]
-- | mov LFUNC:KBASE, [BASE-8]
-- | mov [RD-4], RB // Store delta + FRAME_VARG.
-- | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
-- | mov L:RB, SAVE_L
-- | lea RA, [RD+RA*8]
-- | cmp RA, L:RB->maxstack
-- | ja ->vm_growstack_v // Need to grow stack.
-- | mov RA, BASE
-- | mov BASE, RD
-- | movzx RB, byte [PC-4+PC2PROTO(numparams)]
-- | test RB, RB
-- | jz >2
-- |1: // Copy fixarg slots up to new frame.
-- | add RA, 8
-- | cmp RA, BASE
-- | jnb >3 // Less args than parameters?
-- | mov KBASE, [RA-8]
-- | mov [RD], KBASE
-- | mov KBASE, [RA-4]
-- | mov [RD+4], KBASE
-- | add RD, 8
-- | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
-- | sub RB, 1
-- | jnz <1
-- |2:
-- if (op == BC_JFUNCV) {
-- | movzx RD, PC_RD
-- | jmp =>BC_JLOOP
-- } else {
-- | mov KBASE, [PC-4+PC2PROTO(k)]
-- | ins_next
-- }
-- |
-- |3: // Clear missing parameters.
-- | mov dword [RD+4], LJ_TNIL
-- | add RD, 8
-- | sub RB, 1
-- | jnz <3
-- | jmp <2
-- break;
--
- case BC_FUNCC:
- case BC_FUNCCW:
-- | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
-- | mov CFUNC:RB, [BASE-8]
-- | mov KBASEa, CFUNC:RB->f
-- | mov L:RB, SAVE_L
-- | lea RD, [BASE+NARGS:RD*8-8]
-- | mov L:RB->base, BASE
-- | lea RA, [RD+8*LUA_MINSTACK]
-- | cmp RA, L:RB->maxstack
-- | mov L:RB->top, RD
-- if (op == BC_FUNCC) {
-- |.if X64
-- | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
-- |.else
-- | mov ARG1, L:RB
-- |.endif
-- } else {
-- |.if X64
-- | mov CARG2, KBASEa
-- | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
-- |.else
-- | mov ARG2, KBASEa
-- | mov ARG1, L:RB
-- |.endif
-- }
-- | ja ->vm_growstack_c // Need to grow stack.
-- | set_vmstate C
-- if (op == BC_FUNCC) {
-- | call KBASEa // (lua_State *L)
-- } else {
-- | // (lua_State *L, lua_CFunction f)
-- | call aword [DISPATCH+DISPATCH_GL(wrapf)]
-- }
-- | // nresults returned in eax (RD).
-- | mov BASE, L:RB->base
-- | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-- | set_vmstate INTERP
-- | lea RA, [BASE+RD*8]
-- | neg RA
-- | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
-- | mov PC, [BASE-4] // Fetch PC of caller.
-- | jmp ->vm_returnc
-+ | lg r0, 0(r0) // Not implemented, seg fault.
- break;
-
- /* ---------------------------------------------------------------------- */
-@@ -5241,314 +702,4 @@ static int build_backend(BuildCtx *ctx)
- /* Emit pseudo frame-info for all assembler functions. */
- static void emit_asm_debug(BuildCtx *ctx)
- {
-- int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
--#if LJ_64
--#define SZPTR "8"
--#define BSZPTR "3"
--#define REG_SP "0x7"
--#define REG_RA "0x10"
--#else
--#define SZPTR "4"
--#define BSZPTR "2"
--#define REG_SP "0x4"
--#define REG_RA "0x8"
--#endif
-- switch (ctx->mode) {
-- case BUILD_elfasm:
-- fprintf(ctx->fp, "\t.section
.debug_frame,\"\",@progbits\n");
-- fprintf(ctx->fp,
-- ".Lframe0:\n"
-- "\t.long .LECIE0-.LSCIE0\n"
-- ".LSCIE0:\n"
-- "\t.long 0xffffffff\n"
-- "\t.byte 0x1\n"
-- "\t.string \"\"\n"
-- "\t.uleb128 0x1\n"
-- "\t.sleb128 -" SZPTR "\n"
-- "\t.byte " REG_RA "\n"
-- "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR
"\n"
-- "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
-- "\t.align " SZPTR "\n"
-- ".LECIE0:\n\n");
-- fprintf(ctx->fp,
-- ".LSFDE0:\n"
-- "\t.long .LEFDE0-.LASFDE0\n"
-- ".LASFDE0:\n"
-- "\t.long .Lframe0\n"
--#if LJ_64
-- "\t.quad .Lbegin\n"
-- "\t.quad %d\n"
-- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-- "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-- "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
-- "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
--#if LJ_NO_UNWIND
-- "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
-- "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
--#endif
--#else
-- "\t.long .Lbegin\n"
-- "\t.long %d\n"
-- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-- "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-- "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
-- "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
-- "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
--#endif
-- "\t.align " SZPTR "\n"
-- ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
--#if LJ_HASFFI
-- fprintf(ctx->fp,
-- ".LSFDE1:\n"
-- "\t.long .LEFDE1-.LASFDE1\n"
-- ".LASFDE1:\n"
-- "\t.long .Lframe0\n"
--#if LJ_64
-- "\t.quad lj_vm_ffi_call\n"
-- "\t.quad %d\n"
-- "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
-- "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-- "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
-- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
--#else
-- "\t.long lj_vm_ffi_call\n"
-- "\t.long %d\n"
-- "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
-- "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-- "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
-- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
--#endif
-- "\t.align " SZPTR "\n"
-- ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
--#endif
--#if !LJ_NO_UNWIND
--#if (defined(__sun__) && defined(__svr4__))
--#if LJ_64
-- fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
--#else
-- fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
--#endif
--#else
-- fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
--#endif
-- fprintf(ctx->fp,
-- ".Lframe1:\n"
-- "\t.long .LECIE1-.LSCIE1\n"
-- ".LSCIE1:\n"
-- "\t.long 0\n"
-- "\t.byte 0x1\n"
-- "\t.string \"zPR\"\n"
-- "\t.uleb128 0x1\n"
-- "\t.sleb128 -" SZPTR "\n"
-- "\t.byte " REG_RA "\n"
-- "\t.uleb128 6\n" /* augmentation length */
-- "\t.byte 0x1b\n" /* pcrel|sdata4 */
-- "\t.long lj_err_unwind_dwarf-.\n"
-- "\t.byte 0x1b\n" /* pcrel|sdata4 */
-- "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR
"\n"
-- "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
-- "\t.align " SZPTR "\n"
-- ".LECIE1:\n\n");
-- fprintf(ctx->fp,
-- ".LSFDE2:\n"
-- "\t.long .LEFDE2-.LASFDE2\n"
-- ".LASFDE2:\n"
-- "\t.long .LASFDE2-.Lframe1\n"
-- "\t.long .Lbegin-.\n"
-- "\t.long %d\n"
-- "\t.uleb128 0\n" /* augmentation length */
-- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
--#if LJ_64
-- "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-- "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
-- "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
--#else
-- "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-- "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
-- "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
-- "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
--#endif
-- "\t.align " SZPTR "\n"
-- ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
--#if LJ_HASFFI
-- fprintf(ctx->fp,
-- ".Lframe2:\n"
-- "\t.long .LECIE2-.LSCIE2\n"
-- ".LSCIE2:\n"
-- "\t.long 0\n"
-- "\t.byte 0x1\n"
-- "\t.string \"zR\"\n"
-- "\t.uleb128 0x1\n"
-- "\t.sleb128 -" SZPTR "\n"
-- "\t.byte " REG_RA "\n"
-- "\t.uleb128 1\n" /* augmentation length */
-- "\t.byte 0x1b\n" /* pcrel|sdata4 */
-- "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR
"\n"
-- "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
-- "\t.align " SZPTR "\n"
-- ".LECIE2:\n\n");
-- fprintf(ctx->fp,
-- ".LSFDE3:\n"
-- "\t.long .LEFDE3-.LASFDE3\n"
-- ".LASFDE3:\n"
-- "\t.long .LASFDE3-.Lframe2\n"
-- "\t.long lj_vm_ffi_call-.\n"
-- "\t.long %d\n"
-- "\t.uleb128 0\n" /* augmentation length */
--#if LJ_64
-- "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
-- "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-- "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
-- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
--#else
-- "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
-- "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-- "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
-- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
--#endif
-- "\t.align " SZPTR "\n"
-- ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
--#endif
--#endif
-- break;
--#if !LJ_NO_UNWIND
-- /* Mental note: never let Apple design an assembler.
-- ** Or a linker. Or a plastic case. But I digress.
-- */
-- case BUILD_machasm: {
--#if LJ_HASFFI
-- int fcsize = 0;
--#endif
-- int i;
-- fprintf(ctx->fp, "\t.section
__TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
-- fprintf(ctx->fp,
-- "EH_frame1:\n"
-- "\t.set L$set$x,LECIEX-LSCIEX\n"
-- "\t.long L$set$x\n"
-- "LSCIEX:\n"
-- "\t.long 0\n"
-- "\t.byte 0x1\n"
-- "\t.ascii \"zPR\\0\"\n"
-- "\t.byte 0x1\n"
-- "\t.byte 128-" SZPTR "\n"
-- "\t.byte " REG_RA "\n"
-- "\t.byte 6\n" /* augmentation length */
-- "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
--#if LJ_64
-- "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
-- "\t.byte 0x1b\n" /* pcrel|sdata4 */
-- "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
--#else
-- "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
-- "\t.byte 0x1b\n" /* pcrel|sdata4 */
-- "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */
--#endif
-- "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
-- "\t.align " BSZPTR "\n"
-- "LECIEX:\n\n");
-- for (i = 0; i < ctx->nsym; i++) {
-- const char *name = ctx->sym[i].name;
-- int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
-- if (size == 0) continue;
--#if LJ_HASFFI
-- if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
--#endif
-- fprintf(ctx->fp,
-- "%s.eh:\n"
-- "LSFDE%d:\n"
-- "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
-- "\t.long L$set$%d\n"
-- "LASFDE%d:\n"
-- "\t.long LASFDE%d-EH_frame1\n"
-- "\t.long %s-.\n"
-- "\t.long %d\n"
-- "\t.byte 0\n" /* augmentation length */
-- "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
--#if LJ_64
-- "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
-- "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
-- "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
-- "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
--#else
-- "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
-- "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */
-- "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */
-- "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */
--#endif
-- "\t.align " BSZPTR "\n"
-- "LEFDE%d:\n\n",
-- name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
-- }
--#if LJ_HASFFI
-- if (fcsize) {
-- fprintf(ctx->fp,
-- "EH_frame2:\n"
-- "\t.set L$set$y,LECIEY-LSCIEY\n"
-- "\t.long L$set$y\n"
-- "LSCIEY:\n"
-- "\t.long 0\n"
-- "\t.byte 0x1\n"
-- "\t.ascii \"zR\\0\"\n"
-- "\t.byte 0x1\n"
-- "\t.byte 128-" SZPTR "\n"
-- "\t.byte " REG_RA "\n"
-- "\t.byte 1\n" /* augmentation length */
--#if LJ_64
-- "\t.byte 0x1b\n" /* pcrel|sdata4 */
-- "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
--#else
-- "\t.byte 0x1b\n" /* pcrel|sdata4 */
-- "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */
--#endif
-- "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
-- "\t.align " BSZPTR "\n"
-- "LECIEY:\n\n");
-- fprintf(ctx->fp,
-- "_lj_vm_ffi_call.eh:\n"
-- "LSFDEY:\n"
-- "\t.set L$set$yy,LEFDEY-LASFDEY\n"
-- "\t.long L$set$yy\n"
-- "LASFDEY:\n"
-- "\t.long LASFDEY-EH_frame2\n"
-- "\t.long _lj_vm_ffi_call-.\n"
-- "\t.long %d\n"
-- "\t.byte 0\n" /* augmentation length */
--#if LJ_64
-- "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
-- "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
-- "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
-- "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
--#else
-- "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */
-- "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
-- "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */
-- "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */
--#endif
-- "\t.align " BSZPTR "\n"
-- "LEFDEY:\n\n", fcsize);
-- }
--#endif
--#if !LJ_64
-- fprintf(ctx->fp,
-- "\t.non_lazy_symbol_pointer\n"
-- "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
-- ".indirect_symbol _lj_err_unwind_dwarf\n"
-- ".long 0\n\n");
-- fprintf(ctx->fp, "\t.section
__IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
-- {
-- const char *const *xn;
-- for (xn = ctx->extnames; *xn; xn++)
-- if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
-- fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii
\"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
-- }
--#endif
-- fprintf(ctx->fp, ".subsections_via_symbols\n");
-- }
-- break;
--#endif
-- default: /* Difficult for other modes. */
-- break;
-- }
- }
---
-2.20.1
-
-
-From aca15414847a675462c32d3c7cccce6cad53c855 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Mon, 12 Dec 2016 11:21:42 +0530
-Subject: [PATCH 100/247] Correct the range of parameter, and merge the two
- case
-
----
- dynasm/dasm_s390x.h | 5 +----
- 1 file changed, 1 insertion(+), 4 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 5be8e8a..c1de357 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -290,9 +290,6 @@ void dasm_put(Dst_DECL, int start, ...)
- b[pos++] = n;
- break;
- case DASM_LEN4HR:
-- CK(n >= 1 && n <= 128, RANGE_I);
-- b[pos++] = n;
-- break;
- case DASM_LEN4LR:
- CK(n >= 1 && n <= 128, RANGE_I);
- b[pos++] = n;
-@@ -478,7 +475,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- cp[-1] |= (n - 1) & 0xff;
- break;
- case DASM_LEN4HR:
-- cp[-1] |= (n - 1) & 0xf0;
-+ cp[-1] |= ((n - 1) << 4) & 0xf0;
- break;
- case DASM_LEN4LR:
- cp[-1] |= (n - 1) & 0x0f;
---
-2.20.1
-
-
-From 251dde650620eb21c9700794a2db534685b29d40 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 12 Dec 2016 14:38:55 -0500
-Subject: [PATCH 101/247] Add lhi instruction and fix immediate parsing.
-
-We were reading immediate values as hexadecimal values, really we
-want the default to be decimal unless the immediate has a '0x' prefix.
----
- dynasm/Examples/test_z_inst.c | 4 ++--
- dynasm/dasm_s390x.lua | 31 +++++++++++++++++++------------
- 2 files changed, 21 insertions(+), 14 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index a8895c0..d093906 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -190,8 +190,8 @@ static void save(dasm_State *state)
- |.endmacro
- |
- | saveregs
-- | lgfi r7, 10 // 16
-- | lgfi r8, 20 // 32
-+ | lgfi r7, 0x10 // 16
-+ | lgfi r8, 0x20 // 32
- | agr r2, r3
- | agr r7, r8
- | msgr r2, r7
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 08d44a3..d3ed723 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -459,28 +459,34 @@ local function parse_mem_l2b(arg,high_l)
- return dval, lval, parse_reg(b), dact, lact
- end
-
--local function parse_imm(arg)
-- local imm_val = tonumber(arg,16)
-+local function parse_imm32(imm)
-+ local imm_val = tonumber(imm)
- if imm_val then
- if not is_int32(imm_val) then
-- werror("Immediate value out of range: ", imm_val)
-+ werror("immediate value out of range: ", imm_val)
- end
-- wputhw(band(shr(imm_val, 16), 0xffff));
-- wputhw(band(imm_val, 0xffff));
-+ wputhw(band(shr(imm_val, 16), 0xffff))
-+ wputhw(band(imm_val, 0xffff))
-+ elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
-+ match(imm, "^([%w_]+):(r1?[0-9])$") then
-+ werror("expected immediate operand, got register")
- else
-- waction("IMM32", nil, arg) -- if we get label
-+ waction("IMM32", nil, imm) -- if we get label
- end
- end
-
--local function parse_imm16(arg)
-- local imm_val = tonumber(arg,16)
-+local function parse_imm16(imm)
-+ local imm_val = tonumber(imm)
- if imm_val then
- if not is_int16(imm_val) then
-- werror("Immediate value out of range: ", imm_val)
-+ werror("immediate value out of range: ", imm_val)
- end
-- wputhw(imm_val)
-+ wputhw(band(imm_val, 0xffff))
-+ elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
-+ match(imm, "^([%w_]+):(r1?[0-9])$") then
-+ werror("expected immediate operand, got register")
- else
-- waction("IMM16", nil, arg)
-+ waction("IMM16", nil, imm)
- end
- end
-
-@@ -842,6 +848,7 @@ map_op = {
- lgh_2 = "e30000000015l",
- lghr_2 = "0000b9070000h",
- lhh_2 = "e300000000c4l",
-+ lhi_2 = "0000a7080000i",
- lhrl_2 = "c40500000000o",
- lghrl_2 = "c40400000000o",
- lfh_2 = "e300000000cal",
-@@ -1161,7 +1168,7 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "n" then
- op0 = op0 + shl(parse_reg(params[1]), 4)
- wputhw(op0);
-- parse_imm(params[2])
-+ parse_imm32(params[2])
- elseif p == "o" then
- op0 = op0 + shl(parse_reg(params[1]), 4)
- wputhw(op0);
---
-2.20.1
-
-
-From 014a44471afb3beeba7dca6f34091d70e9a9df02 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 12 Dec 2016 17:17:34 -0500
-Subject: [PATCH 102/247] Add partial implementation of vm_cpcall.
-
-Currently works if the call returns 0. Haven't yet written the code
-needed to handle the non-zero case.
----
- dynasm/dasm_s390x.lua | 2 ++
- src/vm_s390x.dasc | 39 +++++++++++++++++++++++++++++++++++++--
- 2 files changed, 39 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index d3ed723..60d61bd 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -651,6 +651,7 @@ map_op = {
- chlr_2 = "0000b9dd0000h",
- cfi_2 = "c20d00000000n",
- cgfi_2 = "c20c00000000n",
-+ cghi_2 = "0000a70f0000i",
- cih_2 = "cc0d00000000n",
- cl_2 = "000055000000j",
- clr_2 = "000000001500g",
-@@ -782,6 +783,7 @@ map_op = {
- lgr_2 = "0000b9040000h",
- lgf_2 = "e30000000014l",
- lgfr_2 = "0000b9140000h",
-+ lghi_2 = "0000a7090000i",
- lxr_2 = "0000b3650000h",
- ld_2 = "000068000000j",
- ldr_2 = "000000002800g",
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index bdd063d..88fef7d 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -63,6 +63,7 @@
- |
- |// Register save area.
- |.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
-+|.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before
stack frame is allocated).
- |
- |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
- |.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
-@@ -88,8 +89,9 @@
- |.define CALLEESAVE, 000(sp) // <- sp in interpreter.
- |
- |.macro saveregs
-+| stmg r6, r15, SAVE_GPRS_P
- | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
--| stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
-+| // TODO: save backchain?
- | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
- | std f9, SAVE_FPR9
- | std f10, SAVE_FPR10
-@@ -110,7 +112,6 @@
- | ld f14, SAVE_FPR14
- | ld f15, SAVE_FPR15
- | lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
--|// br r14 to return?
- |.endmacro
- |
- |// Type definitions. Some of these are only used for documentation.
-@@ -175,6 +176,10 @@
- | ins_NEXT
- | .endmacro
- |.endif
-+|
-+|// Assumes DISPATCH is relative to GL.
-+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
-+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
-
- /* Generate subroutines used by opcodes and other parts of the VM. */
- /* The .code_sub section should be last to help static branch prediction. */
-@@ -193,8 +198,13 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_return:
- |
- |->vm_leave_cp:
-+ | lg RA, SAVE_CFRAME // Restore previous C frame.
-+ | stg RA, L:LREG->cframe
-+ | lghi CRET1, 0 // Ok return status for vm_pcall.
- |
- |->vm_leave_unw:
-+ | restoreregs
-+ | br r14
- |
- |->vm_unwind_yield:
- |
-@@ -230,6 +240,31 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_call_dispatch_f:
- |
- |->vm_cpcall: // Setup protected C frame, call C.
-+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
-+ | saveregs
-+ | lgr LREG, CARG1
-+ | stg LREG, SAVE_L
-+ | stg LREG, SAVE_PC // Any value outside of bytecode is ok.
-+ |
-+ | lg KBASE, L:LREG->stack // Compute -savestack(L, L->top).
-+ | sg KBASE, L:LREG->top
-+ | lg DISPATCH, L:LREG->glref // Setup pointer to dispatch table.
-+ | lghi RA, 0
-+ | stg RA, SAVE_ERRF // No error function.
-+ | stg KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
-+ | aghi DISPATCH, GG_G2DISP
-+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
-+ |
-+ | lg KBASE, L:LREG->cframe // Add our C frame to cframe chain.
-+ | stg KBASE, SAVE_CFRAME
-+ | stg sp, L:LREG->cframe
-+ | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
-+ |
-+ | basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud)
-+ | // TValue * (new base) or NULL returned in r2 (CRET1/).
-+ | cghi CRET1, 0
-+ | je ->vm_leave_cp // No base? Just remove C frame.
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- |//-- Metamethod handling ------------------------------------------------
---
-2.20.1
-
-
-From dedc278e982aa0ddebea3fe182855c7b7efce2cc Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 13 Dec 2016 11:26:53 -0500
-Subject: [PATCH 103/247] Add support for SIL instructions in DynASM.
-
----
- dynasm/Examples/test_z_inst.c | 19 ++++++++++++++++++-
- dynasm/dasm_s390x.lua | 10 ++++++++++
- 2 files changed, 28 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index d093906..c09ae08 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -308,6 +308,22 @@ static void type(dasm_State *state) {
- | br r14
- }
-
-+static void sil(dasm_State *state) {
-+ dasm_State **Dst = &state;
-+
-+ | lay sp, -16(sp)
-+ | xc 0(16, sp), 0(sp)
-+ | mvghi 0(sp), 5
-+ | mvhi 8(sp), 7
-+ | mvhhi 12(sp), 11
-+ | lghi r2, 0
-+ | ag r2, 0(sp) // r2 += 5
-+ | a r2, 8(sp) // r2 += 7
-+ | ah r2, 12(sp) // r2 += 11
-+ | la sp, 16(sp)
-+ | br r14
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -335,7 +351,8 @@ test_table test[] = {
- // { 2,4, load_test, 4,"load_test"},
- {-1, 0, ssa, 65535<<8, "ssa"},
- {-1, 0, ssa_act, 65535<<8, "ssa_act"},
-- {27, 0, type, 27, "type"}
-+ {27, 0, type, 27, "type"},
-+ { 0, 0, sil, 23, "sil"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 60d61bd..e3c8f26 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -971,6 +971,9 @@ map_op = {
- msfi_2 = "c20100000000n",
- msgfi_2 = "c20000000000n",
- maer_3 = "0000b32e0000r",
-+ mvhhi_2 = "e54400000000SIL",
-+ mvhi_2 = "e54c00000000SIL",
-+ mvghi_2 = "e54800000000SIL",
- o_2 = "000056000000j",
- or_2 = "000000001600g",
- oy_2 = "e30000000056l",
-@@ -1219,6 +1222,13 @@ local function parse_template(params, template, nparams, pos)
- if d1a then d1a() end
- wputhw(op2)
- if d2a then d2a() end
-+ elseif p == "SIL" then
-+ wputhw(op0)
-+ local d, b, a = parse_mem_b(params[1])
-+ op1 = op1 + shl(b, 12) + d
-+ wputhw(op1)
-+ if a then a() end
-+ parse_imm16(params[2])
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From 4faea21ed6c618601255685a0c2ed068c280546c Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 13 Dec 2016 17:01:20 -0500
-Subject: [PATCH 104/247] Add more RI-a (register-immediate) instructions.
-
----
- dynasm/dasm_s390x.h | 2 +-
- dynasm/dasm_s390x.lua | 26 +++++++++++++++++++++++++-
- 2 files changed, 26 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index c1de357..cebce22 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -269,7 +269,7 @@ void dasm_put(Dst_DECL, int start, ...)
- b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
- case DASM_IMM16:
-- CK(((short)n) == n, RANGE_I); /* TODO: unsigned immediates? */
-+ CK(((short)n) == n || ((unsigned short)n) == n, RANGE_I); /* TODO: is this the
right way to handle unsigned immediates? */
- ofs += 2;
- b[pos++] = n;
- break;
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index e3c8f26..e0fb916 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -299,6 +299,10 @@ local function is_int32(num)
- return -2147483648 <= num and num < 2147483648
- end
-
-+local function is_uint16(num)
-+ return 0 <= num and num < 0xffff
-+end
-+
- local function is_int16(num)
- return -32768 <= num and num < 32768
- end
-@@ -478,7 +482,7 @@ end
- local function parse_imm16(imm)
- local imm_val = tonumber(imm)
- if imm_val then
-- if not is_int16(imm_val) then
-+ if not is_int16(imm_val) and not is_uint16(imm_val) then
- werror("immediate value out of range: ", imm_val)
- end
- wputhw(band(imm_val, 0xffff))
-@@ -581,7 +585,11 @@ map_op = {
- ng_2 = "e30000000080l",
- ngr_2 = "0000b9800000h",
- nihf_2 = "c00a00000000n",
-+ nihh_2 = "0000a5040000i",
-+ nihl_2 = "0000a5050000i",
- nilf_2 = "c00b00000000n",
-+ nilh_2 = "0000a5060000i",
-+ nill_2 = "0000a5070000i",
- bal_2 = "000045000000j",
- balr_2 = "000000000500g",
- bas_2 = "00004d000000j",
-@@ -772,7 +780,11 @@ map_op = {
- icm_3 = "0000bf000000r",
- icmy_3 = "eb0000000081t",
- iihf_2 = "c00800000000n",
-+ iihh_2 = "0000a5000000i",
-+ iihl_2 = "0000a5010000i",
- iilf_2 = "c00900000000n",
-+ iilh_2 = "0000a5020000i",
-+ iill_2 = "0000a5030000i",
- ipm_2 = "0000b2220000h",
- iske_2 = "0000b2290000h",
- ivsk_2 = "0000b2230000h",
-@@ -876,7 +888,11 @@ map_op = {
- llhrl_2 = "c40200000000o",
- llghrl_2 = "c40600000000o",
- llihf_2 = "c00e00000000n",
-+ llihh_2 = "0000a50c0000i",
-+ llihl_2 = "0000a50d0000i",
- llilf_2 = "c00f00000000n",
-+ llilh_2 = "0000a50e0000i",
-+ llill_2 = "0000a50f0000i",
- llgfrl_2 = "c40e00000000o",
- llgt_2 = "e30000000017l",
- llgtr_2 = "0000b9170000h",
-@@ -980,7 +996,11 @@ map_op = {
- og_2 = "e30000000081l",
- ogr_2 = "0000b9810000h",
- oihf_2 = "c00c00000000n",
-+ oihh_2 = "0000a5080000i",
-+ oihl_2 = "0000a5090000i",
- oilf_2 = "c00d00000000n",
-+ oilh_2 = "0000a50a0000i",
-+ oill_2 = "0000a50b0000i",
- pgin_2 = "0000b22e0000h",
- pgout_2 = "0000b22f0000h",
- pcc_2 = "0000b92c0000h",
-@@ -1099,6 +1119,10 @@ map_op = {
- swr_2 = "000000002f00g",
- tar_2 = "0000b24c0000h",
- tb_2 = "0000b22c0000h",
-+ tmhh_2 = "0000a7020000i",
-+ tmhl_2 = "0000a7030000i",
-+ tmlh_2 = "0000a7000000i",
-+ tmll_2 = "0000a7010000i",
- trace_3 = "000099000000q",
- tracg_3 = "eb000000000fs",
- tre_2 = "0000b2a50000h",
---
-2.20.1
-
-
-From 4fcabd2fe3dd38134a94be3a3adc1208226a4435 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 13 Dec 2016 18:31:43 -0500
-Subject: [PATCH 105/247] Add more interpreter code.
-
-Compilation is currently broken, a label is missing.
----
- src/vm_s390x.dasc | 323 ++++++++++++++++++++++++++++++++++++++++++++--
- 1 file changed, 309 insertions(+), 14 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 88fef7d..a1a4d76 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -34,10 +34,11 @@
- |.define PC, r9 // Next PC.
- |.define DISPATCH, r10 // Opcode dispatch table.
- |.define LREG, r11 // Register holding lua_State (also in SAVE_L).
-+|.define ITYPE, r13 //
- |
- |// The following temporaries are not saved across C calls, except for RD.
--|.define RA, r0 // Cannot be dereferenced.
--|.define RB, r1
-+|.define RA, r1 // Cannot be dereferenced.
-+|.define RB, r12
- |.define RC, r5 // Overlaps CARG4.
- |.define RD, r6 // Overlaps CARG5. Callee-saved.
- |
-@@ -56,7 +57,7 @@
- |.define CRET1, r2
- |
- |.define OP, r2
--|.define TMP1, r3
-+|.define TMP1, r14
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
- |.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
-@@ -144,20 +145,20 @@
- |// Instruction decode+dispatch.
- | // TODO: tune this, right now we always decode RA-D even if they aren't used.
- |.macro ins_NEXT
--| l RD, (PC)
-+| llgf RD, 0(PC)
- | // 32 63
- | // [ B | C | A | OP ]
- | // [ D | A | OP ]
--| llhr RA, RD
--| srl RA, #8
--| llcr OP, RD
--| srl RD, #16
--| lr RB, RD
--| srl RB, #8
--| llcr RC, RD
-+| llghr RA, RD
-+| srlg RA, RA, 8(r0)
-+| llgcr OP, RD
-+| srlg RD, RD, 16(r0)
-+| lgr RB, RD
-+| srlg RB, RB, 8(r0)
-+| llgcr RC, RD
- | la PC, 4(PC)
- | llgfr TMP1, OP
--| sll TMP1, #3 // TMP1=OP*8
-+| sllg TMP1, TMP1, 3(r0) // TMP1=OP*8
- | b 0(TMP1, DISPATCH)
- |.endmacro
- |
-@@ -177,9 +178,89 @@
- | .endmacro
- |.endif
- |
-+|// Call decode and dispatch.
-+|.macro ins_callt
-+| // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC
-+| lg PC, LFUNC:RB->pc
-+| llgf RA, 0(PC) // TODO: combine loads?
-+| llgcr OP, RA
-+| sllg TMP1, OP, 3(r0)
-+| la PC, 4(PC)
-+| lg TMP1, 0(TMP1, DISPATCH)
-+| br TMP1
-+|.endmacro
-+|
-+|.macro ins_call
-+| // BASE = new base, RB = LFUNC, RD = nargs+1
-+| stg PC, -8(BASE)
-+| ins_callt
-+|.endmacro
-+|
- |// Assumes DISPATCH is relative to GL.
- #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
- #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
-+|
-+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
-+|
-+|//-----------------------------------------------------------------------
-+|
-+|// Macros to clear or set tags.
-+|.macro cleartp, reg; sllg reg, reg, 17(r0); srlg reg, reg, 17(r0); .endmacro // TODO:
use nihf instead? would introduce dependence on z9-109.
-+|.macro settp, reg, tp
-+| oihh reg, ((tp>>1) &0xffff)
-+| oihl reg, ((tp<<15)&0x8000)
-+|.endmacro
-+|.macro setint, reg
-+| settp reg, LJ_TISNUM
-+|.endmacro
-+|
-+|// Macros to test operand types.
-+|.macro checktp_nc, reg, tp, target
-+| srag ITYPE, reg, 47(r0)
-+| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
-+| jne target
-+|.endmacro
-+|.macro checktp, reg, tp, target
-+| srag ITYPE, reg, 47(r0)
-+| cleartp reg
-+| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
-+| jne target
-+|.endmacro
-+|.macro checktptp, src, tp, target
-+| srag ITYPE, src, 47(r0)
-+| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
-+| jne target
-+|.endmacro
-+|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
-+|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
-+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
-+|
-+|.macro checknumx, reg, target, jump
-+| srag ITYPE, reg, 47(r0)
-+| cghi ITYPE, LJ_TISNUM // Sign extend LJ_TISNUM tp from 16- to 64-bits.
-+| jump target
-+|.endmacro
-+|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
-+|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
-+|.macro checknum, reg, target; checknumx reg, target, jhe; .endmacro
-+|.macro checknumtp, src, target; checknumx src, target, jhe; .endmacro
-+|.macro checknumber, src, target; checknumx src, target, jh; .endmacro
-+|
-+|.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE
== ~(1<<47)
-+|.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE
== ~(2<<47)
-+|
-+|.define PC_OP, -4(PC)
-+|.define PC_RA, -3(PC)
-+|.define PC_RB, -1(PC)
-+|.define PC_RC, -2(PC)
-+|.define PC_RD, -2(PC)
-+|
-+|// Set current VM state.
-+|.macro set_vmstate, st
-+| lghi TMP1, ~LJ_VMST_..st
-+| stg TMP1, DISPATCH_GL(vmstate)(DISPATCH)
-+|.endmacro
-+|
-
- /* Generate subroutines used by opcodes and other parts of the VM. */
- /* The .code_sub section should be last to help static branch prediction. */
-@@ -192,10 +273,58 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->vm_returnp:
-+ | cghi PC, 0
-+ | je ->cont_dispatch
-+ |
-+ | // Return from pcall or xpcall fast func.
-+ | nill PC, -7
-+ | sgr BASE, PC // Restore caller base.
-+ | lay RA, -8(RA, PC) // Rebase RA and prepend one result.
-+ | lg PC, -8(BASE) // Fetch PC of previous frame.
-+ | // Prepending may overwrite the pcall frame, so do it at the end.
-+ | load_true ITYPE
-+ | stg ITYPE, 0(RA, BASE) // Prepend true to results.
- |
- |->vm_returnc:
-+ | ahi RD, 1 // RD = nresults+1
-+ | jo ->vm_unwind_yield // TODO: !!! NOT SURE, jz on x64, overflow? !!!
-+ | stg RD, SAVE_MULTRES
-+ | tmll PC, FRAME_TYPE
-+ | je ->BC_RET_Z // Handle regular return to Lua.
- |
- |->vm_return:
-+ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
-+ | lghi TMP1, FRAME_C
-+ | xgr PC, TMP1
-+ | tmll PC, FRAME_TYPE
-+ | jne ->vm_returnp
-+ |
-+ | // Return to C.
-+ | set_vmstate C
-+ | nill PC, -8
-+ | sgr PC, BASE
-+ | lcgr PC, PC // Previous base = BASE - delta.
-+ |
-+ | ahi RD, -1
-+ | je >2
-+ |1: // Move results down.
-+ | lg RB, 0(BASE, RA)
-+ | stg RB, -16(BASE)
-+ | la BASE, 8(BASE)
-+ | ahi RD, -1
-+ | jne <1
-+ |2:
-+ | lg L:RB, SAVE_L
-+ | stg PC, L:RB->base
-+ |3:
-+ | lg RD, SAVE_MULTRES
-+ | lg RA, SAVE_NRES // RA = wanted nresults+1
-+ |4:
-+ | cgr RA, RD
-+ | jne >6 // More/less results wanted?
-+ |5:
-+ | lay BASE, -16(BASE)
-+ | stg BASE, L:RB->top
- |
- |->vm_leave_cp:
- | lg RA, SAVE_CFRAME // Restore previous C frame.
-@@ -206,7 +335,40 @@ static void build_subroutines(BuildCtx *ctx)
- | restoreregs
- | br r14
- |
-+ |6:
-+ | jl >7 // Less results wanted?
-+ | // More results wanted. Check stack size and fill up results with nil.
-+ | cg BASE, L:RB->maxstack
-+ | jh >8
-+ | lghi TMP1, LJ_TNIL
-+ | stg TMP1, -16(BASE)
-+ | la BASE, 8(BASE)
-+ | aghi RD, 1
-+ | j <4
-+ |
-+ |7: // Fewer results wanted.
-+ | cghi RA, 0
-+ | je <5 // But check for LUA_MULTRET+1.
-+ | sgr RA, RD // Negative result!
-+ | sllg TMP1, RA, 3(r0)
-+ | lay BASE, 0(TMP1, BASE) // Correct top.
-+ | j <5
-+ |
-+ |8: // Corner case: need to grow stack for filling up results.
-+ | // This can happen if:
-+ | // - A C function grows the stack (a lot).
-+ | // - The GC shrinks the stack in between.
-+ | // - A return back from a lua_call() with (high) nresults adjustment.
-+ | stg BASE, L:RB->top // Save current top held in BASE (yes).
-+ | stg RD, SAVE_MULTRES // Need to fill only remainder with nil.
-+ | lgr CARG2, RA
-+ | lgr CARG1, L:RB
-+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
-+ | lg BASE, L:RB->top // Need the (realloced) L->top in BASE.
-+ | j <3
-+ |
- |->vm_unwind_yield:
-+ | stg r0, 0(r0)
- |
- |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
- |->vm_unwind_c_eh: // Landing pad for external unwinder.
-@@ -219,6 +381,7 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->vm_growstack_c: // Grow stack for C function.
-+ | stg r0, 0(r0)
- |
- |->vm_growstack_v: // Grow stack for vararg Lua function.
- |
-@@ -235,9 +398,26 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vm_call: // Setup C frame and enter VM.
- |
-+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, LREG = L, PC = ftype).
-+ | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
-+ | set_vmstate INTERP
-+ | lg BASE, L:LREG->base // BASE = old base (used in vmeta_call).
-+ | agr PC, RA
-+ | sgr PC, BASE // PC = frame delta + frame type
-+ |
-+ | lg RD, L:LREG->top
-+ | sgr RD, RA
-+ | srlg NARGS:RD, NARGS:RD, 3(r0) // TODO: support '3' on its own in dynasm.
-+ | aghi NARGS:RD, 1 // RD = nargs+1
-+ |
- |->vm_call_dispatch:
-+ | lg LFUNC:RB, -16(RA)
-+ | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
- |
- |->vm_call_dispatch_f:
-+ | lgr BASE, RA
-+ | ins_call
-+ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
- |
- |->vm_cpcall: // Setup protected C frame, call C.
- | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
-@@ -264,7 +444,9 @@ static void build_subroutines(BuildCtx *ctx)
- | // TValue * (new base) or NULL returned in r2 (CRET1/).
- | cghi CRET1, 0
- | je ->vm_leave_cp // No base? Just remove C frame.
-- | stg r0, 0(r0)
-+ | lgr RA, CRET1
-+ | lghi PC, FRAME_CP
-+ | j <2 // Else continue with the call.
- |
- |//-----------------------------------------------------------------------
- |//-- Metamethod handling ------------------------------------------------
-@@ -690,7 +872,84 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISNEXT:
- case BC_VARG:
- case BC_RETM:
-+ | stg r0, 0(r0) // not implemented
-+ break;
-+
- case BC_RET: case BC_RET0: case BC_RET1:
-+ | ins_AD // RA = results, RD = nresults+1
-+ if (op != BC_RET0) {
-+ | sllg RA, RA, 3(r0)
-+ }
-+ |1:
-+ | lg PC, -8(BASE)
-+ | stg RD, SAVE_MULTRES // Save nresults+1.
-+ | tmll PC, FRAME_TYPE // Check frame type marker.
-+ | jne >7 // Not returning to a fixarg Lua func?
-+ switch (op) {
-+ case BC_RET:
-+ |->BC_RET_Z:
-+ | lgr KBASE, BASE // Use KBASE for result move.
-+ | aghi RD, -1
-+ | je >3
-+ |2: // Move results down.
-+ | lg RB, 0(KBASE, RA)
-+ | stg RB, -16(KBASE)
-+ | la KBASE, 8(KBASE)
-+ | // TODO: replace with brctg RD, <2 once supported.
-+ | aghi RD, -1
-+ | jne <2
-+ |3:
-+ | lg RD, SAVE_MULTRES // Note: MULTRES may be >255.
-+ | llgc RB, PC_RB
-+ |5:
-+ | cgr RB, RD // More results expected?
-+ | jh >6
-+ break;
-+ case BC_RET1:
-+ | lg RB, 0(BASE, RA)
-+ | stg RB, -16(BASE)
-+ /* fallthrough */
-+ case BC_RET0:
-+ |5:
-+ | llgc TMP1, PC_RB
-+ | cgr TMP1, RD
-+ | jh >6
-+ default:
-+ break;
-+ }
-+ | llgc RA, PC_RA
-+ | lcgr RA, RA
-+ | sllg RA, RA, 3(r0)
-+ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
-+ | lg LFUNC:KBASE, -16(BASE)
-+ | cleartp LFUNC:KBASE
-+ | lg KBASE, LFUNC:KBASE->pc
-+ | lg KBASE, PC2PROTO(k)(KBASE)
-+ | ins_next
-+ |
-+ |6: // Fill up results with nil.
-+ | lghi TMP1, LJ_TNIL
-+ if (op == BC_RET) {
-+ | stg TMP1, -16(KBASE) // Note: relies on shifted base.
-+ | la KBASE, 8(KBASE)
-+ } else {
-+ | sllg RC, RD, 3(r0) // RC used as temp.
-+ | stg TMP1, -24(RC, BASE)
-+ }
-+ | la RD, 1(RD)
-+ | j <5
-+ |
-+ |7: // Non-standard return case.
-+ | lay RB, -FRAME_VARG(PC)
-+ | tmll RB, FRAME_TYPEP
-+ | jne ->vm_return
-+ | // Return from vararg function: relocate BASE down and RA up.
-+ | sgr BASE, RB
-+ if (op != BC_RET0) {
-+ | agr RA, RB
-+ }
-+ | j <1
-+ break;
- case BC_FORL:
- case BC_JFORI:
- case BC_JFORL:
-@@ -709,9 +968,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_IFUNCF:
- case BC_JFUNCV:
- case BC_IFUNCV:
-+ | lg r0, 0(r0) // Not implemented, seg fault.
-+ break;
-+
- case BC_FUNCC:
- case BC_FUNCCW:
-- | lg r0, 0(r0) // Not implemented, seg fault.
-+ | ins_AD // BASE = new base, RD = nargs+1
-+ | lg CFUNC:RB, -16(BASE)
-+ | cleartp CFUNC:RB
-+ | lg KBASE, CFUNC:RB->f
-+ | lg L:RB, SAVE_L
-+ | sllg RD, NARGS:RD, 3(r0)
-+ | lay RD, -8(RD,BASE)
-+ | stg BASE, L:RB->base
-+ | lay RA, (8*LUA_MINSTACK)(RD)
-+ | cg RA, L:RB->maxstack
-+ | stg RD, L:RB->top
-+ | lgr CARG1, L:RB // Caveat: CARG1 may be RA.
-+ if (op != BC_FUNCC) {
-+ | lgr CARG2, KBASE
-+ }
-+ | jh ->vm_growstack_c // Need to grow stack.
-+ | set_vmstate C
-+ if (op == BC_FUNCC) {
-+ | basr r14, KBASE // (lua_State *L)
-+ } else {
-+ | // (lua_State *L, lua_CFunction f)
-+ | lg TMP1, (DISPATCH_GL(wrapf))(DISPATCH)
-+ | basr r14, TMP1 // TODO: TMP1==r14, is this ok?
-+ }
-+ | // nresults returned in r2 (CRET1).
-+ | lg BASE, L:RB->base
-+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
-+ | set_vmstate INTERP
-+ | sllg TMP1, RD, 3(r0)
-+ | la RA, 0(TMP1, BASE)
-+ | lcgr RA, RA
-+ | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
-+ | lg PC, -8(BASE) // Fetch PC of caller.
-+ | j ->vm_returnc
- break;
-
- /* ---------------------------------------------------------------------- */
---
-2.20.1
-
-
-From d9891e194d7b03f443673b9a67d0062fbe0c4719 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 12:22:08 +0530
-Subject: [PATCH 106/247] Added RRF-e support
-
----
- dynasm/dasm_s390x.lua | 27 +++++++++++++++++++++++++++
- 1 file changed, 27 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index e0fb916..da59ff9 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -494,6 +494,24 @@ local function parse_imm16(imm)
- end
- end
-
-+local function parse_mask(arg)
-+ local m3 = parse_number(arg)
-+ if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
-+ return m3
-+ else
-+ werror("Mask value should be 0,1 or 3-7: ", m3)
-+ end
-+end
-+
-+local function parse_mask2(arg)
-+ local m4 = parse_number(arg)
-+ if ( m4 >=0 and m4 <=1) then
-+ return m4
-+ else
-+ werror("Mask value should be 0 or 1: ", m4)
-+ end
-+end
-+
- local function parse_label(label, def)
- local prefix = sub(label, 1, 2)
- -- =>label (pc label reference)
-@@ -1144,6 +1162,8 @@ map_op = {
- unpku_2 = "e20000000000SS-a",
- xc_2 = "d70000000000SS-a",
- ap_2 = "fa0000000000SS-b",
-+ cfebr_3 = "0000b3980000RRF-e",
-+ cfebra_4 = "0000b3980000RRF-e",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1253,6 +1273,13 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op1)
- if a then a() end
- parse_imm16(params[2])
-+ elseif p == "RRF-e" then
-+ wputhw(op1)
-+ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[1]),12) +
parse_reg(params[3])
-+ if params[4] then
-+ op2 = op2 + shl(parse_mask2(params[4]),8)
-+ end
-+ wputhw(op2)
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From 900353a4a5a9a997e48513ec2abb1bec0f04be95 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 12:29:46 +0530
-Subject: [PATCH 107/247] Adding support for RXE mode instructions
-
----
- dynasm/dasm_s390x.lua | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index da59ff9..8d30c93 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1162,8 +1162,11 @@ map_op = {
- unpku_2 = "e20000000000SS-a",
- xc_2 = "d70000000000SS-a",
- ap_2 = "fa0000000000SS-b",
-+ -- RRF-e instructions
- cfebr_3 = "0000b3980000RRF-e",
- cfebra_4 = "0000b3980000RRF-e",
-+ -- RXE instructions
-+ sqdb_2 = "ed0000000015RXE",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1280,6 +1283,15 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(parse_mask2(params[4]),8)
- end
- wputhw(op2)
-+ elseif p == "RXE" then
-+ local d, x, b, a = parse_mem_bx(params[2])
-+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
-+ op1 = op1 + shl(b, 12) + d
-+ -- m3 is not present, so assumed its not part of the instruction since its not
passed as a prameter
-+ wputhw(op0);
-+ wputhw(op1);
-+ if a then a() end
-+ wputhw(op2);
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From f0ea0638c47b9c0d5c17732feb485b9ce41fea4f Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 12:34:11 +0530
-Subject: [PATCH 108/247] Added RRF-b mode support
-
----
- dynasm/dasm_s390x.lua | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 8d30c93..e0c4733 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1167,6 +1167,8 @@ map_op = {
- cfebra_4 = "0000b3980000RRF-e",
- -- RXE instructions
- sqdb_2 = "ed0000000015RXE",
-+ -- RRF-b instructions
-+ didbr_4 = "0000b3580000RRF-b",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1292,6 +1294,10 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op1);
- if a then a() end
- wputhw(op2);
-+ elseif p == "RRF-b" then
-+ wputhw(op1);
-+ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) +
parse_reg(params[3]) + parse_mask(params[4])
-+ wputhw(op2)
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From 29b3e8e9bf513702ba205d2f52723cee71d4728f Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 13:08:32 +0530
-Subject: [PATCH 109/247] Adding S mode instructions support
-
----
- dynasm/dasm_s390x.lua | 12 ++++++++++--
- 1 file changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index e0c4733..9ef15ad 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1166,9 +1166,11 @@ map_op = {
- cfebr_3 = "0000b3980000RRF-e",
- cfebra_4 = "0000b3980000RRF-e",
- -- RXE instructions
-- sqdb_2 = "ed0000000015RXE",
-+ sqdb_2 = "ed0000000015RXE",
- -- RRF-b instructions
-- didbr_4 = "0000b3580000RRF-b",
-+ didbr_4 = "0000b3580000RRF-b",
-+ -- S mode instructions
-+ stfl_1 = "0000b2b10000sS",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1298,6 +1300,12 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op1);
- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) +
parse_reg(params[3]) + parse_mask(params[4])
- wputhw(op2)
-+ elseif p =="sS" then
-+ wputhw(op1);
-+ local d, b, a = parse_mem_b(params[1])
-+ op2 = op2 + shl(b,12) + d;
-+ wputhw(op2)
-+ if a then a() end
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From 2e80aca24dd8b8c7186a63a1f7ba688e80ff7318 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 13:16:44 +0530
-Subject: [PATCH 110/247] Added support for I mode instructions
-
----
- dynasm/dasm_s390x.lua | 27 +++++++++++++++++++++++----
- 1 file changed, 23 insertions(+), 4 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 9ef15ad..c2deaaa 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -494,8 +494,20 @@ local function parse_imm16(imm)
- end
- end
-
--local function parse_mask(arg)
-- local m3 = parse_number(arg)
-+local function parse_imm8(imm)
-+ local imm_val = tonumber(imm)
-+ if imm_val then
-+ if not is_int8(imm_val) then
-+ werror("Immediate value out of range: ", imm_val)
-+ end
-+ else
-+ iact = function() waction("IMM8",nil,imm) end
-+ end
-+ return imm_val, iact
-+end
-+
-+local function parse_mask(mask)
-+ local m3 = parse_number(mask)
- if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
- return m3
- else
-@@ -503,8 +515,8 @@ local function parse_mask(arg)
- end
- end
-
--local function parse_mask2(arg)
-- local m4 = parse_number(arg)
-+local function parse_mask2(mask)
-+ local m4 = parse_number(mask)
- if ( m4 >=0 and m4 <=1) then
- return m4
- else
-@@ -1171,6 +1183,8 @@ map_op = {
- didbr_4 = "0000b3580000RRF-b",
- -- S mode instructions
- stfl_1 = "0000b2b10000sS",
-+ -- I- mdoe instructions
-+ svc_1 = "000000000a00iI",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1306,6 +1320,11 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b,12) + d;
- wputhw(op2)
- if a then a() end
-+ elseif p =="iI" then
-+ local imm_val, a = parse_imm8(params[1])
-+ op2 = op2 + imm_val;
-+ wputhw(op2);
-+ if a then a() end
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From 426dfa67873219659973d3a4589d699c6d68d445 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 13:24:44 +0530
-Subject: [PATCH 111/247] Added the action part for I mode
-
----
- dynasm/dasm_s390x.lua | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index c2deaaa..a25cc96 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn
- local action_names = {
- "STOP", "SECTION", "ESC", "REL_EXT",
- "ALIGN", "REL_LG", "LABEL_LG",
-- "REL_PC", "LABEL_PC", "DISP12", "DISP20",
"IMM16", "IMM32",
"LEN8R","LEN4HR","LEN4LR",
-+ "REL_PC", "LABEL_PC", "DISP12", "DISP20",
"IMM8", "IMM16", "IMM32",
"LEN8R","LEN4HR","LEN4LR",
- }
-
- -- Maximum number of section buffer positions for dasm_put().
-@@ -307,6 +307,10 @@ local function is_int16(num)
- return -32768 <= num and num < 32768
- end
-
-+local function is_int8(num)
-+ return -128 <= num and num < 128
-+end
-+
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
- -- If x is not specified then it is 0.
- local function split_memop(arg)
---
-2.20.1
-
-
-From 1fe52eb9842e530b4e668dd3785173f1ca1f4860 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 13:30:49 +0530
-Subject: [PATCH 112/247] Added C support for I mode instructions
-
----
- dynasm/dasm_s390x.h | 9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index cebce22..b98df8f 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -23,7 +23,7 @@ enum {
- /* The following actions also have an argument. */
- DASM_REL_PC, DASM_LABEL_PC,
- DASM_DISP12, DASM_DISP20,
-- DASM_IMM16, DASM_IMM32,
-+ DASM_IMM8, DASM_IMM16, DASM_IMM32,
- DASM_LEN8R,DASM_LEN4HR,DASM_LEN4LR,
- DASM__MAX
- };
-@@ -268,6 +268,9 @@ void dasm_put(Dst_DECL, int start, ...)
- *pl = -pos; /* Label exists now. */
- b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
-+ case DASM_IMM8:
-+ b[pos++] = n;
-+ break;
- case DASM_IMM16:
- CK(((short)n) == n || ((unsigned short)n) == n, RANGE_I); /* TODO: is this the
right way to handle unsigned immediates? */
- ofs += 2;
-@@ -370,6 +373,7 @@ int dasm_link(Dst_DECL, size_t * szp)
- p++;
- b[pos++] += ofs;
- break;
-+ case DASM_IMM8:
- case DASM_IMM16:
- case DASM_IMM32:
- case DASM_DISP20:
-@@ -457,6 +461,9 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_LABEL_PC:
- break;
-+ case DASM_IMM8:
-+ cp[-1] |= n & 0xff;
-+ break;
- case DASM_IMM16:
- *cp++ = n;
- break;
---
-2.20.1
-
-
-From 2604366049b70ae5e090696aaf076f7e4d37a6aa Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 13:41:55 +0530
-Subject: [PATCH 113/247] Added support for RI-b and RI-c mode instructions
-
----
- dynasm/dasm_s390x.lua | 16 +++++++++++++++-
- 1 file changed, 15 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index a25cc96..fe6d6a3 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1187,8 +1187,12 @@ map_op = {
- didbr_4 = "0000b3580000RRF-b",
- -- S mode instructions
- stfl_1 = "0000b2b10000sS",
-- -- I- mdoe instructions
-+ -- I- mode instructions
- svc_1 = "000000000a00iI",
-+ -- RI-b mode instructions
-+ bras_2 = "0000a7050000RI-b",
-+ -- RI-c mode instructions
-+ brc_2 = "0000a7040000RI-c",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1329,6 +1333,16 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + imm_val;
- wputhw(op2);
- if a then a() end
-+ elseif p == "RI-b" then
-+ op1 = op1 + shl(parse_reg(params[1]),4)
-+ wputhw(op1)
-+ local mode, n, s = parse_label(params[2])
-+ waction("REL_"..mode, n, s)
-+ elseif p == "RI-c" then
-+ op1 = op1 + shl(parse_num(params[1]),4)
-+ wputhw(op1)
-+ local mode, n, s = parse_label(params[2])
-+ waction("REL_"..mode, n, s)
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From 55b3a66284fa36dcc35b8d135e33f9b8b58efb16 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 13:51:58 +0530
-Subject: [PATCH 114/247] Added support for RIL-c and RX-b instructions
-
----
- dynasm/dasm_s390x.lua | 17 ++++++++++++++++-
- 1 file changed, 16 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index fe6d6a3..e0deef1 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1192,7 +1192,11 @@ map_op = {
- -- RI-b mode instructions
- bras_2 = "0000a7050000RI-b",
- -- RI-c mode instructions
-- brc_2 = "0000a7040000RI-c",
-+ brc_2 = "0000a7040000RI-c",
-+ -- RIL-c
-+ brcl_2 = "c00400000000RIL-c"
-+ -- RX-b mode instructions
-+ bc_2 = "000047000000RX-b",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1343,6 +1347,17 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op1)
- local mode, n, s = parse_label(params[2])
- waction("REL_"..mode, n, s)
-+ elseif p == "RIL-c" then
-+ op0 = op0 + shl(parse_num(params[1]),4)
-+ wputhhw(op0)
-+ local mode, n, s = parse_label(params[2])
-+ waction("REL_"..mode, n, s)
-+ elseif p == "RX-b" then
-+ local d, x, b, a = parse_mem_bx(params[2])
-+ op1 = op1 + shl(parse_num(params[1]), 4) + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1);wputhw(op2);
-+ if a then a() end
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From 714ab59f941bd42211530a906d939e9f632d9bce Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 14:18:07 +0530
-Subject: [PATCH 115/247] Added support for RIE-e, RSI, RXF, SI instructions
-
----
- dynasm/dasm_s390x.lua | 36 ++++++++++++++++++++++++++++++++++++
- 1 file changed, 36 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index e0deef1..9f37bf4 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1197,6 +1197,14 @@ map_op = {
- brcl_2 = "c00400000000RIL-c"
- -- RX-b mode instructions
- bc_2 = "000047000000RX-b",
-+ -- RSI
-+ brxh_3 = "000084000000RSI",
-+ -- RIE-e
-+ brxhg_3 = "ec0000000044RIE-e",
-+ -- SI
-+ ni_2 = "000094000000SI",
-+ -- RXF
-+ madb_3 = "ed000000001eRXF",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1358,6 +1366,34 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b, 12) + d
- wputhw(op1);wputhw(op2);
- if a then a() end
-+ elseif p == "RSI" then
-+ op1 = op1 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ wputhw(op1)
-+ local mode, n, s = parse_label(params[3])
-+ waction("REL_"..mode, n, s)
-+ elseif p == "RIE-e" then
-+ op0 = op0 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ wputhw1(op0)
-+ local mode, n, s = parse_label(params[3])
-+ waction("REL_"..mode, n, s)
-+ wputhw(op2)
-+ elseif p == "SI" then
-+ local imm_val, a = parse_imm8(params[2])
-+ op1 = op1 + imm_val
-+ wputhw(op1)
-+ if a then a() end
-+ local d, b, a = parse_mem_b(params[1])
-+ op2 = op2 + shl(b,12) + d
-+ wputhw(op2)
-+ if a then a() end
-+ elseif p == "RXF" then
-+ local d, x, b, a = parse_mem_bx(params[3])
-+ op0 = op0 + shl(parse_reg(params[2]),4) + x
-+ op1 = op1 + shl(b, 12) + d
-+ wputhw(op0); wputhw(op1);
-+ if a then a() end
-+ op2 = op2 + shl(parse_reg(params[1]),12)
-+ wputhw(op2)
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From d0d2824f3f05fe4aed01d81b16752c63c685b64d Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 14:47:15 +0530
-Subject: [PATCH 116/247] Minor cleanup
-
----
- dynasm/dasm_s390x.lua | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 9f37bf4..5a79a96 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1194,7 +1194,7 @@ map_op = {
- -- RI-c mode instructions
- brc_2 = "0000a7040000RI-c",
- -- RIL-c
-- brcl_2 = "c00400000000RIL-c"
-+ brcl_2 = "c00400000000RIL-c",
- -- RX-b mode instructions
- bc_2 = "000047000000RX-b",
- -- RSI
---
-2.20.1
-
-
-From f34615f910cac724c4b1ccdea1a2937ad1a103d4 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 17:45:32 +0530
-Subject: [PATCH 117/247] Minor Fix, correct the parameter used
-
-Instead of params[2] , params[1] was used, corrected it.
----
- dynasm/dasm_s390x.lua | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 5a79a96..db5aa89 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1316,7 +1316,7 @@ local function parse_template(params, template, nparams, pos)
- parse_imm16(params[2])
- elseif p == "RRF-e" then
- wputhw(op1)
-- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[1]),12) +
parse_reg(params[3])
-+ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[2]),12) +
parse_reg(params[3])
- if params[4] then
- op2 = op2 + shl(parse_mask2(params[4]),8)
- end
---
-2.20.1
-
-
-From 69b320e9d0321a1ca8dc3174d61b592a23a330d1 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 19:07:05 +0530
-Subject: [PATCH 118/247] Added support for RRD addressing mode
-
-We may not require RRD mode but, Added to check working of RRF-e.
----
- dynasm/dasm_s390x.lua | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index db5aa89..fc65b81 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1205,6 +1205,8 @@ map_op = {
- ni_2 = "000094000000SI",
- -- RXF
- madb_3 = "ed000000001eRXF",
-+ --RRD
-+ maebr_3 = "0000b30e0000RRD",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1394,6 +1396,10 @@ local function parse_template(params, template, nparams, pos)
- if a then a() end
- op2 = op2 + shl(parse_reg(params[1]),12)
- wputhw(op2)
-+ elseif p == "RRD" then
-+ wputhw(op1)
-+ op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) +
parse_reg(params[3])
-+ wputhw(op2)
- elseif p == "w" then
- local mode, n, s = parse_label(params[1])
- wputhw(op1)
---
-2.20.1
-
-
-From 38bd6ac73dec8189c62a11d2a120d602090e23f1 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 14 Dec 2016 19:14:10 +0530
-Subject: [PATCH 119/247] Added test for RRD and RRF-e
-
-Also have modified the function which can handle 3 arguments now
----
- dynasm/Examples/test_z_inst.c | 57 +++++++++++++++++++++--------------
- 1 file changed, 35 insertions(+), 22 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index c09ae08..42a4674 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -324,35 +324,48 @@ static void sil(dasm_State *state) {
- | br r14
- }
-
-+static void rrfe_rrd(dasm_State *state) {
-+ dasm_State ** Dst = &state;
-+
-+ | cefbr f0,r2
-+ | cefbr f2,r3
-+ | cefbr f4,r4
-+ | maebr f0 ,f2 ,f4
-+ | cfebr r2, 0, f0
-+ | br r14
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-+ int64_t arg3;
- void (*fn)(dasm_State *);
- int64_t want;
- const char *testname;
- } test_table;
-
- test_table test[] = {
-- { 1, 2, add, 3, "add"},
-- {10, 5, sub, 5, "sub"},
-- { 2, 3, mul, 6, "mul"},
-- { 5, 7, rx, 12298, "rx"},
-- { 5, 7, rxy, 10, "rxy"},
-- { 2, 4, lab, 32, "lab"},
-- { 2, 4, labg, 32, "labg"},
-- { 2, 0, add_imm16, 17, "imm16"},
-- { 2, 0, add_imm32, 16, "imm32"},
-- { 7, 3, save, 480, "save"},
-- { 7, 3, labmul, 21, "labmul0"},
-- { 7, 0, labmul, 0, "labmul1"},
-- { 0, 0, pc, 55, "pc"},
-- { 2,12, jmp_fwd, 12, "jmp_fwd"},
--// { 9,8, add_rrd, 25, "add_rrd"},
--// { 2,4, load_test, 4,"load_test"},
-- {-1, 0, ssa, 65535<<8, "ssa"},
-- {-1, 0, ssa_act, 65535<<8, "ssa_act"},
-- {27, 0, type, 27, "type"},
-- { 0, 0, sil, 23, "sil"}
-+ { 1, 2, 0, add, 3, "add"},
-+ {10, 5, 0, sub, 5, "sub"},
-+ { 2, 3, 0, mul, 6, "mul"},
-+ { 5, 7, 0, rx, 12298, "rx"},
-+ { 5, 7, 0, rxy, 10, "rxy"},
-+ { 2, 4, 0, lab, 32, "lab"},
-+ { 2, 4, 0, labg, 32, "labg"},
-+ { 2, 0, 0, add_imm16, 17, "imm16"},
-+ { 2, 0, 0, add_imm32, 16, "imm32"},
-+ { 7, 3, 0, save, 480, "save"},
-+ { 7, 3, 0, labmul, 21, "labmul0"},
-+ { 7, 0, 0, labmul, 0, "labmul1"},
-+ { 0, 0, 0, pc, 55, "pc"},
-+ { 2,12, 0, jmp_fwd, 12, "jmp_fwd"},
-+// { 9,8, 0, add_rrd, 25, "add_rrd"},
-+// { 2,4, 0, load_test, 4,"load_test"},
-+ {-1, 0, 0, ssa, 65535<<8, "ssa"},
-+ {-1, 0, 0, ssa_act, 65535<<8, "ssa_act"},
-+ {27, 0, 0, type, 27, "type"},
-+ { 0, 0, 0, sil, 23, "sil"},
-+ {15,3,10, rrfe_rrd, 45, "rrfe_rrd"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
-@@ -378,8 +391,8 @@ int main(int argc, char *argv[])
- dasm_setup(&state, actions);
- test[i].fn(state);
- size_t size;
-- int64_t (*fptr)(int64_t, int64_t) = jitcode(&state, &size);
-- int64_t got = fptr(test[i].arg1, test[i].arg2);
-+ int64_t (*fptr)(int64_t, int64_t, int64_t) = jitcode(&state, &size);
-+ int64_t got = fptr(test[i].arg1, test[i].arg2, test[i].arg3);
-
- if (got != test[i].want) {
- fprintf(stderr, "FAIL: test %s: want %ld, got %ld\n", test[i].testname,
test[i].want, got);
---
-2.20.1
-
-
-From 3a161b8db61129abefe774f88d0c41e7be93594d Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 14 Dec 2016 16:27:38 -0500
-Subject: [PATCH 120/247] Fix decoding of REL_EXT.
-
-REL_EXT has an argument which wasn't being properly jumped,
-resulting in an early STOP (because the argument is 0).
----
- dynasm/dasm_s390x.h | 16 +++++++++++-----
- dynasm/dasm_s390x.lua | 3 ++-
- 2 files changed, 13 insertions(+), 6 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index b98df8f..ea3aa80 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -214,12 +214,17 @@ void dasm_put(Dst_DECL, int start, ...)
- ofs += 2;
- break;
- case DASM_REL_EXT:
-+ p++;
-+ ofs += 4;
- break;
- case DASM_ALIGN:
- ofs += *p++;
- b[pos++] = ofs;
- break;
- case DASM_REL_LG:
-+ if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
-+ ofs += 2;
-+ }
- n = *p++ - 10;
- pl = D->lglabels + n;
- /* Bkwd rel or global. */
-@@ -234,6 +239,9 @@ void dasm_put(Dst_DECL, int start, ...)
- n = 0; /* Start new chain for fwd rel if label exists. */
- goto linkrel;
- case DASM_REL_PC:
-+ if (p[-2] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
-+ ofs += 2;
-+ }
- pl = D->pclabels + n;
- CKPL(pc, PC);
- putrel:
-@@ -246,9 +254,6 @@ void dasm_put(Dst_DECL, int start, ...)
- *pl = pos;
- }
- ofs += 2;
-- if (p[-3] >> 12 == 0xc) { /* RIL instruction needs 32-bit immediate. */
-- ofs += 2;
-- }
- pos++;
- break;
- case DASM_LABEL_LG:
-@@ -359,6 +364,7 @@ int dasm_link(Dst_DECL, size_t * szp)
- p++;
- break;
- case DASM_REL_EXT:
-+ p++;
- break;
- case DASM_ALIGN:
- ofs -= (b[pos++] + ofs) & *p++;
-@@ -430,7 +436,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- *cp++ = *p++;
- break;
- case DASM_REL_EXT:
-- n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
-+ n = DASM_EXTERN(Dst, (unsigned char *)cp, *p++, 1) - 4;
- goto patchrel;
- case DASM_ALIGN:
- ins = *p++;
-@@ -443,6 +449,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_REL_PC:
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
-+ p++; /* skip argument */
- patchrel:
- /* Offsets are halfword aligned (so need to be halved). */
- n += 2; /* Offset is relative to start of instruction. */
-@@ -452,7 +459,6 @@ int dasm_encode(Dst_DECL, void *buffer)
- CK(-(1 << 16) <= n && n < (1 << 16) && (n
& 1) == 0, RANGE_LG);
- }
- *cp++ = n >> 1;
-- p++; /* skip argument */
- break;
- case DASM_LABEL_LG:
- ins = *p++;
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index fc65b81..556cfec 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -79,7 +79,8 @@ local function havearg(a)
- return a == "ESC" or
- a == "SECTION" or
- a == "REL_LG" or
-- a == "LABEL_LG"
-+ a == "LABEL_LG" or
-+ a == "REL_EXT"
- end
-
- -- Write action list buffer as a huge static C array.
---
-2.20.1
-
-
-From 3e1453ce5c32dfdb62cce167df848729a9ca9163 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 14 Dec 2016 16:31:52 -0500
-Subject: [PATCH 121/247] Fix extern handling in host vm builder.
-
----
- src/host/buildvm_asm.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
-index 5817091..6743c73 100644
---- a/src/host/buildvm_asm.c
-+++ b/src/host/buildvm_asm.c
-@@ -118,8 +118,8 @@ static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
- int opcode = *(uint16_t*)(&cp[n]);
- int arg = (opcode>>4) & 0xf;
- switch (opcode & 0xff0f) {
-- case 0xa705: opname = "bras"; argt = "r"; break;
-- case 0xc005: opname = "brasl"; argt = "r"; break;
-+ case 0xa705: opname = "bras"; argt = "%r"; break;
-+ case 0xc005: opname = "brasl"; argt = "%r"; break;
- case 0xa704: opname = "brc"; break;
- case 0xc004: opname = "brcl"; break;
- default:
-@@ -360,7 +360,7 @@ void emit_asm(BuildCtx *ctx)
- ofs += n+4;
- #elif LJ_TARGET_S390X
- emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
-- ofs += n;
-+ ofs += n+4;
- #else
- emit_asm_wordreloc(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
- ofs += n;
---
-2.20.1
-
-
-From 8944fc9bb893ad21348675daed0cc6a178d8728d Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 14 Dec 2016 18:43:21 -0500
-Subject: [PATCH 122/247] Various fixes for vm_s390x.dasc.
-
----
- src/vm_s390x.dasc | 38 ++++++++++++++++++++++++++++++++------
- 1 file changed, 32 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index a1a4d76..803184b 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -249,11 +249,11 @@
- |.macro load_false, reg; lghi reg, -1; iihl reg, 0x7fff; .endmacro // assumes LJ_TFALSE
== ~(1<<47)
- |.macro load_true, reg; lghi reg, -1; iihh reg, 0xfffe; .endmacro // assumes LJ_TTRUE
== ~(2<<47)
- |
--|.define PC_OP, -4(PC)
--|.define PC_RA, -3(PC)
--|.define PC_RB, -1(PC)
--|.define PC_RC, -2(PC)
--|.define PC_RD, -2(PC)
-+|.define PC_OP, -1(PC)
-+|.define PC_RA, -2(PC)
-+|.define PC_RB, -4(PC)
-+|.define PC_RC, -3(PC)
-+|.define PC_RD, -4(PC)
- |
- |// Set current VM state.
- |.macro set_vmstate, st
-@@ -381,12 +381,37 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->vm_growstack_c: // Grow stack for C function.
-- | stg r0, 0(r0)
-+ | lghi CARG2, LUA_MINSTACK
-+ | j >2
- |
- |->vm_growstack_v: // Grow stack for vararg Lua function.
-+ | aghi RD, -16 // LJ_FR2
-+ | j >1
- |
- |->vm_growstack_f: // Grow stack for fixarg Lua function.
- | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
-+ | sllg RD, NARGS:RD, 3(r0)
-+ | lay RD, -8(RD, BASE)
-+ |1:
-+ | llgc RA, (PC2PROTO(framesize)-4)(PC)
-+ | la PC, 4(PC) // Must point after first instruction.
-+ | stg BASE, L:RB->base
-+ | stg RD, L:RB->top
-+ | stg PC, SAVE_PC
-+ | lgr CARG2, RA
-+ |2:
-+ | // RB = L, L->base = new base, L->top = top
-+ | lgr CARG1, L:RB
-+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
-+ | lg BASE, L:RB->base
-+ | lg RD, L:RB->top
-+ | lg LFUNC:RB, -16(BASE)
-+ | cleartp LFUNC:RB
-+ | sgr RD, BASE
-+ | srlg RD, RD, 3(r0)
-+ | aghi NARGS:RD, 1
-+ | // BASE = new base, RB = LFUNC, RD = nargs+1
-+ | ins_callt // Just retry the call.
- |
- |//-----------------------------------------------------------------------
- |//-- Entry points into the assembler VM ---------------------------------
-@@ -998,6 +1023,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | basr r14, TMP1 // TODO: TMP1==r14, is this ok?
- }
- | // nresults returned in r2 (CRET1).
-+ | lgr RD, CRET1
- | lg BASE, L:RB->base
- | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
- | set_vmstate INTERP
---
-2.20.1
-
-
-From 2ccd9d244bcdcc6d323969832bdc5c800a88b5a2 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 14 Dec 2016 21:16:30 -0500
-Subject: [PATCH 123/247] Add support for clm instruction.
-
----
- dynasm/Examples/test_z_inst.c | 21 ++++++++++++++++++++-
- dynasm/dasm_s390x.lua | 11 ++++++++++-
- 2 files changed, 30 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 42a4674..4820c57 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -335,6 +335,24 @@ static void rrfe_rrd(dasm_State *state) {
- | br r14
- }
-
-+static void rsb(dasm_State *state) {
-+ dasm_State **Dst = &state;
-+
-+ | lay sp, -4(sp)
-+ | lghi r3, 0x0706
-+ | lghi r4, 0
-+ | iill r4, 6
-+ | iilh r4, 7
-+ | st r4, 0(sp)
-+ | lghi r2, 0
-+ | clm r3, 5, 0(sp)
-+ | jne >1
-+ | lghi r2, 1
-+ |1:
-+ | la sp, 4(sp)
-+ | br r14
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -365,7 +383,8 @@ test_table test[] = {
- {-1, 0, 0, ssa_act, 65535<<8, "ssa_act"},
- {27, 0, 0, type, 27, "type"},
- { 0, 0, 0, sil, 23, "sil"},
-- {15,3,10, rrfe_rrd, 45, "rrfe_rrd"}
-+ {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"},
-+ { 0, 0, 0, rsb, 0, "rsb"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 556cfec..6bb008e 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1206,8 +1206,10 @@ map_op = {
- ni_2 = "000094000000SI",
- -- RXF
- madb_3 = "ed000000001eRXF",
-- --RRD
-+ -- RRD
- maebr_3 = "0000b30e0000RRD",
-+ -- RS-b
-+ clm_3 = "0000bd000000RS-b"
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1418,6 +1420,13 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "z" then
- op2 = op2 + parse_reg(params[1])
- wputhw(op2)
-+ elseif p == "RS-b" then
-+ local m = parse_mask(params[2])
-+ local d, b, a = parse_mem_b(params[3])
-+ op1 = op1 + shl(parse_reg(params[1]), 4) + m
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2)
-+ if a then a() end
- else
- werror("unrecognized encoding")
- end
---
-2.20.1
-
-
-From e2f460412b8c137e03ba94d14e47063a1b6b6572 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 14 Dec 2016 22:22:49 -0500
-Subject: [PATCH 124/247] Add vm_call handling code.
-
-Now prints the prompt (!).
----
- src/vm_s390x.dasc | 57 +++++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 57 insertions(+)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 803184b..da087ea 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -418,10 +418,67 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->vm_resume: // Setup C frame and resume thread.
-+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
-+ | saveregs
-+ | lgr L:RB, CARG1 // Caveat: CARG1 may be RA.
-+ | stg CARG1, SAVE_L
-+ | lgr RA, CARG2
-+ | lghi PC, FRAME_CP
-+ | lghi RD, 0
-+ | lay KBASE, CFRAME_RESUME(sp)
-+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | aghi DISPATCH, GG_G2DISP
-+ | stg RD, SAVE_PC // Any value outside of bytecode is ok.
-+ | stg RD, SAVE_CFRAME
-+ | stg RD, SAVE_NRES
-+ | stg RD, SAVE_ERRF
-+ | stg KBASE, L:RB->cframe
-+ | clm RD, 1, L:RB->status
-+ | je >2 // Initial resume (like a call).
-+ |
-+ | // Resume after yield (like a return).
-+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
-+ | set_vmstate INTERP
-+ | llgc RD, L:RB->status
-+ | lg BASE, L:RB->base
-+ | lg RD, L:RB->top
-+ | sgr RD, RA
-+ | srlg RD, RD, 3(r0)
-+ | aghi RD, 1 // RD = nresults+1
-+ | sgr RA, BASE // RA = resultofs
-+ | lg PC, -8(BASE)
-+ | stg RD, SAVE_MULTRES
-+ | tmll PC, FRAME_TYPE
-+ | je ->BC_RET_Z
-+ | j ->vm_return
- |
- |->vm_pcall: // Setup protected C frame and enter VM.
-+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
-+ | saveregs
-+ | lghi PC, FRAME_CP
-+ | llgfr CARG4, CARG4
-+ | stg CARG4, SAVE_ERRF
-+ | j >1
- |
- |->vm_call: // Setup C frame and enter VM.
-+ | // (lua_State *L, TValue *base, int nres1)
-+ | saveregs
-+ | lghi PC, FRAME_C
-+ |
-+ |1: // Entry point for vm_pcall above (PC = ftype).
-+ | lgfr CARG3, CARG3
-+ | stg CARG3, SAVE_NRES
-+ | lgr L:RB, CARG1
-+ | stg CARG1, SAVE_L
-+ | lgr RA, CARG2
-+ |
-+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
-+ | stg KBASE, SAVE_CFRAME
-+ | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
-+ | aghi DISPATCH, GG_G2DISP
-+ | stg sp, L:RB->cframe
-+ | lgr L:LREG, L:RB // TODO: use RB instead of LREG here?
- |
- |2: // Entry point for vm_resume/vm_cpcall (RA = base, LREG = L, PC = ftype).
- | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
---
-2.20.1
-
-
-From bd4500e60d0874cb3cabfea048b9f1cae071553d Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Thu, 15 Dec 2016 13:51:46 +0530
-Subject: [PATCH 125/247] Added rre instruction format example
-
-example includes instruction fidr
----
- dynasm/Examples/test_z_inst.c | 16 +++++++++++++++-
- 1 file changed, 15 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 4820c57..2314606 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -335,6 +335,19 @@ static void rrfe_rrd(dasm_State *state) {
- | br r14
- }
-
-+static void rre(dasm_State *state) {
-+
-+ dasm_State **Dst = &state;
-+
-+ | lay sp , -8(sp)
-+ | cefbr f0 , r2
-+ | cefbr f1 , r3
-+ | fidr f0 , f1
-+ | cfebr r2 ,0,f0
-+ | la sp, 8(sp)
-+ | br r14
-+}
-+
- static void rsb(dasm_State *state) {
- dasm_State **Dst = &state;
-
-@@ -384,7 +397,8 @@ test_table test[] = {
- {27, 0, 0, type, 27, "type"},
- { 0, 0, 0, sil, 23, "sil"},
- {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"},
-- { 0, 0, 0, rsb, 0, "rsb"}
-+ { 0, 0, 0, rsb, 0, "rsb"},
-+ {12,10, 0, rre, 10, "rre"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From 521c6effc835bea3cdb73eef211241ab220497bb Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 15 Dec 2016 11:01:59 -0500
-Subject: [PATCH 126/247] Implement more functions in the VM.
-
-Also adds segmentation faults to stubbed out functions to make it
-easier to work out what the control flow is.
----
- src/vm_s390x.dasc | 317 +++++++++++++++++++++++++++++++++++++++++++++-
- 1 file changed, 311 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index da087ea..01a3b87 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -286,7 +286,7 @@ static void build_subroutines(BuildCtx *ctx)
- | stg ITYPE, 0(RA, BASE) // Prepend true to results.
- |
- |->vm_returnc:
-- | ahi RD, 1 // RD = nresults+1
-+ | aghi RD, 1 // RD = nresults+1
- | jo ->vm_unwind_yield // TODO: !!! NOT SURE, jz on x64, overflow? !!!
- | stg RD, SAVE_MULTRES
- | tmll PC, FRAME_TYPE
-@@ -305,13 +305,13 @@ static void build_subroutines(BuildCtx *ctx)
- | sgr PC, BASE
- | lcgr PC, PC // Previous base = BASE - delta.
- |
-- | ahi RD, -1
-+ | aghi RD, -1
- | je >2
- |1: // Move results down.
- | lg RB, 0(BASE, RA)
- | stg RB, -16(BASE)
- | la BASE, 8(BASE)
-- | ahi RD, -1
-+ | aghi RD, -1
- | jne <1
- |2:
- | lg L:RB, SAVE_L
-@@ -368,13 +368,31 @@ static void build_subroutines(BuildCtx *ctx)
- | j <3
- |
- |->vm_unwind_yield:
-- | stg r0, 0(r0)
-+ | lghi CRET1, LUA_YIELD
-+ | j ->vm_unwind_c_eh
- |
- |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
-+ | // (void *cframe, int errcode)
-+ | lgr sp, CARG1
-+ | lgfr CARG2, CRET1 // Error return status for vm_pcall.
- |->vm_unwind_c_eh: // Landing pad for external unwinder.
-+ | lg L:RB, SAVE_L
-+ | lg GL:RB, L:RB->glref
-+ | lghi TMP1, ~LJ_VMST_C
-+ | stg TMP1, GL:RB->vmstate
-+ | j ->vm_leave_unw
-+ |
- |->vm_unwind_rethrow:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ |
- |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ |
- |->vm_unwind_ff_eh: // Landing pad for external unwinder.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- |//-- Grow stack for calls -----------------------------------------------
-@@ -537,69 +555,127 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Continuation dispatch ----------------------------------------------
- |
- |->cont_dispatch:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->cont_cat: // BASE = base, RC = result, RB = mbase
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Table indexing metamethods -----------------------------------------
- |
- |->vmeta_tgets:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_tgetb:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_tgetv:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->cont_ra: // BASE = base, RC = result
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_tgetr:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- |
- |->vmeta_tsets:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_tsetb:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_tsetv:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->cont_nop: // BASE = base, (RC = result)
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_tsetr:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Comparison metamethods ---------------------------------------------
- |
- |->cont_condt: // BASE = base, RC = result
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->cont_condf: // BASE = base, RC = result
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_equal:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_equal_cd:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_istype:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Arithmetic metamethods ---------------------------------------------
- |
- |->vmeta_arith_vno:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->vmeta_arith_vn:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_arith_nvo:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->vmeta_arith_nv:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_unm:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_arith_vvo:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->vmeta_arith_vv:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- | // Call metamethod for binary op.
- |->vmeta_binop:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vmeta_len:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Call metamethod ----------------------------------------------------
- |
- |->vmeta_call_ra:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->vmeta_call: // Resolve and call __call metamethod.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Argument coercion for 'for' statement ------------------------------
- |
- |->vmeta_for:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- |//-- Fast functions -----------------------------------------------------
-@@ -849,6 +925,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro vm_round, name, mode, cond
- |->name:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |.endmacro
- |
- | vm_round vm_floor, 0, 1
-@@ -857,9 +935,13 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// FP modulo x%y. Called by BC_MOD* and vm_arith.
- |->vm_mod:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
- |->vm_powi_sse:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- |//-- Miscellaneous functions --------------------------------------------
-@@ -867,12 +949,16 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
- |->vm_cpuid:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- |//-- Assertions ---------------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->assert_bad_for_arg_type:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- #ifdef LUA_USE_ASSERT
- #endif
- |
-@@ -882,11 +968,17 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// Handler for callback functions. Callback slot number in ah/al.
- |->vm_ffi_callback:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->cont_ffi_callback: // Return from FFI callback.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vm_ffi_call: // Call C function via FFI.
- |// Note: vm_ffi_call must be the last function in this object file!
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- }
-@@ -901,60 +993,220 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- switch (op) {
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ISEQV: case BC_ISNEV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ISEQS: case BC_ISNES:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ISEQN: case BC_ISNEN:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ISEQP: case BC_ISNEP:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ISTYPE:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ISNUM:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_MOV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_NOT:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_UNM:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_LEN:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_MODVN:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_MODNV: case BC_MODVV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_POW:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_CAT:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_KSTR:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_KCDATA:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_KSHORT:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_KNUM:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_KPRI:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_KNIL:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_UGET:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_USETV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_USETS:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_USETN:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_USETP:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_UCLO:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_FNEW:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TNEW:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TDUP:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_GGET:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_GSET:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TGETV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TGETS:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TGETB:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TGETR:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TSETV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TSETS:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TSETB:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TSETR:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_TSETM:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_CALL: case BC_CALLM:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_CALLMT:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_CALLT:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ITERC:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ITERN:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ISNEXT:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_VARG:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_RETM:
-- | stg r0, 0(r0) // not implemented
-+ | stg r0, 0(r0) // not implemented
-+ | stg r0, 0(r0)
- break;
-
- case BC_RET: case BC_RET0: case BC_RET1:
-@@ -1033,24 +1285,76 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j <1
- break;
- case BC_FORL:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_JFORI:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_JFORL:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_FORI:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_IFORL:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ITERL:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_JITERL:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_IITERL:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_LOOP:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_ILOOP:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_JLOOP:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_JMP:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_FUNCF:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_FUNCV: /* NYI: compiled vararg functions. */
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_JFUNCF:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_IFUNCF:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_JFUNCV:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
-+ break;
- case BC_IFUNCV:
-- | lg r0, 0(r0) // Not implemented, seg fault.
-+ | stg r0, 0(r0) // Not implemented, seg fault.
-+ | stg r0, 0(r0)
- break;
-
- case BC_FUNCC:
-@@ -1089,6 +1393,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lcgr RA, RA
- | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
- | lg PC, -8(BASE) // Fetch PC of caller.
-+ | // BUG: PC seems to be -1 here sometimes. Not yet sure why.
- | j ->vm_returnc
- break;
-
---
-2.20.1
-
-
-From ffd76321015d4c7d8e8f2d8b70af6d8a36acd1ab Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 15 Dec 2016 11:03:13 -0500
-Subject: [PATCH 127/247] Add debug options to Makefile.
-
-We're going to need these for a while, so better to put in the
-repository. Once we're happy things are working we can disable
-the debug info and enable optimizations again.
----
- src/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index d0f160a..158bfa8 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -56,7 +56,7 @@ CCOPT_mips=
- #
- CCDEBUG=
- # Uncomment the next line to generate debug information:
--#CCDEBUG= -g
-+CCDEBUG= -g -O0
- #
- CCWARN= -Wall
- # Uncomment the next line to enable more warnings:
---
-2.20.1
-
-
-From 35c7685ecc61843d5940e1a5ac42bb2441a0ea3e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 16 Dec 2016 17:23:46 -0500
-Subject: [PATCH 128/247] Add support for global short assignments.
-
-In other words 'a = 1' now works.
----
- dynasm/dasm_s390x.lua | 4 +
- src/lj_arch.h | 2 +-
- src/vm_s390x.dasc | 280 +++++++++++++++++++++++++++++++++++-------
- 3 files changed, 238 insertions(+), 48 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 6bb008e..a4b01cc 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1190,6 +1190,10 @@ map_op = {
- stfl_1 = "0000b2b10000sS",
- -- I- mode instructions
- svc_1 = "000000000a00iI",
-+ -- RI-a mode instructions
-+ -- TODO: change "i" to "RI-a"
-+ mhi_2 = "0000a70c0000i",
-+ mghi_2 = "0000a70d0000i",
- -- RI-b mode instructions
- bras_2 = "0000a7050000RI-b",
- -- RI-c mode instructions
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index b613fab..bceb6de 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -398,7 +398,7 @@
- #define LJ_TARGET_MASKSHIFT 1
- #define LJ_TARGET_MASKROT 1
- #define LJ_TARGET_UNALIGNED 1
--#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
-+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
- #define LJ_TARGET_GC64 1
- #define LJ_ARCH_NOJIT 1 /* NYI */
- #define LJ_ARCH_NOFFI 1 /* Disable FFI for now. */
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 01a3b87..3f4cea6 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -57,7 +57,8 @@
- |.define CRET1, r2
- |
- |.define OP, r2
--|.define TMP1, r14
-+|.define TMPR1, r14
-+|.define TMPR2, r0
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
- |.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
-@@ -66,9 +67,9 @@
- |.define SAVE_GPRS, 288(sp) // Save area for r6-r15 (10*8 bytes).
- |.define SAVE_GPRS_P, 48(sp) // Save area for r6-r15 (10*8 bytes) in prologue (before
stack frame is allocated).
- |
--|// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
-+|// Argument save area.
- |.define SAVE_ERRF, 280(sp) // Argument 4, in r5.
--|.define SAVE_NRES, 272(sp) // Argument 3, in r4.
-+|.define SAVE_NRES, 272(sp) // Argument 3, in r4. Size is 4-bytes.
- |.define SAVE_CFRAME, 264(sp) // Argument 2, in r3.
- |.define SAVE_L, 256(sp) // Argument 1, in r2.
- |.define RESERVED, 248(sp) // Reserved for compiler use.
-@@ -85,6 +86,7 @@
- |.define SAVE_FPR8, 176(sp)
- |.define SAVE_PC, 168(sp)
- |.define SAVE_MULTRES, 160(sp)
-+|.define TMP_STACK, 160(sp) // Overlaps SAVE_MULTRES
- |
- |// Callee save area (allocated by interpreter).
- |.define CALLEESAVE, 000(sp) // <- sp in interpreter.
-@@ -140,7 +142,7 @@
- |.macro ins_ABC; .endmacro
- |.macro ins_AB_; .endmacro
- |.macro ins_A_C; .endmacro
--|.macro ins_AND; .endmacro
-+|.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
- |
- |// Instruction decode+dispatch.
- | // TODO: tune this, right now we always decode RA-D even if they aren't used.
-@@ -157,9 +159,10 @@
- | srlg RB, RB, 8(r0)
- | llgcr RC, RD
- | la PC, 4(PC)
--| llgfr TMP1, OP
--| sllg TMP1, TMP1, 3(r0) // TMP1=OP*8
--| b 0(TMP1, DISPATCH)
-+| llgfr TMPR1, OP
-+| sllg TMPR1, TMPR1, 3(r0) // TMPR1=OP*8
-+| lg TMPR1, 0(TMPR1, DISPATCH)
-+| br TMPR1
- |.endmacro
- |
- |// Instruction footer.
-@@ -184,10 +187,10 @@
- | lg PC, LFUNC:RB->pc
- | llgf RA, 0(PC) // TODO: combine loads?
- | llgcr OP, RA
--| sllg TMP1, OP, 3(r0)
-+| sllg TMPR1, OP, 3(r0)
- | la PC, 4(PC)
--| lg TMP1, 0(TMP1, DISPATCH)
--| br TMP1
-+| lg TMPR1, 0(TMPR1, DISPATCH)
-+| br TMPR1
- |.endmacro
- |
- |.macro ins_call
-@@ -210,6 +213,11 @@
- | oihh reg, ((tp>>1) &0xffff)
- | oihl reg, ((tp<<15)&0x8000)
- |.endmacro
-+|.macro settp, dst, reg, tp
-+| llihh dst, ((tp>>1) &0xffff)
-+| iihl dst, ((tp<<15)&0x8000)
-+| ogr dst, reg
-+|.endmacro
- |.macro setint, reg
- | settp reg, LJ_TISNUM
- |.endmacro
-@@ -257,10 +265,24 @@
- |
- |// Set current VM state.
- |.macro set_vmstate, st
--| lghi TMP1, ~LJ_VMST_..st
--| stg TMP1, DISPATCH_GL(vmstate)(DISPATCH)
-+| lghi TMPR1, ~LJ_VMST_..st
-+| stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH)
- |.endmacro
- |
-+|// Move table write barrier back. Overwrites reg.
-+|.macro barrierback, tab, reg
-+| // TODO: more efficient way?
-+| llgc reg, tab->marked
-+| nill reg, (uint16_t)~LJ_GC_BLACK // black2gray(tab)
-+| stc reg, tab->marked
-+| lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH)
-+| stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH)
-+| stg reg, tab->gclist
-+|.endmacro
-+
-+#if !LJ_DUALNUM
-+#error "Only dual-number mode supported for s390x target"
-+#endif
-
- /* Generate subroutines used by opcodes and other parts of the VM. */
- /* The .code_sub section should be last to help static branch prediction. */
-@@ -294,8 +316,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vm_return:
- | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
-- | lghi TMP1, FRAME_C
-- | xgr PC, TMP1
-+ | lghi TMPR1, FRAME_C
-+ | xgr PC, TMPR1
- | tmll PC, FRAME_TYPE
- | jne ->vm_returnp
- |
-@@ -318,7 +340,7 @@ static void build_subroutines(BuildCtx *ctx)
- | stg PC, L:RB->base
- |3:
- | lg RD, SAVE_MULTRES
-- | lg RA, SAVE_NRES // RA = wanted nresults+1
-+ | lgf RA, SAVE_NRES // RA = wanted nresults+1
- |4:
- | cgr RA, RD
- | jne >6 // More/less results wanted?
-@@ -340,8 +362,8 @@ static void build_subroutines(BuildCtx *ctx)
- | // More results wanted. Check stack size and fill up results with nil.
- | cg BASE, L:RB->maxstack
- | jh >8
-- | lghi TMP1, LJ_TNIL
-- | stg TMP1, -16(BASE)
-+ | lghi TMPR1, LJ_TNIL
-+ | stg TMPR1, -16(BASE)
- | la BASE, 8(BASE)
- | aghi RD, 1
- | j <4
-@@ -350,8 +372,8 @@ static void build_subroutines(BuildCtx *ctx)
- | cghi RA, 0
- | je <5 // But check for LUA_MULTRET+1.
- | sgr RA, RD // Negative result!
-- | sllg TMP1, RA, 3(r0)
-- | lay BASE, 0(TMP1, BASE) // Correct top.
-+ | sllg TMPR1, RA, 3(r0)
-+ | lay BASE, 0(TMPR1, BASE) // Correct top.
- | j <5
- |
- |8: // Corner case: need to grow stack for filling up results.
-@@ -378,8 +400,8 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_unwind_c_eh: // Landing pad for external unwinder.
- | lg L:RB, SAVE_L
- | lg GL:RB, L:RB->glref
-- | lghi TMP1, ~LJ_VMST_C
-- | stg TMP1, GL:RB->vmstate
-+ | lghi TMPR1, ~LJ_VMST_C
-+ | stg TMPR1, GL:RB->vmstate
- | j ->vm_leave_unw
- |
- |->vm_unwind_rethrow:
-@@ -448,7 +470,7 @@ static void build_subroutines(BuildCtx *ctx)
- | aghi DISPATCH, GG_G2DISP
- | stg RD, SAVE_PC // Any value outside of bytecode is ok.
- | stg RD, SAVE_CFRAME
-- | stg RD, SAVE_NRES
-+ | st RD, SAVE_NRES
- | stg RD, SAVE_ERRF
- | stg KBASE, L:RB->cframe
- | clm RD, 1, L:RB->status
-@@ -484,8 +506,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lghi PC, FRAME_C
- |
- |1: // Entry point for vm_pcall above (PC = ftype).
-- | lgfr CARG3, CARG3
-- | stg CARG3, SAVE_NRES
-+ | st CARG3, SAVE_NRES
- | lgr L:RB, CARG1
- | stg CARG1, SAVE_L
- | lgr RA, CARG2
-@@ -531,7 +552,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg DISPATCH, L:LREG->glref // Setup pointer to dispatch table.
- | lghi RA, 0
- | stg RA, SAVE_ERRF // No error function.
-- | stg KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
-+ | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
- | aghi DISPATCH, GG_G2DISP
- | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
- |
-@@ -1081,8 +1102,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- break;
- case BC_KSHORT:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst, RD = signed int16 literal
-+ | // Assumes DUALNUM.
-+ | lhr RD, RD // Sign-extend literal to 32-bits.
-+ | setint RD
-+ | sllg TMPR1, RA, 3(r0)
-+ | stg RD, 0(RA, BASE)
-+ | ins_next
- break;
- case BC_KNUM:
- | stg r0, 0(r0)
-@@ -1132,21 +1158,67 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
- case BC_GGET:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AND // RA = dst, RD = str const (~)
-+ | lg LFUNC:RB, -16(BASE)
-+ | cleartp LFUNC:RB
-+ | lg TAB:RB, LFUNC:RB->env
-+ | sllg TMPR1, RD, 3(r0)
-+ | lg STR:RC, 0(TMPR1, KBASE)
-+ | j ->BC_TGETS_Z
- break;
- case BC_GSET:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AND // RA = src, RD = str const (~)
-+ | lg LFUNC:RB, -16(BASE)
-+ | cleartp LFUNC:RB
-+ | lg TAB:RB, LFUNC:RB->env
-+ | sllg TMPR1, RD, 3(r0)
-+ | lg STR:RC, 0(TMPR1, KBASE)
-+ | j ->BC_TSETS_Z
- break;
-+
- case BC_TGETV:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
- case BC_TGETS:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0(r0) // Not yet implemented.
-+ |
-+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
-+ | l TMPR1, TAB:RB->hmask
-+ | n TMPR1, STR:RC->hash
-+ | lgfr TMPR1, TMPR1
-+ | mghi TMPR1, #NODE // TODO: not sure about this one, original: imul TMPRd, #NODE
-+ | ag NODE:TMPR1, TAB:RB->node
-+ | settp ITYPE, STR:RC, LJ_TSTR
-+ |1:
-+ | cg ITYPE, NODE:TMPR1->key
-+ | jne >4
-+ | // Get node value.
-+ | lg ITYPE, NODE:TMPR1->val
-+ | cghi ITYPE, LJ_TNIL
-+ | je >5 // Key found, but nil value?
-+ |2:
-+ | sllg RA, RA, 3(r0)
-+ | stg ITYPE, 0(TMPR1, RA)
-+ | ins_next
-+ |
-+ |4: // Follow hash chain.
-+ | lg NODE:TMPR1, NODE:TMPR1->next
-+ | cghi NODE:TMPR1, 0
-+ | jne <1
-+ | // End of hash chain: key not found, nil result.
-+ | lghi ITYPE, LJ_TNIL
-+ |
-+ |5: // Check for __index if table value is nil.
-+ | lg TAB:TMPR1, TAB:RB->metatable
-+ | cghi TAB:TMPR1, 0
-+ | je <2 // No metatable: done.
-+ | llgc TMPR2, TAB:TMPR1->nomm
-+ | tmll TMPR2, 1<<MM_index
-+ | jne <2 // 'no __index' flag set: done.
-+ | j ->vmeta_tgets // Caveat: preserve STR:RC.
- break;
- case BC_TGETB:
- | stg r0, 0(r0)
-@@ -1162,7 +1234,73 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETS:
- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ |
-+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
-+ | l TMPR1, TAB:RB->hmask
-+ | n TMPR1, STR:RC->hash
-+ | lgfr TMPR1, TMPR1
-+ | mghi TMPR1, #NODE
-+ | xr TMPR2, TMPR2
-+ | stc TMPR2, TAB:RB->nomm // Clear metamethod cache.
-+ | ag NODE:TMPR1, TAB:RB->node
-+ | settp ITYPE, STR:RC, LJ_TSTR
-+ |1:
-+ | cg ITYPE, NODE:TMPR1->key
-+ | jne >5
-+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
-+ | lghi TMPR2, LJ_TNIL
-+ | cg TMPR2, 0(TMPR1)
-+ | je >4 // Previous value is nil?
-+ |2:
-+ | llgc TMPR2, TAB:RB->marked
-+ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
-+ | jne >7
-+ |3: // Set node value.
-+ | sllg RA, RA, 3(r0)
-+ | lg ITYPE, 0(RA, BASE)
-+ | stg ITYPE, 0(TMPR1)
-+ | ins_next
-+ |
-+ |4: // Check for __newindex if previous value is nil.
-+ | lg TAB:ITYPE, TAB:RB->metatable
-+ | cghi TAB:ITYPE, 0
-+ | je <2
-+ | llgc TMPR2, TAB:ITYPE->nomm
-+ | tmll TMPR2, 1<<MM_newindex
-+ | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
-+ | j <2
-+ |
-+ |5: // Follow hash chain.
-+ | lg NODE:TMPR1, NODE:TMPR1->next
-+ | cghi NODE:TMPR1, 0
-+ | jne <1
-+ | // End of hash chain: key not found, add a new one.
-+ |
-+ | // But check for __newindex first.
-+ | lg TAB:TMPR1, TAB:RB->metatable
-+ | cghi TAB:TMPR1, 0
-+ | je >6 // No metatable: continue.
-+ | llgc TMPR2, TAB:TMPR1->nomm
-+ | tmll TMPR2, 1<<MM_newindex
-+ | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
-+ |6:
-+ | stg ITYPE, TMP_STACK
-+ | lg L:CARG1, SAVE_L
-+ | stg BASE, L:CARG1->base
-+ | la CARG3, TMP_STACK // TODO: lea CARG3, ITYPE... not sure.
-+ | lgr CARG2, TAB:RB
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
-+ | // Handles write barrier for the new key. TValue * returned in r2 (CRET1).
-+ | lgr TMPR1, CRET1
-+ | lg L:CRET1, SAVE_L
-+ | lg BASE, L:CRET1->base
-+ | llgc RA, PC_RA
-+ | j <2 // Must check write barrier for value.
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, ITYPE
-+ | j <3
- break;
- case BC_TSETB:
- | stg r0, 0(r0)
-@@ -1245,8 +1383,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- /* fallthrough */
- case BC_RET0:
- |5:
-- | llgc TMP1, PC_RB
-- | cgr TMP1, RD
-+ | llgc TMPR1, PC_RB
-+ | cgr TMPR1, RD
- | jh >6
- default:
- break;
-@@ -1262,13 +1400,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- |
- |6: // Fill up results with nil.
-- | lghi TMP1, LJ_TNIL
-+ | lghi TMPR1, LJ_TNIL
- if (op == BC_RET) {
-- | stg TMP1, -16(KBASE) // Note: relies on shifted base.
-+ | stg TMPR1, -16(KBASE) // Note: relies on shifted base.
- | la KBASE, 8(KBASE)
- } else {
- | sllg RC, RD, 3(r0) // RC used as temp.
-- | stg TMP1, -24(RC, BASE)
-+ | stg TMPR1, -24(RC, BASE)
- }
- | la RD, 1(RD)
- | j <5
-@@ -1348,13 +1486,61 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
- case BC_JFUNCV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+#if !LJ_HASJIT
- break;
-+#endif
-+ | stg r0, 0(r0) // NYI: compiled vararg functions
-+ break; /* NYI: compiled vararg functions. */
-+
- case BC_IFUNCV:
-- | stg r0, 0(r0) // Not implemented, seg fault.
-- | stg r0, 0(r0)
-+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
-+ | sllg TMPR1, NARGS:RD, 3(r0)
-+ | la RB, (FRAME_VARG+8)(TMPR1)
-+ | la RD, 8(TMPR1, BASE)
-+ | lg LFUNC:KBASE, -16(BASE)
-+ | stg RB, -8(RD) // Store delta + FRAME_VARG.
-+ | stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC.
-+ | lg L:RB, SAVE_L
-+ | sllg RA, RA, 3(r0)
-+ | la RA, 0(RA, RD)
-+ | cg RA, L:RB->maxstack
-+ | jh ->vm_growstack_v // Need to grow stack.
-+ | lgr RA, BASE
-+ | lgr BASE, RD
-+ | llgc RB, (PC2PROTO(numparams)-4)(PC)
-+ | cghi RB, 0
-+ | je >2
-+ | aghi RA, 8
-+ | lghi TMPR1, LJ_TNIL
-+ |1: // Copy fixarg slots up to new frame.
-+ | la RA, 8(RA)
-+ | cgr RA, BASE
-+ | jnl >3 // Less args than parameters?
-+ | lg KBASE, -16(RA)
-+ | stg KBASE, 0(RD)
-+ | la RD, 8(RD)
-+ | stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC).
-+ | aghi RB, -1
-+ | jne <1
-+ | // TODO: brctg instead of decrement/branch
-+ |2:
-+ if (op == BC_JFUNCV) {
-+ | llgh RD, PC_RD
-+ | j =>BC_JLOOP
-+ } else {
-+ | lg KBASE, (PC2PROTO(k)-4)(PC)
-+ | ins_next
-+ }
-+ |
-+ |3: // Clear missing parameters.
-+ | stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here.
-+ | la RD, 8(RD)
-+ | aghi RB, -1
-+ | jne <3
-+ | // TODO: brctg instead of decrement/branch
-+ | j <2
- break;
-
- case BC_FUNCC:
-@@ -1380,16 +1566,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | basr r14, KBASE // (lua_State *L)
- } else {
- | // (lua_State *L, lua_CFunction f)
-- | lg TMP1, (DISPATCH_GL(wrapf))(DISPATCH)
-- | basr r14, TMP1 // TODO: TMP1==r14, is this ok?
-+ | lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH)
-+ | basr r14, TMPR1 // TODO: TMPR1==r14, is this ok?
- }
- | // nresults returned in r2 (CRET1).
- | lgr RD, CRET1
- | lg BASE, L:RB->base
- | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
- | set_vmstate INTERP
-- | sllg TMP1, RD, 3(r0)
-- | la RA, 0(TMP1, BASE)
-+ | sllg TMPR1, RD, 3(r0)
-+ | la RA, 0(TMPR1, BASE)
- | lcgr RA, RA
- | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
- | lg PC, -8(BASE) // Fetch PC of caller.
---
-2.20.1
-
-
-From ad997c842c5268a3246212a6d382e9d4dfcbeb5a Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Sat, 17 Dec 2016 19:56:56 -0500
-Subject: [PATCH 129/247] Add support for print function call.
-
-Hello world now works.
-
-> print("hello world!")
-hello world!
----
- src/vm_s390x.dasc | 184 +++++++++++++++++++++++++++++++++++++++++++---
- 1 file changed, 172 insertions(+), 12 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 3f4cea6..0454e9c 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -37,7 +37,7 @@
- |.define ITYPE, r13 //
- |
- |// The following temporaries are not saved across C calls, except for RD.
--|.define RA, r1 // Cannot be dereferenced.
-+|.define RA, r1
- |.define RB, r12
- |.define RC, r5 // Overlaps CARG4.
- |.define RD, r6 // Overlaps CARG5. Callee-saved.
-@@ -686,11 +686,30 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Call metamethod ----------------------------------------------------
- |
- |->vmeta_call_ra:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | la RA, 16(RA, BASE) // RA previously set to RA*8.
- |->vmeta_call: // Resolve and call __call metamethod.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
-+ | stg NARGS:RD, TMP_STACK // Save RA, RC for us (not sure about this).
-+ | lgr RB, RA
-+ | lg L:CARG1, SAVE_L
-+ | stg BASE, L:CARG1->base
-+ | lay CARG2, -16(RA)
-+ | sllg RD, RD, 3(r0)
-+ | lay CARG3, -8(RA, RD)
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
-+ | lgr RA, RB
-+ | lg L:RB, SAVE_L
-+ | lg BASE, L:RB->base
-+ | lg NARGS:RD, TMP_STACK
-+ | lg LFUNC:RB, -16(RA)
-+ | aghi NARGS:RD, 1 // 32-bit on x64.
-+ | // This is fragile. L->base must not move, KBASE must always be defined.
-+ | cgr KBASE, BASE // Continue with CALLT if flag set.
-+ | je ->BC_CALLT_Z
-+ | cleartp LFUNC:RB
-+ | lgr BASE, RA
-+ | ins_call // Otherwise call resolved metamethod.
- |
- |//-- Argument coercion for 'for' statement ------------------------------
- |
-@@ -704,14 +723,20 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc, name
- |->ff_ .. name:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |.endmacro
- |
- |.macro .ffunc_1, name
- |->ff_ .. name:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |.endmacro
- |
- |.macro .ffunc_2, name
- |->ff_ .. name:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |.endmacro
- |
- |.macro .ffunc_n, name, op
-@@ -733,22 +758,36 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Base library: checks -----------------------------------------------
- |
- |.ffunc_1 assert
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |.ffunc_1 type
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Base library: getters and setters ---------------------------------
- |
- |.ffunc_1 getmetatable
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |.ffunc_2 setmetatable
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |.ffunc_2 rawget
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Base library: conversions ------------------------------------------
- |
- |.ffunc tonumber
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |.ffunc_1 tostring
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Base library: iterators -------------------------------------------
- |
-@@ -764,8 +803,12 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Base library: catch errors ----------------------------------------
- |
- |.ffunc_1 pcall
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |.ffunc_2 xpcall
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Coroutine library --------------------------------------------------
- |
-@@ -843,14 +886,20 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- String library -----------------------------------------------------
- |
- |.ffunc string_byte // Only handle the 1-arg case here.
-+ | stg r0, 0(r0)
- |
- |.ffunc string_char // Only handle the 1-arg case here.
-+ | stg r0, 0(r0)
- |->fff_newstr:
-+ | stg r0, 0(r0)
- |->fff_resstr:
-+ | stg r0, 0(r0)
- |
- |.ffunc string_sub
-+ | stg r0, 0(r0)
- |
- |->fff_emptystr: // Range underflow.
-+ | stg r0, 0(r0)
- |
- |.macro ffstring_op, name
- | .ffunc_1 string_ .. name
-@@ -889,6 +938,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc_bit_sh, name, ins
- | .ffunc_bit name, 1, .ffunc_2
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |.endmacro
- |
- |.ffunc_bit_sh bit_lshift, shl
-@@ -900,36 +951,64 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->fff_fallback_2:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->fff_fallback_1:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->fff_fallback: // Call fast function fallback handler.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |// Reconstruct previous base for vmeta_call during tailcall.
- |->vm_call_tail:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->fff_gcstep: // Call GC step function.
- | // BASE = new base, RD = nargs+1
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- |//-- Special dispatch targets -------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_record: // Dispatch target for recording phase.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vm_rethook: // Dispatch target for return hooks.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vm_inshook: // Dispatch target for instr/line hooks.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->cont_hook: // Continue from hook yield.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vm_hotloop: // Hot loop counter underflow.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vm_callhook: // Dispatch target for call hooks.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vm_hotcall: // Hot call counter underflow.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->cont_stitch: // Trace stitching.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |->vm_profhook: // Dispatch target for profiler hook.
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- |//-- Trace exit handler -------------------------------------------------
-@@ -938,7 +1017,11 @@ static void build_subroutines(BuildCtx *ctx)
- |// Called from an exit stub with the exit number on the stack.
- |// The 16 bit exit number is stored with two (sign-extended) push imm8.
- |->vm_exit_handler:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->vm_exit_interp:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- |//-- Math helper functions ----------------------------------------------
-@@ -1093,9 +1176,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
-+ /* -- Constant ops ------------------------------------------------------ */
-+
- case BC_KSTR:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AND // RA = dst, RD = str const (~)
-+ | sllg RD, RD, 3(r0)
-+ | lg RD, 0(RD, KBASE)
-+ | settp RD, LJ_TSTR
-+ | sllg RA, RA, 3(r0)
-+ | stg RD, 0(RA, BASE)
-+ | ins_next
- break;
- case BC_KCDATA:
- | stg r0, 0(r0)
-@@ -1201,7 +1292,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | je >5 // Key found, but nil value?
- |2:
- | sllg RA, RA, 3(r0)
-- | stg ITYPE, 0(TMPR1, RA)
-+ | stg ITYPE, 0(RA, BASE)
- | ins_next
- |
- |4: // Follow hash chain.
-@@ -1314,18 +1405,87 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
-+ /* -- Calls and vararg handling ----------------------------------------- */
-+
- case BC_CALL: case BC_CALLM:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
-+ if (op == BC_CALLM) {
-+ | ag NARGS:RD, SAVE_MULTRES // TODO: MULTRES is 32-bit on x64
-+ }
-+ | sllg RA, RA, 3(r0)
-+ | lg LFUNC:RB, 0(BASE, RA)
-+ | checkfunc LFUNC:RB, ->vmeta_call_ra
-+ | la BASE, 16(RA, BASE)
-+ | lgr RD, RC
-+ | ins_call
- break;
-+
- case BC_CALLMT:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
- case BC_CALLT:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = base, RD = nargs+1
-+ | sllg RA, RA, 3(r0)
-+ | la RA, 16(RA, BASE)
-+ | lgr KBASE, BASE // Use KBASE for move + vmeta_call hint.
-+ | lg LFUNC:RB, -16(RA)
-+ | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
-+ |->BC_CALLT_Z:
-+ | lg PC, -8(BASE)
-+ | tmll PC, FRAME_TYPE
-+ | jne >7
-+ |1:
-+ | stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below.
-+ | stg NARGS:RD, SAVE_MULTRES // 32-bit on x64.
-+ | aghi NARGS:RD, -1
-+ | je >3
-+ |2: // Move args down.
-+ | // TODO: mvc or something here?
-+ | lg RB, 0(RA)
-+ | la RA, 8(RA)
-+ | stg RB, 0(KBASE)
-+ | la KBASE, 8(KBASE)
-+ | // TODO: replace decrement/branch with brctg
-+ | aghi NARGS:RD, -1
-+ | jne <2
-+ |
-+ | lg LFUNC:RB, -16(BASE)
-+ |3:
-+ | cleartp LFUNC:RB
-+ | lg NARGS:RD, SAVE_MULTRES
-+ | llgc TMPR1, LFUNC:RB->ffid
-+ | cghi TMPR1, 1 // (> FF_C) Calling a fast function?
-+ | jh >5
-+ |4:
-+ | ins_callt
-+ |
-+ |5: // Tailcall to a fast function.
-+ | tmll PC, FRAME_TYPE // Lua frame below?
-+ | jne <4
-+ | llgc RA, PC_RA
-+ | lcgr RA, RA
-+ | sllg RA, RA, 3(r0)
-+ | lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE.
-+ | cleartp LFUNC:KBASE
-+ | lg KBASE, LFUNC:KBASE->pc
-+ | lg KBASE, (PC2PROTO(k))(KBASE)
-+ | j <4
-+ |
-+ |7: // Tailcall from a vararg function.
-+ | aghi PC, -FRAME_VARG
-+ | tmll PC, FRAME_TYPEP
-+ | jne >8 // Vararg frame below?
-+ | sgr BASE, PC // Need to relocate BASE/KBASE down.
-+ | lgr KBASE, BASE
-+ | lg PC, -8(BASE)
-+ | j <1
-+ |8:
-+ | aghi PC, FRAME_VARG
-+ | j <1
- break;
-+
- case BC_ITERC:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 4e35973e019dff2e4bb909f96a735b2f894d862e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 19 Dec 2016 10:49:21 -0500
-Subject: [PATCH 130/247] Fix KSHORT destination slot address.
-
----
- src/vm_s390x.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 0454e9c..34e6843 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1197,7 +1197,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Assumes DUALNUM.
- | lhr RD, RD // Sign-extend literal to 32-bits.
- | setint RD
-- | sllg TMPR1, RA, 3(r0)
-+ | sllg RA, RA, 3(r0)
- | stg RD, 0(RA, BASE)
- | ins_next
- break;
---
-2.20.1
-
-
-From 64a65a9a395630f641e2d7657cd9f2ea03d36438 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 19 Dec 2016 14:21:24 -0500
-Subject: [PATCH 131/247] Add some more instructions to DynASM.
-
----
- dynasm/dasm_s390x.lua | 16 +++++++++++++++-
- 1 file changed, 15 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index a4b01cc..dc1f5e5 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -694,7 +694,6 @@ map_op = {
- chlr_2 = "0000b9dd0000h",
- cfi_2 = "c20d00000000n",
- cgfi_2 = "c20c00000000n",
-- cghi_2 = "0000a70f0000i",
- cih_2 = "cc0d00000000n",
- cl_2 = "000055000000j",
- clr_2 = "000000001500g",
-@@ -1183,7 +1182,20 @@ map_op = {
- cfebr_3 = "0000b3980000RRF-e",
- cfebra_4 = "0000b3980000RRF-e",
- -- RXE instructions
-+ adb_2 = "ed000000001aRXE",
-+ aeb_2 = "ed000000000aRXE",
-+ cdb_2 = "ed0000000019RXE",
-+ ceb_2 = "ed0000000009RXE",
-+ ddb_2 = "ed000000001dRXE",
-+ deb_2 = "ed000000000dRXE",
-+ mdb_2 = "ed000000001cRXE",
-+ mdeb_2 = "ed000000000cRXE",
-+ meeb_2 = "ed0000000017RXE",
-+ mxdb_2 = "ed0000000007RXE",
- sqdb_2 = "ed0000000015RXE",
-+ sqeb_2 = "ed0000000014RXE",
-+ sdb_2 = "ed000000001bRXE",
-+ seb_2 = "ed000000000bRXE",
- -- RRF-b instructions
- didbr_4 = "0000b3580000RRF-b",
- -- S mode instructions
-@@ -1192,6 +1204,8 @@ map_op = {
- svc_1 = "000000000a00iI",
- -- RI-a mode instructions
- -- TODO: change "i" to "RI-a"
-+ chi_2 = "0000a70e0000i",
-+ cghi_2 = "0000a70f0000i",
- mhi_2 = "0000a70c0000i",
- mghi_2 = "0000a70d0000i",
- -- RI-b mode instructions
---
-2.20.1
-
-
-From 0794de8b7e6e6099ba96e2f162acb841bab11cee Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 19 Dec 2016 14:21:48 -0500
-Subject: [PATCH 132/247] Add basic integer for loop support.
-
-> for i=1,3 do print(i) end
-1
-2
-3
----
- src/vm_s390x.dasc | 172 ++++++++++++++++++++++++++++++++++++++++++----
- 1 file changed, 158 insertions(+), 14 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 34e6843..c1d6f5f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -263,6 +263,15 @@
- |.define PC_RC, -3(PC)
- |.define PC_RD, -4(PC)
- |
-+|.macro branchPC, reg
-+| // TODO: optimize this, was just lea PC, [PC+reg*4-BCBIAS_J*4].
-+| // Can't clobber TMPR1 or condition code.
-+| lgr TMPR2, TMPR1 // Workaround because TMPR2 == r0 and can't be used in lay.
-+| sllg TMPR1, reg, 2(r0)
-+| lay PC, (-BCBIAS_J*4)(TMPR1, PC)
-+| lgr TMPR1, TMPR2
-+|.endmacro
-+|
- |// Set current VM state.
- |.macro set_vmstate, st
- | lghi TMPR1, ~LJ_VMST_..st
-@@ -1129,8 +1138,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- break;
- case BC_MOV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst, RD = src
-+ | sllg RD, RD, 3(r0)
-+ | lg RB, 0(RD, BASE)
-+ | sllg RA, RA, 3(r0)
-+ | stg RB, 0(RA, BASE)
-+ | ins_next_
- break;
- case BC_NOT:
- | stg r0, 0(r0)
-@@ -1274,8 +1287,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- break;
- case BC_TGETS:
-- | stg r0, 0(r0) // Not yet implemented.
-- |
-+ | ins_ABC
-+ | sllg RB, RB, 3(r0)
-+ | lg TAB:RB, 0(RB, BASE)
-+ | lghi TMPR1, -1
-+ | xgr RC, TMPR1
-+ | sllg RC, RC, 3(r0)
-+ | lg STR:RC, 0(RC, BASE)
-+ | checktab TAB:RB, ->vmeta_tgets
- |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
- | l TMPR1, TAB:RB->hmask
- | n TMPR1, STR:RC->hash
-@@ -1582,26 +1601,151 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | j <1
- break;
-+
-+ /* -- Loops and branches ------------------------------------------------ */
-+
-+ |.define FOR_IDX, 0(RA)
-+ |.define FOR_STOP, 8(RA)
-+ |.define FOR_STEP, 16(RA)
-+ |.define FOR_EXT, 24(RA)
-+
- case BC_FORL:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
- case BC_JFORI:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-- break;
- case BC_JFORL:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+#if !LJ_HASJIT
- break;
-+#endif
- case BC_FORI:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-- break;
- case BC_IFORL:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ vk = (op == BC_IFORL || op == BC_JFORL);
-+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
-+ | sllg RA, RA, 3(r0)
-+ | la RA, 0(RA, BASE)
-+ | lg RB, FOR_IDX
-+ | checkint RB, >9
-+ | lg TMPR1, FOR_STOP
-+ if (!vk) {
-+ | checkint TMPR1, ->vmeta_for
-+ | lg ITYPE, FOR_STEP
-+ | chi ITYPE, 0; jl >5
-+ | srag ITYPE, ITYPE, 47(r0)
-+ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
-+ } else {
-+#ifdef LUA_USE_ASSERT
-+ | // lg TMPR1, FOR_STOP
-+ | checkinttp TMPR1, ->assert_bad_for_arg_type
-+ | lg TMPR2, FOR_STEP
-+ | checkinttp TMPR2, ->assert_bad_for_arg_type
-+#endif
-+ | lg ITYPE, FOR_STEP
-+ | chi ITYPE, 0; jl >5
-+ | ar RB, ITYPE; jo >1
-+ | setint RB
-+ | stg RB, FOR_IDX
-+ }
-+ | cr RB, TMPR1
-+ | stg RB, FOR_EXT
-+ if (op == BC_FORI) {
-+ | jle >7
-+ |1:
-+ |6:
-+ | branchPC RD
-+ } else if (op == BC_JFORI) {
-+ | branchPC RD
-+ | llgh RD, PC_RD
-+ | jle =>BC_JLOOP
-+ |1:
-+ |6:
-+ } else if (op == BC_IFORL) {
-+ | jh >7
-+ |6:
-+ | branchPC RD
-+ |1:
-+ } else {
-+ | jle =>BC_JLOOP
-+ |1:
-+ |6:
-+ }
-+ |7:
-+ | ins_next
-+ |
-+ |5: // Invert check for negative step.
-+ if (!vk) {
-+ | srag ITYPE, ITYPE, 47(r0)
-+ | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
-+ } else {
-+ | ar RB, ITYPE; jo <1
-+ | setint RB
-+ | stg RB, FOR_IDX
-+ }
-+ | cr RB, TMPR1
-+ | stg RB, FOR_EXT
-+ if (op == BC_FORI) {
-+ | jhe <7
-+ } else if (op == BC_JFORI) {
-+ | branchPC RD
-+ | llgh RD, PC_RD
-+ | jhe =>BC_JLOOP
-+ } else if (op == BC_IFORL) {
-+ | jl <7
-+ } else {
-+ | jhe =>BC_JLOOP
-+ }
-+ | j <6
-+ |9: // Fallback to FP variant.
-+ if (!vk) {
-+ | jhe ->vmeta_for
-+ }
-+ if (!vk) {
-+ | lg TMPR2, FOR_STOP
-+ | checknumtp TMPR2, ->vmeta_for
-+ } else {
-+#ifdef LUA_USE_ASSERT
-+ | lg TMPR2, FOR_STOP
-+ | checknumtp TMPR2, ->assert_bad_for_arg_type
-+ | lg TMPR2, FOR_STEP
-+ | checknumtp TMPR2, ->assert_bad_for_arg_type
-+#endif
-+ }
-+ | lg RB, FOR_STEP
-+ if (!vk) {
-+ | checknum RB, ->vmeta_for
-+ }
-+ | ld f0, FOR_IDX
-+ | ld f1, FOR_STOP
-+ if (vk) {
-+ | adb f0, FOR_STEP
-+ | std f0, FOR_IDX
-+ | cghi RB, 0; jl >3
-+ } else {
-+ | // TODO: need cmp here?
-+ | jl >3
-+ }
-+ | cdbr f1, f0
-+ |1:
-+ | std f0, FOR_EXT
-+ if (op == BC_FORI) {
-+ | jnl <7
-+ } else if (op == BC_JFORI) {
-+ | branchPC RD
-+ | llgh RD, PC_RD
-+ | jnl =>BC_JLOOP
-+ } else if (op == BC_IFORL) {
-+ | jl <7
-+ } else {
-+ | jnl =>BC_JLOOP
-+ }
-+ | j <6
-+ |
-+ |3: // Invert comparison if step is negative.
-+ | cdbr f0, f1
-+ | j <1
- break;
-+
- case BC_ITERL:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From d8886158077e1f73673eea30bc00862a90896d12 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 19 Dec 2016 16:03:21 -0500
-Subject: [PATCH 133/247] Fix floating point fallback code for for loops.
-
-Adds a dependency on clfi. Not sure how to work around (don't
-really want to always introduce a temporary).
----
- src/vm_s390x.dasc | 40 ++++++++++++++++++++++++++++------------
- 1 file changed, 28 insertions(+), 12 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index c1d6f5f..c387975 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -18,6 +18,10 @@
- |// ar0,ar1 | TLS | volatile |
- |// ar2-ar15 | | volatile |
- |
-+|// Instructions used that are not in base z/Architecture:
-+|// clfi (compare logical immediate) [requires z9-109]
-+|// TODO: alternative instructions?
-+|
- |.arch s390x
- |.section code_op, code_sub
- |
-@@ -225,18 +229,18 @@
- |// Macros to test operand types.
- |.macro checktp_nc, reg, tp, target
- | srag ITYPE, reg, 47(r0)
--| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
-+| clfi ITYPE, tp
- | jne target
- |.endmacro
- |.macro checktp, reg, tp, target
- | srag ITYPE, reg, 47(r0)
- | cleartp reg
--| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
-+| clfi ITYPE, tp
- | jne target
- |.endmacro
- |.macro checktptp, src, tp, target
- | srag ITYPE, src, 47(r0)
--| cghi ITYPE, tp // Sign extend tp from 16- -> 64-bits.
-+| clfi ITYPE, tp
- | jne target
- |.endmacro
- |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
-@@ -245,7 +249,7 @@
- |
- |.macro checknumx, reg, target, jump
- | srag ITYPE, reg, 47(r0)
--| cghi ITYPE, LJ_TISNUM // Sign extend LJ_TISNUM tp from 16- to 64-bits.
-+| clfi ITYPE, LJ_TISNUM
- | jump target
- |.endmacro
- |.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
-@@ -723,8 +727,19 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Argument coercion for 'for' statement ------------------------------
- |
- |->vmeta_for:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | lgr CARG2, RA
-+ | lgr CARG1, RB
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_for // (lua_State *L, TValue *base)
-+ | lg BASE, L:RB->base
-+ | llgc OP, PC_OP
-+ | llgc RA, PC_RA
-+ | llgh RD, PC_RD
-+ | sllg TMPR1, OP, 3(r0)
-+ | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI.
-+ | br TMPR1
- |
- |//-----------------------------------------------------------------------
- |//-- Fast functions -----------------------------------------------------
-@@ -1215,8 +1230,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- break;
- case BC_KNUM:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst, RD = num const
-+ | sllg RD, RD, 3(r0)
-+ | ld f0, 0(RD, KBASE)
-+ | sllg RA, RA, 3(r0)
-+ | std f0, 0(RA, BASE)
-+ | ins_next
- break;
- case BC_KPRI:
- | stg r0, 0(r0)
-@@ -1720,11 +1739,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- if (vk) {
- | adb f0, FOR_STEP
- | std f0, FOR_IDX
-- | cghi RB, 0; jl >3
-- } else {
-- | // TODO: need cmp here?
-- | jl >3
- }
-+ | cghi RB, 0; jl >3
- | cdbr f1, f0
- |1:
- | std f0, FOR_EXT
---
-2.20.1
-
-
-From 5e69e0a4f0126d7f065d98181b875c546581b1db Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Tue, 20 Dec 2016 17:50:29 +0530
-Subject: [PATCH 134/247] Updated encoding for instructions
-
-Have replaced the characters used for encoding with their respective addressing modes
----
- dynasm/dasm_s390x.lua | 1204 ++++++++++++++++++++---------------------
- 1 file changed, 602 insertions(+), 602 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index dc1f5e5..a5f280d 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -513,10 +513,12 @@ end
-
- local function parse_mask(mask)
- local m3 = parse_number(mask)
-- if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
-- return m3
-- else
-- werror("Mask value should be 0,1 or 3-7: ", m3)
-+ if m3 then
-+ if ((m3 == 1) or (m3 == 0) or ( m3 >=3 and m3 <=7)) then
-+ return m3
-+ else
-+ werror("Mask value should be 0,1 or 3-7: ", m3)
-+ end
- end
- end
-
-@@ -573,593 +575,594 @@ end
-
- -- Template strings for s390x instructions.
- map_op = {
-- a_2 = "00005a000000j",
-- ar_2 = "000000001a00g",
-- ay_2 = "e3000000005al",
-- ag_2 = "e30000000008l",
-- agr_2 = "0000b9080000h",
-- agf_2 = "e30000000018l",
-- agfr_2 = "0000b9180000h",
-- axbr_2 = "0000b34a0000h",
-- adbr_2 = "0000b31a0000h",
-- aebr_2 = "0000b30a0000h",
-- aghi_2 = "0000a70b0000i",
-- ah_2 = "00004a000000j",
-- ahi_2 = "0000a70a0000i",
-- ahy_2 = "e3000000007al",
-- afi_2 = "c20900000000n",
-- agfi_2 = "c20800000000n",
-- aih_2 = "cc0800000000n",
-- al_2 = "00005e000000j",
-- alr_2 = "000000001e00g",
-- aly_2 = "e3000000005el",
-- alg_2 = "e3000000000al",
-- algr_2 = "0000b90a0000h",
-- algf_2 = "e3000000001al",
-- algfr_2 = "0000b91a0000h",
-- alfi_2 = "c20b00000000n",
-- algfi_2 = "c20a00000000n",
-- alc_2 = "e30000000098l",
-- alcr_2 = "0000b9980000h",
-- alcg_2 = "e30000000088l",
-- alcgr_2 = "0000b9880000h",
-- alsih_2 = "cc0a00000000n",
-- alsihn_2 = "cc0b00000000n",
-- axr_2 = "000000003600g",
-- ad_2 = "00006a000000j",
-- adr_2 = "000000002a00g",
-- ae_2 = "00007a000000j",
-- aer_2 = "000000003a00g",
-- aw_2 = "00006e000000j",
-- awr_2 = "000000002e00g",
-- au_2 = "00007e000000j",
-- aur_2 = "000000003e00g",
-- n_2 = "000054000000j",
-- nr_2 = "000000001400g",
-- ny_2 = "e30000000054l",
-- ng_2 = "e30000000080l",
-- ngr_2 = "0000b9800000h",
-- nihf_2 = "c00a00000000n",
-- nihh_2 = "0000a5040000i",
-- nihl_2 = "0000a5050000i",
-- nilf_2 = "c00b00000000n",
-- nilh_2 = "0000a5060000i",
-- nill_2 = "0000a5070000i",
-- bal_2 = "000045000000j",
-- balr_2 = "000000000500g",
-- bas_2 = "00004d000000j",
-- basr_2 = "000000000d00g",
-- bassm_2 = "000000000c00g",
-- bsa_2 = "0000b25a0000h",
-- bsm_2 = "000000000b00g",
-- bakr_2 = "0000b2400000h",
-- bsg_2 = "0000b2580000h",
-- bc_2 = "000047000000k",
-- bcr_2 = "000000000700g",
-- bct_2 = "000046000000j",
-- bctr_2 = "000000000600g",
-- bctg_2 = "e30000000046l",
-- bctgr_2 = "0000b9460000h",
-- bxh_3 = "000086000000q",
-- bxhg_3 = "eb0000000044s",
-- bxle_3 = "000087000000q",
-- bxleg_3 = "eb0000000045s",
-- brasl_2 = "c00500000000o",
-- brcl_2 = "c00400000000p",
-- brcth_2 = "cc0600000000o",
-- cksm_2 = "0000b2410000h",
-- km_2 = "0000b92e0000h",
-- kmf_2 = "0000b92a0000h",
-- kmc_2 = "0000b92f0000h",
-- kmo_2 = "0000b92b0000h",
-- c_2 = "000059000000j",
-- cr_2 = "000000001900g",
-- cy_2 = "e30000000059l",
-- cg_2 = "e30000000020l",
-- cgr_2 = "0000b9200000h",
-- cgf_2 = "e30000000030l",
-- cgfr_2 = "0000b9300000h",
-- cxbr_2 = "0000b3490000h",
-- cxtr_2 = "0000b3ec0000h",
-- cxr_2 = "0000b3690000h",
-- cdbr_2 = "0000b3190000h",
-- cdtr_2 = "0000b3e40000h",
-- cd_2 = "000069000000j",
-- cdr_2 = "000000002900g",
-- cebr_2 = "0000b3090000h",
-- ce_2 = "000079000000j",
-- cer_2 = "000000003900g",
-- kxbr_2 = "0000b3480000h",
-- kxtr_2 = "0000b3e80000h",
-- kdbr_2 = "0000b3180000h",
-- kdtr_2 = "0000b3e00000h",
-- kebr_2 = "0000b3080000h",
-- cs_3 = "0000ba000000q",
-- csy_3 = "eb0000000014s",
-- csg_3 = "eb0000000030s",
-- csp_2 = "0000b2500000h",
-- cspg_2 = "0000b98a0000h",
-- cextr_2 = "0000b3fc0000h",
-- cedtr_2 = "0000b3f40000h",
-- cds_3 = "0000bb000000q",
-- cdsy_3 = "eb0000000031s",
-- cdsg_3 = "eb000000003es",
-- ch_2 = "000049000000j",
-- chy_2 = "e30000000079l",
-- cgh_2 = "e30000000034l",
-- chrl_2 = "c60500000000o",
-- cghrl_2 = "c60400000000o",
-- chf_2 = "e300000000cdl",
-- chhr_2 = "0000b9cd0000h",
-- chlr_2 = "0000b9dd0000h",
-- cfi_2 = "c20d00000000n",
-- cgfi_2 = "c20c00000000n",
-- cih_2 = "cc0d00000000n",
-- cl_2 = "000055000000j",
-- clr_2 = "000000001500g",
-- cly_2 = "e30000000055l",
-- clg_2 = "e30000000021l",
-- clgr_2 = "0000b9210000h",
-- clgf_2 = "e30000000031l",
-- clgfr_2 = "0000b9310000h",
-- clmh_3 = "eb0000000020t",
-- clm_3 = "0000bd000000r",
-- clmy_3 = "eb0000000021t",
-- clhf_2 = "e300000000cfl",
-- clhhr_2 = "0000b9cf0000h",
-- clhlr_2 = "0000b9df0000h",
-- clfi_2 = "c20f00000000n",
-- clgfi_2 = "c20e00000000n",
-- clih_2 = "cc0f00000000n",
-- clcl_2 = "000000000f00g",
-- clcle_3 = "0000a9000000q",
-- clclu_3 = "eb000000008fs",
-- clrl_2 = "c60f00000000o",
-- clhrl_2 = "c60700000000o",
-- clgrl_2 = "c60a00000000o",
-- clghrl_2 = "c60600000000o",
-- clgfrl_2 = "c60e00000000o",
-- clst_2 = "0000b25d0000h",
-- crl_2 = "c60d00000000o",
-- cgrl_2 = "c60800000000o",
-- cgfrl_2 = "c60c00000000o",
-- cuse_2 = "0000b2570000h",
-- cmpsc_2 = "0000b2630000h",
-- kimd_2 = "0000b93e0000h",
-- klmd_2 = "0000b93f0000h",
-- kmac_2 = "0000b91e0000h",
-- thdr_2 = "0000b3590000h",
-- thder_2 = "0000b3580000h",
-- cxfbr_2 = "0000b3960000h",
-- cxftr_2 = "0000b9590000h",
-- cxfr_2 = "0000b3b60000h",
-- cdfbr_2 = "0000b3950000h",
-- cdftr_2 = "0000b9510000h",
-- cdfr_2 = "0000b3b50000h",
-- cefbr_2 = "0000b3940000h",
-- cefr_2 = "0000b3b40000h",
-- cxgbr_2 = "0000b3a60000h",
-- cxgtr_2 = "0000b3f90000h",
-- cxgr_2 = "0000b3c60000h",
-- cdgbr_2 = "0000b3a50000h",
-- cdgtr_2 = "0000b3f10000h",
-- cdgr_2 = "0000b3c50000h",
-- cegbr_2 = "0000b3a40000h",
-- cegr_2 = "0000b3c40000h",
-- cxstr_2 = "0000b3fb0000h",
-- cdstr_2 = "0000b3f30000h",
-- cxutr_2 = "0000b3fa0000h",
-- cdutr_2 = "0000b3f20000h",
-- cvb_2 = "00004f000000j",
-- cvby_2 = "e30000000006l",
-- cvbg_2 = "e3000000000el",
-- cvd_2 = "00004e000000j",
-- cvdy_2 = "e30000000026l",
-- cvdg_2 = "e3000000002el",
-- cuxtr_2 = "0000b3ea0000h",
-- cudtr_2 = "0000b3e20000h",
-- cu42_2 = "0000b9b30000h",
-- cu41_2 = "0000b9b20000h",
-- cpya_2 = "0000b24d0000h",
-- d_2 = "00005d000000j",
-- dr_2 = "000000001d00g",
-- dxbr_2 = "0000b34d0000h",
-- dxr_2 = "0000b22d0000h",
-- ddbr_2 = "0000b31d0000h",
-- dd_2 = "00006d000000j",
-- ddr_2 = "000000002d00g",
-- debr_2 = "0000b30d0000h",
-- de_2 = "00007d000000j",
-- der_2 = "000000003d00g",
-- dl_2 = "e30000000097l",
-- dlr_2 = "0000b9970000h",
-- dlg_2 = "e30000000087l",
-- dlgr_2 = "0000b9870000h",
-- dsg_2 = "e3000000000dl",
-- dsgr_2 = "0000b90d0000h",
-- dsgf_2 = "e3000000001dl",
-- dsgfr_2 = "0000b91d0000h",
-- x_2 = "000057000000j",
-- xr_2 = "000000001700g",
-- xy_2 = "e30000000057l",
-- xg_2 = "e30000000082l",
-- xgr_2 = "0000b9820000h",
-- xihf_2 = "c00600000000n",
-- xilf_2 = "c00700000000n",
-- ex_2 = "000044000000j",
-- exrl_2 = "c60000000000o",
-- ear_2 = "0000b24f0000h",
-- esea_2 = "0000b99d0000h",
-- eextr_2 = "0000b3ed0000h",
-- eedtr_2 = "0000b3e50000h",
-- ecag_3 = "eb000000004cs",
-- efpc_2 = "0000b38c0000h",
-- epar_2 = "0000b2260000h",
-- epair_2 = "0000b99a0000h",
-- epsw_2 = "0000b98d0000h",
-- esar_2 = "0000b2270000h",
-- esair_2 = "0000b99b0000h",
-- esxtr_2 = "0000b3ef0000h",
-- esdtr_2 = "0000b3e70000h",
-- ereg_2 = "0000b2490000h",
-- eregg_2 = "0000b90e0000h",
-- esta_2 = "0000b24a0000h",
-- flogr_2 = "0000b9830000h",
-- hdr_2 = "000000002400g",
-- her_2 = "000000003400g",
-- iac_2 = "0000b2240000h",
-- ic_2 = "000043000000j",
-- icy_2 = "e30000000073l",
-- icmh_3 = "eb0000000080t",
-- icm_3 = "0000bf000000r",
-- icmy_3 = "eb0000000081t",
-- iihf_2 = "c00800000000n",
-- iihh_2 = "0000a5000000i",
-- iihl_2 = "0000a5010000i",
-- iilf_2 = "c00900000000n",
-- iilh_2 = "0000a5020000i",
-- iill_2 = "0000a5030000i",
-- ipm_2 = "0000b2220000h",
-- iske_2 = "0000b2290000h",
-- ivsk_2 = "0000b2230000h",
-- l_2 = "000058000000j",
-- lr_2 = "000000001800g",
-- ly_2 = "e30000000058l",
-- lg_2 = "e30000000004l",
-- lgr_2 = "0000b9040000h",
-- lgf_2 = "e30000000014l",
-- lgfr_2 = "0000b9140000h",
-- lghi_2 = "0000a7090000i",
-- lxr_2 = "0000b3650000h",
-- ld_2 = "000068000000j",
-- ldr_2 = "000000002800g",
-- ldy_2 = "ed0000000065l",
-- le_2 = "000078000000j",
-- ler_2 = "000000003800g",
-- ley_2 = "ed0000000064l",
-- lam_3 = "00009a000000q",
-- lamy_3 = "eb000000009as",
-- la_2 = "000041000000j",
-- lay_2 = "e30000000071l",
-- lae_2 = "000051000000j",
-- laey_2 = "e30000000075l",
-- larl_2 = "c00000000000o",
-- laa_3 = "eb00000000f8s",
-- laag_3 = "eb00000000e8s",
-- laal_3 = "eb00000000fas",
-- laalg_3 = "eb00000000eas",
-- lan_3 = "eb00000000f4s",
-- lang_3 = "eb00000000e4s",
-- lax_3 = "eb00000000f7s",
-- laxg_3 = "eb00000000e7s",
-- lao_3 = "eb00000000f6s",
-- laog_3 = "eb00000000e6s",
-- lt_2 = "e30000000012l",
-- ltr_2 = "000000001200g",
-- ltg_2 = "e30000000002l",
-- ltgr_2 = "0000b9020000h",
-- ltgf_2 = "e30000000032l",
-- ltgfr_2 = "0000b9120000h",
-- ltxbr_2 = "0000b3420000h",
-- ltxtr_2 = "0000b3de0000h",
-- ltxr_2 = "0000b3620000h",
-- ltdbr_2 = "0000b3120000h",
-- ltdtr_2 = "0000b3d60000h",
-- ltdr_2 = "000000002200g",
-- ltebr_2 = "0000b3020000h",
-- lter_2 = "000000003200g",
-- lb_2 = "e30000000076l",
-- lbr_2 = "0000b9260000h",
-- lgb_2 = "e30000000077l",
-- lgbr_2 = "0000b9060000h",
-- lbh_2 = "e300000000c0l",
-- lcr_2 = "000000001300g",
-- lcgr_2 = "0000b9030000h",
-- lcgfr_2 = "0000b9130000h",
-- lcxbr_2 = "0000b3430000h",
-- lcxr_2 = "0000b3630000h",
-- lcdbr_2 = "0000b3130000h",
-- lcdr_2 = "000000002300g",
-- lcdfr_2 = "0000b3730000h",
-- lcebr_2 = "0000b3030000h",
-- lcer_2 = "000000003300g",
-- lctl_3 = "0000b7000000q",
-- lctlg_3 = "eb000000002fs",
-- fixr_2 = "0000b3670000h",
-- fidr_2 = "0000b37f0000h",
-- fier_2 = "0000b3770000h",
-- ldgr_2 = "0000b3c10000h",
-- lgdr_2 = "0000b3cd0000h",
-- lh_2 = "000048000000j",
-- lhr_2 = "0000b9270000h",
-- lhy_2 = "e30000000078l",
-- lgh_2 = "e30000000015l",
-- lghr_2 = "0000b9070000h",
-- lhh_2 = "e300000000c4l",
-- lhi_2 = "0000a7080000i",
-- lhrl_2 = "c40500000000o",
-- lghrl_2 = "c40400000000o",
-- lfh_2 = "e300000000cal",
-- lgfi_2 = "c00100000000n",
-- lxdbr_2 = "0000b3050000h",
-- lxdr_2 = "0000b3250000h",
-- lxebr_2 = "0000b3060000h",
-- lxer_2 = "0000b3260000h",
-- ldebr_2 = "0000b3040000h",
-- lder_2 = "0000b3240000h",
-- llgf_2 = "e30000000016l",
-- llgfr_2 = "0000b9160000h",
-- llc_2 = "e30000000094l",
-- llcr_2 = "0000b9940000h",
-- llgc_2 = "e30000000090l",
-- llgcr_2 = "0000b9840000h",
-- llch_2 = "e300000000c2l",
-- llh_2 = "e30000000095l",
-- llhr_2 = "0000b9950000h",
-- llgh_2 = "e30000000091l",
-- llghr_2 = "0000b9850000h",
-- llhh_2 = "e300000000c6l",
-- llhrl_2 = "c40200000000o",
-- llghrl_2 = "c40600000000o",
-- llihf_2 = "c00e00000000n",
-- llihh_2 = "0000a50c0000i",
-- llihl_2 = "0000a50d0000i",
-- llilf_2 = "c00f00000000n",
-- llilh_2 = "0000a50e0000i",
-- llill_2 = "0000a50f0000i",
-- llgfrl_2 = "c40e00000000o",
-- llgt_2 = "e30000000017l",
-- llgtr_2 = "0000b9170000h",
-- lm_3 = "000098000000q",
-- lmy_3 = "eb0000000098s",
-- lmg_3 = "eb0000000004s",
-- lmh_3 = "eb0000000096s",
-- lnr_2 = "000000001100g",
-- lngr_2 = "0000b9010000h",
-- lngfr_2 = "0000b9110000h",
-- lnxbr_2 = "0000b3410000h",
-- lnxr_2 = "0000b3610000h",
-- lndbr_2 = "0000b3110000h",
-- lndr_2 = "000000002100g",
-- lndfr_2 = "0000b3710000h",
-- lnebr_2 = "0000b3010000h",
-- lner_2 = "000000003100g",
-- loc_3 = "eb00000000f2t",
-- locg_3 = "eb00000000e2t",
-- lpq_2 = "e3000000008fl",
-- lpr_2 = "000000001000g",
-- lpgr_2 = "0000b9000000h",
-- lpgfr_2 = "0000b9100000h",
-- lpxbr_2 = "0000b3400000h",
-- lpxr_2 = "0000b3600000h",
-- lpdbr_2 = "0000b3100000h",
-- lpdr_2 = "000000002000g",
-- lpdfr_2 = "0000b3700000h",
-- lpebr_2 = "0000b3000000h",
-- lper_2 = "000000003000g",
-- lra_2 = "0000b1000000j",
-- lray_2 = "e30000000013l",
-- lrag_2 = "e30000000003l",
-- lrl_2 = "c40d00000000o",
-- lgrl_2 = "c40800000000o",
-- lgfrl_2 = "c40c00000000o",
-- lrvh_2 = "e3000000001fl",
-- lrv_2 = "e3000000001el",
-- lrvr_2 = "0000b91f0000h",
-- lrvg_2 = "e3000000000fl",
-- lrvgr_2 = "0000b90f0000h",
-- ldxbr_2 = "0000b3450000h",
-- ldxr_2 = "000000002500g",
-- lrdr_2 = "000000002500g",
-- lexbr_2 = "0000b3460000h",
-- lexr_2 = "0000b3660000h",
-- ledbr_2 = "0000b3440000h",
-- ledr_2 = "000000003500g",
-- lrer_2 = "000000003500g",
-- lura_2 = "0000b24b0000h",
-- lurag_2 = "0000b9050000h",
-- lzxr_2 = "0000b3760000h",
-- lzdr_2 = "0000b3750000h",
-- lzer_2 = "0000b3740000h",
-- msta_2 = "0000b2470000h",
-- mvcl_2 = "000000000e00g",
-- mvcle_3 = "0000a8000000q",
-- mvclu_3 = "eb000000008es",
-- mvpg_2 = "0000b2540000h",
-- mvst_2 = "0000b2550000h",
-- m_2 = "00005c000000j",
-- mfy_2 = "e3000000005cl",
-- mr_2 = "000000001c00g",
-- mxbr_2 = "0000b34c0000h",
-- mxr_2 = "000000002600g",
-- mdbr_2 = "0000b31c0000h",
-- md_2 = "00006c000000j",
-- mdr_2 = "000000002c00g",
-- mxdbr_2 = "0000b3070000h",
-- mxd_2 = "000067000000j",
-- mxdr_2 = "000000002700g",
-- meebr_2 = "0000b3170000h",
-- meer_2 = "0000b3370000h",
-- mdebr_2 = "0000b30c0000h",
-- mde_2 = "00007c000000j",
-- mder_2 = "000000003c00g",
-- me_2 = "00007c000000j",
-- mer_2 = "000000003c00g",
-- mh_2 = "00004c000000j",
-- mhy_2 = "e3000000007cl",
-- mlg_2 = "e30000000086l",
-- mlgr_2 = "0000b9860000h",
-- ml_2 = "e30000000096l",
-- mlr_2 = "0000b9960000h",
-- ms_2 = "000071000000j",
-- msr_2 = "0000b2520000h",
-- msy_2 = "e30000000051l",
-- msg_2 = "e3000000000cl",
-- msgr_2 = "0000b90c0000h",
-- msgf_2 = "e3000000001cl",
-- msgfr_2 = "0000b91c0000h",
-- msfi_2 = "c20100000000n",
-- msgfi_2 = "c20000000000n",
-- maer_3 = "0000b32e0000r",
-+ a_2 = "00005a000000RX-a",
-+ ar_2 = "000000001a00RR",
-+ ay_2 = "e3000000005aRXY-a",
-+ ag_2 = "e30000000008RXY-a",
-+ agr_2 = "0000b9080000RRE",
-+ agf_2 = "e30000000018RXY-a",
-+ agfr_2 = "0000b9180000RRE",
-+ axbr_2 = "0000b34a0000RRE",
-+ adbr_2 = "0000b31a0000RRE",
-+ aebr_2 = "0000b30a0000RRE",
-+ aghi_2 = "0000a70b0000RI-a",
-+ ah_2 = "00004a000000RX-a",
-+ ahi_2 = "0000a70a0000RI-a",
-+ ahy_2 = "e3000000007aRXY-a",
-+ afi_2 = "c20900000000RIL-a",
-+ agfi_2 = "c20800000000RIL-a",
-+ aih_2 = "cc0800000000RIL-a",
-+ al_2 = "00005e000000RX-a",
-+ alr_2 = "000000001e00RR",
-+ aly_2 = "e3000000005eRXY-a",
-+ alg_2 = "e3000000000aRXY-a",
-+ algr_2 = "0000b90a0000RRE",
-+ algf_2 = "e3000000001aRXY-a",
-+ algfr_2 = "0000b91a0000RRE",
-+ alfi_2 = "c20b00000000RIL-a",
-+ algfi_2 = "c20a00000000RIL-a",
-+ alc_2 = "e30000000098RXY-a",
-+ alcr_2 = "0000b9980000RRE",
-+ alcg_2 = "e30000000088RXY-a",
-+ alcgr_2 = "0000b9880000RRE",
-+ alsih_2 = "cc0a00000000RIL-a",
-+ alsihn_2 = "cc0b00000000RIL-a",
-+ axr_2 = "000000003600RR",
-+ ad_2 = "00006a000000RX-a",
-+ adr_2 = "000000002a00RR",
-+ ae_2 = "00007a000000RX-a",
-+ aer_2 = "000000003a00RR",
-+ aw_2 = "00006e000000RX-a",
-+ awr_2 = "000000002e00RR",
-+ au_2 = "00007e000000RX-a",
-+ aur_2 = "000000003e00RR",
-+ n_2 = "000054000000RX-a",
-+ nr_2 = "000000001400RR",
-+ ny_2 = "e30000000054RXY-a",
-+ ng_2 = "e30000000080RXY-a",
-+ ngr_2 = "0000b9800000RRE",
-+ nihf_2 = "c00a00000000RIL-a",
-+ nihh_2 = "0000a5040000RI-a",
-+ nihl_2 = "0000a5050000RI-a",
-+ nilf_2 = "c00b00000000RIL-a",
-+ nilh_2 = "0000a5060000RI-a",
-+ nill_2 = "0000a5070000RI-a",
-+ bal_2 = "000045000000RX-a",
-+ balr_2 = "000000000500RR",
-+ bas_2 = "00004d000000RX-a",
-+ basr_2 = "000000000d00RR",
-+ bassm_2 = "000000000c00RR",
-+ bsa_2 = "0000b25a0000RRE",
-+ bsm_2 = "000000000b00RR",
-+ bakr_2 = "0000b2400000RRE",
-+ bsg_2 = "0000b2580000RRE",
-+ bc_2 = "000047000000RX-b",
-+ bcr_2 = "000000000700RR",
-+ bct_2 = "000046000000RX-a",
-+ bctr_2 = "000000000600RR",
-+ bctg_2 = "e30000000046RXY-a",
-+ bctgr_2 = "0000b9460000RRE",
-+ bxh_3 = "000086000000RS-a",
-+ bxhg_3 = "eb0000000044RSY-a",
-+ bxle_3 = "000087000000RS-a",
-+ bxleg_3 = "eb0000000045RSY-a",
-+ brasl_2 = "c00500000000RIL-b",
-+ brcl_2 = "c00400000000RIL-c",
-+ brcth_2 = "cc0600000000RIL-b",
-+ cksm_2 = "0000b2410000RRE",
-+ km_2 = "0000b92e0000RRE",
-+ kmf_2 = "0000b92a0000RRE",
-+ kmc_2 = "0000b92f0000RRE",
-+ kmo_2 = "0000b92b0000RRE",
-+ c_2 = "000059000000RX-a",
-+ cr_2 = "000000001900RR",
-+ cy_2 = "e30000000059RXY-a",
-+ cg_2 = "e30000000020RXY-a",
-+ cgr_2 = "0000b9200000RRE",
-+ cgf_2 = "e30000000030RXY-a",
-+ cgfr_2 = "0000b9300000RRE",
-+ cxbr_2 = "0000b3490000RRE",
-+ cxtr_2 = "0000b3ec0000RRE",
-+ cxr_2 = "0000b3690000RRE",
-+ cdbr_2 = "0000b3190000RRE",
-+ cdtr_2 = "0000b3e40000RRE",
-+ cd_2 = "000069000000RX-a",
-+ cdr_2 = "000000002900RR",
-+ cebr_2 = "0000b3090000RRE",
-+ ce_2 = "000079000000RX-a",
-+ cer_2 = "000000003900RR",
-+ kxbr_2 = "0000b3480000RRE",
-+ kxtr_2 = "0000b3e80000RRE",
-+ kdbr_2 = "0000b3180000RRE",
-+ kdtr_2 = "0000b3e00000RRE",
-+ kebr_2 = "0000b3080000RRE",
-+ cs_3 = "0000ba000000RS-a",
-+ csy_3 = "eb0000000014RSY-a",
-+ csg_3 = "eb0000000030RSY-a",
-+ csp_2 = "0000b2500000RRE",
-+ cspg_2 = "0000b98a0000RRE",
-+ cextr_2 = "0000b3fc0000RRE",
-+ cedtr_2 = "0000b3f40000RRE",
-+ cds_3 = "0000bb000000RS-a",
-+ cdsy_3 = "eb0000000031RSY-a",
-+ cdsg_3 = "eb000000003eRSY-a",
-+ ch_2 = "000049000000RX-a",
-+ chy_2 = "e30000000079RXY-a",
-+ cgh_2 = "e30000000034RXY-a",
-+ chrl_2 = "c60500000000RIL-b",
-+ cghrl_2 = "c60400000000RIL-b",
-+ chf_2 = "e300000000cdRXY-a",
-+ chhr_2 = "0000b9cd0000RRE",
-+ chlr_2 = "0000b9dd0000RRE",
-+ cfi_2 = "c20d00000000RIL-a",
-+ cgfi_2 = "c20c00000000RIL-a",
-+ cih_2 = "cc0d00000000RIL-a",
-+ cl_2 = "000055000000RX-a",
-+ clr_2 = "000000001500RR",
-+ cly_2 = "e30000000055RXY-a",
-+ clg_2 = "e30000000021RXY-a",
-+ clgr_2 = "0000b9210000RRE",
-+ clgf_2 = "e30000000031RXY-a",
-+ clgfr_2 = "0000b9310000RRE",
-+ clmh_3 = "eb0000000020RSY-b",
-+ clm_3 = "0000bd000000RS-b",
-+ clmy_3 = "eb0000000021RSY-b",
-+ clhf_2 = "e300000000cfRXY-a",
-+ clhhr_2 = "0000b9cf0000RRE",
-+ clhlr_2 = "0000b9df0000RRE",
-+ clfi_2 = "c20f00000000RIL-a",
-+ clgfi_2 = "c20e00000000RIL-a",
-+ clih_2 = "cc0f00000000RIL-a",
-+ clcl_2 = "000000000f00RR",
-+ clcle_3 = "0000a9000000RS-a",
-+ clclu_3 = "eb000000008fRSY-a",
-+ clrl_2 = "c60f00000000RIL-b",
-+ clhrl_2 = "c60700000000RIL-b",
-+ clgrl_2 = "c60a00000000RIL-b",
-+ clghrl_2 = "c60600000000RIL-b",
-+ clgfrl_2 = "c60e00000000RIL-b",
-+ clst_2 = "0000b25d0000RRE",
-+ crl_2 = "c60d00000000RIL-b",
-+ cgrl_2 = "c60800000000RIL-b",
-+ cgfrl_2 = "c60c00000000RIL-b",
-+ cuse_2 = "0000b2570000RRE",
-+ cmpsc_2 = "0000b2630000RRE",
-+ kimd_2 = "0000b93e0000RRE",
-+ klmd_2 = "0000b93f0000RRE",
-+ kmac_2 = "0000b91e0000RRE",
-+ thdr_2 = "0000b3590000RRE",
-+ thder_2 = "0000b3580000RRE",
-+ cxfbr_2 = "0000b3960000RRE",
-+ cxftr_2 = "0000b9590000RRE",
-+ cxfr_2 = "0000b3b60000RRE",
-+ cdfbr_2 = "0000b3950000RRE",
-+ cdftr_2 = "0000b9510000RRE",
-+ cdfr_2 = "0000b3b50000RRE",
-+ cefbr_2 = "0000b3940000RRE",
-+ cefr_2 = "0000b3b40000RRE",
-+ cxgbr_2 = "0000b3a60000RRE",
-+ cxgtr_2 = "0000b3f90000RRE",
-+ cxgr_2 = "0000b3c60000RRE",
-+ cdgbr_2 = "0000b3a50000RRE",
-+ cdgtr_2 = "0000b3f10000RRE",
-+ cdgr_2 = "0000b3c50000RRE",
-+ cegbr_2 = "0000b3a40000RRE",
-+ cegr_2 = "0000b3c40000RRE",
-+ cxstr_2 = "0000b3fb0000RRE",
-+ cdstr_2 = "0000b3f30000RRE",
-+ cxutr_2 = "0000b3fa0000RRE",
-+ cdutr_2 = "0000b3f20000RRE",
-+ cvb_2 = "00004f000000RX-a",
-+ cvby_2 = "e30000000006RXY-a",
-+ cvbg_2 = "e3000000000eRXY-a",
-+ cvd_2 = "00004e000000RX-a",
-+ cvdy_2 = "e30000000026RXY-a",
-+ cvdg_2 = "e3000000002eRXY-a",
-+ cuxtr_2 = "0000b3ea0000RRE",
-+ cudtr_2 = "0000b3e20000RRE",
-+ cu42_2 = "0000b9b30000RRE",
-+ cu41_2 = "0000b9b20000RRE",
-+ cpya_2 = "0000b24d0000RRE",
-+ d_2 = "00005d000000RX-a",
-+ dr_2 = "000000001d00RR",
-+ dxbr_2 = "0000b34d0000RRE",
-+ dxr_2 = "0000b22d0000RRE",
-+ ddbr_2 = "0000b31d0000RRE",
-+ dd_2 = "00006d000000RX-a",
-+ ddr_2 = "000000002d00RR",
-+ debr_2 = "0000b30d0000RRE",
-+ de_2 = "00007d000000RX-a",
-+ der_2 = "000000003d00RR",
-+ dl_2 = "e30000000097RXY-a",
-+ dlr_2 = "0000b9970000RRE",
-+ dlg_2 = "e30000000087RXY-a",
-+ dlgr_2 = "0000b9870000RRE",
-+ dsg_2 = "e3000000000dRXY-a",
-+ dsgr_2 = "0000b90d0000RRE",
-+ dsgf_2 = "e3000000001dRXY-a",
-+ dsgfr_2 = "0000b91d0000RRE",
-+ x_2 = "000057000000RX-a",
-+ xr_2 = "000000001700RR",
-+ xy_2 = "e30000000057RXY-a",
-+ xg_2 = "e30000000082RXY-a",
-+ xgr_2 = "0000b9820000RRE",
-+ xihf_2 = "c00600000000RIL-a",
-+ xilf_2 = "c00700000000RIL-a",
-+ ex_2 = "000044000000RX-a",
-+ exrl_2 = "c60000000000RIL-b",
-+ ear_2 = "0000b24f0000RRE",
-+ esea_2 = "0000b99d0000RRE",
-+ eextr_2 = "0000b3ed0000RRE",
-+ eedtr_2 = "0000b3e50000RRE",
-+ ecag_3 = "eb000000004cRSY-a",
-+ efpc_2 = "0000b38c0000RRE",
-+ epar_2 = "0000b2260000RRE",
-+ epair_2 = "0000b99a0000RRE",
-+ epsw_2 = "0000b98d0000RRE",
-+ esar_2 = "0000b2270000RRE",
-+ esair_2 = "0000b99b0000RRE",
-+ esxtr_2 = "0000b3ef0000RRE",
-+ esdtr_2 = "0000b3e70000RRE",
-+ ereg_2 = "0000b2490000RRE",
-+ eregg_2 = "0000b90e0000RRE",
-+ esta_2 = "0000b24a0000RRE",
-+ flogr_2 = "0000b9830000RRE",
-+ hdr_2 = "000000002400RR",
-+ her_2 = "000000003400RR",
-+ iac_2 = "0000b2240000RRE",
-+ ic_2 = "000043000000RX-a",
-+ icy_2 = "e30000000073RXY-a",
-+ icmh_3 = "eb0000000080RSY-b",
-+ icm_3 = "0000bf000000RS-b",
-+ icmy_3 = "eb0000000081RSY-b",
-+ iihf_2 = "c00800000000RIL-a",
-+ iihh_2 = "0000a5000000RI-a",
-+ iihl_2 = "0000a5010000RI-a",
-+ iilf_2 = "c00900000000RIL-a",
-+ iilh_2 = "0000a5020000RI-a",
-+ iill_2 = "0000a5030000RI-a",
-+ ipm_2 = "0000b2220000RRE",
-+ iske_2 = "0000b2290000RRE",
-+ ivsk_2 = "0000b2230000RRE",
-+ l_2 = "000058000000RX-a",
-+ lr_2 = "000000001800RR",
-+ ly_2 = "e30000000058RXY-a",
-+ lg_2 = "e30000000004RXY-a",
-+ lgr_2 = "0000b9040000RRE",
-+ lgf_2 = "e30000000014RXY-a",
-+ lgfr_2 = "0000b9140000RRE",
-+ lghi_2 = "0000a7090000RI-a",
-+ lxr_2 = "0000b3650000RRE",
-+ ld_2 = "000068000000RX-a",
-+ ldr_2 = "000000002800RR",
-+ ldy_2 = "ed0000000065RXY-a",
-+ le_2 = "000078000000RX-a",
-+ ler_2 = "000000003800RR",
-+ ley_2 = "ed0000000064RXY-a",
-+ lam_3 = "00009a000000RS-a",
-+ lamy_3 = "eb000000009aRSY-a",
-+ la_2 = "000041000000RX-a",
-+ lay_2 = "e30000000071RXY-a",
-+ lae_2 = "000051000000RX-a",
-+ laey_2 = "e30000000075RXY-a",
-+ larl_2 = "c00000000000RIL-b",
-+ laa_3 = "eb00000000f8RSY-a",
-+ laag_3 = "eb00000000e8RSY-a",
-+ laal_3 = "eb00000000faRSY-a",
-+ laalg_3 = "eb00000000eaRSY-a",
-+ lan_3 = "eb00000000f4RSY-a",
-+ lang_3 = "eb00000000e4RSY-a",
-+ lax_3 = "eb00000000f7RSY-a",
-+ laxg_3 = "eb00000000e7RSY-a",
-+ lao_3 = "eb00000000f6RSY-a",
-+ laog_3 = "eb00000000e6RSY-a",
-+ lt_2 = "e30000000012RXY-a",
-+ ltr_2 = "000000001200RR",
-+ ltg_2 = "e30000000002RXY-a",
-+ ltgr_2 = "0000b9020000RRE",
-+ ltgf_2 = "e30000000032RXY-a",
-+ ltgfr_2 = "0000b9120000RRE",
-+ ltxbr_2 = "0000b3420000RRE",
-+ ltxtr_2 = "0000b3de0000RRE",
-+ ltxr_2 = "0000b3620000RRE",
-+ ltdbr_2 = "0000b3120000RRE",
-+ ltdtr_2 = "0000b3d60000RRE",
-+ ltdr_2 = "000000002200RR",
-+ ltebr_2 = "0000b3020000RRE",
-+ lter_2 = "000000003200RR",
-+ lb_2 = "e30000000076RXY-a",
-+ lbr_2 = "0000b9260000RRE",
-+ lgb_2 = "e30000000077RXY-a",
-+ lgbr_2 = "0000b9060000RRE",
-+ lbh_2 = "e300000000c0RXY-a",
-+ lcr_2 = "000000001300RR",
-+ lcgr_2 = "0000b9030000RRE",
-+ lcgfr_2 = "0000b9130000RRE",
-+ lcxbr_2 = "0000b3430000RRE",
-+ lcxr_2 = "0000b3630000RRE",
-+ lcdbr_2 = "0000b3130000RRE",
-+ lcdr_2 = "000000002300RR",
-+ lcdfr_2 = "0000b3730000RRE",
-+ lcebr_2 = "0000b3030000RRE",
-+ lcer_2 = "000000003300RR",
-+ lctl_3 = "0000b7000000RS-a",
-+ lctlg_3 = "eb000000002fRSY-a",
-+ fixr_2 = "0000b3670000RRE",
-+ fidr_2 = "0000b37f0000RRE",
-+ fier_2 = "0000b3770000RRE",
-+ ldgr_2 = "0000b3c10000RRE",
-+ lgdr_2 = "0000b3cd0000RRE",
-+ lh_2 = "000048000000RX-a",
-+ lhr_2 = "0000b9270000RRE",
-+ lhy_2 = "e30000000078RXY-a",
-+ lgh_2 = "e30000000015RXY-a",
-+ lghr_2 = "0000b9070000RRE",
-+ lhh_2 = "e300000000c4RXY-a",
-+ lhi_2 = "0000a7080000RI-a",
-+ lhrl_2 = "c40500000000RIL-b",
-+ lghrl_2 = "c40400000000RIL-b",
-+ lfh_2 = "e300000000caRXY-a",
-+ lgfi_2 = "c00100000000RIL-a",
-+ lxdbr_2 = "0000b3050000RRE",
-+ lxdr_2 = "0000b3250000RRE",
-+ lxebr_2 = "0000b3060000RRE",
-+ lxer_2 = "0000b3260000RRE",
-+ ldebr_2 = "0000b3040000RRE",
-+ lder_2 = "0000b3240000RRE",
-+ llgf_2 = "e30000000016RXY-a",
-+ llgfr_2 = "0000b9160000RRE",
-+ llc_2 = "e30000000094RXY-a",
-+ llcr_2 = "0000b9940000RRE",
-+ llgc_2 = "e30000000090RXY-a",
-+ llgcr_2 = "0000b9840000RRE",
-+ llch_2 = "e300000000c2RXY-a",
-+ llh_2 = "e30000000095RXY-a",
-+ llhr_2 = "0000b9950000RRE",
-+ llgh_2 = "e30000000091RXY-a",
-+ llghr_2 = "0000b9850000RRE",
-+ llhh_2 = "e300000000c6RXY-a",
-+ llhrl_2 = "c40200000000RIL-b",
-+ llghrl_2 = "c40600000000RIL-b",
-+ llihf_2 = "c00e00000000RIL-a",
-+ llihh_2 = "0000a50c0000RI-a",
-+ llihl_2 = "0000a50d0000RI-a",
-+ llilf_2 = "c00f00000000RIL-a",
-+ llilh_2 = "0000a50e0000RI-a",
-+ llill_2 = "0000a50f0000RI-a",
-+ llgfrl_2 = "c40e00000000RIL-b",
-+ llgt_2 = "e30000000017RXY-a",
-+ llgtr_2 = "0000b9170000RRE",
-+ lm_3 = "000098000000RS-a",
-+ lmy_3 = "eb0000000098RSY-a",
-+ lmg_3 = "eb0000000004RSY-a",
-+ lmh_3 = "eb0000000096RSY-a",
-+ lnr_2 = "000000001100RR",
-+ lngr_2 = "0000b9010000RRE",
-+ lngfr_2 = "0000b9110000RRE",
-+ lnxbr_2 = "0000b3410000RRE",
-+ lnxr_2 = "0000b3610000RRE",
-+ lndbr_2 = "0000b3110000RRE",
-+ lndr_2 = "000000002100RR",
-+ lndfr_2 = "0000b3710000RRE",
-+ lnebr_2 = "0000b3010000RRE",
-+ lner_2 = "000000003100RR",
-+ loc_3 = "eb00000000f2RSY-b",
-+ locg_3 = "eb00000000e2RSY-b",
-+ lpq_2 = "e3000000008fRXY-a",
-+ lpr_2 = "000000001000RR",
-+ lpgr_2 = "0000b9000000RRE",
-+ lpgfr_2 = "0000b9100000RRE",
-+ lpxbr_2 = "0000b3400000RRE",
-+ lpxr_2 = "0000b3600000RRE",
-+ lpdbr_2 = "0000b3100000RRE",
-+ lpdr_2 = "000000002000RR",
-+ lpdfr_2 = "0000b3700000RRE",
-+ lpebr_2 = "0000b3000000RRE",
-+ lper_2 = "000000003000RR",
-+ lra_2 = "0000b1000000RX-a",
-+ lray_2 = "e30000000013RXY-a",
-+ lrag_2 = "e30000000003RXY-a",
-+ lrl_2 = "c40d00000000RIL-b",
-+ lgrl_2 = "c40800000000RIL-b",
-+ lgfrl_2 = "c40c00000000RIL-b",
-+ lrvh_2 = "e3000000001fRXY-a",
-+ lrv_2 = "e3000000001eRXY-a",
-+ lrvr_2 = "0000b91f0000RRE",
-+ lrvg_2 = "e3000000000fRXY-a",
-+ lrvgr_2 = "0000b90f0000RRE",
-+ ldxbr_2 = "0000b3450000RRE",
-+ ldxr_2 = "000000002500RR",
-+ lrdr_2 = "000000002500RR",
-+ lexbr_2 = "0000b3460000RRE",
-+ lexr_2 = "0000b3660000RRE",
-+ ledbr_2 = "0000b3440000RRE",
-+ ledr_2 = "000000003500RR",
-+ lrer_2 = "000000003500RR",
-+ lura_2 = "0000b24b0000RRE",
-+ lurag_2 = "0000b9050000RRE",
-+ lzxr_2 = "0000b3760000RRE",
-+ lzdr_2 = "0000b3750000RRE",
-+ lzer_2 = "0000b3740000RRE",
-+ msta_2 = "0000b2470000RRE",
-+ mvcl_2 = "000000000e00RR",
-+ mvcle_3 = "0000a8000000RS-a",
-+ mvclu_3 = "eb000000008eRSY-a",
-+ mvpg_2 = "0000b2540000RRE",
-+ mvst_2 = "0000b2550000RRE",
-+ m_2 = "00005c000000RX-a",
-+ mfy_2 = "e3000000005cRXY-a",
-+ mr_2 = "000000001c00RR",
-+ mxbr_2 = "0000b34c0000RRE",
-+ mxr_2 = "000000002600RR",
-+ mdbr_2 = "0000b31c0000RRE",
-+ md_2 = "00006c000000RX-a",
-+ mdr_2 = "000000002c00RR",
-+ mxdbr_2 = "0000b3070000RRE",
-+ mxd_2 = "000067000000RX-a",
-+ mxdr_2 = "000000002700RR",
-+ meebr_2 = "0000b3170000RRE",
-+ meer_2 = "0000b3370000RRE",
-+ mdebr_2 = "0000b30c0000RRE",
-+ mde_2 = "00007c000000RX-a",
-+ mder_2 = "000000003c00RR",
-+ me_2 = "00007c000000RX-a",
-+ mer_2 = "000000003c00RR",
-+ mh_2 = "00004c000000RX-a",
-+ mhy_2 = "e3000000007cRXY-a",
-+ mlg_2 = "e30000000086RXY-a",
-+ mlgr_2 = "0000b9860000RRE",
-+ ml_2 = "e30000000096RXY-a",
-+ mlr_2 = "0000b9960000RRE",
-+ ms_2 = "000071000000RX-a",
-+ msr_2 = "0000b2520000RRE",
-+ msy_2 = "e30000000051RXY-a",
-+ msg_2 = "e3000000000cRXY-a",
-+ msgr_2 = "0000b90c0000RRE",
-+ msgf_2 = "e3000000001cRXY-a",
-+ msgfr_2 = "0000b91c0000RRE",
-+ msfi_2 = "c20100000000RIL-a",
-+ msgfi_2 = "c20000000000RIL-a",
-+ maer_3 = "0000b32e0000RRD",
- mvhhi_2 = "e54400000000SIL",
- mvhi_2 = "e54c00000000SIL",
- mvghi_2 = "e54800000000SIL",
-- o_2 = "000056000000j",
-- or_2 = "000000001600g",
-- oy_2 = "e30000000056l",
-- og_2 = "e30000000081l",
-- ogr_2 = "0000b9810000h",
-- oihf_2 = "c00c00000000n",
-- oihh_2 = "0000a5080000i",
-- oihl_2 = "0000a5090000i",
-- oilf_2 = "c00d00000000n",
-- oilh_2 = "0000a50a0000i",
-- oill_2 = "0000a50b0000i",
-- pgin_2 = "0000b22e0000h",
-- pgout_2 = "0000b22f0000h",
-- pcc_2 = "0000b92c0000h",
-- pckmo_2 = "0000b9280000h",
-- pfmf_2 = "0000b9af0000h",
-- ptf_2 = "0000b9a20000h",
-- popcnt_2 = "0000b9e10000h",
-+ o_2 = "000056000000RX-a",
-+ or_2 = "000000001600RR",
-+ oy_2 = "e30000000056RXY-a",
-+ og_2 = "e30000000081RXY-a",
-+ ogr_2 = "0000b9810000RRE",
-+ oihf_2 = "c00c00000000RIL-a",
-+ oihh_2 = "0000a5080000RI-a",
-+ oihl_2 = "0000a5090000RI-a",
-+ oilf_2 = "c00d00000000RIL-a",
-+ oilh_2 = "0000a50a0000RI-a",
-+ oill_2 = "0000a50b0000RI-a",
-+ pgin_2 = "0000b22e0000RRE",
-+ pgout_2 = "0000b22f0000RRE",
-+ pcc_2 = "0000b92c0000RRE",
-+ pckmo_2 = "0000b9280000RRE",
-+ pfmf_2 = "0000b9af0000RRE",
-+ ptf_2 = "0000b9a20000RRE",
-+ popcnt_2 = "0000b9e10000RRE",
- pfd_2 = "e30000000036m",
-- pfdrl_2 = "c60200000000p",
-- pt_2 = "0000b2280000h",
-- pti_2 = "0000b99e0000h",
-- palb_2 = "0000b2480000h",
-- rrbe_2 = "0000b22a0000h",
-- rrbm_2 = "0000b9ae0000h",
-- rll_3 = "eb000000001ds",
-- rllg_3 = "eb000000001cs",
-- srst_2 = "0000b25e0000h",
-- srstu_2 = "0000b9be0000h",
-- sar_2 = "0000b24e0000h",
-- sfpc_2 = "0000b3840000h",
-- sfasr_2 = "0000b3850000h",
-- spm_2 = "000000000400g",
-- ssar_2 = "0000b2250000h",
-- ssair_2 = "0000b99f0000h",
-- slda_3 = "00008f000000q",
-- sldl_3 = "00008d000000q",
-- sla_3 = "00008b000000q",
-- slak_3 = "eb00000000dds",
-- slag_3 = "eb000000000bs",
-- sll_3 = "000089000000q",
-- sllk_3 = "eb00000000dfs",
-- sllg_3 = "eb000000000ds",
-- srda_3 = "00008e000000q",
-- srdl_3 = "00008c000000q",
-- sra_3 = "00008a000000q",
-- srak_3 = "eb00000000dcs",
-- srag_3 = "eb000000000as",
-- srl_3 = "000088000000q",
-- srlk_3 = "eb00000000des",
-- srlg_3 = "eb000000000cs",
-- sqxbr_2 = "0000b3160000h",
-- sqxr_2 = "0000b3360000h",
-- sqdbr_2 = "0000b3150000h",
-- sqdr_2 = "0000b2440000h",
-- sqebr_2 = "0000b3140000h",
-- sqer_2 = "0000b2450000h",
-- st_2 = "000050000000j",
-- sty_2 = "e30000000050l",
-- stg_2 = "e30000000024l",
-- std_2 = "000060000000j",
-- stdy_2 = "ed0000000067l",
-- ste_2 = "000070000000j",
-- stey_2 = "ed0000000066l",
-- stam_3 = "00009b000000q",
-- stamy_3 = "eb000000009bs",
-- stc_2 = "000042000000j",
-- stcy_2 = "e30000000072l",
-- stch_2 = "e300000000c3l",
-- stcmh_3 = "eb000000002ct",
-- stcm_3 = "0000be000000r",
-- stcmy_3 = "eb000000002dt",
-- stctl_3 = "0000b6000000q",
-- stctg_3 = "eb0000000025s",
-- sth_2 = "000040000000j",
-- sthy_2 = "e30000000070l",
-- sthh_2 = "e300000000c7l",
-- sthrl_2 = "c40700000000o",
-- stfh_2 = "e300000000cbl",
-- stm_3 = "000090000000q",
-- stmy_3 = "eb0000000090s",
-- stmg_3 = "eb0000000024s",
-- stmh_3 = "eb0000000026s",
-- stoc_3 = "eb00000000f3t",
-- stocg_3 = "eb00000000e3t",
-- stpq_2 = "e3000000008el",
-- strl_2 = "c40f00000000o",
-- stgrl_2 = "c40b00000000o",
-- strvh_2 = "e3000000003fl",
-- strv_2 = "e3000000003el",
-- strvg_2 = "e3000000002fl",
-- stura_2 = "0000b2460000h",
-- sturg_2 = "0000b9250000h",
-- s_2 = "00005b000000j",
-- sr_2 = "000000001b00g",
-- sy_2 = "e3000000005bl",
-- sg_2 = "e30000000009l",
-- sgr_2 = "0000b9090000h",
-- sgf_2 = "e30000000019l",
-- sgfr_2 = "0000b9190000h",
-- sxbr_2 = "0000b34b0000h",
-- sdbr_2 = "0000b31b0000h",
-- sebr_2 = "0000b30b0000h",
-- sh_2 = "00004b000000j",
-- shy_2 = "e3000000007bl",
-- sl_2 = "00005f000000j",
-- slr_2 = "000000001f00g",
-- sly_2 = "e3000000005fl",
-- slg_2 = "e3000000000bl",
-- slgr_2 = "0000b90b0000h",
-- slgf_2 = "e3000000001bl",
-- slgfr_2 = "0000b91b0000h",
-- slfi_2 = "c20500000000n",
-- slgfi_2 = "c20400000000n",
-- slb_2 = "e30000000099l",
-- slbr_2 = "0000b9990000h",
-- slbg_2 = "e30000000089l",
-- slbgr_2 = "0000b9890000h",
-- sxr_2 = "000000003700g",
-- sd_2 = "00006b000000j",
-- sdr_2 = "000000002b00g",
-- se_2 = "00007b000000j",
-- ser_2 = "000000003b00g",
-- su_2 = "00007f000000j",
-- sur_2 = "000000003f00g",
-- sw_2 = "00006f000000j",
-- swr_2 = "000000002f00g",
-- tar_2 = "0000b24c0000h",
-- tb_2 = "0000b22c0000h",
-- tmhh_2 = "0000a7020000i",
-- tmhl_2 = "0000a7030000i",
-- tmlh_2 = "0000a7000000i",
-- tmll_2 = "0000a7010000i",
-- trace_3 = "000099000000q",
-- tracg_3 = "eb000000000fs",
-- tre_2 = "0000b2a50000h",
-+ pfdrl_2 = "c60200000000RIL-c",
-+ pt_2 = "0000b2280000RRE",
-+ pti_2 = "0000b99e0000RRE",
-+ palb_2 = "0000b2480000RRE",
-+ rrbe_2 = "0000b22a0000RRE",
-+ rrbm_2 = "0000b9ae0000RRE",
-+ rll_3 = "eb000000001dRSY-a",
-+ rllg_3 = "eb000000001cRSY-a",
-+ srst_2 = "0000b25e0000RRE",
-+ srstu_2 = "0000b9be0000RRE",
-+ sar_2 = "0000b24e0000RRE",
-+ sfpc_2 = "0000b3840000RRE",
-+ sfasr_2 = "0000b3850000RRE",
-+ spm_2 = "000000000400RR",
-+ ssar_2 = "0000b2250000RRE",
-+ ssair_2 = "0000b99f0000RRE",
-+ slda_3 = "00008f000000RS-a",
-+ sldl_3 = "00008d000000RS-a",
-+ sla_3 = "00008b000000RS-a",
-+ slak_3 = "eb00000000ddRSY-a",
-+ slag_3 = "eb000000000bRSY-a",
-+ sll_3 = "000089000000RS-a",
-+ sllk_3 = "eb00000000dfRSY-a",
-+ sllg_3 = "eb000000000dRSY-a",
-+ srda_3 = "00008e000000RS-a",
-+ srdl_3 = "00008c000000RS-a",
-+ sra_3 = "00008a000000RS-a",
-+ srak_3 = "eb00000000dcRSY-a",
-+ srag_3 = "eb000000000aRSY-a",
-+ srl_3 = "000088000000RS-a",
-+ srlk_3 = "eb00000000deRSY-a",
-+ srlg_3 = "eb000000000cRSY-a",
-+ sqxbr_2 = "0000b3160000RRE",
-+ sqxr_2 = "0000b3360000RRE",
-+ sqdbr_2 = "0000b3150000RRE",
-+ sqdr_2 = "0000b2440000RRE",
-+ sqebr_2 = "0000b3140000RRE",
-+ sqer_2 = "0000b2450000RRE",
-+ st_2 = "000050000000RX-a",
-+ sty_2 = "e30000000050RXY-a",
-+ stg_2 = "e30000000024RXY-a",
-+ std_2 = "000060000000RX-a",
-+ stdy_2 = "ed0000000067RXY-a",
-+ ste_2 = "000070000000RX-a",
-+ stey_2 = "ed0000000066RXY-a",
-+ stam_3 = "00009b000000RS-a",
-+ stamy_3 = "eb000000009bRSY-a",
-+ stc_2 = "000042000000RX-a",
-+ stcy_2 = "e30000000072RXY-a",
-+ stch_2 = "e300000000c3RXY-a",
-+ stcmh_3 = "eb000000002cRSY-b",
-+ stcm_3 = "0000be000000RS-b",
-+ stcmy_3 = "eb000000002dRSY-b",
-+ stctl_3 = "0000b6000000RS-a",
-+ stctg_3 = "eb0000000025RSY-a",
-+ sth_2 = "000040000000RX-a",
-+ sthy_2 = "e30000000070RXY-a",
-+ sthh_2 = "e300000000c7RXY-a",
-+ sthrl_2 = "c40700000000RIL-b",
-+ stfh_2 = "e300000000cbRXY-a",
-+ stm_3 = "000090000000RS-a",
-+ stmy_3 = "eb0000000090RSY-a",
-+ stmg_3 = "eb0000000024RSY-a",
-+ stmh_3 = "eb0000000026RSY-a",
-+ stoc_3 = "eb00000000f3RSY-b",
-+ stocg_3 = "eb00000000e3RSY-b",
-+ stpq_2 = "e3000000008eRXY-a",
-+ strl_2 = "c40f00000000RIL-b",
-+ stgrl_2 = "c40b00000000RIL-b",
-+ strvh_2 = "e3000000003fRXY-a",
-+ strv_2 = "e3000000003eRXY-a",
-+ strvg_2 = "e3000000002fRXY-a",
-+ stura_2 = "0000b2460000RRE",
-+ sturg_2 = "0000b9250000RRE",
-+ s_2 = "00005b000000RX-a",
-+ sr_2 = "000000001b00RR",
-+ sy_2 = "e3000000005bRXY-a",
-+ sg_2 = "e30000000009RXY-a",
-+ sgr_2 = "0000b9090000RRE",
-+ sgf_2 = "e30000000019RXY-a",
-+ sgfr_2 = "0000b9190000RRE",
-+ sxbr_2 = "0000b34b0000RRE",
-+ sdbr_2 = "0000b31b0000RRE",
-+ sebr_2 = "0000b30b0000RRE",
-+ sh_2 = "00004b000000RX-a",
-+ shy_2 = "e3000000007bRXY-a",
-+ sl_2 = "00005f000000RX-a",
-+ slr_2 = "000000001f00RR",
-+ sly_2 = "e3000000005fRXY-a",
-+ slg_2 = "e3000000000bRXY-a",
-+ slgr_2 = "0000b90b0000RRE",
-+ slgf_2 = "e3000000001bRXY-a",
-+ slgfr_2 = "0000b91b0000RRE",
-+ slfi_2 = "c20500000000RIL-a",
-+ slgfi_2 = "c20400000000RIL-a",
-+ slb_2 = "e30000000099RXY-a",
-+ slbr_2 = "0000b9990000RRE",
-+ slbg_2 = "e30000000089RXY-a",
-+ slbgr_2 = "0000b9890000RRE",
-+ sxr_2 = "000000003700RR",
-+ sd_2 = "00006b000000RX-a",
-+ sdr_2 = "000000002b00RR",
-+ se_2 = "00007b000000RX-a",
-+ ser_2 = "000000003b00RR",
-+ su_2 = "00007f000000RX-a",
-+ sur_2 = "000000003f00RR",
-+ sw_2 = "00006f000000RX-a",
-+ swr_2 = "000000002f00RR",
-+ tar_2 = "0000b24c0000RRE",
-+ tb_2 = "0000b22c0000RRE",
-+ tmhh_2 = "0000a7020000RI-a",
-+ tmhl_2 = "0000a7030000RI-a",
-+ tmlh_2 = "0000a7000000RI-a",
-+ tmll_2 = "0000a7010000RI-a",
-+ trace_3 = "000099000000RS-a",
-+ tracg_3 = "eb000000000fRSY-a",
-+ tre_2 = "0000b2a50000RRE",
-+
-
- -- SS-a instructions
- clc_2 = "d50000000000SS-a",
-@@ -1252,24 +1255,24 @@ local function parse_template(params, template, nparams, pos)
-
- -- Process each character.
- local p = sub(template, 13)
-- if p == "g" then
-+ if p == "RR" then
- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
- wputhw(op2)
-- elseif p == "h" then
-+ elseif p == "RRE" then
- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
- wputhw(op1); wputhw(op2)
-- elseif p == "i" then
-+ elseif p == "RI-a" then
- op1 = op1 + shl(parse_reg(params[1]),4)
- wputhw(op1);
- parse_imm16(params[2])
-- elseif p == "j" then
-+ elseif p == "RX-a" then
- local d, x, b, a = parse_mem_bx(params[2])
- op1 = op1 + shl(parse_reg(params[1]), 4) + x
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
- if a then a() end
-- elseif p == "k" then
-- elseif p == "l" then
-+ elseif p == "RX-b" then
-+ elseif p == "RXY-a" then
- local d, x, b, a = parse_mem_bxy(params[2])
- op0 = op0 + shl(parse_reg(params[1]), 4) + x
- op1 = op1 + shl(b, 12) + band(d, 0xfff)
-@@ -1278,25 +1281,22 @@ local function parse_template(params, template, nparams, pos)
- if a then a() end
- elseif p == "m" then
-
-- elseif p == "n" then
-+ elseif p == "RIL-a" then
- op0 = op0 + shl(parse_reg(params[1]), 4)
- wputhw(op0);
- parse_imm32(params[2])
-- elseif p == "o" then
-+ elseif p == "RIL-b" then
- op0 = op0 + shl(parse_reg(params[1]), 4)
- wputhw(op0);
- local mode, n, s = parse_label(params[2])
- waction("REL_"..mode, n, s)
-- elseif p == "q" then
-+ elseif p == "RS-a" then
- local d, b, a = parse_mem_b(params[3])
- op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2)
- if a then a() end -- a() emits action.
-- elseif p == "r" then
-- op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) +
parse_reg(params[3])
-- wputhw(op1); wputhw(op2)
-- elseif p == "s" then
-+ elseif p == "RSY-a" then
- local d, b, a = parse_mem_by(params[3])
- op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
- op1 = op1 + shl(b, 12) + band(d, 0xfff)
-@@ -1357,13 +1357,13 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op1);
- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) +
parse_reg(params[3]) + parse_mask(params[4])
- wputhw(op2)
-- elseif p =="sS" then
-+ elseif p =="S" then
- wputhw(op1);
- local d, b, a = parse_mem_b(params[1])
- op2 = op2 + shl(b,12) + d;
- wputhw(op2)
- if a then a() end
-- elseif p =="iI" then
-+ elseif p =="I" then
- local imm_val, a = parse_imm8(params[1])
- op2 = op2 + imm_val;
- wputhw(op2);
---
-2.20.1
-
-
-From 16ff796f05258b750eb819b96bebcf9ca43fadc0 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 09:27:40 -0500
-Subject: [PATCH 135/247] Fix some DynASM instructions.
-
----
- dynasm/dasm_s390x.lua | 12 ++++++------
- 1 file changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index a5f280d..9012c3a 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1202,15 +1202,15 @@ map_op = {
- -- RRF-b instructions
- didbr_4 = "0000b3580000RRF-b",
- -- S mode instructions
-- stfl_1 = "0000b2b10000sS",
-+ stfl_1 = "0000b2b10000S",
- -- I- mode instructions
-- svc_1 = "000000000a00iI",
-+ svc_1 = "000000000a00I",
- -- RI-a mode instructions
- -- TODO: change "i" to "RI-a"
-- chi_2 = "0000a70e0000i",
-- cghi_2 = "0000a70f0000i",
-- mhi_2 = "0000a70c0000i",
-- mghi_2 = "0000a70d0000i",
-+ chi_2 = "0000a70e0000RI-a",
-+ cghi_2 = "0000a70f0000RI-a",
-+ mhi_2 = "0000a70c0000RI-a",
-+ mghi_2 = "0000a70d0000RI-a",
- -- RI-b mode instructions
- bras_2 = "0000a7050000RI-b",
- -- RI-c mode instructions
---
-2.20.1
-
-
-From 8e5243de262516fc12cf7a771b35c5ec3d95fa6a Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 09:41:19 -0500
-Subject: [PATCH 136/247] Add support for integer add/subtract.
-
-Still need to support floating point operations. Multiplication is a
-little more complicated because it doesn't set the overflow flag.
----
- src/vm_s390x.dasc | 53 +++++++++++++++++++++++++++++++++++++++++++----
- 1 file changed, 49 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index c387975..ea554c7 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1172,13 +1172,58 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
-+ /* -- Binary ops -------------------------------------------------------- */
-+
-+ |.macro ins_arithpre
-+ | ins_ABC
-+ | sllg RB, RB, 3(r0)
-+ | sllg RC, RC, 3(r0)
-+ | sllg RA, RA, 3(r0)
-+ |.endmacro
-+ |
-+ |.macro ins_arithdn, intins
-+ | ins_arithpre
-+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-+ ||switch (vk) {
-+ ||case 0:
-+ | lg RB, 0(RB, BASE)
-+ | lg RC, 0(RC, KBASE)
-+ | checkint RB, ->vmeta_arith_vno
-+ | checkint RC, ->vmeta_arith_vno
-+ | intins RB, RC; jo ->vmeta_arith_vno
-+ || break;
-+ ||case 1:
-+ | lg RB, 0(RB, BASE)
-+ | lg RC, 0(RC, KBASE)
-+ | checkint RB, ->vmeta_arith_nvo
-+ | checkint RC, ->vmeta_arith_nvo
-+ | intins RC, RB; jo ->vmeta_arith_nvo
-+ || break;
-+ ||default:
-+ | lg RB, 0(RB, BASE)
-+ | lg RC, 0(RC, BASE)
-+ | checkint RB, ->vmeta_arith_vvo
-+ | checkint RC, ->vmeta_arith_vvo
-+ | intins RB, RC; jo ->vmeta_arith_vvo
-+ || break;
-+ ||}
-+ ||if (vk == 1) {
-+ | // setint RC
-+ | stg RC, 0(RA, BASE)
-+ ||} else {
-+ | // setint RB
-+ | stg RB, 0(RA, BASE)
-+ ||}
-+ | ins_next
-+ |.endmacro
-+
-+ | // RA = dst, RB = src1 or num const, RC = src2 or num const
- case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_arithdn ar
- break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_arithdn sr
- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 7c95a50b03c5e02a5a45c3368cf6e6a92d010015 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 12:09:32 -0500
-Subject: [PATCH 137/247] Add support for floating point add/sub.
-
----
- src/vm_s390x.dasc | 62 +++++++++++++++++++++++++++++++++++------------
- 1 file changed, 46 insertions(+), 16 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index ea554c7..3750de0 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -631,8 +631,7 @@ static void build_subroutines(BuildCtx *ctx)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- |->cont_nop: // BASE = base, (RC = result)
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_next
- |
- |->vmeta_tsetr:
- | stg r0, 0(r0)
-@@ -663,34 +662,65 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Arithmetic metamethods ---------------------------------------------
- |
- |->vmeta_arith_vno:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgc RB, PC_RB
-+ | llgc RC, PC_RC
- |->vmeta_arith_vn:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | sllg RB, RB, 3(r0)
-+ | sllg RC, RC, 3(r0)
-+ | lay RB, 0(RB, BASE)
-+ | lay RC, 0(RC, KBASE)
-+ | j >1
- |
- |->vmeta_arith_nvo:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgc RC, PC_RC
-+ | llgc RB, PC_RB
- |->vmeta_arith_nv:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | sllg RC, RC, 3(r0)
-+ | sllg RB, RB, 3(r0)
-+ | lay TMPR1, 0(RC, KBASE)
-+ | lay RC, 0(RB, BASE)
-+ | lgr RB, TMPR1
-+ | j >1
- |
- |->vmeta_unm:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- |
- |->vmeta_arith_vvo:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgc RB, PC_RB
-+ | llgc RC, PC_RC
- |->vmeta_arith_vv:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | sllg RC, RC, 3(r0)
-+ | sllg RB, RB, 3(r0)
-+ | lay RB, 0(RB, BASE)
-+ | lay RC, 0(RC, BASE)
-+ |1:
-+ | sllg RA, RA, 3(r0)
-+ | lay RA, 0(RA, BASE)
-+ | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
-+ | lgr CARG2, RA
-+ | lgr CARG3, RB
-+ | // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out).
-+ | lg L:CARG1, SAVE_L
-+ | stg BASE, L:CARG1->base
-+ | lgr L:RB, L:CARG1
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
-+ | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
-+ | lg BASE, L:RB->base
-+ | cghi CRET1, 0
-+ | lgr RC, CRET1
-+ | je ->cont_nop
- |
- | // Call metamethod for binary op.
- |->vmeta_binop:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | // BASE = base, RC = new base, stack = cont/func/o1/o2
-+ | lgr RA, RC
-+ | sgr RC, BASE
-+ | stg PC, -24(RA) // [cont|PC]
-+ | la PC, FRAME_CONT(RC)
-+ | lghi NARGS:RD, 2+1 // 2 args for func(o1, o2).
-+ | j ->vm_call_dispatch
- |
- |->vmeta_len:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From cb0b5492a027bcc5deeb2718a8e6a3c6bbbd9f82 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 13:14:53 -0500
-Subject: [PATCH 138/247] Add support for multiplication.
-
-Multiplication instructions don't set the overflow flag so we need
-to manually check, which is why this is more complicated than
-addition.
----
- src/vm_s390x.dasc | 41 +++++++++++++++++++++++++++++++++++++++--
- 1 file changed, 39 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 3750de0..37aa29b 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1256,8 +1256,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_arithdn sr
- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_arithpre
-+ | // For multiplication we use msgfr and check if the result
-+ | // fits in an int32_t.
-+ switch(op) {
-+ case BC_MULVN:
-+ | lg RB, 0(RB, BASE)
-+ | lg RC, 0(RC, KBASE)
-+ | checkint RB, ->vmeta_arith_vno
-+ | checkint RC, ->vmeta_arith_vno
-+ | lgfr RB, RB
-+ | msgfr RB, RC
-+ | lgfr RC, RB
-+ | cgr RB, RC; jne ->vmeta_arith_vno
-+ break;
-+ case BC_MULNV:
-+ | lg RB, 0(RB, BASE)
-+ | lg RC, 0(RC, KBASE)
-+ | checkint RB, ->vmeta_arith_nvo
-+ | checkint RC, ->vmeta_arith_nvo
-+ | lgfr RB, RB
-+ | msgfr RB, RC
-+ | lgfr RC, RB
-+ | cgr RB, RC; jne ->vmeta_arith_nvo
-+ break;
-+ default:
-+ | lg RB, 0(RB, BASE)
-+ | lg RC, 0(RC, BASE)
-+ | checkint RB, ->vmeta_arith_vvo
-+ | checkint RC, ->vmeta_arith_vvo
-+ | lgfr RB, RB
-+ | msgfr RB, RC
-+ | lgfr RC, RB
-+ | cgr RB, RC; jne ->vmeta_arith_vvo
-+ break;
-+ }
-+ | llgfr RB, RB
-+ | setint RB
-+ | stg RB, 0(RA, BASE)
-+ | ins_next
- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 8fab235acd85c40586bcf6baf1104c35af60df71 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 13:37:42 -0500
-Subject: [PATCH 139/247] Add support for division.
-
----
- src/vm_s390x.dasc | 39 +++++++++++++++++++++++++++++++++++++--
- 1 file changed, 37 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 37aa29b..955fe9f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1212,6 +1212,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | sllg RA, RA, 3(r0)
- |.endmacro
- |
-+ |.macro ins_arithfp, ins
-+ | ins_arithpre
-+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-+ ||switch (vk) {
-+ ||case 0:
-+ | ld f0, 0(RB, BASE)
-+ | ld f1, 0(RC, KBASE)
-+ | lg RB, 0(RB, BASE)
-+ | lg RC, 0(RC, KBASE)
-+ | checknumtp RB, ->vmeta_arith_vno
-+ | checknumtp RC, ->vmeta_arith_vno
-+ | ins f0, f1
-+ || break;
-+ ||case 1:
-+ | ld f1, 0(RB, BASE)
-+ | ld f0, 0(RC, KBASE)
-+ | lg RB, 0(RB, BASE)
-+ | lg RC, 0(RC, KBASE)
-+ | checknumtp RB, ->vmeta_arith_nvo
-+ | checknumtp RC, ->vmeta_arith_nvo
-+ | ins f0, f1
-+ || break;
-+ ||default:
-+ | ld f0, 0(RB, BASE)
-+ | ld f1, 0(RC, BASE)
-+ | lg RB, 0(RB, BASE)
-+ | lg RC, 0(RC, BASE)
-+ | checknumtp RB, ->vmeta_arith_vvo
-+ | checknumtp RC, ->vmeta_arith_vvo
-+ | ins f0, f1
-+ || break;
-+ ||}
-+ | std f0, 0(RA, BASE)
-+ | ins_next
-+ |.endmacro
-+ |
- |.macro ins_arithdn, intins
- | ins_arithpre
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-@@ -1297,8 +1333,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_arithfp ddbr
- break;
- case BC_MODVN:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 7cf4e8c2508b80dac958c5fb24a4724aeed8b9c9 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 14:53:31 -0500
-Subject: [PATCH 140/247] Fixes for DynASM.
-
- * s/hle/nle/
- * Fix RRF-b encoding (didbr instruction)
----
- dynasm/dasm_s390x.lua | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 9012c3a..f8c45fa 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -239,7 +239,7 @@ function _M.revdef(s)
- end
-
- local map_cond = {
-- o = 1, h = 2, hle = 3, l = 4,
-+ o = 1, h = 2, nle = 3, l = 4,
- nhe = 5, lh = 6, ne = 7, e = 8,
- nlh = 9, he = 10, nl = 11, le = 12,
- nh = 13, no = 14, [""] = 15,
-@@ -1200,7 +1200,7 @@ map_op = {
- sdb_2 = "ed000000001bRXE",
- seb_2 = "ed000000000bRXE",
- -- RRF-b instructions
-- didbr_4 = "0000b3580000RRF-b",
-+ didbr_4 = "0000b35b0000RRF-b",
- -- S mode instructions
- stfl_1 = "0000b2b10000S",
- -- I- mode instructions
-@@ -1355,7 +1355,7 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op2);
- elseif p == "RRF-b" then
- wputhw(op1);
-- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) +
parse_reg(params[3]) + parse_mask(params[4])
-+ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) +
parse_reg(params[3]) + shl(parse_mask(params[4]),8)
- wputhw(op2)
- elseif p =="S" then
- wputhw(op1);
---
-2.20.1
-
-
-From d359319d3a81244902098b8d6599b2741b8f3105 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 14:54:26 -0500
-Subject: [PATCH 141/247] Add support for modulo (%) operations.
-
-Only the slow path for now.
----
- src/vm_s390x.dasc | 32 ++++++++++++++++++++++----------
- 1 file changed, 22 insertions(+), 10 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 955fe9f..2e404aa 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1081,15 +1081,24 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Math helper functions ----------------------------------------------
- |//-----------------------------------------------------------------------
- |
-- |.macro vm_round, name, mode, cond
-+ |// FP value rounding. Called by math.floor/math.ceil fast functions.
-+ |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14.
-+ |.macro vm_round, name, mask
- |->name:
-- | stg r0, 0(r0)
-+ | // TODO: handle edge cases?
-+ | lghi r0, 1
-+ | cdfbr f1, r0
-+ | didbr f0, f2, f1, mask // f0=remainder, f2=quotient.
-+ | jnle >1
-+ | ldr f0, f2
-+ | br r14
-+ |1: // partial remainder (sanity check)
- | stg r0, 0(r0)
- |.endmacro
- |
-- | vm_round vm_floor, 0, 1
-- | vm_round vm_ceil, 1, JIT
-- | vm_round vm_trunc, 2, JIT
-+ | vm_round vm_floor, 7 // Round towards -inf.
-+ | vm_round vm_ceil, 6 // Round towards +inf.
-+ | vm_round vm_trunc, 5 // Round towards 0.
- |
- |// FP modulo x%y. Called by BC_MOD* and vm_arith.
- |->vm_mod:
-@@ -1335,13 +1344,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arithfp ddbr
- break;
-+ // TODO: implement fast mod operation.
-+ // x86_64 does floating point mod, however it might be better to use integer mod.
- case BC_MODVN:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | j ->vmeta_arith_vno
- break;
-- case BC_MODNV: case BC_MODVV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ case BC_MODNV:
-+ | j ->vmeta_arith_nvo
-+ break;
-+ case BC_MODVV:
-+ | j ->vmeta_arith_vvo
- break;
- case BC_POW:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 6f0798631c312d77da04a33c61d09ede6539ab83 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 15:49:21 -0500
-Subject: [PATCH 142/247] Add support for function definitions.
-
----
- src/vm_s390x.dasc | 71 ++++++++++++++++++++++++++++++++++++++++-------
- 1 file changed, 61 insertions(+), 10 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 2e404aa..018ec27 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1429,8 +1429,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- break;
- case BC_FNEW:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | lg CARG3, -16(BASE)
-+ | cleartp CARG3
-+ | sllg RD, RD, 3(r0)
-+ | lg CARG2, 0(RD, KBASE) // Fetch GCproto *.
-+ | lgr CARG1, L:RB
-+ | stg PC, SAVE_PC
-+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
-+ | brasl r14, extern lj_func_newL_gc
-+ | // GCfuncL * returned in r2 (CRET1).
-+ | lg BASE, L:RB->base
-+ | llgc RA, PC_RA
-+ | sllg RA, RA, 3(r0)
-+ | settp LFUNC:CRET1, LJ_TFUNC
-+ | stg LFUNC:CRET1, 0(RA, BASE)
-+ | ins_next
- break;
- case BC_TNEW:
- | stg r0, 0(r0)
-@@ -1949,21 +1965,56 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
-+ /* -- Function headers -------------------------------------------------- */
-+
-+ /*
-+ ** Reminder: A function may be called with func/args above L->maxstack,
-+ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
-+ ** too. This means all FUNC* ops (including fast functions) must check
-+ ** for stack overflow _before_ adding more slots!
-+ */
-+
- case BC_FUNCF:
-+ |.if JIT
- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-- break;
-+ |.endif
- case BC_FUNCV: /* NYI: compiled vararg functions. */
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
- break;
-+
- case BC_JFUNCF:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+#if !LJ_HASJIT
- break;
-+#endif
- case BC_IFUNCF:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
-+ | lg KBASE, (PC2PROTO(k)-4)(PC)
-+ | lg L:RB, SAVE_L
-+ | sllg RA, RA, 3(r0)
-+ | la RA, 0(RA, BASE) // Top of frame.
-+ | clg RA, L:RB->maxstack
-+ | jh ->vm_growstack_f
-+ | llgc RA, (PC2PROTO(numparams)-4)(PC)
-+ | clgr NARGS:RD, RA // Check for missing parameters.
-+ | jle >3
-+ |2:
-+ if (op == BC_JFUNCF) {
-+ | llgh RD, PC_RD
-+ | j =>BC_JLOOP
-+ } else {
-+ | ins_next
-+ }
-+ |
-+ |3: // Clear missing parameters.
-+ | // TODO: optimize this. Some of this can be hoisted.
-+ | sllg TMPR1, NARGS:RD, 3(r0)
-+ | lghi TMPR2, LJ_TNIL
-+ | stg TMPR2, -8(TMPR1, BASE)
-+ | la RD, 1(RD)
-+ | clgr RD, RA
-+ | jle <3
-+ | j <2
- break;
-
- case BC_JFUNCV:
---
-2.20.1
-
-
-From e4e2aa6c498bf6fff55fed72366875364286a2eb Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 17:10:38 -0500
-Subject: [PATCH 143/247] Add support for if statements.
-
----
- src/vm_s390x.dasc | 100 ++++++++++++++++++++++++++++++++++++++++++++--
- 1 file changed, 96 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 018ec27..83c26db 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -639,6 +639,33 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |//-- Comparison metamethods ---------------------------------------------
- |
-+ |->vmeta_comp:
-+ | llgh RD, PC_RD
-+ | sllg RD, RD, 3(r0)
-+ | llgc RA, PC_RA
-+ | sllg RA, RA, 3(r0)
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | la CARG2, 0(RA, BASE)
-+ | la CARG3, 0(RD, BASE)
-+ | lgr CARG1, L:RB
-+ | llgc CARG4, PC_OP
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
-+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
-+ |3:
-+ | lg BASE, L:RB->base
-+ | clgfi CRET1, 1
-+ | jh ->vmeta_binop
-+ |4:
-+ | la PC, 4(PC)
-+ | jl >6
-+ |5:
-+ | llgh RD, PC_RD
-+ | branchPC RD
-+ |6:
-+ | ins_next
-+ |
- |->cont_condt: // BASE = base, RC = result
- | stg r0, 0(r0)
- | stg r0, 0(r0)
-@@ -1159,10 +1186,73 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |=>defop:
-
- switch (op) {
-+
-+ /* -- Comparison ops ---------------------------------------------------- */
-+
-+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
-+
-+ |.macro jmp_comp, lt, ge, le, gt, target
-+ ||switch (op) {
-+ ||case BC_ISLT:
-+ | lt target
-+ ||break;
-+ ||case BC_ISGE:
-+ | ge target
-+ ||break;
-+ ||case BC_ISLE:
-+ | le target
-+ ||break;
-+ ||case BC_ISGT:
-+ | gt target
-+ ||break;
-+ ||default: break; /* Shut up GCC. */
-+ ||}
-+ |.endmacro
-+
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | // RA = src1, RD = src2, JMP with RD = target
-+ | ins_AD
-+ | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3(r0)
-+ | ld f0, 0(RA, BASE)
-+ | ld f1, 0(RD, BASE)
-+ | lg RA, 0(RA, BASE)
-+ | lg RD, 0(RD, BASE)
-+ | srag ITYPE, RA, 47(r0)
-+ | srag RB, RD, 47(r0)
-+ |
-+ | clfi ITYPE, LJ_TISNUM; jne >7
-+ | clfi RB, LJ_TISNUM; jne >8
-+ | // Both are integers.
-+ | la PC, 4(PC)
-+ | cr RA, RD
-+ | jmp_comp jhe, jl, jh, jle, >9
-+ |6:
-+ | llgh RD, PC_RD
-+ | branchPC RD
-+ |9:
-+ | ins_next
-+ |
-+ |7: // RA is not an integer.
-+ | jh ->vmeta_comp
-+ | // RA is a number.
-+ | clfi RB, LJ_TISNUM; jl >1; jne ->vmeta_comp
-+ | // RA is a number, RD is an integer.
-+ | cdfbr f1, RD
-+ | j >1
-+ |
-+ |8: // RA is an integer, RD is not an integer.
-+ | jh ->vmeta_comp
-+ | // RA is an integer, RD is a number.
-+ | cdfbr f0, RA
-+ |1:
-+ | la PC, 4(PC)
-+ | cdbr f0, f1
-+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
-+ | jmp_comp jnl, jl, jnle, jle, <9
-+ | j <6
- break;
-+
- case BC_ISEQV: case BC_ISNEV:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
-@@ -1961,9 +2051,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
- case BC_JMP:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AJ // RA = unused, RD = target
-+ | branchPC RD
-+ | ins_next
- break;
-
- /* -- Function headers -------------------------------------------------- */
---
-2.20.1
-
-
-From ae00d3ad2578b7c4e7c95370224d53fd5302b509 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 20 Dec 2016 17:26:33 -0500
-Subject: [PATCH 144/247] Fix bug in division.
-
----
- src/vm_s390x.dasc | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 83c26db..0f538d0 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -722,6 +722,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lay RB, 0(RB, BASE)
- | lay RC, 0(RC, BASE)
- |1:
-+ | llgc RA, PC_RA
- | sllg RA, RA, 3(r0)
- | lay RA, 0(RA, BASE)
- | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
---
-2.20.1
-
-
-From ca7782ebcf74c18e3d7e54edef6f6e531a935c2e Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 21 Dec 2016 18:43:25 +0530
-Subject: [PATCH 145/247] Added test example for RXE mode
-
----
- dynasm/Examples/test_z_inst.c | 17 ++++++++++++++++-
- 1 file changed, 16 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 2314606..20b2045 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -366,6 +366,20 @@ static void rsb(dasm_State *state) {
- | br r14
- }
-
-+static void sqrt_rxe(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | lay sp , -8(sp)
-+ | cefbr f0 , r2
-+ | stdy f0 , 0(sp)
-+ | sqeb f0 ,0(r4,sp)
-+ | cfebr r2 ,0, f0
-+ | la sp, 8(sp)
-+ | br r14
-+
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -398,7 +412,8 @@ test_table test[] = {
- { 0, 0, 0, sil, 23, "sil"},
- {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"},
- { 0, 0, 0, rsb, 0, "rsb"},
-- {12,10, 0, rre, 10, "rre"}
-+ {12,10, 0, rre, 10, "rre"},
-+ {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From 46c3314ea73bd0ab9daf665d49d58a1e79652831 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 21 Dec 2016 11:02:53 -0500
-Subject: [PATCH 146/247] Add support for numeric equality checks.
-
----
- src/vm_s390x.dasc | 114 +++++++++++++++++++++++++++++++++++++++++++++-
- 1 file changed, 112 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 0f538d0..d1db7b9 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1255,8 +1255,118 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
-
- case BC_ISEQV: case BC_ISNEV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ vk = op == BC_ISEQV;
-+ | ins_AD // RA = src1, RD = src2, JMP with RD = target
-+ | sllg RD, RD, 3(r0)
-+ | ld f1, 0(RD, BASE)
-+ | lg RD, 0(RD, BASE)
-+ | sllg RA, RA, 3(r0)
-+ | ld f0, 0(RA, BASE)
-+ | lg RA, 0(RA, BASE)
-+ | la PC, 4(PC)
-+ | srag RB, RD, 47(r0)
-+ | srag ITYPE, RA, 47(r0)
-+ | clfi RB, LJ_TISNUM; jne >7
-+ | clfi ITYPE, LJ_TISNUM; jne >8
-+ | cr RD, RA
-+ if (vk) {
-+ | jne >9
-+ } else {
-+ | je >9
-+ }
-+ | llgh RD, PC_RD
-+ | branchPC RD
-+ |9:
-+ | ins_next
-+ |
-+ |7: // RD is not an integer.
-+ | jh >5
-+ | // RD is a number.
-+ | clfi ITYPE, LJ_TISNUM; jl >1; jne >5
-+ | // RD is a number, RA is an integer.
-+ | cdfbr f0, RA
-+ | j >1
-+ |
-+ |8: // RD is an integer, RA is not an integer.
-+ | jh >5
-+ | // RD is an integer, RA is a number.
-+ | cdfbr f1, RD
-+ | j >1
-+ |
-+ |1:
-+ | cdbr f0, f1
-+ |4:
-+ iseqne_fp:
-+ if (vk) {
-+ | jne >2 // Unordered means not equal.
-+ } else {
-+ | je >1 // Unordered means not equal.
-+ }
-+ iseqne_end:
-+ if (vk) {
-+ |1: // EQ: Branch to the target.
-+ | llgh RD, PC_RD
-+ | branchPC RD
-+ |2: // NE: Fallthrough to next instruction.
-+ |.if not FFI
-+ |3:
-+ |.endif
-+ } else {
-+ |.if not FFI
-+ |3:
-+ |.endif
-+ |2: // NE: Branch to the target.
-+ | llgh RD, PC_RD
-+ | branchPC RD
-+ |1: // EQ: Fallthrough to next instruction.
-+ }
-+ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
-+ op == BC_ISEQN || op == BC_ISNEN)) {
-+ | j <9
-+ } else {
-+ | ins_next
-+ }
-+ |
-+ if (op == BC_ISEQV || op == BC_ISNEV) {
-+ |5: // Either or both types are not numbers.
-+ |.if FFI
-+ | clfi RB, LJ_TCDATA; je ->vmeta_equal_cd
-+ | clfi ITYPE, LJ_TCDATA; je ->vmeta_equal_cd
-+ |.endif
-+ | cgr RA, RD
-+ | je <1 // Same GCobjs or pvalues?
-+ | cr RB, ITYPE
-+ | jne <2 // Not the same type?
-+ | clfi RB, LJ_TISTABUD
-+ | jh <2 // Different objects and not table/ud?
-+ |
-+ | // Different tables or userdatas. Need to check __eq metamethod.
-+ | // Field metatable must be at same offset for GCtab and GCudata!
-+ | cleartp TAB:RA
-+ | lg TAB:RB, TAB:RA->metatable
-+ | cghi TAB:RB, 0
-+ | je <2 // No metatable?
-+ | llgc TMPR2, TAB:RB->nomm
-+ | tmll TMPR2, 1<<MM_eq
-+ | jne <2 // Or 'no __eq' flag set?
-+ if (vk) {
-+ | lghi RB, 0 // ne = 0 // TODO: should be 32-bit?
-+ } else {
-+ | lghi RB, 1 // ne = 1 // TODO: should be 32-bit?
-+ }
-+ | j ->vmeta_equal // Handle __eq metamethod.
-+ } else {
-+ |.if FFI
-+ |3:
-+ | clfi ITYPE, LJ_TCDATA
-+ if (LJ_DUALNUM && vk) {
-+ | jne <9
-+ } else {
-+ | jne <2
-+ }
-+ | j ->vmeta_equal_cd
-+ |.endif
-+ }
- break;
- case BC_ISEQS: case BC_ISNES:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 8cb661762b3ac955a6203f0f0053121356bb82c7 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 21 Dec 2016 12:49:53 -0500
-Subject: [PATCH 147/247] Implement some boolean operations.
-
----
- src/vm_s390x.dasc | 47 +++++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 41 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index d1db7b9..e243074 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1381,8 +1381,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- break;
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
-+ | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3(r0)
-+ | lg ITYPE, 0(RD, BASE)
-+ | la PC, 4(PC)
-+ if (op == BC_ISTC || op == BC_ISFC) {
-+ | lgr RB, ITYPE
-+ }
-+ | srag ITYPE, ITYPE, 47(r0)
-+ | clfi ITYPE, LJ_TISTRUECOND
-+ if (op == BC_IST || op == BC_ISTC) {
-+ | jhe >1
-+ } else {
-+ | jl >1
-+ }
-+ if (op == BC_ISTC || op == BC_ISFC) {
-+ | stg RB, 0(RA, BASE)
-+ }
-+ | llgh RD, PC_RD
-+ | branchPC RD
-+ |1: // Fallthrough to the next instruction.
-+ | ins_next
- break;
- case BC_ISTYPE:
- | stg r0, 0(r0)
-@@ -1401,8 +1421,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next_
- break;
- case BC_NOT:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst, RD = src
-+ | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3(r0)
-+ | lg RB, 0(RD, BASE)
-+ | srag RB, RB, 47(r0)
-+ | load_false RC
-+ | cghi RB, LJ_TTRUE
-+ | je >1 // TODO: Maybe do something fancy to avoid the jump?
-+ | load_true RC
-+ |1:
-+ | stg RC, 0(RA, BASE)
-+ | ins_next
- break;
- case BC_UNM:
- | stg r0, 0(r0)
-@@ -1598,8 +1628,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- break;
- case BC_KPRI:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst, RD = primitive type (~)
-+ | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 47(r0)
-+ | lghi TMPR2, -1
-+ | xgr RD, TMPR2 // not
-+ | stg RD, 0(RA, BASE)
-+ | ins_next
- break;
- case BC_KNIL:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 2e843b084a495a3687e40087866a4e55400c2bbe Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 21 Dec 2016 13:21:12 -0500
-Subject: [PATCH 148/247] Implement more equality checks.
-
----
- src/vm_s390x.dasc | 89 ++++++++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 81 insertions(+), 8 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index e243074..59ce184 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1369,17 +1369,90 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- break;
- case BC_ISEQS: case BC_ISNES:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-- break;
-+ vk = op == BC_ISEQS;
-+ | ins_AND // RA = src, RD = str const, JMP with RD = target
-+ | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3(r0)
-+ | lg RB, 0(RA, BASE)
-+ | la PC, 4(PC)
-+ | checkstr RB, >3
-+ | cg RB, 0(RD, KBASE)
-+ iseqne_test:
-+ if (vk) {
-+ | jne >2
-+ } else {
-+ | je >1
-+ }
-+ goto iseqne_end;
- case BC_ISEQN: case BC_ISNEN:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-- break;
-+ vk = op == BC_ISEQN;
-+ | ins_AD // RA = src, RD = num const, JMP with RD = target
-+ | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3(r0)
-+ | ld f0, 0(RA, BASE)
-+ | lg RB, 0(RA, BASE)
-+ | ld f1, 0(RD, KBASE)
-+ | lg RD, 0(RD, KBASE)
-+ | la PC, 4(PC)
-+ | checkint RB, >7
-+ | checkint RD, >8
-+ | cr RB, RD
-+ if (vk) {
-+ | jne >9
-+ } else {
-+ | je >9
-+ }
-+ | llgh RD, PC_RD
-+ | branchPC RD
-+ |9:
-+ | ins_next
-+ |
-+ |7: // RA is not an integer.
-+ | jh >3
-+ | // RA is a number.
-+ | checkint RD, >1
-+ | // RA is a number, RD is an integer.
-+ | cdfbr f1, RD
-+ | j >1
-+ |
-+ |8: // RA is an integer, RD is a number.
-+ | cdfbr f0, RB
-+ | cdbr f0, f1
-+ | j >4
-+ |1:
-+ | cdbr f0, f1
-+ |4:
-+ goto iseqne_fp;
- case BC_ISEQP: case BC_ISNEP:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ vk = op == BC_ISEQP;
-+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
-+ | sllg RA, RA, 3(r0)
-+ | lg RB, 0(RA, BASE)
-+ | srag RB, RB, 47(r0)
-+ | la PC, 4(PC)
-+ | cr RB, RD
-+ if (!LJ_HASFFI) goto iseqne_test;
-+ if (vk) {
-+ | jne >3
-+ | llgh RD, PC_RD
-+ | branchPC RD
-+ |2:
-+ | ins_next
-+ |3:
-+ | cghi RB, LJ_TCDATA; jne <2
-+ | j ->vmeta_equal_cd
-+ } else {
-+ | je >2
-+ | cghi RB, LJ_TCDATA; je ->vmeta_equal_cd
-+ | llgh RD, PC_RD
-+ | branchPC RD
-+ |2:
-+ | ins_next
-+ }
- break;
-+
-+ /* -- Unary test and copy ops ------------------------------------------- */
-+
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
- | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
- | sllg RD, RD, 3(r0)
---
-2.20.1
-
-
-From e96efa0225c5352c51e8e64d1d6117c1afcd67c0 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 21 Dec 2016 14:03:58 -0500
-Subject: [PATCH 149/247] Implement unary minus.
-
----
- src/vm_s390x.dasc | 27 +++++++++++++++++++++++----
- 1 file changed, 23 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 59ce184..9a45446 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -710,8 +710,11 @@ static void build_subroutines(BuildCtx *ctx)
- | j >1
- |
- |->vmeta_unm:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgh RD, PC_RD
-+ | sllg RD, RD, 3(r0)
-+ | la RC, 0(RD, BASE)
-+ | lgr RB, RC
-+ | j >1
- |
- |->vmeta_arith_vvo:
- | llgc RB, PC_RB
-@@ -1508,8 +1511,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- break;
- case BC_UNM:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst, RD = src
-+ | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3(r0)
-+ | lg RB, 0(RD, BASE)
-+ | checkint RB, >3
-+ | lcr RB, RB; jo >2
-+ |1:
-+ | stg RB, 0(RA, BASE)
-+ | ins_next
-+ |2:
-+ | llihh RB, 0x41e0 // (double)2^31
-+ | j <1
-+ |3:
-+ | jh ->vmeta_unm
-+ | // Toggle sign bit.
-+ | llihh TMPR2, 0x8000
-+ | xgr RB, TMPR2
-+ | j <1
- break;
- case BC_LEN:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 7eb003e607d06805beba7930e2fd5474610548ff Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 21 Dec 2016 18:02:43 -0500
-Subject: [PATCH 150/247] Implement table creation and printing.
-
----
- src/vm_s390x.dasc | 140 ++++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 123 insertions(+), 17 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 9a45446..8cf4b3f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -808,20 +808,16 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc, name
- |->ff_ .. name:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
- |.endmacro
- |
- |.macro .ffunc_1, name
- |->ff_ .. name:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | clfi NARGS:RD, 1+1; jl ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_2, name
- |->ff_ .. name:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | clfi NARGS:RD, 2+1; jl ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_n, name, op
-@@ -867,12 +863,40 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Base library: conversions ------------------------------------------
- |
- |.ffunc tonumber
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | // Only handles the number case inline (without a base argument).
-+ | clfi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
-+ | lg RB, 0(BASE)
-+ | checknumber RB, ->fff_fallback
-+ | lg PC, -8(BASE)
-+ | stg RB, -16(BASE)
-+ | j ->fff_res1
- |
- |.ffunc_1 tostring
-+ | // Only handles the string or number case inline.
-+ | lg PC, -8(BASE)
-+ | lg STR:RB, 0(BASE)
-+ | checktp_nc STR:RB, LJ_TSTR, >3
-+ | // A __tostring method in the string base metatable is ignored.
-+ |2:
-+ | stg STR:RB, -16(BASE)
-+ | j ->fff_res1
-+ |3: // Handle numbers inline, unless a number base metatable is present.
-+ | clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1
-+ | lghi TMPR2, 0
-+ | cg TMPR2, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH)
-+ | jne ->fff_fallback
-+ | ffgccheck // Caveat: uses label 1.
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base // Add frame since C call can throw.
-+ | stg PC, SAVE_PC // Redundant (but a defined value).
-+ | lgr CARG2, BASE // Otherwise: CARG2 == BASE
-+ | lgr L:CARG1, L:RB
-+ | brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o)
-+ | // GCstr returned in r2 (CRET1).
- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg BASE, L:RB->base
-+ | settp STR:RB, CRET1, LJ_TSTR
-+ | j <2
- |
- |//-- Base library: iterators -------------------------------------------
- |
-@@ -921,8 +945,33 @@ static void build_subroutines(BuildCtx *ctx)
- |->fff_resxmm0:
- |
- |->fff_res1:
-+ | lghi RD, 1+1
- |->fff_res:
-+ | stg RD, SAVE_MULTRES
- |->fff_res_:
-+ | tmll PC, FRAME_TYPE
-+ | jne >7
-+ |5:
-+ | llgc TMPR1, PC_RB
-+ | clgr TMPR1, RD // More results expected?
-+ | jh >6
-+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
-+ | llgc RA, PC_RA
-+ | lcgr RA, RA
-+ | sllg RA, RA, 3(r0)
-+ | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
-+ | ins_next
-+ |
-+ |6: // Fill up results with nil.
-+ | sllg TMPR1, RD, 3(r0)
-+ | lghi TMPR2, LJ_TNIL
-+ | stg TMPR2, -24(TMPR1, BASE)
-+ | la RD, 1(RD)
-+ | j <5
-+ |
-+ |7: // Non-standard return case.
-+ | lghi RA, -16 // Results start at BASE+RA = BASE-16.
-+ | j ->vm_return
- |
- |.macro math_round, func
- | .ffunc math_ .. func
-@@ -1036,14 +1085,41 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->fff_fallback_2:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lghi NARGS:RD, 1+2 // Other args are ignored, anyway.
-+ | j ->fff_fallback
- |->fff_fallback_1:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lghi NARGS:RD, 1+1 // Other args are ignored, anyway.
- |->fff_fallback: // Call fast function fallback handler.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | // BASE = new base, RD = nargs+1
-+ | lg L:RB, SAVE_L
-+ | lg PC, -8(BASE) // Fallback may overwrite PC.
-+ | stg PC, SAVE_PC // Redundant (but a defined value).
-+ | stg BASE, L:RB->base
-+ | sllg RD, NARGS:RD, 3(r0)
-+ | lay RD, -8(RD, BASE)
-+ | la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler.
-+ | stg RD, L:RB->top
-+ | lg CFUNC:RD, -16(BASE)
-+ | cleartp CFUNC:RD
-+ | clg RA, L:RB->maxstack
-+ | jh >5 // Need to grow stack.
-+ | lgr CARG1, L:RB
-+ | lg TMPR1, CFUNC:RD->f
-+ | basr r14, TMPR1 // (lua_State *L)
-+ | lg BASE, L:RB->base
-+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
-+ | lgr RD, CRET1
-+ | cghi RD, 0; jh ->fff_res // Returned nresults+1?
-+ |1:
-+ | lg RA, L:RB->top
-+ | sgr RA, BASE
-+ | srlg RA, RA, 3(r0)
-+ | cghi RD, 0
-+ | la NARGS:RD, 1(RA)
-+ | lg LFUNC:RB, -16(BASE)
-+ | jne ->vm_call_tail // Returned -1?
-+ | cleartp LFUNC:RB
-+ | ins_callt // Returned 0: retry fast path.
- |
- |// Reconstruct previous base for vmeta_call during tailcall.
- |->vm_call_tail:
-@@ -1777,8 +1853,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- break;
- case BC_TNEW:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst, RD = hbits|asize
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
-+ | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
-+ | stg PC, SAVE_PC
-+ | jhe >5
-+ |1:
-+ | srlg CARG3, RD, 11(r0)
-+ | llill TMPR2, 0x7ff
-+ | nr RD, TMPR2
-+ | cr RD, TMPR2
-+ | je >3
-+ |2:
-+ | lgr L:CARG1, L:RB
-+ | llgfr CARG2, RD
-+ | brasl r14, extern lj_tab_new // (lua_State *L, uint32_t asize, uint32_t hbits)
-+ | // Table * returned in r2 (CRET1).
-+ | lg BASE, L:RB->base
-+ | llgc RA, PC_RA
-+ | sllg RA, RA, 3(r0)
-+ | settp TAB:CRET1, LJ_TTAB
-+ | stg TAB:CRET1, 0(RA, BASE)
-+ | ins_next
-+ |3: // Turn 0x7ff into 0x801.
-+ | llill RD, 0x801
-+ | j <2
-+ |5:
-+ | lgr L:CARG1, L:RB
-+ | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
-+ | llgh RD, PC_RD
-+ | j <1
- break;
- case BC_TDUP:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 7c4428d400c59805726ead3ed88e44ca5e4931b5 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 22 Dec 2016 12:41:00 -0500
-Subject: [PATCH 151/247] Fix BC_MCALL
-
-RC and RD are the same register on x64, so sometimes it uses them
-interchangeably. Probably we should make them the same register
-on s390x, but that would involve changing the instruction decode
-code which I would rather leave until we have a test suite passing.
----
- src/vm_s390x.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 8cf4b3f..c714568 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2057,6 +2057,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_CALL: case BC_CALLM:
- | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
-+ | lgr RD, RC
- if (op == BC_CALLM) {
- | ag NARGS:RD, SAVE_MULTRES // TODO: MULTRES is 32-bit on x64
- }
-@@ -2064,7 +2065,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg LFUNC:RB, 0(BASE, RA)
- | checkfunc LFUNC:RB, ->vmeta_call_ra
- | la BASE, 16(RA, BASE)
-- | lgr RD, RC
- | ins_call
- break;
-
---
-2.20.1
-
-
-From 7660250f71be03e12746b0778273fadac61d3687 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 22 Dec 2016 13:16:02 -0500
-Subject: [PATCH 152/247] Implement TDUP, TGETB and TSETB.
-
-Allows some simple table operations, for example:
-
-t = {1, 2}
-print(t[1]) -- prints 1
-t[1] = 3
-print(t[1]) -- prints 3
----
- src/vm_s390x.dasc | 90 +++++++++++++++++++++++++++++++++++++++++++----
- 1 file changed, 84 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index c714568..260c576 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1887,8 +1887,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j <1
- break;
- case BC_TDUP:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AND // RA = dst, RD = table const (~) (holding template table)
-+ | lg L:RB, SAVE_L
-+ | lg RA, (DISPATCH_GL(gc.total))(DISPATCH)
-+ | stg PC, SAVE_PC
-+ | clg RA, (DISPATCH_GL(gc.threshold))(DISPATCH)
-+ | stg BASE, L:RB->base
-+ | jhe >3
-+ |2:
-+ | sllg RD, RD, 3(r0)
-+ | lg TAB:CARG2, 0(RD, KBASE)
-+ | lgr L:CARG1, L:RB
-+ | brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt)
-+ | // Table * returned in r2 (CRET1).
-+ | lg BASE, L:RB->base
-+ | llgc RA, PC_RA
-+ | settp TAB:CRET1, LJ_TTAB
-+ | sllg RA, RA, 3(r0)
-+ | stg TAB:CRET1, 0(RA, BASE)
-+ | ins_next
-+ |3:
-+ | lgr L:CARG1, L:RB
-+ | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
-+ | llgh RD, PC_RD // Need to reload RD.
-+ | lghi TMPR2, -1
-+ | xgr RD, TMPR2 // not RD
-+ | j <2
- break;
-
- case BC_GGET:
-@@ -1959,8 +1983,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j ->vmeta_tgets // Caveat: preserve STR:RC.
- break;
- case BC_TGETB:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_ABC // RA = dst, RB = table, RC = byte literal
-+ | sllg RB, RB, 3(r0)
-+ | lg TAB:RB, 0(RB, BASE)
-+ | checktab TAB:RB, ->vmeta_tgetb
-+ | cl RC, TAB:RB->asize
-+ | jhe ->vmeta_tgetb
-+ | sllg RC, RC, 3(r0)
-+ | ag RC, TAB:RB->array
-+ | // Get array slot.
-+ | lg ITYPE, 0(RC)
-+ | cghi ITYPE, LJ_TNIL
-+ | je >2
-+ |1:
-+ | sllg RA, RA, 3(r0)
-+ | stg ITYPE, 0(RA, BASE)
-+ | ins_next
-+ |
-+ |2: // Check for __index if table value is nil.
-+ | lg TAB:TMPR1, TAB:RB->metatable
-+ | cghi TAB:TMPR1, 0
-+ | je <1
-+ | llgc TMPR2, TAB:TMPR1->nomm
-+ | tmll TMPR2, 1<<MM_index
-+ | je ->vmeta_tgetb // 'no __index' flag NOT set: check.
-+ | j <1
- break;
- case BC_TGETR:
- | stg r0, 0(r0)
-@@ -2041,8 +2088,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j <3
- break;
- case BC_TSETB:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_ABC // RA = src, RB = table, RC = byte literal
-+ | sllg RB, RB, 3(r0)
-+ | lg TAB:RB, 0(RB, BASE)
-+ | checktab TAB:RB, ->vmeta_tsetb
-+ | cl RC, TAB:RB->asize
-+ | jhe ->vmeta_tsetb
-+ | sllg RC, RC, 3(r0)
-+ | ag RC, TAB:RB->array
-+ | lghi TMPR2, LJ_TNIL
-+ | cg TMPR2, 0(RC)
-+ | je >3 // Previous value is nil?
-+ |1:
-+ | llgc TMPR1, TAB:RB->marked
-+ | tmll TMPR1, LJ_GC_BLACK // isblack(table)
-+ | jne >7
-+ |2: // Set array slot.
-+ | sllg RA, RA, 3(r0)
-+ | lg ITYPE, 0(RA, BASE)
-+ | stg ITYPE, 0(RC)
-+ | ins_next
-+ |
-+ |3: // Check for __newindex if previous value is nil.
-+ | lg TAB:TMPR1, TAB:RB->metatable
-+ | cghi TAB:TMPR1, 0
-+ | je <1
-+ | llgc TMPR2, TAB:TMPR1->nomm
-+ | tmll TMPR2, 1<<MM_newindex
-+ | je ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
-+ | j <1
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, TMPR1
-+ | j <2
- break;
- case BC_TSETR:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From cbc49f5562d56bfe953e3c587026d50c8ec9ccf4 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 22 Dec 2016 13:50:59 -0500
-Subject: [PATCH 153/247] Implement more tset and tget metamethods.
-
-This allows table entries to be get and set even if they don't
-already exist, for example:
-
-t = {}
-print(t[1]) -- prints nil
-t[1] = 3
-print(t[1]) -- prints 3
----
- src/vm_s390x.dasc | 119 ++++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 105 insertions(+), 14 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 260c576..7051370 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -599,19 +599,61 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Table indexing metamethods -----------------------------------------
- |
- |->vmeta_tgets:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
-+ | stg STR:RC, TMP_STACK
-+ | la RC, TMP_STACK
-+ | llgc TMPR1, PC_OP
-+ | cghi TMPR1, BC_GGET
-+ | jne >1
-+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
-+ | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
-+ | stg TAB:RA, 0(RB)
-+ | j >2
- |
- |->vmeta_tgetb:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgc RC, PC_RC
-+ | setint RC
-+ | stg RC, TMP_STACK
-+ | la RC, TMP_STACK
-+ | j >1
- |
- |->vmeta_tgetv:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgc RC, PC_RC // Reload TValue *k from RC.
-+ | sllg RC, RC, 3(r0)
-+ | la RC, 0(RC, BASE)
-+ |1:
-+ | llgc RB, PC_RB // Reload TValue *t from RB.
-+ | sllg RB, RB, 3(r0)
-+ | la RB, 0(RB, BASE)
-+ |2:
-+ | lg L:CARG1, SAVE_L
-+ | stg BASE, L:CARG1->base
-+ | lgr CARG2, RB
-+ | lgr CARG3, RC
-+ | lgr L:RB, L:CARG1
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
-+ | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
-+ | lg BASE, L:RB->base
-+ | ltgr RC, CRET1
-+ | je >3
- |->cont_ra: // BASE = base, RC = result
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgc RA, PC_RA
-+ | sllg RA, RA, 3(r0)
-+ | lg RB, 0(RC)
-+ | stg RB, 0(RA, BASE)
-+ | ins_next
-+ |
-+ |3: // Call __index metamethod.
-+ | // BASE = base, L->top = new base, stack = cont/func/t/k
-+ | lg RA, L:RB->top
-+ | stg PC, -24(PC) // [cont|PC]
-+ | lay PC, FRAME_CONT(RA)
-+ | sgr PC, BASE
-+ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
-+ | lghi NARGS:RD, 2+1 // 2 args for func(t, k).
-+ | cleartp LFUNC:RB
-+ | j ->vm_call_dispatch_f
- |
- |->vmeta_tgetr:
- | stg r0, 0(r0)
-@@ -620,19 +662,68 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->vmeta_tsets:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
-+ | stg STR:RC, TMP_STACK
-+ | la RC, TMP_STACK
-+ | llgc TMPR2, PC_OP
-+ | cghi TMPR2, BC_GSET
-+ | jne >1
-+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
-+ | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
-+ | stg TAB:RA, 0(RB)
-+ | j >2
- |
- |->vmeta_tsetb:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgc RC, PC_RC
-+ | setint RC
-+ | stg RC, TMP_STACK
-+ | la RC, TMP_STACK
-+ | j >1
- |
- |->vmeta_tsetv:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgc RC, PC_RC // Reload TValue *k from RC.
-+ | sllg RC, RC, 3(r0)
-+ | la RC, 0(RC, BASE)
-+ |1:
-+ | llgc RB, PC_RB // Reload TValue *t from RB.
-+ | sllg RB, RB, 3(r0)
-+ | la RB, 0(RB, BASE)
-+ |2:
-+ | lg L:CARG1, SAVE_L
-+ | stg BASE, L:CARG1->base // Caveat: CARG2/CARG3 may be BASE.
-+ | lgr CARG2, RB
-+ | lgr CARG3, RC
-+ | lgr L:RB, L:CARG1
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
-+ | // TValue * (finished) or NULL (metamethod) returned in r2 (CRET1).
-+ | lg BASE, L:RB->base
-+ | ltgr RC, CRET1
-+ | je >3
-+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
-+ | llgc RA, PC_RA
-+ | sllg RA, RA, 3(r0)
-+ | lg RB, 0(RA, BASE)
-+ | stg RB, 0(RC)
- |->cont_nop: // BASE = base, (RC = result)
- | ins_next
- |
-+ |3: // Call __newindex metamethod.
-+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
-+ | lg RA, L:RB->top
-+ | stg PC, -24(PC) // [cont|PC]
-+ | llgc RC, PC_RA
-+ | // Copy value to third argument.
-+ | sllg RB, RC, 3(r0)
-+ | lg RB, 0(RB, BASE)
-+ | stg RB, 16(RA)
-+ | la PC, FRAME_CONT(RA)
-+ | sgr PC, BASE
-+ | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
-+ | lghi NARGS:RD, 3+1 // 3 args for func(t, k, v).
-+ | cleartp LFUNC:RB
-+ | j ->vm_call_dispatch_f
-+ |
- |->vmeta_tsetr:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From ba2361329f910df48f3f3a8db76036abe1a79384 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 22 Dec 2016 14:20:47 -0500
-Subject: [PATCH 154/247] Implement TGETV and TSETV.
-
-Allows table entries to be get and set using variables, for example:
-
-t = {4,5}
-i = 1
-print(t[i]) -- prints 4
-t[i] = 3
-print(t[i]) -- prints 3
----
- src/vm_s390x.dasc | 85 ++++++++++++++++++++++++++++++++++++++++++++---
- 1 file changed, 81 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 7051370..3b0b3ac 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2026,8 +2026,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
-
- case BC_TGETV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_ABC // RA = dst, RB = table, RC = key
-+ | sllg RB, RB, 3(r0)
-+ | lg TAB:RB, 0(RB, BASE)
-+ | sllg RC, RC, 3(r0)
-+ | lg RC, 0(RC, BASE)
-+ | checktab TAB:RB, ->vmeta_tgetv
-+ |
-+ | // Integer key?
-+ | checkint RC, >5
-+ | cl RC, TAB:RB->asize // Takes care of unordered, too.
-+ | jhe ->vmeta_tgetv // Not in array part? Use fallback.
-+ | llgfr RC, RC
-+ | sllg RC, RC, 3(r0)
-+ | ag RC, TAB:RB->array
-+ | // Get array slot.
-+ | lg ITYPE, 0(RC)
-+ | cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
-+ | je >2
-+ |1:
-+ | sllg RA, RA, 3(r0)
-+ | stg ITYPE, 0(RA, BASE)
-+ | ins_next
-+ |
-+ |2: // Check for __index if table value is nil.
-+ | lg TAB:TMPR1, TAB:RB->metatable
-+ | cghi TAB:TMPR1, 0
-+ | je <1
-+ | llgc TMPR2, TAB:TMPR1->nomm
-+ | tmll TMPR2, 1<<MM_index
-+ | je ->vmeta_tgetv // 'no __index' flag NOT set: check.
-+ | j <1
-+ |
-+ |5: // String key?
-+ | cghi ITYPE, LJ_TSTR; jne ->vmeta_tgetv
-+ | cleartp STR:RC
-+ | j ->BC_TGETS_Z
- break;
- case BC_TGETS:
- | ins_ABC
-@@ -2104,9 +2138,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
- case BC_TSETV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_ABC // RA = src, RB = table, RC = key
-+ | sllg RB, RB, 3(r0)
-+ | lg TAB:RB, 0(RB, BASE)
-+ | sllg RC, RC, 3(r0)
-+ | lg RC, 0(RC, BASE)
-+ | checktab TAB:RB, ->vmeta_tsetv
-+ |
-+ | // Integer key?
-+ | checkint RC, >5
-+ | cl RC, TAB:RB->asize // Takes care of unordered, too.
-+ | jhe ->vmeta_tsetv
-+ | llgfr RC, RC
-+ | sllg RC, RC, 3(r0)
-+ | ag RC, TAB:RB->array
-+ | lghi TMPR2, LJ_TNIL
-+ | cg TMPR2, 0(RC)
-+ | je >3 // Previous value is nil?
-+ |1:
-+ | llgc TMPR1, TAB:RB->marked
-+ | tmll TMPR1, LJ_GC_BLACK // isblack(table)
-+ | jne >7
-+ |2: // Set array slot.
-+ | sllg RA, RA, 3(r0)
-+ | lg RB, 0(RA, BASE)
-+ | stg RB, 0(RC)
-+ | ins_next
-+ |
-+ |3: // Check for __newindex if previous value is nil.
-+ | lg TAB:TMPR1, TAB:RB->metatable
-+ | cghi TAB:TMPR1, 0
-+ | je <1
-+ | llgc TMPR2, TAB:TMPR1->nomm
-+ | tmll TMPR2, 1<<MM_newindex
-+ | je ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
-+ | j <1
-+ |
-+ |5: // String key?
-+ | cghi ITYPE, LJ_TSTR; jne ->vmeta_tsetv
-+ | cleartp STR:RC
-+ | j ->BC_TSETS_Z
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, TMPR1
-+ | j <2
- break;
- case BC_TSETS:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 482916bd1adbbd7185d8ae3519d321c6b6fc55dc Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 22 Dec 2016 14:40:31 -0500
-Subject: [PATCH 155/247] Implement/fix TGETS and TSETS.
-
-Allows string keys in tables, for example:
-
-t = {}
-t["hello"] = 1
-print(t["hello"]) -- prints 1
----
- src/vm_s390x.dasc | 12 +++++++++---
- 1 file changed, 9 insertions(+), 3 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 3b0b3ac..2bba69f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2070,7 +2070,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lghi TMPR1, -1
- | xgr RC, TMPR1
- | sllg RC, RC, 3(r0)
-- | lg STR:RC, 0(RC, BASE)
-+ | lg STR:RC, 0(RC, KBASE)
- | checktab TAB:RB, ->vmeta_tgets
- |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
- | l TMPR1, TAB:RB->hmask
-@@ -2186,8 +2186,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j <2
- break;
- case BC_TSETS:
-- | stg r0, 0(r0)
-- |
-+ | ins_ABC // RA = src, RB = table, RC = str const (~)
-+ | sllg RB, RB, 3(r0)
-+ | lg TAB:RB, 0(RB, BASE)
-+ | lghi TMPR2, -1
-+ | xgr RC, TMPR2 // ~RC
-+ | sllg RC, RC, 3(r0)
-+ | lg STR:RC, 0(RC, KBASE)
-+ | checktab TAB:RB, ->vmeta_tsets
- |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
- | l TMPR1, TAB:RB->hmask
- | n TMPR1, STR:RC->hash
---
-2.20.1
-
-
-From 982dfa978ef1c8ae8b22fbffef61049817d6a9a5 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 22 Dec 2016 14:59:37 -0500
-Subject: [PATCH 156/247] Implement LEN.
-
-Enables length of tables and strings to be taken, for example:
-
-t = "hello"
-print(#t) -- prints 5
-t = {1,2}
-print(#t) -- prints 2
----
- src/vm_s390x.dasc | 60 +++++++++++++++++++++++++++++++++++++++++++----
- 1 file changed, 56 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 2bba69f..dedccf2 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -845,8 +845,28 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->vm_call_dispatch
- |
- |->vmeta_len:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgh RD, PC_RD
-+ | sllg RD, RD, 3(r0)
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | la CARG2, 0(RD, BASE)
-+ | lgr L:CARG1, L:RB
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_len // (lua_State *L, TValue *o)
-+ | // NULL (retry) or TValue * (metamethod) returned in r2 (CRET1).
-+ | lgr RC, CRET1
-+ | lg BASE, L:RB->base
-+#if LJ_52
-+ | cghi RC, 0
-+ | jne ->vmeta_binop // Binop call for compatibility.
-+ | llgh RD, PC_RD
-+ | sllg RD, RD, 3(r0)
-+ | lg TAB:CARG1, 0(RD, BASE)
-+ | cleartp TAB:CARG1
-+ | j ->BC_LEN_Z
-+#else
-+ | j ->vmeta_binop // Binop call for compatibility.
-+#endif
- |
- |//-- Call metamethod ----------------------------------------------------
- |
-@@ -1698,8 +1718,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j <1
- break;
- case BC_LEN:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst, RD = src
-+ | sllg RD, RD, 3(r0)
-+ | lg RD, 0(RD, BASE)
-+ | checkstr RD, >2
-+ | llgf RD, STR:RD->len
-+ |1:
-+ | sllg RA, RA, 3(r0)
-+ | setint RD
-+ | stg RD, 0(RA, BASE)
-+ | ins_next
-+ |2:
-+ | cghi ITYPE, LJ_TTAB; jne ->vmeta_len
-+ | lgr TAB:CARG1, TAB:RD
-+#if LJ_52
-+ | lg TAB:RB, TAB:RD->metatable
-+ | cghi TAB:RB, 0
-+ | jne >9
-+ |3:
-+#endif
-+ |->BC_LEN_Z:
-+ | lgr RB, BASE // Save BASE.
-+ | brasl r14, extern lj_tab_len // (GCtab *t)
-+ | // Length of table returned in r2 (CRET1).
-+ | lgr RD, CRET1
-+ | lgr BASE, RB // Restore BASE.
-+ | llgc RA, PC_RA
-+ | j <1
-+#if LJ_52
-+ |9: // Check for __len.
-+ | llgc TMPR2, TAB:RB->nomm
-+ | tmll TMPR2, 1<<MM_len
-+ | jne <3
-+ | j ->vmeta_len // 'no __len' flag NOT set: check.
-+#endif
- break;
-
- /* -- Binary ops -------------------------------------------------------- */
---
-2.20.1
-
-
-From 2e5aacd37966d51a8330d3041d62fc72d354ef18 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 22 Dec 2016 15:40:25 -0500
-Subject: [PATCH 157/247] Implement a UCLO, ff_assert and a couple of other
- bits.
-
-Needed to get -bl working, still more to do though.
----
- src/vm_s390x.dasc | 46 ++++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 40 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index dedccf2..c862e42 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -950,8 +950,26 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Base library: checks -----------------------------------------------
- |
- |.ffunc_1 assert
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg RB, 0(BASE)
-+ | srag ITYPE, RB, 47(r0)
-+ | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
-+ | lg PC, -8(BASE)
-+ | stg RD, SAVE_MULTRES // TODO: needs to be 32-bit.
-+ | lg RB, 0(BASE)
-+ | stg RB, -16(BASE)
-+ | ahi RD, -2
-+ | je >2
-+ | lgr RA, BASE
-+ |1:
-+ | la RA, 8(RA)
-+ | lg RB, 0(RA)
-+ | stg RB, -16(RA)
-+ | ahi RD, -1
-+ | jne <1
-+ | // TODO: replace with branch on count (brctg).
-+ |2:
-+ | lg RD, SAVE_MULTRES // TODO: needs to be 32-bit.
-+ | j ->fff_res_
- |
- |.ffunc_1 type
- | stg r0, 0(r0)
-@@ -1972,9 +1990,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- break;
- case BC_UCLO:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = level, RD = target
-+ | branchPC RD // Do this first to free RD.
-+ | lg L:RB, SAVE_L
-+ | ltg TMPR2, L:RB->openupval
-+ | je >1
-+ | stg BASE, L:RB->base
-+ | sllg RA, RA, 3(r0)
-+ | la CARG2, 0(RA, BASE)
-+ | lgr L:CARG1, L:RB
-+ | brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level)
-+ | lg BASE, L:RB->base
-+ |1:
-+ | ins_next
- break;
-+
- case BC_FNEW:
- | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
- | lg L:RB, SAVE_L
-@@ -2453,9 +2483,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
-+ /* -- Returns ----------------------------------------------------------- */
-+
- case BC_RETM:
-- | stg r0, 0(r0) // not implemented
-- | stg r0, 0(r0)
-+ | ins_AD // RA = results, RD = extra_nresults
-+ | ag RD, SAVE_MULTRES // MULTRES >=1, so RD >=1. // TODO: needs to be
32-bit.
-+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
- break;
-
- case BC_RET: case BC_RET0: case BC_RET1:
---
-2.20.1
-
-
-From 1ec8b47c7ce4cb9818e563912125d22e3ebf9668 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 28 Dec 2016 10:56:09 -0500
-Subject: [PATCH 158/247] Implement UGET.
-
-Allows simple closures, for example:
-
-function f(x)
- return function() return x end
-end
-
-y = f(1)
-print(y()) -- prints 1
----
- src/vm_s390x.dasc | 12 ++++++++++--
- 1 file changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index c862e42..7754c45 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1970,8 +1970,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- break;
- case BC_UGET:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst, RD = upvalue #
-+ | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3(r0)
-+ | lg LFUNC:RB, -16(BASE)
-+ | cleartp LFUNC:RB
-+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB)
-+ | lg RB, UPVAL:RB->v
-+ | lg RD, 0(RB)
-+ | stg RD, 0(RA, BASE)
-+ | ins_next
- break;
- case BC_USETV:
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From c13b924104e09f7d98e874b905db1c407d5ba13a Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 28 Dec 2016 13:21:06 -0500
-Subject: [PATCH 159/247] Implement TSETM and VARG.
-
-Allows varargs to be used, for example:
-
-function sel(n, ...)
- local arg = {...}
- return arg[n]
-end
-
-print(sel(2, 3, 4, 5)) -- prints 4
----
- src/vm_s390x.dasc | 149 ++++++++++++++++++++++++++++++++++++++++------
- 1 file changed, 132 insertions(+), 17 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 7754c45..450fce8 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -91,6 +91,7 @@
- |.define SAVE_PC, 168(sp)
- |.define SAVE_MULTRES, 160(sp)
- |.define TMP_STACK, 160(sp) // Overlaps SAVE_MULTRES
-+|.define TMP_STACK_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES).
- |
- |// Callee save area (allocated by interpreter).
- |.define CALLEESAVE, 000(sp) // <- sp in interpreter.
-@@ -323,7 +324,7 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_returnc:
- | aghi RD, 1 // RD = nresults+1
- | jo ->vm_unwind_yield // TODO: !!! NOT SURE, jz on x64, overflow? !!!
-- | stg RD, SAVE_MULTRES
-+ | st RD, SAVE_MULTRES
- | tmll PC, FRAME_TYPE
- | je ->BC_RET_Z // Handle regular return to Lua.
- |
-@@ -352,7 +353,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg L:RB, SAVE_L
- | stg PC, L:RB->base
- |3:
-- | lg RD, SAVE_MULTRES
-+ | llgf RD, SAVE_MULTRES
- | lgf RA, SAVE_NRES // RA = wanted nresults+1
- |4:
- | cgr RA, RD
-@@ -395,7 +396,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // - The GC shrinks the stack in between.
- | // - A return back from a lua_call() with (high) nresults adjustment.
- | stg BASE, L:RB->top // Save current top held in BASE (yes).
-- | stg RD, SAVE_MULTRES // Need to fill only remainder with nil.
-+ | st RD, SAVE_MULTRES // Need to fill only remainder with nil.
- | lgr CARG2, RA
- | lgr CARG1, L:RB
- | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
-@@ -500,7 +501,7 @@ static void build_subroutines(BuildCtx *ctx)
- | aghi RD, 1 // RD = nresults+1
- | sgr RA, BASE // RA = resultofs
- | lg PC, -8(BASE)
-- | stg RD, SAVE_MULTRES
-+ | st RD, SAVE_MULTRES
- | tmll PC, FRAME_TYPE
- | je ->BC_RET_Z
- | j ->vm_return
-@@ -954,7 +955,7 @@ static void build_subroutines(BuildCtx *ctx)
- | srag ITYPE, RB, 47(r0)
- | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
- | lg PC, -8(BASE)
-- | stg RD, SAVE_MULTRES // TODO: needs to be 32-bit.
-+ | st RD, SAVE_MULTRES
- | lg RB, 0(BASE)
- | stg RB, -16(BASE)
- | ahi RD, -2
-@@ -968,7 +969,7 @@ static void build_subroutines(BuildCtx *ctx)
- | jne <1
- | // TODO: replace with branch on count (brctg).
- |2:
-- | lg RD, SAVE_MULTRES // TODO: needs to be 32-bit.
-+ | llgf RD, SAVE_MULTRES
- | j ->fff_res_
- |
- |.ffunc_1 type
-@@ -1076,7 +1077,7 @@ static void build_subroutines(BuildCtx *ctx)
- |->fff_res1:
- | lghi RD, 1+1
- |->fff_res:
-- | stg RD, SAVE_MULTRES
-+ | st RD, SAVE_MULTRES
- |->fff_res_:
- | tmll PC, FRAME_TYPE
- | jne >7
-@@ -2391,8 +2392,54 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- break;
- case BC_TSETM:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
-+ |1:
-+ | sllg RA, RA, 3(r0)
-+ | sllg TMPR1, RD, 3(r0)
-+ | llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word.
-+ | la RA, 0(RA, BASE)
-+ | lg TAB:RB, -8(RA) // Guaranteed to be a table.
-+ | cleartp TAB:RB
-+ | llgc TMPR2, TAB:RB->marked
-+ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
-+ | jne >7
-+ |2:
-+ | llgf RD, SAVE_MULTRES
-+ | aghi RD, -1
-+ | je >4 // Nothing to copy?
-+ | agr RD, TMPR1 // Compute needed size.
-+ | clgf RD, TAB:RB->asize
-+ | jh >5 // Doesn't fit into array part?
-+ | sgr RD, TMPR1
-+ | sllg TMPR1, TMPR1, 3(r0)
-+ | ag TMPR1, TAB:RB->array
-+ |3: // Copy result slots to table.
-+ | lg RB, 0(RA)
-+ | la RA, 8(RA)
-+ | stg RB, 0(TMPR1)
-+ | la TMPR1, 8(TMPR1)
-+ | aghi RD, -1
-+ | jne <3
-+ | // TODO: replace decrement/branch with branch on count.
-+ |4:
-+ | ins_next
-+ |
-+ |5: // Need to resize array part.
-+ | lg L:CARG1, SAVE_L
-+ | stg BASE, L:CARG1->base
-+ | lgr CARG2, TAB:RB
-+ | lgfr CARG3, RD
-+ | lgr L:RB, L:CARG1
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
-+ | lg BASE, L:RB->base
-+ | llgc RA, PC_RA // Restore RA.
-+ | llgh RD, PC_RD // Restore RD.
-+ | j <1 // Retry.
-+ |
-+ |7: // Possible table write barrier for any value. Skip valiswhite check.
-+ | barrierback TAB:RB, RD
-+ | j <2
- break;
-
- /* -- Calls and vararg handling ----------------------------------------- */
-@@ -2401,7 +2448,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
- | lgr RD, RC
- if (op == BC_CALLM) {
-- | ag NARGS:RD, SAVE_MULTRES // TODO: MULTRES is 32-bit on x64
-+ | agf NARGS:RD, SAVE_MULTRES
- }
- | sllg RA, RA, 3(r0)
- | lg LFUNC:RB, 0(BASE, RA)
-@@ -2427,7 +2474,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | jne >7
- |1:
- | stg LFUNC:RB, -16(BASE) // Copy func+tag down, reloaded below.
-- | stg NARGS:RD, SAVE_MULTRES // 32-bit on x64.
-+ | st NARGS:RD, SAVE_MULTRES
- | aghi NARGS:RD, -1
- | je >3
- |2: // Move args down.
-@@ -2443,7 +2490,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg LFUNC:RB, -16(BASE)
- |3:
- | cleartp LFUNC:RB
-- | lg NARGS:RD, SAVE_MULTRES
-+ | llgf NARGS:RD, SAVE_MULTRES
- | llgc TMPR1, LFUNC:RB->ffid
- | cghi TMPR1, 1 // (> FF_C) Calling a fast function?
- | jh >5
-@@ -2488,15 +2535,83 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- break;
- case BC_VARG:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | // TODO: some opportunities for branch on index in here.
-+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
-+ | sllg RA, RA, 3(r0)
-+ | sllg RB, RB, 3(r0)
-+ | sllg RC, RC, 3(r0)
-+ | la TMPR1, (16+FRAME_VARG)(RC, BASE)
-+ | la RA, 0(RA, BASE)
-+ | sg TMPR1, -8(BASE)
-+ | // Note: TMPR1 may now be even _above_ BASE if nargs was < numparams.
-+ | cghi RB, 0
-+ | je >5 // Copy all varargs?
-+ | lay RB, -8(RA, RB)
-+ | clgr TMPR1, BASE // No vararg slots?
-+ | jnl >2
-+ |1: // Copy vararg slots to destination slots.
-+ | lg RC, -16(TMPR1)
-+ | la TMPR1, 8(TMPR1)
-+ | stg RC, 0(RA)
-+ | la RA, 8(RA)
-+ | clgr RA, RB // All destination slots filled?
-+ | jnl >3
-+ | clgr TMPR1, BASE // No more vararg slots?
-+ | jl <1
-+ | lghi TMPR2, LJ_TNIL
-+ |2: // Fill up remainder with nil.
-+ | stg TMPR2, 0(RA)
-+ | la RA, 8(RA)
-+ | clgr RA, RB
-+ | jl <2
-+ |3:
-+ | ins_next
-+ |
-+ |5: // Copy all varargs.
-+ | lghi TMPR2, 1
-+ | st TMPR2, SAVE_MULTRES // MULTRES = 0+1
-+ | lgr RC, BASE
-+ | slgr RC, TMPR1
-+ | jno <3 // No vararg slots? (borrow or zero)
-+ | llgfr RB, RC
-+ | srlg RB, RB, 3(r0)
-+ | ahi RB, 1
-+ | st RB, SAVE_MULTRES // MULTRES = #varargs+1
-+ | lg L:RB, SAVE_L
-+ | agr RC, RA
-+ | clg RC, L:RB->maxstack
-+ | jh >7 // Need to grow stack?
-+ |6: // Copy all vararg slots.
-+ | lg RC, -16(TMPR1)
-+ | la TMPR1, 8(TMPR1)
-+ | stg RC, 0(RA)
-+ | la RA, 8(RA)
-+ | clgr TMPR1, BASE // No more vararg slots?
-+ | jl <6
-+ | j <3
-+ |
-+ |7: // Grow stack for varargs.
-+ | stg BASE, L:RB->base
-+ | stg RA, L:RB->top
-+ | stg PC, SAVE_PC
-+ | sgr TMPR1, BASE // Need delta, because BASE may change.
-+ | st TMPR1, TMP_STACK_HI
-+ | llgf CARG2, SAVE_MULTRES
-+ | aghi CARG2, -1
-+ | lgr CARG1, L:RB
-+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
-+ | lg BASE, L:RB->base
-+ | llgf TMPR1, TMP_STACK_HI
-+ | lg RA, L:RB->top
-+ | agr TMPR1, BASE
-+ | j <6
- break;
-
- /* -- Returns ----------------------------------------------------------- */
-
- case BC_RETM:
- | ins_AD // RA = results, RD = extra_nresults
-- | ag RD, SAVE_MULTRES // MULTRES >=1, so RD >=1. // TODO: needs to be
32-bit.
-+ | agf RD, SAVE_MULTRES // MULTRES >=1, so RD >=1.
- | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
- break;
-
-@@ -2507,7 +2622,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- |1:
- | lg PC, -8(BASE)
-- | stg RD, SAVE_MULTRES // Save nresults+1.
-+ | st RD, SAVE_MULTRES // Save nresults+1.
- | tmll PC, FRAME_TYPE // Check frame type marker.
- | jne >7 // Not returning to a fixarg Lua func?
- switch (op) {
-@@ -2524,7 +2639,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | aghi RD, -1
- | jne <2
- |3:
-- | lg RD, SAVE_MULTRES // Note: MULTRES may be >255.
-+ | llgf RD, SAVE_MULTRES // Note: MULTRES may be >256.
- | llgc RB, PC_RB
- |5:
- | cgr RB, RD // More results expected?
---
-2.20.1
-
-
-From 9d01ddc5df803fcecafd88668a1dbe90c19e275e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 28 Dec 2016 14:13:08 -0500
-Subject: [PATCH 160/247] Implement LOOP.
-
-Allows for while and repeat loops, for example:
-
-x = 0
-while x < 5 do
- print(x)
- x = x + 1
-end
-
--- prints:
--- 0
--- 1
--- 2
--- 3
--- 4
----
- src/vm_s390x.dasc | 16 ++++++++++++----
- 1 file changed, 12 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 450fce8..990ae92 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2844,14 +2844,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- break;
-+
- case BC_LOOP:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_A // RA = base, RD = target (loop extent)
-+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
-+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
-+ |.if JIT
-+ | hotloop RBd
-+ |.endif
-+ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
- break;
-+
- case BC_ILOOP:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_A // RA = base, RD = target (loop extent)
-+ | ins_next
- break;
-+
- case BC_JLOOP:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 1638d7ef1568a16e2a0e45d66075ca443814ed2b Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 28 Dec 2016 15:42:30 -0500
-Subject: [PATCH 161/247] Implement ISNUM, ISTYPE, TGETR and TSETR.
-
----
- src/vm_s390x.dasc | 223 +++++++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 202 insertions(+), 21 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 990ae92..bcb8e3f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -657,8 +657,17 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->vm_call_dispatch_f
- |
- |->vmeta_tgetr:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lgr CARG1, TAB:RB
-+ | lgr RB, BASE // Save BASE.
-+ | lgfr CARG2, RC
-+ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
-+ | // cTValue * or NULL returned in r2 (CRET1).
-+ | llgc RA, PC_RA
-+ | lgr BASE, RB // Restore BASE.
-+ | ltgr RC, CRET1
-+ | jne ->BC_TGETR_Z
-+ | lghi ITYPE, LJ_TNIL
-+ | j ->BC_TGETR2_Z
- |
- |//-----------------------------------------------------------------------
- |
-@@ -726,8 +735,18 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->vm_call_dispatch_f
- |
- |->vmeta_tsetr:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg L:CARG1, SAVE_L
-+ | lgr CARG2, TAB:RB
-+ | stg BASE, L:CARG1->base
-+ | lgr RB, BASE // Save BASE (TODO: BASE is callee-saved anyway on s390x).
-+ | lgfr CARG3, RC
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
-+ | // TValue * returned in r2 (CRET1).
-+ | lgr RC, CRET1
-+ | llgh RA, PC_RA
-+ | lgr BASE, RB // Restore BASE.
-+ | j ->BC_TSETR_Z
- |
- |//-- Comparison metamethods ---------------------------------------------
- |
-@@ -775,8 +794,15 @@ static void build_subroutines(BuildCtx *ctx)
- | stg r0, 0(r0)
- |
- |->vmeta_istype:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | llgfr CARG2, RA
-+ | llgfr CARG3, RD
-+ | lgr L:CARG1, L:RB
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
-+ | lg BASE, L:RB->base
-+ | j <6
- |
- |//-- Arithmetic metamethods ---------------------------------------------
- |
-@@ -946,6 +972,11 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// Inlined GC threshold check. Caveat: uses label 1.
- |.macro ffgccheck
-+ | lg RB, (DISPATCH_GL(gc.total))(DISPATCH)
-+ | clg RB, (DISPATCH_GL(gc.threshold))(DISPATCH)
-+ | jl >1
-+ | brasl r14, ->fff_gcstep
-+ |1:
- |.endmacro
- |
- |//-- Base library: checks -----------------------------------------------
-@@ -973,8 +1004,24 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->fff_res_
- |
- |.ffunc_1 type
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg RC, 0(BASE)
-+ | srag RC, RC, 47(r0)
-+ | lghi RB, LJ_TISNUM
-+ | clgr RC, RB
-+ | jnl >1
-+ | lgr RC, RB
-+ |1:
-+ | lghi TMPR2, -1
-+ | xgr RC, TMPR2
-+ |2:
-+ | lg CFUNC:RB, -16(BASE)
-+ | cleartp CFUNC:RB
-+ | sllg RC, RC, 3(r0)
-+ | lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB)
-+ | lg PC, -8(BASE)
-+ | settp STR:RC, LJ_TSTR
-+ | stg STR:RC, -16(BASE)
-+ | j ->fff_res1
- |
- |//-- Base library: getters and setters ---------------------------------
- |
-@@ -1155,18 +1202,89 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc string_char // Only handle the 1-arg case here.
- | stg r0, 0(r0)
- |->fff_newstr:
-- | stg r0, 0(r0)
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | llgfr CARG3, TMPR1 // Zero-extended to size_t.
-+ | lgr CARG2, RD
-+ | lgr CARG1, L:RB
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_str_new // (lua_State *L, char *str, size_t l)
- |->fff_resstr:
-- | stg r0, 0(r0)
-+ | // GCstr * returned in r2 (CRET1).
-+ | lgr STR:RD, CRET1
-+ | lg BASE, L:RB->base
-+ | lg PC, -8(BASE)
-+ | settp STR:RD, LJ_TSTR
-+ | stg STR:RD, -16(BASE)
-+ | j ->fff_res1
- |
- |.ffunc string_sub
-- | stg r0, 0(r0)
-+ | ffgccheck
-+ | lghi TMPR1, -1
-+ | clfi NARGS:RD, 1+2; jl ->fff_fallback
-+ | jnh >1
-+ | lg TMPR1, 16(BASE)
-+ | checkint TMPR1, ->fff_fallback
-+ |1:
-+ | lg STR:RB, 0(BASE)
-+ | checkstr STR:RB, ->fff_fallback
-+ | lg ITYPE, 8(BASE)
-+ | llgfr RA, ITYPE // Must clear hiword for lea below.
-+ | srag ITYPE, ITYPE, 47(r0)
-+ | cghi ITYPE, LJ_TISNUM
-+ | jne ->fff_fallback
-+ | llgf RC, STR:RB->len
-+ | clr RC, TMPR1 // len < end? (unsigned compare)
-+ | jl >5
-+ |2:
-+ | cghi RA, 0 // start <= 0?
-+ | jle >7
-+ |3:
-+ | sr TMPR1, RA // start > end?
-+ | jnhe ->fff_emptystr // TODO: not sure about this, was jl in x64.
-+ | la RD, (#STR-1)(RA, STR:RB)
-+ | ahi TMPR1, 1
-+ |4:
-+ | j ->fff_newstr
-+ |
-+ |5: // Negative end or overflow.
-+ | chi TMPR1, 0
-+ | jnl >6
-+ | ahi TMPR1, 1
-+ | ar TMPR1, RC // end = end+(len+1)
-+ | j <2
-+ |6: // Overflow.
-+ | lr TMPR1, RC // end = len
-+ | j <2
-+ |
-+ |7: // Negative start or underflow.
-+ | je >8
-+ | agr RA, RC // start = start+(len+1)
-+ | aghi RA, 1
-+ | jh <3 // start > 0?
-+ |8: // Underflow.
-+ | lghi RA, 1 // start = 1
-+ | j <3
- |
- |->fff_emptystr: // Range underflow.
- | stg r0, 0(r0)
- |
- |.macro ffstring_op, name
- | .ffunc_1 string_ .. name
-+ | ffgccheck
-+ | lg STR:CARG2, 0(BASE)
-+ | checkstr STR:CARG2, ->fff_fallback
-+ | lg L:RB, SAVE_L
-+ | lay SBUF:CARG1, (DISPATCH_GL(tmpbuf))(DISPATCH)
-+ | stg BASE, L:RB->base
-+ | lg RC, SBUF:CARG1->b
-+ | stg L:RB, SBUF:CARG1->L
-+ | stg RC, SBUF:CARG1->p
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_buf_putstr_ .. name
-+ | // lgr CARG1, CRET1 (nop, CARG1==CRET1)
-+ | brasl r14, extern lj_buf_tostr
-+ | j ->fff_resstr
- |.endmacro
- |
- |ffstring_op reverse
-@@ -1258,8 +1376,22 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->fff_gcstep: // Call GC step function.
- | // BASE = new base, RD = nargs+1
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r14, TMP_STACK // Save return address
-+ | lg L:RB, SAVE_L
-+ | stg PC, SAVE_PC // Redundant (but a defined value).
-+ | stg BASE, L:RB->base
-+ | sllg RD, NARGS:RD, 3(r0)
-+ | lay RD, -8(RD, BASE)
-+ | lgr CARG1, L:RB
-+ | stg RD, L:RB->top
-+ | brasl r14, extern lj_gc_step // (lua_State *L)
-+ | lg BASE, L:RB->base
-+ | lg RD, L:RB->top
-+ | sgr RD, BASE
-+ | srlg RD, RD, 3(r0)
-+ | aghi NARGS:RD, 1
-+ | lg r14, TMP_STACK // Restore return address.
-+ | br r14
- |
- |//-----------------------------------------------------------------------
- |//-- Special dispatch targets -------------------------------------------
-@@ -1686,13 +1818,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |1: // Fallthrough to the next instruction.
- | ins_next
- break;
-+
- case BC_ISTYPE:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = src, RD = -type
-+ | lghr RD, RD // TODO: always sign extend RD?
-+ | sllg RA, RA, 3(r0)
-+ | lg RB, 0(RA, BASE)
-+ | srag RB, RB, 47(r0)
-+ | agr RB, RD
-+ | jne ->vmeta_istype
-+ | ins_next
- break;
- case BC_ISNUM:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = src, RD = -(TISNUM-1)
-+ | sllg TMPR1, RA, 3(r0)
-+ | lg TMPR1, 0(TMPR1, BASE)
-+ | checknumtp TMPR1, ->vmeta_istype
-+ | ins_next
- break;
- case BC_MOV:
- | ins_AD // RA = dst, RD = src
-@@ -2226,8 +2368,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j <1
- break;
- case BC_TGETR:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_ABC // RA = dst, RB = table, RC = key
-+ | sllg RB, RB, 3(r0)
-+ | lg TAB:RB, 0(RB, BASE)
-+ | cleartp TAB:RB
-+ | sllg RC, RC, 3(r0)
-+ | llgf RC, 4(RC, BASE) // Load low word (big endian).
-+ | cl RC, TAB:RB->asize
-+ | jhe ->vmeta_tgetr // Not in array part? Use fallback.
-+ | sllg RC, RC, 3(r0)
-+ | ag RC, TAB:RB->array
-+ | // Get array slot.
-+ |->BC_TGETR_Z:
-+ | lg ITYPE, 0(RC)
-+ |->BC_TGETR2_Z:
-+ | sllg RA, RA, 3(r0)
-+ | stg ITYPE, 0(RA, BASE)
-+ | ins_next
- break;
-
- case BC_TSETV:
-@@ -2388,9 +2545,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j <2
- break;
- case BC_TSETR:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_ABC // RA = src, RB = table, RC = key
-+ | sllg RB, RB, 3(r0)
-+ | lg TAB:RB, 0(RB, BASE)
-+ | cleartp TAB:RB
-+ | sllg RC, RC, 3(r0)
-+ | lg RC, 0(RC, BASE)
-+ | llgc TMPR2, TAB:RB->marked
-+ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
-+ | jne >7
-+ |2:
-+ | cl RC, TAB:RB->asize
-+ | jhe ->vmeta_tsetr
-+ | llgfr RC, RC
-+ | sllg RC, RC, 3(r0)
-+ | ag RC, TAB:RB->array
-+ | // Set array slot.
-+ |->BC_TSETR_Z:
-+ | sllg RA, RA, 3(r0)
-+ | lg ITYPE, 0(RA, BASE)
-+ | stg ITYPE, 0(RC)
-+ | ins_next
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, TMPR1
-+ | j <2
- break;
-+
- case BC_TSETM:
- | ins_AD // RA = base (table at base-1), RD = num const (start index)
- |1:
---
-2.20.1
-
-
-From 236bcd1ead24e43e224d491a1b9bccb74a0e2cb1 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 28 Dec 2016 17:53:38 -0500
-Subject: [PATCH 162/247] Implement KNIL and CALLMT.
-
----
- src/vm_s390x.dasc | 23 +++++++++++++++++++----
- 1 file changed, 19 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index bcb8e3f..f5055a4 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2109,9 +2109,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- break;
- case BC_KNIL:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = dst_start, RD = dst_end
-+ | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3(r0)
-+ | la RA, 8(RA, BASE)
-+ | la RD, 0(RD, BASE)
-+ | lghi RB, LJ_TNIL
-+ | stg RB, -8(RA) // Sets minimum 2 slots.
-+ |1:
-+ | stg RB, 0(RA)
-+ | la RA, 8(RA)
-+ | clgr RA, RD
-+ | jle <1
-+ | ins_next
- break;
-+
-+/* -- Upvalue and function ops ------------------------------------------ */
-+
- case BC_UGET:
- | ins_AD // RA = dst, RD = upvalue #
- | sllg RA, RA, 3(r0)
-@@ -2639,8 +2653,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
-
- case BC_CALLMT:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = base, RD = extra_nargs
-+ | a NARGS:RD, SAVE_MULTRES
-+ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
- break;
- case BC_CALLT:
- | ins_AD // RA = base, RD = nargs+1
---
-2.20.1
-
-
-From c2b3733ef1c68be9c0ad7d2eed3f241bc216f3ae Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 29 Dec 2016 11:10:18 -0500
-Subject: [PATCH 163/247] Implement POW.
-
-Allows use of the '^' operator, for example:
-
-x = 2
-y = 3
-print(x ^ y) -- prints 8
----
- src/vm_s390x.dasc | 19 +++++++++++++++++--
- 1 file changed, 17 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f5055a4..99200bc 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2059,9 +2059,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j ->vmeta_arith_vvo
- break;
- case BC_POW:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_ABC
-+ | sllg RB, RB, 3(r0)
-+ | sllg RC, RC, 3(r0)
-+ | ld FARG1, 0(RB, BASE)
-+ | ld FARG2, 0(RC, BASE)
-+ | lg TMPR2, 0(RB, BASE)
-+ | checknumtp TMPR2, ->vmeta_arith_vvo
-+ | lg TMPR2, 0(RC, BASE)
-+ | checknumtp TMPR2, ->vmeta_arith_vvo
-+ | lgr RB, BASE // TODO: redundant, BASE is currently callee-saved.
-+ | brasl r14, extern pow // double pow(double x, double y), result in f0.
-+ | llgc RA, PC_RA
-+ | lgr BASE, RB
-+ | sllg RA, RA, 3(r0)
-+ | std f0, 0(RA, BASE)
-+ | ins_next
- break;
-+
- case BC_CAT:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 5573bd5668bdce652abc28056407a50b63a9c6f8 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 29 Dec 2016 11:23:45 -0500
-Subject: [PATCH 164/247] Implement CAT.
-
-Allows the use of the '..' operator, for example:
-
-x = "hello"
-y = " "
-z = "world!"
-print(x..y..z) -- prints 'hello world!'
----
- src/vm_s390x.dasc | 24 ++++++++++++++++++++++--
- 1 file changed, 22 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 99200bc..cf9a8cc 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2078,8 +2078,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
-
- case BC_CAT:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_ABC // RA = dst, RB = src_start, RC = src_end
-+ | lg L:CARG1, SAVE_L
-+ | stg BASE, L:CARG1->base
-+ | lgr CARG3, RC
-+ | sgr CARG3, RB
-+ | sllg RC, RC, 3(r0)
-+ | la CARG2, 0(RC, BASE)
-+ |->BC_CAT_Z:
-+ | lgr L:RB, L:CARG1
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_cat // (lua_State *L, TValue *top, int left)
-+ | // NULL (finished) or TValue * (metamethod) returned in r2 (CRET1).
-+ | lg BASE, L:RB->base
-+ | ltgr RC, CRET1
-+ | jne ->vmeta_binop
-+ | llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
-+ | sllg RB, RB, 3(r0)
-+ | llgc RA, PC_RA
-+ | sllg RA, RA, 3(r0)
-+ | lg RC, 0(RB, BASE)
-+ | stg RC, 0(RA, BASE)
-+ | ins_next
- break;
-
- /* -- Constant ops ------------------------------------------------------ */
---
-2.20.1
-
-
-From b0a0516b3ad36fa6d2d0994cea1d655676449b64 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 29 Dec 2016 14:50:45 -0500
-Subject: [PATCH 165/247] Implement bit operations.
-
-See
http://bitop.luajit.org/api.html for more information.
-
-Bytecode listing is now supported, for example:
-
-$ ./luajit -bl -e 'a=1'
--- BYTECODE -- "a=1":0-1
-0001 KSHORT 0 1
-0002 GSET 0 0 ; "a"
-0003 RET0 0 1
----
- src/vm_s390x.dasc | 114 +++++++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 103 insertions(+), 11 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index cf9a8cc..3526139 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -20,6 +20,8 @@
- |
- |// Instructions used that are not in base z/Architecture:
- |// clfi (compare logical immediate) [requires z9-109]
-+|// ldgr (load FPR from GPR) [requires z9-109 GA3]
-+|// lgdr (load GPR from FPR) [requires z9-109 GA3]
- |// TODO: alternative instructions?
- |
- |.arch s390x
-@@ -283,6 +285,12 @@
- | stg TMPR1, DISPATCH_GL(vmstate)(DISPATCH)
- |.endmacro
- |
-+|// Synthesize binary floating-point constants.
-+|.macro bfpconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
-+| llihh tmp, 0x4338
-+| ldgr reg, tmp
-+|.endmacro
-+|
- |// Move table write barrier back. Overwrites reg.
- |.macro barrierback, tab, reg
- | // TODO: more efficient way?
-@@ -1113,10 +1121,26 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |//-- Math library -------------------------------------------------------
- |
-- | .ffunc_1 math_abs
-+ |.ffunc_1 math_abs
-+ | lg RB, 0(BASE)
-+ | checkint RB, >3
-+ | lpr RB, RB; jo >2
- |->fff_resbit:
- |->fff_resi:
-+ | setint RB
- |->fff_resRB:
-+ | lg PC, -8(BASE)
-+ | stg RB, -16(BASE)
-+ | j ->fff_res1
-+ |2:
-+ | llihh RB, 0x41e0 // 2^31
-+ | j ->fff_resRB
-+ |3:
-+ | jh ->fff_fallback
-+ | nihh RB, 0x7fff // Clear sign bit.
-+ | lg PC, -8(BASE)
-+ | stg RB, -16(BASE)
-+ | j ->fff_res1
- |
- |.ffunc_n math_sqrt, sqrtsd
- |->fff_resxmm0:
-@@ -1295,6 +1319,26 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc_bit, name, kind, fdef
- | fdef name
-+ |.if kind == 2
-+ | bfpconst_tobit f1, RB
-+ |.endif
-+ | lg RB, 0(BASE)
-+ | ld f0, 0(BASE)
-+ | checkint RB, >1
-+ |.if kind > 0
-+ | j >2
-+ |.else
-+ | j ->fff_resbit
-+ |.endif
-+ |1:
-+ | jh ->fff_fallback
-+ |.if kind < 2
-+ | bfpconst_tobit f1, RB
-+ |.endif
-+ | adbr f0, f1
-+ | lgdr RB, f0
-+ | llgfr RB, RB
-+ |2:
- |.endmacro
- |
- |.macro .ffunc_bit, name, kind
-@@ -1302,33 +1346,81 @@ static void build_subroutines(BuildCtx *ctx)
- |.endmacro
- |
- |.ffunc_bit bit_tobit, 0
-+ | j ->fff_resbit
- |
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name, 2
-+ | lgr TMPR1, NARGS:RD // Save for fallback.
-+ | sllg RD, NARGS:RD, 3(r0)
-+ | lay RD, -16(RD, BASE)
-+ |1:
-+ | clgr RD, BASE
-+ | jle ->fff_resbit
-+ | lg RA, 0(RD)
-+ | checkint RA, >2
-+ | ins RB, RA
-+ | aghi RD, -8
-+ | j <1
-+ |2:
-+ | jh ->fff_fallback_bit_op
-+ | ldgr f0, RA
-+ | adbr f0, f1
-+ | lgdr RA, f0
-+ | ins RB, RA
-+ | aghi RD, -8
-+ | j <1
- |.endmacro
- |
-- |.ffunc_bit_op bit_band, and
-+ |.ffunc_bit_op bit_band, nr
- |.ffunc_bit_op bit_bor, or
-- |.ffunc_bit_op bit_bxor, xor
-+ |.ffunc_bit_op bit_bxor, xr
- |
- |.ffunc_bit bit_bswap, 1
-+ | lrvr RB, RB
-+ | j ->fff_resbit
- |
- |.ffunc_bit bit_bnot, 1
-- |->fff_resbit:
-+ | lhi TMPR2, -1
-+ | xr RB, TMPR2 // TODO: use xilf on newer models?
-+ | j ->fff_resbit
- |
- |->fff_fallback_bit_op:
-+ | lgr NARGS:RD, TMPR1 // Restore for fallback
-+ | j ->fff_fallback
- |
- |.macro .ffunc_bit_sh, name, ins
- | .ffunc_bit name, 1, .ffunc_2
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | // Note: no inline conversion from number for 2nd argument!
-+ | lg RA, 8(BASE)
-+ | checkint RA, ->fff_fallback
-+ | nill RA, 0x1f // Limit shift to 5-bits.
-+ | ins RB, r0, 0(RA) // TODO: fix shift args in DynASM.
-+ | j ->fff_resbit
- |.endmacro
- |
-- |.ffunc_bit_sh bit_lshift, shl
-- |.ffunc_bit_sh bit_rshift, shr
-- |.ffunc_bit_sh bit_arshift, sar
-- |.ffunc_bit_sh bit_rol, rol
-- |.ffunc_bit_sh bit_ror, ror
-+ |.ffunc_bit_sh bit_lshift, sll
-+ |.ffunc_bit_sh bit_rshift, srl
-+ |.ffunc_bit_sh bit_arshift, sra
-+ |
-+ |.ffunc_bit bit_rol, 1, .ffunc_2
-+ | // Note: no inline conversion from number for 2nd argument!
-+ | lg RA, 8(BASE)
-+ | checkint RA, ->fff_fallback
-+ | // Note: no need to limit rotate to 5-bits (wraps).
-+ | rll RB, RB, 0(RA)
-+ | j ->fff_resbit
-+ |
-+ |.ffunc_bit bit_ror, 1, .ffunc_2
-+ | // Note: no inline conversion from number for 2nd argument!
-+ | lg RA, 8(BASE)
-+ | checkint RA, ->fff_fallback
-+ | // TODO: shorter sequence of instructions to convert right rotate into left
rotate.
-+ | nill RA, 0x1f
-+ | lghi TMPR2, 32
-+ | sr TMPR2, RA
-+ | lr RA, TMPR2
-+ | rll RB, RB, 0(RA)
-+ | j ->fff_resbit
- |
- |//-----------------------------------------------------------------------
- |
---
-2.20.1
-
-
-From ab9229335499f1d626d859aa7fc1ce62f0b24006 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 29 Dec 2016 16:50:58 -0500
-Subject: [PATCH 166/247] Fix for DynASM buffer overflow.
-
-Need to include all actions with arguments against MAXSECPOS.
----
- dynasm/dasm_s390x.lua | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index f8c45fa..9b9d3f4 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -117,7 +117,7 @@ local function waction(action, val, a, num)
- wputxhw(w)
- if val then wputxhw(val) end -- Not sure about this, do we always have one arg?
- if a then actargs[#actargs+1] = a end
-- if a or num then secpos = secpos + (num or 1) end
-+ if val or a or num then secpos = secpos + (num or 1) end
- end
-
- -- Flush action list (intervening C code or buffer pos overflow).
---
-2.20.1
-
-
-From 3159fa7245fc003f9ca945a1560d39ceb2a8eb37 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 29 Dec 2016 17:37:11 -0500
-Subject: [PATCH 167/247] Partially implement ipairs.
-
-Still need to handle ipairs_aux.
----
- src/vm_s390x.dasc | 109 +++++++++++++++++++++++++++++++++++++++-------
- 1 file changed, 94 insertions(+), 15 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 3526139..9985f1a 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -431,12 +431,23 @@ static void build_subroutines(BuildCtx *ctx)
- | stg r0, 0(r0)
- |
- |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-- |
-+ | // (void *cframe)
-+ | nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK.
-+ | lgr sp, CARG1
- |->vm_unwind_ff_eh: // Landing pad for external unwinder.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg L:RB, SAVE_L
-+ | lghi RD, 1+1 // Really 1+2 results, incr. later.
-+ | lg BASE, L:RB->base
-+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | lay DISPATCH, GG_G2DISP(DISPATCH)
-+ | lg PC, -8(BASE) // Fetch PC of previous frame.
-+ | load_false RA
-+ | lg RB, 0(BASE)
-+ | stg RA, -16(BASE) // Prepend false to error message.
-+ | stg RB, -8(BASE)
-+ | lghi RA, -16 // Results start at BASE+RA = BASE-16.
-+ | set_vmstate INTERP
-+ | j ->vm_returnc // Increments RD/MULTRES and returns.
- |
- |//-----------------------------------------------------------------------
- |//-- Grow stack for calls -----------------------------------------------
-@@ -1086,19 +1097,60 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Base library: iterators -------------------------------------------
- |
- |.ffunc_1 next
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |.ffunc_1 pairs
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |.ffunc_2 ipairs_aux
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |->fff_res0:
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |.ffunc_1 ipairs
-+ | lg TAB:RB, 0(BASE)
-+ | lgr TMPR1, TAB:RB
-+ | checktab TAB:RB, ->fff_fallback
-+#if LJ_52
-+ | lghi TMPR2, 0
-+ | cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
-+#endif
-+ | lg CFUNC:RD, -16(BASE)
-+ | cleartp CFUNC:RD
-+ | lg CFUNC:RD, CFUNC:RD->upvalue[0]
-+ | settp CFUNC:RD, LJ_TFUNC
-+ | lg PC, -8(BASE)
-+ | stg CFUNC:RD, -16(BASE)
-+ | stg TMPR1, -8(BASE)
-+ | llihh RD, ((int)LJ_TISNUM)>>1 // mov64 RD, ((int64_t)LJ_TISNUM<<47) //
TODO: write mov64-macro, use all of TISNUM (currently this is very fragile).
-+ | stg RD, 0(BASE)
-+ | lghi RD, 1+3
-+ | j ->fff_res
- |
- |//-- Base library: catch errors ----------------------------------------
- |
- |.ffunc_1 pcall
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | la RA, 16(BASE)
-+ | aghi NARGS:RD, -1
-+ | lghi PC, 16+FRAME_PCALL
-+ |1:
-+ | llgc RB, (DISPATCH_GL(hookmask))(DISPATCH)
-+ | srlg RB, RB, HOOK_ACTIVE_SHIFT(r0)
-+ | nill RB, 1 // High bits already zero (from load).
-+ | agr PC, RB // Remember active hook before pcall.
-+ | // Note: this does a (harmless) copy of the function to the PC slot, too.
-+ | lgr KBASE, RD
-+ |2:
-+ | sllg TMPR1, KBASE, 3(r0)
-+ | lg RB, -24(TMPR1, RA)
-+ | stg RB, -16(TMPR1, RA)
-+ | aghi KBASE, -1
-+ | jh <2
-+ | j ->vm_call_dispatch
- |
- |.ffunc_2 xpcall
- | stg r0, 0(r0)
-@@ -2846,9 +2898,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
-
- case BC_ITERC:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
-+ | sllg RA, RA, 3(r0)
-+ | la RA, 16(RA, BASE) // fb = base+2
-+ | lg RB, -32(RA) // Copy state. fb[0] = fb[-4].
-+ | lg RC, -24(RA) // Copy control var. fb[1] = fb[-3].
-+ | stg RB, 0(RA)
-+ | stg RC, 8(RA)
-+ | lg LFUNC:RB, -40(RA) // Copy callable. fb[-2] = fb[-5]
-+ | stg LFUNC:RB, -16(RA)
-+ | lghi NARGS:RD, 2+1 // Handle like a regular 2-arg call.
-+ | checkfunc LFUNC:RB, ->vmeta_call
-+ | lgr BASE, RA
-+ | ins_call
- break;
-+
- case BC_ITERN:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
-@@ -3156,16 +3220,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
-
- case BC_ITERL:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ |.if JIT
-+ | hotloop RB
-+ |.endif
-+ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
- break;
-+
- case BC_JITERL:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+#if !LJ_HASJIT
- break;
-+#endif
- case BC_IITERL:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AJ // RA = base, RD = target
-+ | sllg RA, RA, 3(r0)
-+ | la RA, 0(RA, BASE)
-+ | lg RB, 0(RA)
-+ | cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
-+ if (op == BC_JITERL) {
-+ | stg RB, -8(RA)
-+ | j =>BC_JLOOP
-+ } else {
-+ | branchPC RD // Otherwise save control var + branch.
-+ | stg RB, -8(RA)
-+ }
-+ |1:
-+ | ins_next
- break;
-
- case BC_LOOP:
---
-2.20.1
-
-
-From 38a631404f3ff948c0a5d4836c77ead1cabf6416 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 30 Dec 2016 11:40:39 -0500
-Subject: [PATCH 168/247] Implement ipairs.
-
-Allows the use of the ipairs iterator, for example:
-
-t = { "i", "robot" }
-for i,v in ipairs(t) do
- print(i, v)
-end
--- prints:
--- 1 i
--- 2 robot
----
- src/vm_s390x.dasc | 72 +++++++++++++++++++++++++++++++++++++++++++----
- 1 file changed, 66 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 9985f1a..2db4123 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -228,6 +228,9 @@
- |.macro setint, reg
- | settp reg, LJ_TISNUM
- |.endmacro
-+|.macro setint, dst, reg
-+| settp dst, reg, LJ_TISNUM
-+|.endmacro
- |
- |// Macros to test operand types.
- |.macro checktp_nc, reg, tp, target
-@@ -1097,19 +1100,76 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Base library: iterators -------------------------------------------
- |
- |.ffunc_1 next
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | je >2 // Missing 2nd arg?
-+ |1:
-+ | lg CARG2, 0(BASE)
-+ | checktab CARG2, ->fff_fallback
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base // Add frame since C call can throw.
-+ | stg BASE, L:RB->top // Dummy frame length is ok.
-+ | lg PC, -8(BASE)
-+ | la CARG3, 8(BASE)
-+ | lgr CARG1, L:RB
-+ | stg PC, SAVE_PC // Needed for ITERN fallback.
-+ | brasl r14, extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
-+ | // Flag returned in r2 (CRET1).
-+ | lg BASE, L:RB->base
-+ | lgr RD, CRET1 // TODO: high bits needed? low bits load/test (ltr) enough?
-+ | ltr RD, CRET1; je >3 // End of traversal?
-+ | // Copy key and value to results.
-+ | lg RB, 8(BASE)
-+ | lg RD, 16(BASE)
-+ | stg RB, -16(BASE)
-+ | stg RD, -8(BASE)
-+ |->fff_res2:
-+ | lghi RD, 1+2
-+ | j ->fff_res
-+ |2: // Set missing 2nd arg to nil.
-+ | lghi TMPR2, LJ_TNIL
-+ | stg TMPR2, 8(BASE)
-+ | j <1
-+ |3: // End of traversal: return nil.
-+ | lghi TMPR2, LJ_TNIL
-+ | stg TMPR2, -16(BASE)
-+ | j ->fff_res1
- |
- |.ffunc_1 pairs
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- |
- |.ffunc_2 ipairs_aux
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg TAB:RB, 0(BASE)
-+ | checktab TAB:RB, ->fff_fallback
-+ | lg RA, 8(BASE)
-+ | checkint RA, ->fff_fallback
-+ | lg PC, -8(BASE)
-+ | aghi RA, 1
-+ | setint ITYPE, RA
-+ | stg ITYPE, -16(BASE)
-+ | cl RA, TAB:RB->asize; jhe >2 // Not in array part?
-+ | lg RD, TAB:RB->array
-+ | lgfr TMPR1, RA
-+ | sllg TMPR1, TMPR1, 3(r0)
-+ | la RD, 0(TMPR1, RD)
-+ |1:
-+ | lg TMPR2, 0(RD)
-+ | cghi TMPR2, LJ_TNIL; je ->fff_res0
-+ | // Copy array slot.
-+ | stg TMPR2, -8(BASE)
-+ | j ->fff_res2
-+ |2: // Check for empty hash part first. Otherwise call C function.
-+ | lt TMPR2, TAB:RB->hmask; je ->fff_res0
-+ | lgr CARG1, TAB:RB
-+ | lgr RB, BASE // Save BASE. // TODO: needed?
-+ | lgfr CARG2, RA
-+ | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
-+ | // cTValue * or NULL returned in r2 (CRET1).
-+ | lgr BASE, RB
-+ | ltgr RD, CRET1
-+ | jne <1
- |->fff_res0:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lghi RD, 1+0
-+ | j ->fff_res
- |
- |.ffunc_1 ipairs
- | lg TAB:RB, 0(BASE)
---
-2.20.1
-
-
-From 7f3a637a5f1637570605d9f75605c690823b336e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 30 Dec 2016 12:27:08 -0500
-Subject: [PATCH 169/247] Add FORL implementation (just fallthrough).
-
----
- src/vm_s390x.dasc | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 2db4123..8e02c79 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -3146,8 +3146,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.define FOR_EXT, 24(RA)
-
- case BC_FORL:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ |.if JIT
-+ | hotloop RB
-+ |.endif
-+ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
- break;
-
- case BC_JFORI:
---
-2.20.1
-
-
-From 429f7e8b9dfae9dbcabf81d8134bd617003512ac Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 30 Dec 2016 13:00:38 -0500
-Subject: [PATCH 170/247] Implement USETV.
-
-Allows upvalues to be set in closures, for example:
-
-function f(x)
- local y = x
- local j = function(z)
- y = y + z
- end
- for i=1,3 do
- j(i)
- print(y)
- end
-end
-
-f(2) -- prints: 3 5 8
----
- src/vm_s390x.dasc | 43 +++++++++++++++++++++++++++++++++++++++++--
- 1 file changed, 41 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 8e02c79..182cfef 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2378,9 +2378,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- break;
- case BC_USETV:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+#define TV2MARKOFS \
-+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
-+ | ins_AD // RA = upvalue #, RD = src
-+ | lg LFUNC:RB, -16(BASE)
-+ | cleartp LFUNC:RB
-+ | sllg RA, RA, 3(r0)
-+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
-+ | // TODO: (instead of next 2 instructions) tm UPVAL:RB->closed, 0xff
-+ | llgc TMPR2, UPVAL:RB->closed
-+ | tmll TMPR2, 0xff
-+ | lg RB, UPVAL:RB->v
-+ | sllg TMPR1, RD, 3(r0)
-+ | lg RA, 0(TMPR1, BASE)
-+ | stg RA, 0(RB)
-+ | je >1
-+ | // Check barrier for closed upvalue.
-+ | // TODO: tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv)
-+ | llgc TMPR2, TV2MARKOFS(RB)
-+ | tmll TMPR2, LJ_GC_BLACK
-+ | jne >2
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Upvalue is black. Check if new value is collectable and white.
-+ | srag RD, RA, 47(r0)
-+ | ahi RD, -LJ_TISGCV
-+ | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
-+ | jle <1
-+ | cleartp GCOBJ:RA
-+ | // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
-+ | llgc TMPR2, GCOBJ:RA->gch.marked
-+ | tmll TMPR2, LJ_GC_WHITES
-+ | je <1
-+ | // Crossed a write barrier. Move the barrier forward.
-+ | lgr CARG2, RB
-+ | lgr RB, BASE // Save BASE.
-+ | lay GL:CARG1, GG_DISP2G(DISPATCH)
-+ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
-+ | lgr BASE, RB // Restore BASE.
-+ | j <1
- break;
-+#undef TV2MARKOFS
- case BC_USETS:
- | stg r0, 0(r0)
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From 16aa7d2cd2046e4efbb2f3dcb37743d22ac53c7b Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 30 Dec 2016 13:33:31 -0500
-Subject: [PATCH 171/247] Implement USETN, USETP and USETS.
-
-Allows constant numbers, primitives (nil, true, false) and strings
-to be assigned to upvalues in closures.
----
- src/vm_s390x.dasc | 60 ++++++++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 54 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 182cfef..7b35afb 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2421,16 +2421,64 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- #undef TV2MARKOFS
- case BC_USETS:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AND // RA = upvalue #, RD = str const (~)
-+ | lg LFUNC:RB, -16(BASE)
-+ | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3(r0)
-+ | cleartp LFUNC:RB
-+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
-+ | lg STR:RA, 0(RD, KBASE)
-+ | lg RD, UPVAL:RB->v
-+ | settp STR:ITYPE, STR:RA, LJ_TSTR
-+ | stg STR:ITYPE, 0(RD)
-+ | // TODO: tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
-+ | llgc TMPR2, UPVAL:RB->marked
-+ | tmll TMPR2, LJ_GC_BLACK
-+ | jne >2
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Check if string is white and ensure upvalue is closed.
-+ | // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
-+ | llgc TMPR2, GCOBJ:RA->gch.marked
-+ | tmll TMPR2, LJ_GC_WHITES
-+ | je <1
-+ | // TODO: tm UPVAL:RB->closed, 0xff
-+ | llgc TMPR2, UPVAL:RB->closed
-+ | tmll TMPR2, 0xff
-+ | je <1
-+ | // Crossed a write barrier. Move the barrier forward.
-+ | lgr RB, BASE
-+ | lgr CARG2, RD
-+ | lay GL:CARG1, GG_DISP2G(DISPATCH)
-+ | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
-+ | lgr BASE, RB // Restore BASE.
-+ | j <1
- break;
- case BC_USETN:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = upvalue #, RD = num const
-+ | lg LFUNC:RB, -16(BASE)
-+ | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3(r0)
-+ | cleartp LFUNC:RB
-+ | ld f0, 0(RD, KBASE)
-+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
-+ | lg RA, UPVAL:RB->v
-+ | std f0, 0(RA)
-+ | ins_next
- break;
- case BC_USETP:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = upvalue #, RD = primitive type (~)
-+ | lg LFUNC:RB, -16(BASE)
-+ | sllg RA, RA, 3(r0)
-+ | cleartp LFUNC:RB
-+ | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
-+ | sllg RD, RD, 47(r0)
-+ | lghi TMPR2, -1
-+ | xgr RD, TMPR2
-+ | lg RA, UPVAL:RB->v
-+ | stg RD, 0(RA)
-+ | ins_next
- break;
- case BC_UCLO:
- | ins_AD // RA = level, RD = target
---
-2.20.1
-
-
-From 1bc42a641643f346da1ffcf86c03c318c9266acd Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 30 Dec 2016 14:48:48 -0500
-Subject: [PATCH 172/247] Implement pairs (including ISNEXT and ITERN).
-
-Allows use of the pairs iterator, for example:
-
-t = { alpha = 1, beta = 2 }
-for k,v in pairs(t)
- print(k, v)
-end
-
--- prints:
--- alpha 1
--- beta 2
----
- src/vm_s390x.dasc | 104 +++++++++++++++++++++++++++++++++++++++++++---
- 1 file changed, 98 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 7b35afb..4d5729d 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1134,8 +1134,23 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->fff_res1
- |
- |.ffunc_1 pairs
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg TAB:RB, 0(BASE)
-+ | lgr TMPR1, TAB:RB
-+ | checktab TAB:RB, ->fff_fallback
-+#if LJ_52
-+ | ltg TMPR2, TAB:RB->metatable; jne ->fff_fallback
-+#endif
-+ | lg CFUNC:RD, -16(BASE)
-+ | cleartp CFUNC:RD
-+ | lg CFUNC:RD, CFUNC:RD->upvalue[0]
-+ | settp CFUNC:RD, LJ_TFUNC
-+ | lg PC, -8(BASE)
-+ | stg CFUNC:RD, -16(BASE)
-+ | stg TMPR1, -8(BASE)
-+ | lghi TMPR2, LJ_TNIL
-+ | stg TMPR2, 0(BASE)
-+ | lghi RD, 1+3
-+ | j ->fff_res
- |
- |.ffunc_2 ipairs_aux
- | lg TAB:RB, 0(BASE)
-@@ -3061,13 +3076,90 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
-
- case BC_ITERN:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
-+ |.if JIT
-+ | // NYI: add hotloop, record BC_ITERN.
-+ |.endif
-+ | sllg RA, RA, 3(r0)
-+ | lg TAB:RB, -16(RA, BASE)
-+ | cleartp TAB:RB
-+ | llgf RC, -4(RA, BASE) // Get index from control var. // TODO: ENDIANNESS
DRAGONS.
-+ | llgf TMPR1, TAB:RB->asize
-+ | la PC, 4(PC)
-+ | lg ITYPE, TAB:RB->array
-+ |1: // Traverse array part.
-+ | clr RC, TMPR1; jhe >5 // Index points after array part?
-+ | sllg RD, RC, 3(r0) // Warning: won't work if RD==RC!
-+ | lg TMPR2, 0(RD, ITYPE)
-+ | cghi TMPR2, LJ_TNIL; je >4
-+ | // Copy array slot to returned value.
-+ | lgr RB, TMPR2
-+ | stg RB, 8(RA, BASE)
-+ | // Return array index as a numeric key.
-+ | setint ITYPE, RC
-+ | stg ITYPE, 0(RA, BASE)
-+ | ahi RC, 1
-+ | sty RC, -4(RA, BASE) // Update control var. // TODO: ENDIANNESS DRAGONS
-+ |2:
-+ | llgh RD, PC_RD // Get target from ITERL.
-+ | branchPC RD
-+ |3:
-+ | ins_next
-+ |
-+ |4: // Skip holes in array part.
-+ | ahi RC, 1
-+ | j <1
-+ |
-+ |5: // Traverse hash part.
-+ | sr RC, TMPR1
-+ |6:
-+ | cl RC, TAB:RB->hmask; jh <3 // End of iteration? Branch to ITERL+1.
-+ | llgfr ITYPE, RC
-+ | mghi ITYPE, #NODE
-+ | ag NODE:ITYPE, TAB:RB->node
-+ | lghi TMPR2, LJ_TNIL
-+ | cg TMPR2, NODE:ITYPE->val; je >7
-+ | ar TMPR1, RC
-+ | ahi TMPR1, 1
-+ | // Copy key and value from hash slot.
-+ | lg RB, NODE:ITYPE->key
-+ | lg RC, NODE:ITYPE->val
-+ | stg RB, 0(RA, BASE)
-+ | stg RC, 8(RA, BASE)
-+ | sty TMPR1, -4(RA, BASE) // TODO: ENDIANNESS DRAGONS
-+ | j <2
-+ |
-+ |7: // Skip holes in hash part.
-+ | ahi RC, 1
-+ | j <6
- break;
-+
- case BC_ISNEXT:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | ins_AD // RA = base, RD = target (points to ITERN)
-+ | sllg RA, RA, 3(r0)
-+ | lg CFUNC:RB, -24(RA, BASE)
-+ | checkfunc CFUNC:RB, >5
-+ | lg TMPR1, -16(RA, BASE)
-+ | checktptp TMPR1, LJ_TTAB, >5
-+ | lghi TMPR2, LJ_TNIL
-+ | cg TMPR2, -8(RA, BASE); jne >5
-+ | llgc TMPR1, CFUNC:RB->ffid
-+ | clfi TMPR1, (uint8_t)FF_next_N; jne >5
-+ | branchPC RD
-+ | llihl TMPR1, 0x7fff
-+ | iihh TMPR1, 0xfffe
-+ | stg TMPR1, -8(RA, BASE) // Initialize control var.
-+ |1:
-+ | ins_next
-+ |5: // Despecialize bytecode if any of the checks fail.
-+ | lghi TMPR2, BC_JMP
-+ | stcy TMPR2, PC_OP
-+ | branchPC RD
-+ | lghi TMPR2, BC_ITERC
-+ | stc TMPR2, 3(PC)
-+ | j <1
- break;
-+
- case BC_VARG:
- | // TODO: some opportunities for branch on index in here.
- | ins_ABC // RA = base, RB = nresults+1, RC = numparams
---
-2.20.1
-
-
-From 8fe2705b02d832b904fe6a89ef935d0bbc4f83f3 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 30 Dec 2016 16:33:35 -0500
-Subject: [PATCH 173/247] Remove debug code from tostring.
-
----
- src/vm_s390x.dasc | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 4d5729d..6120341 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1092,7 +1092,6 @@ static void build_subroutines(BuildCtx *ctx)
- | lgr L:CARG1, L:RB
- | brasl r14, extern lj_strfmt_number // (lua_State *L, cTValue *o)
- | // GCstr returned in r2 (CRET1).
-- | stg r0, 0(r0)
- | lg BASE, L:RB->base
- | settp STR:RB, CRET1, LJ_TSTR
- | j <2
---
-2.20.1
-
-
-From ac695184127407dc2b0813111623267a5dd4df47 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Tue, 3 Jan 2017 15:51:23 +0530
-Subject: [PATCH 174/247] Added SIY addressing mode support
-
-Added SIY add mode, and Updated the number of parameters for few of the instructions of
RS-a mode
----
- dynasm/dasm_s390x.lua | 43 +++++++++++++++++++++++++++++++------------
- 1 file changed, 31 insertions(+), 12 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 9b9d3f4..1383669 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1061,20 +1061,20 @@ map_op = {
- spm_2 = "000000000400RR",
- ssar_2 = "0000b2250000RRE",
- ssair_2 = "0000b99f0000RRE",
-- slda_3 = "00008f000000RS-a",
-- sldl_3 = "00008d000000RS-a",
-- sla_3 = "00008b000000RS-a",
-+ slda_2 = "00008f000000RS-a",
-+ sldl_2 = "00008d000000RS-a",
-+ sla_2 = "00008b000000RS-a",
- slak_3 = "eb00000000ddRSY-a",
- slag_3 = "eb000000000bRSY-a",
-- sll_3 = "000089000000RS-a",
-+ sll_2 = "000089000000RS-a",
- sllk_3 = "eb00000000dfRSY-a",
- sllg_3 = "eb000000000dRSY-a",
-- srda_3 = "00008e000000RS-a",
-- srdl_3 = "00008c000000RS-a",
-- sra_3 = "00008a000000RS-a",
-+ srda_2 = "00008e000000RS-a",
-+ srdl_2 = "00008c000000RS-a",
-+ sra_2 = "00008a000000RS-a",
- srak_3 = "eb00000000dcRSY-a",
- srag_3 = "eb000000000aRSY-a",
-- srl_3 = "000088000000RS-a",
-+ srl_2 = "000088000000RS-a",
- srlk_3 = "eb00000000deRSY-a",
- srlg_3 = "eb000000000cRSY-a",
- sqxbr_2 = "0000b3160000RRE",
-@@ -1225,6 +1225,9 @@ map_op = {
- brxhg_3 = "ec0000000044RIE-e",
- -- SI
- ni_2 = "000094000000SI",
-+ tm_2 = "000091000000SI",
-+ -- SIY
-+ tmy_2 = "eb0000000051SIY",
- -- RXF
- madb_3 = "ed000000001eRXF",
- -- RRD
-@@ -1291,11 +1294,17 @@ local function parse_template(params, template, nparams, pos)
- local mode, n, s = parse_label(params[2])
- waction("REL_"..mode, n, s)
- elseif p == "RS-a" then
-- local d, b, a = parse_mem_b(params[3])
-- op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
-- op2 = op2 + shl(b, 12) + d
-+ if (params[3]) then
-+ local d, b, a = parse_mem_b(params[3])
-+ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
-+ op2 = op2 + shl(b, 12) + d
-+ else
-+ local d, b, a = parse_mem_b(params[2])
-+ op1 = op1 + shl(parse_reg(params[1]), 4)
-+ op2 = op2 + shl(b, 12) + d
-+ end
- wputhw(op1); wputhw(op2)
-- if a then a() end -- a() emits action.
-+ if a then a() end
- elseif p == "RSY-a" then
- local d, b, a = parse_mem_by(params[3])
- op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
-@@ -1445,6 +1454,16 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2)
- if a then a() end
-+ elseif p == "SIY" then
-+ local imm8,iact = parse_imm8(params[2])
-+ op0 = op0 + shl(imm8, 8)
-+ wputhw(op0);
-+ if iact then iact() end
-+ local d, b, a = parse_mem_by(params[1])
-+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
-+ op2 = op2 + band(shr(d, 4), 0xff00)
-+ wputhw(op1); wputhw(op2)
-+ if a then a() end
- else
- werror("unrecognized encoding")
- end
---
-2.20.1
-
-
-From 5135fa1e284df1d05ddb70cff10e3dcc1b755a9a Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Tue, 3 Jan 2017 15:55:25 +0530
-Subject: [PATCH 175/247] Minor fix, for arguments in shift operations
-
----
- src/vm_s390x.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 6120341..12cb0e5 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1520,7 +1520,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg RA, 8(BASE)
- | checkint RA, ->fff_fallback
- | nill RA, 0x1f // Limit shift to 5-bits.
-- | ins RB, r0, 0(RA) // TODO: fix shift args in DynASM.
-+ | ins RB, 0(RA) // TODO: fix shift args in DynASM.
- | j ->fff_resbit
- |.endmacro
- |
---
-2.20.1
-
-
-From 55ab87c072d93aa843c058d16b4628de727e1088 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Tue, 3 Jan 2017 16:16:29 +0530
-Subject: [PATCH 176/247] Added couple of instructions required by test-case
-
-maeb(RXF) and cegbra(RRF-e) have been added
----
- dynasm/dasm_s390x.lua | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 1383669..7d260fe 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1184,6 +1184,7 @@ map_op = {
- -- RRF-e instructions
- cfebr_3 = "0000b3980000RRF-e",
- cfebra_4 = "0000b3980000RRF-e",
-+ cegbra_4 = "0000b3a40000RRF-e",
- -- RXE instructions
- adb_2 = "ed000000001aRXE",
- aeb_2 = "ed000000000aRXE",
-@@ -1230,6 +1231,7 @@ map_op = {
- tmy_2 = "eb0000000051SIY",
- -- RXF
- madb_3 = "ed000000001eRXF",
-+ maeb_3 = "ed000000000eRXF",
- -- RRD
- maebr_3 = "0000b30e0000RRD",
- -- RS-b
---
-2.20.1
-
-
-From 313576e159dab8b33482a65d15b0947d7850883e Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Tue, 3 Jan 2017 17:08:30 +0530
-Subject: [PATCH 177/247] Added example for RX-f based instruction mode
-
----
- dynasm/Examples/test_z_inst.c | 17 ++++++++++++++++-
- 1 file changed, 16 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 20b2045..8558aae 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -380,6 +380,20 @@ static void sqrt_rxe(dasm_State *state)
-
- }
-
-+static void rxf(dasm_State *state) {
-+ dasm_State **Dst = &state;
-+
-+ | lay sp , -8(sp)
-+ | cegbra f1 ,0, r2,0
-+ | cegbra f2 ,0,r3,0
-+ | ste f2 ,0(sp)
-+ | maeb f1, f2, 0(sp)
-+ | cfebr r2 ,0, f1
-+ | la sp, 8(sp)
-+ | br r14
-+
-+}
-+
- typedef struct {
- int64_t arg1;
- int64_t arg2;
-@@ -413,7 +427,8 @@ test_table test[] = {
- {15, 3,10, rrfe_rrd, 45, "rrfe_rrd"},
- { 0, 0, 0, rsb, 0, "rsb"},
- {12,10, 0, rre, 10, "rre"},
-- {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"}
-+ {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"},
-+ {16,10, 0, rxf, 116, "rxf"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From ea2c0e9e8b9eb515c64b83c96d2c80bbc9b0695c Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 3 Jan 2017 12:17:34 -0500
-Subject: [PATCH 178/247] Implement metamethod support.
-
-Allows metamethod tables to be get and set.
----
- src/vm_s390x.dasc | 139 +++++++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 124 insertions(+), 15 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 12cb0e5..e68c095 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -320,11 +320,12 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->vm_returnp:
-- | cghi PC, 0
-+ | lghi TMPR2, FRAME_P
-+ | nr TMPR2, PC
- | je ->cont_dispatch
- |
- | // Return from pcall or xpcall fast func.
-- | nill PC, -7
-+ | nill PC, -8
- | sgr BASE, PC // Restore caller base.
- | lay RA, -8(RA, PC) // Rebase RA and prepend one result.
- | lg PC, -8(BASE) // Fetch PC of previous frame.
-@@ -612,8 +613,40 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Continuation dispatch ----------------------------------------------
- |
- |->cont_dispatch:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
-+ | agr RA, BASE
-+ | nill PC, -8
-+ | lgr RB, BASE
-+ | sgr BASE, PC // Restore caller BASE.
-+ | sllg TMPR1, RD, 3(r0)
-+ | lghi TMPR2, LJ_TNIL
-+ | stg TMPR2, -8(RA, TMPR1) // Ensure one valid arg.
-+ | lgr RC, RA // ... in [RC]
-+ | lg PC, -24(RB) // Restore PC from [cont|PC].
-+ | lg RA, -32(RB)
-+ |.if FFI
-+ | stg r0, 0(r0) // TODO: remove once tested.
-+ | clfi RA, 1
-+ | jle >1
-+ |.endif
-+ | lg LFUNC:KBASE, -16(BASE)
-+ | cleartp LFUNC:KBASE
-+ | lg KBASE, LFUNC:KBASE->pc
-+ | lg KBASE, (PC2PROTO(k))(KBASE)
-+ | // BASE = base, RC = result, RB = meta base
-+ | br RA // Jump to continuation.
-+ |
-+ |.if FFI
-+ |1:
-+ | stg r0, 0(r0) // TODO: remove once tested.
-+ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
-+ | // cont = 0: Tail call from C function.
-+ | sgr RB, BASE
-+ | srl RB, 3(r0)
-+ | ahi RB, -3
-+ | llgf RD, RB
-+ | j ->vm_call_tail
-+ |.endif
- |
- |->cont_cat: // BASE = base, RC = result, RB = mbase
- | stg r0, 0(r0)
-@@ -787,8 +820,9 @@ static void build_subroutines(BuildCtx *ctx)
- | brasl r14, extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
- | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
- |3:
-+ | lgr RC, CRET1
- | lg BASE, L:RB->base
-- | clgfi CRET1, 1
-+ | clgfi RC, 1
- | jh ->vmeta_binop
- |4:
- | la PC, 4(PC)
-@@ -800,16 +834,34 @@ static void build_subroutines(BuildCtx *ctx)
- | ins_next
- |
- |->cont_condt: // BASE = base, RC = result
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | la PC, 4(PC)
-+ | lg ITYPE, 0(RC)
-+ | srag ITYPE, ITYPE, 47(r0)
-+ | lghi TMPR2, LJ_TISTRUECOND
-+ | clr ITYPE, TMPR2 // Branch if result is true.
-+ | jl <5
-+ | j <6
- |
- |->cont_condf: // BASE = base, RC = result
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg ITYPE, 0(RC)
-+ | srag ITYPE, ITYPE, 47(r0)
-+ | lghi TMPR2, LJ_TISTRUECOND
-+ | clr ITYPE, TMPR2 // Branch if result is false.
-+ | j <4
- |
- |->vmeta_equal:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | cleartp TAB:RD
-+ | lay PC, -4(PC)
-+ | lgr CARG2, RA
-+ | lgfr CARG4, RB
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | lgr CARG3, RD
-+ | lgr CARG1, L:RB
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
-+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
-+ | j <3
- |
- |->vmeta_equal_cd:
- | stg r0, 0(r0)
-@@ -1048,12 +1100,69 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Base library: getters and setters ---------------------------------
- |
- |.ffunc_1 getmetatable
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg TAB:RB, 0(BASE)
-+ | lg PC, -8(BASE)
-+ | checktab TAB:RB, >6
-+ |1: // Field metatable must be at same offset for GCtab and GCudata!
-+ | lg TAB:RB, TAB:RB->metatable
-+ |2:
-+ | lghi TMPR2, LJ_TNIL
-+ | stg TMPR2, -16(BASE)
-+ | cghi TAB:RB, 0
-+ | je ->fff_res1
-+ | settp TAB:RC, TAB:RB, LJ_TTAB
-+ | stg TAB:RC, -16(BASE) // Store metatable as default result.
-+ | lg STR:RC, (DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable))(DISPATCH)
-+ | llgf RA, TAB:RB->hmask
-+ | n RA, STR:RC->hash
-+ | settp STR:RC, LJ_TSTR
-+ | mghi RA, #NODE
-+ | ag NODE:RA, TAB:RB->node
-+ |3: // Rearranged logic, because we expect _not_ to find the key.
-+ | cg STR:RC, NODE:RA->key
-+ | je >5
-+ |4:
-+ | ltg NODE:RA, NODE:RA->next
-+ | jne <3
-+ | j ->fff_res1 // Not found, keep default result.
-+ |5:
-+ | lg RB, NODE:RA->val
-+ | cghi RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
-+ | stg RB, -16(BASE) // Return value of mt.__metatable.
-+ | j ->fff_res1
-+ |
-+ |6:
-+ | clfi ITYPE, LJ_TUDATA; je <1
-+ | clfi ITYPE, LJ_TISNUM; jh >7
-+ | lhi ITYPE, LJ_TISNUM
-+ |7:
-+ | lhi TMPR2, -1
-+ | xr ITYPE, TMPR2 // not ITYPE
-+ | llgfr ITYPE, ITYPE
-+ | sllg ITYPE, ITYPE, 3(r0)
-+ | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH)
-+ | j <2
- |
- |.ffunc_2 setmetatable
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg TAB:RB, 0(BASE)
-+ | lgr TAB:TMPR1, TAB:RB
-+ | checktab TAB:RB, ->fff_fallback
-+ | // Fast path: no mt for table yet and not clearing the mt.
-+ | lghi TMPR2, 0
-+ | cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
-+ | lg TAB:RA, 8(BASE)
-+ | checktab TAB:RA, ->fff_fallback
-+ | stg TAB:RA, TAB:RB->metatable
-+ | lg PC, -8(BASE)
-+ | stg TAB:TMPR1, -16(BASE) // Return original table.
-+ | // TODO: change to tm
-+ | llgc TMPR2, TAB:RB->marked
-+ | tmll TMPR2, LJ_GC_BLACK // isblack(table)
-+ | je >1
-+ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
-+ | barrierback TAB:RB, RC
-+ |1:
-+ | j ->fff_res1
- |
- |.ffunc_2 rawget
- | stg r0, 0(r0)
---
-2.20.1
-
-
-From cae264fc344e99b0cb99454a3fc873d4fa1656e2 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 3 Jan 2017 16:12:22 -0500
-Subject: [PATCH 179/247] Implement more math functions.
-
-Everything apart from min/max should now be working.
----
- src/vm_s390x.dasc | 68 ++++++++++++++++++++++++++++++++++++++++++++---
- 1 file changed, 64 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index e68c095..4ecc824 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -22,6 +22,8 @@
- |// clfi (compare logical immediate) [requires z9-109]
- |// ldgr (load FPR from GPR) [requires z9-109 GA3]
- |// lgdr (load GPR from FPR) [requires z9-109 GA3]
-+|// ldy (load (long bfp)) [requires z900 GA2]
-+|// stdy (store (long bfp)) [requires z900 GA2]
- |// TODO: alternative instructions?
- |
- |.arch s390x
-@@ -180,7 +182,7 @@
- |.else
- | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
- | .macro ins_next
--| jmp ->ins_next
-+| j ->ins_next
- | .endmacro
- | .macro ins_next_
- | ->ins_next:
-@@ -1034,14 +1036,23 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc_n, name, op
- | .ffunc_1 name
-+ | lg TMPR2, 0(BASE)
-+ | checknumtp TMPR2, ->fff_fallback
-+ | op f0, 0(BASE) // TODO: might be better to unconditionally load into f1.
- |.endmacro
- |
- |.macro .ffunc_n, name
-- | .ffunc_n name, mvc
-+ | .ffunc_n name, ld
- |.endmacro
- |
- |.macro .ffunc_nn, name
- | .ffunc_2 name
-+ | lg TMPR1, 0(BASE)
-+ | lg TMPR2, 8(BASE)
-+ | ld f0, 0(BASE)
-+ | ld f1, 8(BASE)
-+ | checknumtp TMPR1, ->fff_fallback
-+ | checknumtp TMPR2, ->fff_fallback
- |.endmacro
- |
- |// Inlined GC threshold check. Caveat: uses label 1.
-@@ -1377,8 +1388,11 @@ static void build_subroutines(BuildCtx *ctx)
- | stg RB, -16(BASE)
- | j ->fff_res1
- |
-- |.ffunc_n math_sqrt, sqrtsd
-- |->fff_resxmm0:
-+ |.ffunc_n math_sqrt, sqdb
-+ |->fff_resf0:
-+ | lg PC, -8(BASE)
-+ | stdy f0, -16(BASE)
-+ | // fallthrough
- |
- |->fff_res1:
- | lghi RD, 1+1
-@@ -1417,13 +1431,29 @@ static void build_subroutines(BuildCtx *ctx)
- | math_round ceil
- |
- |.ffunc math_log
-+ | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
-+ | lg TMPR2, 0(BASE)
-+ | ld f0, 0(BASE)
-+ | checknumtp TMPR2, ->fff_fallback
-+ | lgr RB, BASE
-+ | brasl r14, extern log
-+ | lgr BASE, RB
-+ | j ->fff_resf0
- |
- |.macro math_extern, func
- | .ffunc_n math_ .. func
-+ | lgr RB, BASE
-+ | brasl r14, extern func
-+ | lgr BASE, RB
-+ | j ->fff_resf0
- |.endmacro
- |
- |.macro math_extern2, func
- | .ffunc_nn math_ .. func
-+ | lgr RB, BASE
-+ | brasl r14, extern func
-+ | lgr BASE, RB
-+ | j ->fff_resf0
- |.endmacro
- |
- | math_extern log10
-@@ -1442,10 +1472,40 @@ static void build_subroutines(BuildCtx *ctx)
- | math_extern2 fmod
- |
- |.ffunc_2 math_ldexp
-+ | lg TMPR2, 0(BASE)
-+ | ld f0, 0(BASE)
-+ | lg CARG1, 8(BASE)
-+ | checknumtp TMPR2, ->fff_fallback
-+ | checkinttp CARG1, ->fff_fallback
-+ | lgfr CARG1, CARG1
-+ | lgr RB, BASE
-+ | brasl r14, extern ldexp // (double, int)
-+ | lgr BASE, RB
-+ | j ->fff_resf0
- |
- |.ffunc_n math_frexp
-+ | lgr RB, BASE
-+ | la CARG1, TMP_STACK
-+ | brasl r14, extern frexp
-+ | lgr BASE, RB
-+ | llgf RB, TMP_STACK
-+ | lg PC, -8(BASE)
-+ | stdy f0, -16(BASE)
-+ | setint RB
-+ | stg RB, -8(BASE)
-+ | lghi RD, 1+2
-+ | j ->fff_res
- |
- |.ffunc_n math_modf
-+ | lgr RB, BASE
-+ | lay CARG1, -16(BASE)
-+ | brasl r14, extern modf // (double, double*)
-+ | lgr BASE, RB
-+ | lg PC, -8(BASE)
-+ | stdy f0, -8(BASE)
-+ | lghi RD, 1+2
-+ | j ->fff_res
-+ |
- |.macro math_minmax, name, cmovop, sseop
- | .ffunc name
- |.endmacro
---
-2.20.1
-
-
-From 5b96068ae76302fca219fd1c63bf11534902b63f Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 3 Jan 2017 16:36:34 -0500
-Subject: [PATCH 180/247] Implement string.byte and string.char.
-
----
- src/vm_s390x.dasc | 18 ++++++++++++++++--
- 1 file changed, 16 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 4ecc824..b987766 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1516,10 +1516,24 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- String library -----------------------------------------------------
- |
- |.ffunc string_byte // Only handle the 1-arg case here.
-- | stg r0, 0(r0)
-+ | chi NARGS:RD, 1+1; jne ->fff_fallback
-+ | lg STR:RB, 0(BASE)
-+ | checkstr STR:RB, ->fff_fallback
-+ | lg PC, -8(BASE)
-+ | ltg TMPR2, STR:RB->len
-+ | je ->fff_res0 // Return no results for empty string.
-+ | llgc RB, STR:RB[1]
-+ | j ->fff_resi
- |
- |.ffunc string_char // Only handle the 1-arg case here.
-- | stg r0, 0(r0)
-+ | ffgccheck
-+ | chi NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
-+ | lg RB, 0(BASE)
-+ | checkint RB, ->fff_fallback
-+ | clfi RB, 255; jh ->fff_fallback
-+ | strvh RB, TMP_STACK // Store [c,0].
-+ | lghi TMPR1, 1
-+ | la RD, TMP_STACK // Points to stack. Little-endian.
- |->fff_newstr:
- | lg L:RB, SAVE_L
- | stg BASE, L:RB->base
---
-2.20.1
-
-
-From 8942fb8b098968f2b511fc5376b598187a371e24 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 3 Jan 2017 16:44:36 -0500
-Subject: [PATCH 181/247] Add emptystr implementation and stub out co-routine
- functions.
-
----
- src/vm_s390x.dasc | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index b987766..ef5420f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1358,12 +1358,16 @@ static void build_subroutines(BuildCtx *ctx)
- |.else
- |.ffunc coroutine_wrap_aux
- |.endif
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |.endmacro
- |
- | coroutine_resume_wrap 1 // coroutine.resume
- | coroutine_resume_wrap 0 // coroutine.wrap
- |
- |.ffunc coroutine_yield
-+ | stg r0, 0(r0)
-+ | stg r0, 0(r0)
- |
- |//-- Math library -------------------------------------------------------
- |
-@@ -1600,7 +1604,8 @@ static void build_subroutines(BuildCtx *ctx)
- | j <3
- |
- |->fff_emptystr: // Range underflow.
-- | stg r0, 0(r0)
-+ | lghi RD, 0
-+ | j <3
- |
- |.macro ffstring_op, name
- | .ffunc_1 string_ .. name
---
-2.20.1
-
-
-From 8195a9e86c42afdb1007b651fd3196dbb1005244 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 4 Jan 2017 16:20:56 +0530
-Subject: [PATCH 182/247] Updated the memory parsing
-
-It accepts 2 registers, without the displacement
----
- dynasm/dasm_s390x.lua | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 7d260fe..95c6927 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -324,6 +324,11 @@ local function split_memop(arg)
- if d then
- return d, 0, parse_reg(b)
- end
-+ -- Assuming the two registers are passed as "(r1,r2)", and displacement(d)
is not specified
-+ local x, b =
string.match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
-+ if b then
-+ return 0, parse_reg(x), parse_reg(b)
-+ end
- local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
- if reg then
- local r, tp = parse_reg(reg)
-@@ -332,7 +337,7 @@ local function split_memop(arg)
- end
- end
- -- TODO: handle values without registers?
-- -- TODO: handle registers without a displacement?
-+ -- TODO: handle registers without a displacement? -- done, above ,needs to be tested
- werror("bad memory operand: "..arg)
- return nil
- end
---
-2.20.1
-
-
-From 4b565e7709e18b4991a25fccf42294f957f95567 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 4 Jan 2017 17:43:53 +0530
-Subject: [PATCH 183/247] Updated memory parsing
-
-The values of base and index registers have been passed as 0, if only displacement is
passed
-the displacement is assumed to be alphanumeric (since label might be used)
----
- dynasm/dasm_s390x.lua | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 95c6927..a62fe21 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -325,10 +325,15 @@ local function split_memop(arg)
- return d, 0, parse_reg(b)
- end
- -- Assuming the two registers are passed as "(r1,r2)", and displacement(d)
is not specified
-- local x, b =
string.match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
-+ local x, b =
match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
- if b then
- return 0, parse_reg(x), parse_reg(b)
- end
-+ -- Assuming that only displacement is passed, as either digit or label "45 or
label1"
-+ local d = match(arg,"[%w_]+")
-+ if d then
-+ return d, 0, 0
-+ end
- local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
- if reg then
- local r, tp = parse_reg(reg)
---
-2.20.1
-
-
-From b1d645b2e4983cac766054078fa29d310d7c427f Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Wed, 4 Jan 2017 18:15:57 +0530
-Subject: [PATCH 184/247] Reverting the changes, as its breaking the build
-
-The above expression works on CLI, but its failing here, not sure whats going wrong ,
Please let me know your comments on it
----
- dynasm/dasm_s390x.lua | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index a62fe21..b3cda6f 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -330,10 +330,10 @@ local function split_memop(arg)
- return 0, parse_reg(x), parse_reg(b)
- end
- -- Assuming that only displacement is passed, as either digit or label "45 or
label1"
-- local d = match(arg,"[%w_]+")
-- if d then
-- return d, 0, 0
-- end
-+ -- local d = match(arg,"[%w_]+")
-+ -- if d then
-+ -- return d, 0, 0
-+ -- end
- local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
- if reg then
- local r, tp = parse_reg(reg)
---
-2.20.1
-
-
-From 226ef35e3cfe26f616dc2ebf0c3c015513538423 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 4 Jan 2017 11:42:22 -0500
-Subject: [PATCH 185/247] Implement math.min and math.max.
-
-Replicates the standard Lua behaviour in the presence of NaNs.
----
- src/vm_s390x.dasc | 43 ++++++++++++++++++++++++++++++++++++++++---
- 1 file changed, 40 insertions(+), 3 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index ef5420f..56d5c02 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1510,12 +1510,49 @@ static void build_subroutines(BuildCtx *ctx)
- | lghi RD, 1+2
- | j ->fff_res
- |
-- |.macro math_minmax, name, cmovop, sseop
-+ |.macro math_minmax, name, cjmp
- | .ffunc name
-+ | lghi RA, 2*8
-+ | sllg TMPR1, RD, 3(r0)
-+ | lg RB, 0(BASE)
-+ | ld f0, 0(BASE)
-+ | checkint RB, >4
-+ |1: // Handle integers.
-+ | clgr RA, TMPR1; jhe ->fff_resRB
-+ | lg TMPR2, -8(RA, BASE)
-+ | checkint TMPR2, >3
-+ | cr RB, TMPR2
-+ | cjmp >2
-+ | lgr RB, TMPR2
-+ |2:
-+ | aghi RA, 8
-+ | j <1
-+ |3:
-+ | jh ->fff_fallback
-+ | // Convert intermediate result to number and continue below.
-+ | cdfbr f0, RB
-+ | ldgr f1, TMPR2
-+ | j >6
-+ |4:
-+ | jh ->fff_fallback
-+ |5: // Handle numbers or integers.
-+ | clgr RA, TMPR1; jhe ->fff_resf0
-+ | lg RB, -8(RA, BASE)
-+ | ldy f1, -8(RA, BASE)
-+ | checknumx RB, >6, jl
-+ | jh ->fff_fallback
-+ | cdfbr f1, RB
-+ |6:
-+ | cdbr f0, f1
-+ | cjmp >7
-+ | ldr f0, f1
-+ |7:
-+ | aghi RA, 8
-+ | j <5
- |.endmacro
- |
-- | math_minmax math_min, cmovg, minsd
-- | math_minmax math_max, cmovl, maxsd
-+ | math_minmax math_min, jnh
-+ | math_minmax math_max, jnl
- |
- |//-- String library -----------------------------------------------------
- |
---
-2.20.1
-
-
-From c5526af2c7de2f8065caf073403f6809c260ea78 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 4 Jan 2017 12:25:15 -0500
-Subject: [PATCH 186/247] Fixes for negative string.sub arguments and __index
- metamethod calls.
-
----
- src/vm_s390x.dasc | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 56d5c02..7e369f0 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -705,7 +705,7 @@ static void build_subroutines(BuildCtx *ctx)
- |3: // Call __index metamethod.
- | // BASE = base, L->top = new base, stack = cont/func/t/k
- | lg RA, L:RB->top
-- | stg PC, -24(PC) // [cont|PC]
-+ | stg PC, -24(RA) // [cont|PC]
- | lay PC, FRAME_CONT(RA)
- | sgr PC, BASE
- | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
-@@ -1603,7 +1603,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg STR:RB, 0(BASE)
- | checkstr STR:RB, ->fff_fallback
- | lg ITYPE, 8(BASE)
-- | llgfr RA, ITYPE // Must clear hiword for lea below.
-+ | lgfr RA, ITYPE
- | srag ITYPE, ITYPE, 47(r0)
- | cghi ITYPE, LJ_TISNUM
- | jne ->fff_fallback
-@@ -1641,8 +1641,8 @@ static void build_subroutines(BuildCtx *ctx)
- | j <3
- |
- |->fff_emptystr: // Range underflow.
-- | lghi RD, 0
-- | j <3
-+ | lghi TMPR1, 0
-+ | j <4
- |
- |.macro ffstring_op, name
- | .ffunc_1 string_ .. name
---
-2.20.1
-
-
-From 68e48b94cfc066646ae38e5f3d4af610094548bb Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 4 Jan 2017 15:34:30 -0500
-Subject: [PATCH 187/247] Implement cont_cat.
-
-Required to pass cat tests.
----
- src/vm_s390x.dasc | 28 ++++++++++++++++++++++++----
- 1 file changed, 24 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 7e369f0..51c6002 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -651,8 +651,21 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- |
- |->cont_cat: // BASE = base, RC = result, RB = mbase
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | llgc RA, PC_RB
-+ | sllg RA, RA, 3(r0)
-+ | aghi RB, -32
-+ | la RA, 0(RA, BASE)
-+ | sgr RA, RB
-+ | je ->cont_ra
-+ | lcgr RA, RA
-+ | srlg RA, RA, 3(r0)
-+ | lg L:CARG1, SAVE_L
-+ | stg BASE, L:CARG1->base
-+ | lgfr CARG3, RA
-+ | lg RA, 0(RC)
-+ | stg RA, 0(RB)
-+ | lgr CARG2, RB
-+ | j ->BC_CAT_Z
- |
- |//-- Table indexing metamethods -----------------------------------------
- |
-@@ -1347,8 +1360,15 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->vm_call_dispatch
- |
- |.ffunc_2 xpcall
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg LFUNC:RA, 8(BASE)
-+ | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
-+ | lg LFUNC:RB, 0(BASE) // Swap function and traceback.
-+ | stg LFUNC:RA, 0(BASE)
-+ | stg LFUNC:RB, 8(BASE)
-+ | la RA, 24(BASE)
-+ | aghi NARGS:RD, -2
-+ | lghi PC, 24+FRAME_PCALL
-+ | j <1
- |
- |//-- Coroutine library --------------------------------------------------
- |
---
-2.20.1
-
-
-From 996ad03a93a26bfd65519ce616460a06551f8ca3 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 4 Jan 2017 15:54:21 -0500
-Subject: [PATCH 188/247] Fix for __newindex metamethod.
-
----
- src/vm_s390x.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 51c6002..f411c43 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -791,7 +791,7 @@ static void build_subroutines(BuildCtx *ctx)
- |3: // Call __newindex metamethod.
- | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
- | lg RA, L:RB->top
-- | stg PC, -24(PC) // [cont|PC]
-+ | stg PC, -24(RA) // [cont|PC]
- | llgc RC, PC_RA
- | // Copy value to third argument.
- | sllg RB, RC, 3(r0)
---
-2.20.1
-
-
-From 391b3121470ca3bf87fb2e2b8743e213fe581152 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 4 Jan 2017 16:05:55 -0500
-Subject: [PATCH 189/247] Implement call_tail.
-
----
- src/vm_s390x.dasc | 23 +++++++++++++++++++++--
- 1 file changed, 21 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f411c43..84dffec 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1834,8 +1834,27 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// Reconstruct previous base for vmeta_call during tailcall.
- |->vm_call_tail:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lgr RA, BASE
-+ | tmll PC, FRAME_TYPE
-+ | jne >3
-+ | llgc RB, PC_RA
-+ | lcgr RB, RB
-+ | sllg RB, RB, 3(r0)
-+ | lay BASE, -16(RB, BASE) // base = base - (RB+2)*8
-+ | j ->vm_call_dispatch // Resolve again for tailcall.
-+ |3:
-+ | lgr RB, PC
-+ | nill RB, -8
-+ | sgr BASE, RB
-+ | j ->vm_call_dispatch // Resolve again for tailcall.
-+ |
-+ |5: // Grow stack for fallback handler.
-+ | lghi CARG2, LUA_MINSTACK
-+ | lgr CARG1, L:RB
-+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
-+ | lg BASE, L:RB->base
-+ | lghi RD, 0 // Simulate a return 0.
-+ | j <1 // Dumb retry (goes through ff first).
- |
- |->fff_gcstep: // Call GC step function.
- | // BASE = new base, RD = nargs+1
---
-2.20.1
-
-
-From fe975aee8efda43c818cd56554fc1e66a194211f Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb(a)gmail.com>
-Date: Thu, 5 Jan 2017 14:48:09 +0530
-Subject: [PATCH 190/247] Updated the memory parsing
-
-The order matters here, so just moved displacement check to end
----
- dynasm/dasm_s390x.lua | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index b3cda6f..0c1263c 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -329,11 +329,6 @@ local function split_memop(arg)
- if b then
- return 0, parse_reg(x), parse_reg(b)
- end
-- -- Assuming that only displacement is passed, as either digit or label "45 or
label1"
-- -- local d = match(arg,"[%w_]+")
-- -- if d then
-- -- return d, 0, 0
-- -- end
- local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
- if reg then
- local r, tp = parse_reg(reg)
-@@ -341,6 +336,11 @@ local function split_memop(arg)
- return format(tp.ctypefmt, tailr), 0, r
- end
- end
-+ -- Assuming that only displacement is passed, as either digit or label "45 or
label1"
-+ local d = match(arg,"[%w_]+")
-+ if d then
-+ return d, 0, 0
-+ end
- -- TODO: handle values without registers?
- -- TODO: handle registers without a displacement? -- done, above ,needs to be tested
- werror("bad memory operand: "..arg)
---
-2.20.1
-
-
-From 70d68cb7e98e5d4e5a939324d78d610eb7f42085 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 5 Jan 2017 10:50:17 -0500
-Subject: [PATCH 191/247] Fix math.pow.
-
-The second floating point argument is placed into f2, not f1.
-Use the macros FARG{1,2} instead of using the registers directly.
----
- src/vm_s390x.dasc | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 84dffec..9efd5b0 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1062,8 +1062,8 @@ static void build_subroutines(BuildCtx *ctx)
- | .ffunc_2 name
- | lg TMPR1, 0(BASE)
- | lg TMPR2, 8(BASE)
-- | ld f0, 0(BASE)
-- | ld f1, 8(BASE)
-+ | ld FARG1, 0(BASE)
-+ | ld FARG2, 8(BASE)
- | checknumtp TMPR1, ->fff_fallback
- | checknumtp TMPR2, ->fff_fallback
- |.endmacro
-@@ -1457,7 +1457,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc math_log
- | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
- | lg TMPR2, 0(BASE)
-- | ld f0, 0(BASE)
-+ | ld FARG1, 0(BASE)
- | checknumtp TMPR2, ->fff_fallback
- | lgr RB, BASE
- | brasl r14, extern log
-@@ -1497,7 +1497,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc_2 math_ldexp
- | lg TMPR2, 0(BASE)
-- | ld f0, 0(BASE)
-+ | ld FARG1, 0(BASE)
- | lg CARG1, 8(BASE)
- | checknumtp TMPR2, ->fff_fallback
- | checkinttp CARG1, ->fff_fallback
---
-2.20.1
-
-
-From e24098c2d5a2c6acbd48459251ac42891609f9b4 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 5 Jan 2017 11:02:03 -0500
-Subject: [PATCH 192/247] Implement rawget.
-
----
- src/vm_s390x.dasc | 15 +++++++++++++--
- 1 file changed, 13 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 9efd5b0..d377738 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1189,8 +1189,19 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->fff_res1
- |
- |.ffunc_2 rawget
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg TAB:CARG2, 0(BASE)
-+ | checktab TAB:CARG2, ->fff_fallback
-+ | lgr RB, BASE // Save BASE.
-+ | la CARG3, 8(BASE)
-+ | lg CARG1, SAVE_L
-+ | brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
-+ | // cTValue * returned in r2 (CRET1).
-+ | lgr BASE, RB // Restore BASE.
-+ | // Copy table slot.
-+ | lg RB, 0(CRET1)
-+ | lg PC, -8(BASE)
-+ | stg RB, -16(BASE)
-+ | j ->fff_res1
- |
- |//-- Base library: conversions ------------------------------------------
- |
---
-2.20.1
-
-
-From d5ec0a1f1f75e86f92dc08358d5a9dec0e7b4fec Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 5 Jan 2017 17:32:51 -0500
-Subject: [PATCH 193/247] Implement coroutines.
-
-TODO: delete LREG, caused problems while implementing this (x64
-doesn't have LREG).
----
- src/vm_s390x.dasc | 161 ++++++++++++++++++++++++++++++++++++++++++++--
- 1 file changed, 156 insertions(+), 5 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index d377738..5b21924 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -512,6 +512,7 @@ static void build_subroutines(BuildCtx *ctx)
- | st RD, SAVE_NRES
- | stg RD, SAVE_ERRF
- | stg KBASE, L:RB->cframe
-+ | lgr LREG, L:RB
- | clm RD, 1, L:RB->status
- | je >2 // Initial resume (like a call).
- |
-@@ -1386,19 +1387,169 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro coroutine_resume_wrap, resume
- |.if resume
- |.ffunc_1 coroutine_resume
-+ | lg L:RB, 0(BASE)
-+ | lgr L:TMPR2, L:RB // Save type for checktptp.
-+ | cleartp L:RB
- |.else
- |.ffunc coroutine_wrap_aux
-+ | lg CFUNC:RB, -16(BASE)
-+ | cleartp CFUNC:RB
-+ | lg L:RB, CFUNC:RB->upvalue[0].gcr
-+ | cleartp L:RB
- |.endif
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg PC, -8(BASE)
-+ | stg PC, SAVE_PC
-+ | stg L:RB, TMP_STACK
-+ |.if resume
-+ | checktptp L:TMPR2, LJ_TTHREAD, ->fff_fallback
-+ |.endif
-+ | ltg TMPR2, L:RB->cframe; jne ->fff_fallback
-+ | // TODO: replace with cli.
-+ | llgc TMPR1, L:RB->status
-+ | cghi TMPR1, (uint8_t)LUA_YIELD; jh ->fff_fallback
-+ | lg RA, L:RB->top
-+ | je >1 // Status != LUA_YIELD (i.e. 0)?
-+ | cg RA, L:RB->base // Check for presence of initial func.
-+ | je ->fff_fallback
-+ | lg PC, -8(RA) // Move initial function up.
-+ | stg PC, 0(RA)
-+ | la RA, 8(RA)
-+ |1:
-+ | sllg TMPR1, NARGS:RD, 3(r0)
-+ |.if resume
-+ | lay PC, -16(TMPR1, RA) // Check stack space (-1-thread).
-+ |.else
-+ | lay PC, -8(TMPR1, RA) // Check stack space (-1).
-+ |.endif
-+ | clg PC, L:RB->maxstack; jh ->fff_fallback
-+ | stg PC, L:RB->top
-+ |
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ |.if resume
-+ | la BASE, 8(BASE) // Keep resumed thread in stack for GC.
-+ |.endif
-+ | stg BASE, L:RB->top
-+ |.if resume
-+ | lay RB, -24(TMPR1, BASE) // RB = end of source for stack move.
-+ |.else
-+ | lay RB, -16(TMPR1, BASE) // RB = end of source for stack move.
-+ |.endif
-+ | sgr RB, PC // Relative to PC.
-+ |
-+ | cgr PC, RA
-+ | je >3
-+ |2: // Move args to coroutine.
-+ | lg RC, 0(RB, PC)
-+ | stg RC, -8(PC)
-+ | // TODO: replace with branch on count/index?
-+ | lay PC, -8(PC)
-+ | cgr PC, RA
-+ | jne <2
-+ |3:
-+ | lgr CARG2, RA
-+ | lg L:CARG1, TMP_STACK
-+ | lghi CARG3, 0
-+ | lghi CARG4, 0
-+ | brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0)
-+ |
-+ | lg L:RB, SAVE_L
-+ | lg L:PC, TMP_STACK
-+ | lg BASE, L:RB->base
-+ | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
-+ | set_vmstate INTERP
-+ |
-+ | clfi CRET1, LUA_YIELD
-+ | jh >8
-+ |4:
-+ | lg RA, L:PC->base
-+ | lg KBASE, L:PC->top
-+ | stg RA, L:PC->top // Clear coroutine stack.
-+ | lgr PC, KBASE
-+ | sgr PC, RA
-+ | je >6 // No results?
-+ | la RD, 0(PC, BASE)
-+ | llgfr PC, PC
-+ | srlg PC, PC, 3(r0)
-+ | clg RD, L:RB->maxstack
-+ | jh >9 // Need to grow stack?
-+ |
-+ | lgr RB, BASE
-+ | sgr RB, RA
-+ |5: // Move results from coroutine.
-+ | lg RD, 0(RA)
-+ | stg RD, 0(RA, RB)
-+ | // TODO: branch on count/index?
-+ | la RA, 8(RA)
-+ | cgr RA, KBASE
-+ | jne <5
-+ |6:
-+ |.if resume
-+ | la RD, 2(PC) // nresults+1 = 1 + true + results.
-+ | load_true ITYPE // Prepend true to results.
-+ | stg ITYPE, -8(BASE)
-+ |.else
-+ | la RD, 1(PC) // nresults+1 = 1 + results.
-+ |.endif
-+ |7:
-+ | lg PC, SAVE_PC
-+ | st RD, SAVE_MULTRES
-+ |.if resume
-+ | lghi RA, -8
-+ |.else
-+ | lghi RA, 0
-+ |.endif
-+ | tmll PC, FRAME_TYPE
-+ | je ->BC_RET_Z
-+ | j ->vm_return
-+ |
-+ |8: // Coroutine returned with error (at co->top-1).
-+ |.if resume
-+ | load_false ITYPE // Prepend false to results.
-+ | stg ITYPE, -8(BASE)
-+ | lg RA, L:PC->top
-+ | aghi RA, -8
-+ | stg RA, L:PC->top // Clear error from coroutine stack.
-+ | // Copy error message.
-+ | lg RD, 0(RA)
-+ | stg RD, 0(BASE)
-+ | lghi RD, 1+2 // nresults+1 = 1 + false + error.
-+ | j <7
-+ |.else
-+ | lgr CARG2, L:PC
-+ | lgr CARG1, L:RB
-+ | brasl r14, extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
-+ | // Error function does not return.
-+ |.endif
-+ |
-+ |9: // Handle stack expansion on return from yield.
-+ | lg L:RA, TMP_STACK
-+ | stg KBASE, L:RA->top // Undo coroutine stack clearing.
-+ | lgr CARG2, PC
-+ | lgr CARG1, L:RB
-+ | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
-+ | lg L:PC, TMP_STACK
-+ | lg BASE, L:RB->base
-+ | j <4 // Retry the stack move.
- |.endmacro
- |
- | coroutine_resume_wrap 1 // coroutine.resume
- | coroutine_resume_wrap 0 // coroutine.wrap
- |
- |.ffunc coroutine_yield
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | lg L:RB, SAVE_L
-+ | lg TMPR2, L:RB->cframe
-+ | tmll TMPR2, CFRAME_RESUME
-+ | je ->fff_fallback
-+ | stg BASE, L:RB->base
-+ | sllg TMPR1, NARGS:RD, 3(r0)
-+ | lay RD, -8(TMPR1, BASE)
-+ | stg RD, L:RB->top
-+ | lghi RD, 0
-+ | stg RD, L:RB->cframe
-+ | lhi RA, LUA_YIELD
-+ | stc RA, L:RB->status
-+ | j ->vm_leave_unw
- |
- |//-- Math library -------------------------------------------------------
- |
-@@ -3906,7 +4057,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lay RD, -8(RD,BASE)
- | stg BASE, L:RB->base
- | lay RA, (8*LUA_MINSTACK)(RD)
-- | cg RA, L:RB->maxstack
-+ | clg RA, L:RB->maxstack
- | stg RD, L:RB->top
- | lgr CARG1, L:RB // Caveat: CARG1 may be RA.
- if (op != BC_FUNCC) {
---
-2.20.1
-
-
-From 6b78282aa0317ba49ffba05e1f11b0a6e47b31d0 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 5 Jan 2017 23:33:10 -0500
-Subject: [PATCH 194/247] Various fixes for coroutines.
-
-Now passing the tests.
----
- src/vm_s390x.dasc | 12 ++++++------
- 1 file changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 5b21924..6b80f41 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -337,7 +337,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vm_returnc:
- | aghi RD, 1 // RD = nresults+1
-- | jo ->vm_unwind_yield // TODO: !!! NOT SURE, jz on x64, overflow? !!!
-+ | je ->vm_unwind_yield
- | st RD, SAVE_MULTRES
- | tmll PC, FRAME_TYPE
- | je ->BC_RET_Z // Handle regular return to Lua.
-@@ -519,7 +519,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // Resume after yield (like a return).
- | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
- | set_vmstate INTERP
-- | llgc RD, L:RB->status
-+ | stc RD, L:RB->status
- | lg BASE, L:RB->base
- | lg RD, L:RB->top
- | sgr RD, RA
-@@ -1542,13 +1542,13 @@ static void build_subroutines(BuildCtx *ctx)
- | tmll TMPR2, CFRAME_RESUME
- | je ->fff_fallback
- | stg BASE, L:RB->base
-- | sllg TMPR1, NARGS:RD, 3(r0)
-- | lay RD, -8(TMPR1, BASE)
-+ | sllg RD, NARGS:RD, 3(r0)
-+ | lay RD, -8(RD, BASE)
- | stg RD, L:RB->top
- | lghi RD, 0
- | stg RD, L:RB->cframe
-- | lhi RA, LUA_YIELD
-- | stc RA, L:RB->status
-+ | lghi CRET1, LUA_YIELD
-+ | stc CRET1, L:RB->status
- | j ->vm_leave_unw
- |
- |//-- Math library -------------------------------------------------------
---
-2.20.1
-
-
-From 2193ef32d324fc5d06565213f101bd711c77be03 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 6 Jan 2017 11:16:04 -0500
-Subject: [PATCH 195/247] Add more convert to/from fixed instructions to
- DynASM.
-
----
- dynasm/dasm_s390x.lua | 15 +++++++++++++++
- 1 file changed, 15 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 0c1263c..7d95f78 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1194,7 +1194,22 @@ map_op = {
- -- RRF-e instructions
- cfebr_3 = "0000b3980000RRF-e",
- cfebra_4 = "0000b3980000RRF-e",
-+ cfdbr_3 = "0000b3990000RRF-e",
-+ cfdbra_4 = "0000b3990000RRF-e",
-+ cfxbr_3 = "0000b39a0000RRF-e",
-+ cfxbra_4 = "0000b39a0000RRF-e",
-+ cgebr_3 = "0000b3a80000RRF-e",
-+ cgebra_4 = "0000b3a80000RRF-e",
-+ cgdbr_3 = "0000b3a90000RRF-e",
-+ cgdbra_4 = "0000b3a90000RRF-e",
-+ cgxbr_3 = "0000b3aa0000RRF-e",
-+ cgxbra_4 = "0000b3aa0000RRF-e",
-+ cefbra_4 = "0000b3940000RRF-e",
-+ cdfbra_4 = "0000b3950000RRF-e",
-+ cxfbra_4 = "0000b3960000RRF-e",
- cegbra_4 = "0000b3a40000RRF-e",
-+ cdgbra_4 = "0000b3a50000RRF-e",
-+ cxgbra_4 = "0000b3a60000RRF-e",
- -- RXE instructions
- adb_2 = "ed000000001aRXE",
- aeb_2 = "ed000000000aRXE",
---
-2.20.1
-
-
-From b05045d65613cb4c81025fbfb6c213c267aebaee Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 6 Jan 2017 11:16:33 -0500
-Subject: [PATCH 196/247] Implement math.floor/math.ceil.
-
----
- src/vm_s390x.dasc | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 6b80f41..274ca11 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1611,6 +1611,15 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro math_round, func
- | .ffunc math_ .. func
-+ | lg RB, 0(BASE)
-+ | ld f0, 0(BASE)
-+ | checknumx RB, ->fff_resRB, je
-+ | jh ->fff_fallback
-+ | brasl r14, ->vm_ .. func
-+ | cfdbr RB, 0, f0
-+ | jo ->fff_resf0
-+ | llgfr RB, RB
-+ | j ->fff_resi
- |.endmacro
- |
- | math_round floor
---
-2.20.1
-
-
-From 005c2c5a4ed18a474e5eb746d4e5545b58857acc Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 6 Jan 2017 11:47:56 -0500
-Subject: [PATCH 197/247] Fix VARG.
-
----
- src/vm_s390x.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 274ca11..a9bc10d 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -3618,8 +3618,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | jnl >3
- | clgr TMPR1, BASE // No more vararg slots?
- | jl <1
-- | lghi TMPR2, LJ_TNIL
- |2: // Fill up remainder with nil.
-+ | lghi TMPR2, LJ_TNIL // TODO: move out of loop. Add NIL range macro?
- | stg TMPR2, 0(RA)
- | la RA, 8(RA)
- | clgr RA, RB
---
-2.20.1
-
-
-From 0b7db3980d5f6bbfde2ce2d4ff72a2de2bddc4cf Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 6 Jan 2017 13:53:31 -0500
-Subject: [PATCH 198/247] Fix vm_tsetr (needed by table.remove).
-
-The A argument was being loaded as 2-bytes instead of 1.
----
- src/vm_s390x.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index a9bc10d..8fa928b 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -815,7 +815,7 @@ static void build_subroutines(BuildCtx *ctx)
- | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
- | // TValue * returned in r2 (CRET1).
- | lgr RC, CRET1
-- | llgh RA, PC_RA
-+ | llgc RA, PC_RA
- | lgr BASE, RB // Restore BASE.
- | j ->BC_TSETR_Z
- |
---
-2.20.1
-
-
-From 8a7581015b318978d1c15be650c04c3e2a78fe0a Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Fri, 6 Jan 2017 16:19:56 -0500
-Subject: [PATCH 199/247] Add partial FFI support.
-
-Interestingly, enough to pass all the FFI tests. So s390x now
-passes all the tests in LuaJIT-test-cleanup.
----
- src/Makefile | 2 +-
- src/lj_arch.h | 1 -
- src/lj_ccall.c | 35 +++++++++++
- src/lj_ccall.h | 6 +-
- src/lj_ccallback.c | 9 +++
- src/lj_target.h | 2 +
- src/lj_target_s390x.h | 139 +++++-------------------------------------
- src/vm_s390x.dasc | 81 +++++++++++++++++++++---
- 8 files changed, 139 insertions(+), 136 deletions(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index 158bfa8..d0f160a 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -56,7 +56,7 @@ CCOPT_mips=
- #
- CCDEBUG=
- # Uncomment the next line to generate debug information:
--CCDEBUG= -g -O0
-+#CCDEBUG= -g
- #
- CCWARN= -Wall
- # Uncomment the next line to enable more warnings:
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index bceb6de..32d8bb3 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -401,7 +401,6 @@
- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
- #define LJ_TARGET_GC64 1
- #define LJ_ARCH_NOJIT 1 /* NYI */
--#define LJ_ARCH_NOFFI 1 /* Disable FFI for now. */
-
- #else
- #error "No target architecture defined"
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index 25e938c..d2fb19c 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -572,6 +572,41 @@
- goto done; \
- }
-
-+#elif LJ_TARGET_S390X
-+/* -- POSIX/s390x calling conventions --------------------------------------- */
-+
-+#define CCALL_HANDLE_STRUCTRET \
-+ /* Return structs of size 1, 2, 4 or 8 in a GPR. */ \
-+ cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \
-+ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
-+
-+#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
-+
-+#define CCALL_HANDLE_COMPLEXRET2 \
-+ if (!cc->retref) \
-+ *(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */
-+
-+#define CCALL_HANDLE_STRUCTARG \
-+ /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
-+ if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \
-+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
-+ sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \
-+ }
-+
-+#define CCALL_HANDLE_COMPLEXARG \
-+ /* Pass complex float in a GPR and complex double by reference. */ \
-+ if (sz != 2*sizeof(float)) { \
-+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
-+ sz = CTSIZE_PTR; \
-+ }
-+
-+#define CCALL_HANDLE_REGARG \
-+ if (isfp) { \
-+ if (nfpr < maxgpr) { dp = &cc->fpr[nfpr++]; goto done; } \
-+ } else { \
-+ if (ngpr < CCALL_NARG_FPR) { dp = &cc->gpr[ngpr++]; goto done; } \
-+ }
-+
- #else
- #error "Missing calling convention definitions for this architecture"
- #endif
-diff --git a/src/lj_ccall.h b/src/lj_ccall.h
-index 9f023fc..f237eaa 100644
---- a/src/lj_ccall.h
-+++ b/src/lj_ccall.h
-@@ -136,7 +136,11 @@ typedef union FPRArg {
- #define CCALL_SPS_FREE 0
-
- typedef intptr_t GPRArg;
--typedef double FPRArg;
-+typedef union FPRArg {
-+ double d;
-+ float f;
-+} FPRArg;
-+
- #else
- #error "Missing calling convention definitions for this architecture"
- #endif
-diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
-index 412dbf8..965f5d6 100644
---- a/src/lj_ccallback.c
-+++ b/src/lj_ccallback.c
-@@ -507,6 +507,15 @@ void lj_ccallback_mcode_free(CTState *cts)
- if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
- ((float *)dp)[1] = *(float *)dp;
-
-+#elif LJ_TARGET_S390X
-+
-+#define CALLBACK_HANDLE_REGARG \
-+ if (isfp) { \
-+ if (nfpr < maxgpr) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
-+ } else { \
-+ if (ngpr < CCALL_NARG_FPR) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
-+ }
-+
- #else
- #error "Missing calling convention definitions for this architecture"
- #endif
-diff --git a/src/lj_target.h b/src/lj_target.h
-index 8dcae95..0b9763b 100644
---- a/src/lj_target.h
-+++ b/src/lj_target.h
-@@ -144,6 +144,8 @@ typedef uint32_t RegCost;
- #include "lj_target_ppc.h"
- #elif LJ_TARGET_MIPS
- #include "lj_target_mips.h"
-+#elif LJ_TARGET_S390X
-+#include "lj_target_s390x.h"
- #else
- #error "Missing include for target CPU"
- #endif
-diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
-index 4e35891..6e0245f 100644
---- a/src/lj_target_s390x.h
-+++ b/src/lj_target_s390x.h
-@@ -1,26 +1,22 @@
- /*
--** Definitions for S390 CPUs.
-+** Definitions for IBM z/Architecture (s390x) CPUs.
- ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- */
-
--#ifndef _LJ_TARGET_S390_H
--#define _LJ_TARGET_S390_H
-+#ifndef _LJ_TARGET_S390X_H
-+#define _LJ_TARGET_S390X_H
-
- /* -- Registers IDs ------------------------------------------------------- */
-
- #define GPRDEF(_) \
- _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
-- _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _R(15) \
--#if LJ_SOFTFP
--#define FPRDEF(_)
--#else
-+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15)
- #define FPRDEF(_) \
- _(F0) _(F1) _(F2) _(F3) \
- _(F4) _(F5) _(F6) _(F7) \
- _(F8) _(F9) _(F10) _(F11) \
- _(F12) _(F13) _(F14) _(F15)
--#endif
--#define VRIDDEF(_)
-+// TODO: VREG?
-
- #define RIDENUM(name) RID_##name,
-
-@@ -28,84 +24,28 @@ enum {
- GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
- FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
- RID_MAX,
-- RID_TMP = RID_LR,
-
- /* Calling conventions. */
-- RID_RET = RID_R0,
-- RID_RETLO = RID_R0,
-- RID_RETHI = RID_R1,
--#if LJ_SOFTFP
-- RID_FPRET = RID_R0,
--#else
-- RID_FPRET = RID_D0,
--#endif
-+ RID_SP = RID_R15,
-+ RID_RET = RID_R2,
-+ RID_FPRET = RID_F0,
-
- /* These definitions must match with the *.dasc file(s): */
-- RID_BASE = RID_R9, /* Interpreter BASE. */
-- RID_LPC = RID_R6, /* Interpreter PC. */
-- RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
-- RID_LREG = RID_R8, /* Interpreter L. */
-+ RID_BASE = RID_R7, /* Interpreter BASE. */
-+ RID_LPC = RID_R9, /* Interpreter PC. */
-+ RID_DISPATCH = RID_R10, /* Interpreter DISPATCH table. */
-
- /* Register ranges [min, max) and number of registers. */
- RID_MIN_GPR = RID_R0,
-- RID_MAX_GPR = RID_PC+1,
-- RID_MIN_FPR = RID_MAX_GPR,
--#if LJ_SOFTFP
-- RID_MAX_FPR = RID_MIN_FPR,
--#else
-- RID_MAX_FPR = RID_D15+1,
--#endif
-+ RID_MIN_FPR = RID_F0,
-+ RID_MAX_GPR = RID_MIN_FPR,
-+ RID_MAX_FPR = RID_MAX,
- RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
-- RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
-+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
- };
-
--#define RID_NUM_KREF RID_NUM_GPR
--#define RID_MIN_KREF RID_R0
--
- /* -- Register sets ------------------------------------------------------- */
-
--/* Make use of all registers, except sp, lr and pc. */
--#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
--#define RSET_GPREVEN \
-- (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
-- RID2RSET(RID_R8)|RID2RSET(RID_R10))
--#define RSET_GPRODD \
-- (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
-- RID2RSET(RID_R9)|RID2RSET(RID_R11))
--#if LJ_SOFTFP
--#define RSET_FPR 0
--#else
--#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
--#endif
--#define RSET_ALL (RSET_GPR|RSET_FPR)
--#define RSET_INIT RSET_ALL
--
--/* ABI-specific register sets. lr is an implicit scratch register. */
--#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
--#ifdef __APPLE__
--#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
--#else
--#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
--#endif
--#if LJ_SOFTFP
--#define RSET_SCRATCH_FPR 0
--#else
--#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
--#endif
--#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
--#define REGARG_FIRSTGPR RID_R0
--#define REGARG_LASTGPR RID_R3
--#define REGARG_NUMGPR 4
--#if LJ_ABI_SOFTFP
--#define REGARG_FIRSTFPR 0
--#define REGARG_LASTFPR 0
--#define REGARG_NUMFPR 0
--#else
--#define REGARG_FIRSTFPR RID_D0
--#define REGARG_LASTFPR RID_D7
--#define REGARG_NUMFPR 8
--#endif
--
- /* -- Spill slots --------------------------------------------------------- */
-
- /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
-@@ -127,63 +67,14 @@ enum {
-
- /* This definition must match with the *.dasc file(s). */
- typedef struct {
--#if !LJ_SOFTFP
- lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
--#endif
- int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
- int32_t spill[256]; /* Spill slots. */
- } ExitState;
-
--/* PC after instruction that caused an exit. Used to find the trace number. */
--#define EXITSTATE_PCREG RID_PC
--/* Highest exit + 1 indicates stack check. */
--#define EXITSTATE_CHECKEXIT 1
--
- #define EXITSTUB_SPACING 4
- #define EXITSTUBS_PER_GROUP 32
-
- /* -- Instructions -------------------------------------------------------- */
-
--/* Instruction fields. */
--#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28))
--#define ARMF_N(r) ((r) << 16)
--#define ARMF_D(r) ((r) << 12)
--#define ARMF_S(r) ((r) << 8)
--#define ARMF_M(r) (r)
--#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
--#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
--
--typedef enum S390xIns {
-- S390I_SR = 0x1B00000000000000,
-- S390I_AR = 0x1A00000000000000,
-- S390I_NR = 0x1400000000000000,
-- S390I_XR = 0x1700000000000000,
-- S390I_MR = 0x1C00000000000000,
-- S390I_LR = 0x1800000000000000,
-- S390I_C = 0x5900000000000000,
-- S390I_LH = 0x4800000000000000,
-- S390I_BASR = 0x0D00000000000000,
-- S390I_MVCL = 0x0e00000000000000,
-- S390I_ST = 0x5000000000000000,
-- S390I_TM = 0x9100000000000000,
-- S390I_MP = 0xbd00009000000000,
-- S390I_CLR = 0x1500000000000000,
--} S390xIns;
--
--typedef enum S390xShift {
-- S390SH_SLL, S390SH_SRL, S390SH_SRA
--} S390xShift;
--
--/* S390x condition codes. */
--typedef enum S390xCC {
-- /* Z- Zero , LZ - Less thena Zero , GZ - Greater than Zero
-- O - Overflow , NZ - Not Zero , ZC - Zero with carry
-- NZC - No Zero with carry , ZNC - Zero with No Carry
-- EQ - Equal , NE - Not Equal , LO - Loq , HI - High
-- */
-- CC_Z , CC_LZ , CC_GZ , CC_O ,
-- CC_NZ , CC_ZC , CC_NZC ,
-- CC_ZNC , CC_EQ , CC_NE , CC_LO , CC_HI
--} S390xCC;
--
- #endif
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 8fa928b..6ca7e13 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -628,7 +628,6 @@ static void build_subroutines(BuildCtx *ctx)
- | lg PC, -24(RB) // Restore PC from [cont|PC].
- | lg RA, -32(RB)
- |.if FFI
-- | stg r0, 0(r0) // TODO: remove once tested.
- | clfi RA, 1
- | jle >1
- |.endif
-@@ -641,13 +640,12 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.if FFI
- |1:
-- | stg r0, 0(r0) // TODO: remove once tested.
- | je ->cont_ffi_callback // cont = 1: return from FFI callback.
- | // cont = 0: Tail call from C function.
- | sgr RB, BASE
- | srl RB, 3(r0)
- | ahi RB, -3
-- | llgf RD, RB
-+ | llgfr RD, RB
- | j ->vm_call_tail
- |.endif
- |
-@@ -880,8 +878,17 @@ static void build_subroutines(BuildCtx *ctx)
- | j <3
- |
- |->vmeta_equal_cd:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ |.if FFI
-+ | lay PC, -4(PC)
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | lgr CARG1, L:RB
-+ | llgf CARG2, -4(PC)
-+ | stg PC, SAVE_PC
-+ | brasl r14, extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
-+ | // 0/1 or TValue * (metamethod) returned in r2 (CRET1).
-+ | j <3
-+ |.endif
- |
- |->vmeta_istype:
- | lg L:RB, SAVE_L
-@@ -2165,9 +2172,58 @@ static void build_subroutines(BuildCtx *ctx)
- | stg r0, 0(r0)
- |
- |->vm_ffi_call: // Call C function via FFI.
-+ | // Caveat: needs special frame unwinding, see below.
-+ |.if FFI
-+ | .type CCSTATE, CCallState, r10
-+ | stmg r6, r15, 48(sp) // TODO: need to save r6, but might be better in separate
store?
-+ | lgr CCSTATE, CARG1
-+ |
-+ | // Readjust stack.
-+ | sgf sp, CCSTATE->spadj
-+ |
-+ | // Copy stack slots.
-+ | llgc r0, CCSTATE->nsp
-+ | cghi r0, 0
-+ | jle >3
-+ | lay r1, (offsetof(CCallState, stack))(CCSTATE) // Source.
-+ | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
-+ |1:
-+ | cghi r0, 256
-+ | jl >2
-+ | mvc 0(256, r11), 0(r1)
-+ | aghi r1, 256*8
-+ | aghi r11, 256*8
-+ | aghi r0, -256
-+ | j <1
-+ |2:
-+ | cghi r0, 0
-+ | je >3
-+ | // TODO: exrl mvc rather than loop.
-+ | mvc 0(8, r11), 0(r1)
-+ | aghi r1, 8
-+ | aghi r11, 8
-+ | aghi r0, -1
-+ | j <2
-+ |3:
-+ |
-+ | lmg CARG1, CARG5, CCSTATE->gpr[0]
-+ | // TODO: conditionally load FPRs?
-+ | ld FARG1, CCSTATE->fpr[0]
-+ | ld FARG2, CCSTATE->fpr[1]
-+ | ld FARG3, CCSTATE->fpr[2]
-+ | ld FARG4, CCSTATE->fpr[3]
-+ |5:
-+ | lg r1, CCSTATE->func // TODO: move further up?
-+ | basr r14, r1
-+ |
-+ | stg CRET1, CCSTATE->gpr[0]
-+ | stg f0, CCSTATE->fpr[0]
-+ |
-+ | agf sp, CCSTATE->spadj
-+ | lmg r6, r15, 48(sp)
-+ | br r14
-+ |.endif
- |// Note: vm_ffi_call must be the last function in this object file!
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
- |
- |//-----------------------------------------------------------------------
- }
-@@ -2767,8 +2823,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- break;
- case BC_KCDATA:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ |.if FFI
-+ | ins_AND // RA = dst, RD = cdata const (~)
-+ | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3(r0)
-+ | lg RD, 0(RD, KBASE)
-+ | settp RD, LJ_TCDATA
-+ | stg RD, 0(RA, BASE)
-+ | ins_next
-+ |.endif
- break;
- case BC_KSHORT:
- | ins_AD // RA = dst, RD = signed int16 literal
---
-2.20.1
-
-
-From 1236fb05380374001d4d8771020d19f0b70aa7f9 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 9 Jan 2017 11:20:13 -0500
-Subject: [PATCH 200/247] Add stub dis_s390x.lua file to allow make install to
- work.
-
----
- src/jit/dis_s390x.lua | 1 +
- 1 file changed, 1 insertion(+)
- create mode 100644 src/jit/dis_s390x.lua
-
-diff --git a/src/jit/dis_s390x.lua b/src/jit/dis_s390x.lua
-new file mode 100644
-index 0000000..3c63033
---- /dev/null
-+++ b/src/jit/dis_s390x.lua
-@@ -0,0 +1 @@
-+-- Not yet implemented.
---
-2.20.1
-
-
-From 83e747ed75eb9e3ee280673441d08def190cd4dd Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 9 Jan 2017 14:16:44 -0500
-Subject: [PATCH 201/247] Improve ins_NEXT performance.
-
-Prioritise critical path and reduce number of instructions. About
-10% improvement on md5 benchmark.
----
- src/vm_s390x.dasc | 22 ++++++++--------------
- 1 file changed, 8 insertions(+), 14 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 6ca7e13..723efe2 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -148,29 +148,23 @@
- |.macro ins_A; .endmacro
- |.macro ins_AD; .endmacro
- |.macro ins_AJ; .endmacro
--|.macro ins_ABC; .endmacro
--|.macro ins_AB_; .endmacro
--|.macro ins_A_C; .endmacro
-+|.macro ins_ABC; srlg RB, RD, 8(r0); llgcr RC, RD; .endmacro
-+|.macro ins_AB_; srlg RB, RD, 8(r0); .endmacro
-+|.macro ins_A_C; llgcr RC, RD; .endmacro
- |.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
- |
- |// Instruction decode+dispatch.
- | // TODO: tune this, right now we always decode RA-D even if they aren't used.
- |.macro ins_NEXT
--| llgf RD, 0(PC)
- | // 32 63
- | // [ B | C | A | OP ]
- | // [ D | A | OP ]
--| llghr RA, RD
--| srlg RA, RA, 8(r0)
--| llgcr OP, RD
--| srlg RD, RD, 16(r0)
--| lgr RB, RD
--| srlg RB, RB, 8(r0)
--| llgcr RC, RD
--| la PC, 4(PC)
--| llgfr TMPR1, OP
--| sllg TMPR1, TMPR1, 3(r0) // TMPR1=OP*8
-+| llgc OP, 3(PC)
-+| llgh RD, 0(PC)
-+| llgc RA, 2(PC)
-+| sllg TMPR1, OP, 3(r0)
- | lg TMPR1, 0(TMPR1, DISPATCH)
-+| la PC, 4(PC)
- | br TMPR1
- |.endmacro
- |
---
-2.20.1
-
-
-From 1738161b8053da979c28951152fd793daaf42875 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 9 Jan 2017 15:57:37 -0500
-Subject: [PATCH 202/247] Delete some unused function stubs.
-
----
- src/vm_s390x.dasc | 20 +-------------------
- 1 file changed, 1 insertion(+), 19 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 723efe2..86909a9 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -426,10 +426,6 @@ static void build_subroutines(BuildCtx *ctx)
- | stg TMPR1, GL:RB->vmstate
- | j ->vm_leave_unw
- |
-- |->vm_unwind_rethrow:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-- |
- |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
- | // (void *cframe)
- | nill CARG1, CFRAME_RAWMASK // Assumes high 48-bits set in CFRAME_RAWMASK.
-@@ -2124,21 +2120,7 @@ static void build_subroutines(BuildCtx *ctx)
- | vm_round vm_trunc, 5 // Round towards 0.
- |
- |// FP modulo x%y. Called by BC_MOD* and vm_arith.
-- |->vm_mod:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-- |
-- |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
-- |->vm_powi_sse:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-- |
-- |//-----------------------------------------------------------------------
-- |//-- Miscellaneous functions --------------------------------------------
-- |//-----------------------------------------------------------------------
-- |
-- |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
-- |->vm_cpuid:
-+ |->vm_mod: // NYI.
- | stg r0, 0(r0)
- | stg r0, 0(r0)
- |
---
-2.20.1
-
-
-From fa259cd983b4637118a9c6d227ddefe7ba8c7c32 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 10:50:41 -0500
-Subject: [PATCH 203/247] Allow displacements to be used directly without
- register values.
-
-Allows sllg r1, r1, 3(r0,r0) to be written as sllg r1, r1, 3.
----
- dynasm/dasm_s390x.lua | 14 +-
- src/vm_s390x.dasc | 440 +++++++++++++++++++++---------------------
- 2 files changed, 226 insertions(+), 228 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 7d95f78..b175593 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -324,11 +324,16 @@ local function split_memop(arg)
- if d then
- return d, 0, parse_reg(b)
- end
-- -- Assuming the two registers are passed as "(r1,r2)", and displacement(d)
is not specified
-+ -- Assume the two registers are passed as "(r1,r2)", and displacement(d) is
not specified. TODO: not sure if we want to do this, GAS doesn't.
- local x, b =
match(arg,"%(%s*("..reg..")%s*,%s*("..reg..")%s*%)$")
- if b then
- return 0, parse_reg(x), parse_reg(b)
- end
-+ -- Accept a lone integer as a displacement. TODO: allow expressions/variables here?
Interacts badly with the other rules currently.
-+ local d = match(arg,"^(-?[%d]+)$")
-+ if d then
-+ return d, 0, 0
-+ end
- local reg, tailr = match(arg, "^([%w_:]+)%s*(.*)$")
- if reg then
- local r, tp = parse_reg(reg)
-@@ -336,13 +341,6 @@ local function split_memop(arg)
- return format(tp.ctypefmt, tailr), 0, r
- end
- end
-- -- Assuming that only displacement is passed, as either digit or label "45 or
label1"
-- local d = match(arg,"[%w_]+")
-- if d then
-- return d, 0, 0
-- end
-- -- TODO: handle values without registers?
-- -- TODO: handle registers without a displacement? -- done, above ,needs to be tested
- werror("bad memory operand: "..arg)
- return nil
- end
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 86909a9..a441498 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -148,8 +148,8 @@
- |.macro ins_A; .endmacro
- |.macro ins_AD; .endmacro
- |.macro ins_AJ; .endmacro
--|.macro ins_ABC; srlg RB, RD, 8(r0); llgcr RC, RD; .endmacro
--|.macro ins_AB_; srlg RB, RD, 8(r0); .endmacro
-+|.macro ins_ABC; srlg RB, RD, 8; llgcr RC, RD; .endmacro
-+|.macro ins_AB_; srlg RB, RD, 8; .endmacro
- |.macro ins_A_C; llgcr RC, RD; .endmacro
- |.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
- |
-@@ -162,7 +162,7 @@
- | llgc OP, 3(PC)
- | llgh RD, 0(PC)
- | llgc RA, 2(PC)
--| sllg TMPR1, OP, 3(r0)
-+| sllg TMPR1, OP, 3
- | lg TMPR1, 0(TMPR1, DISPATCH)
- | la PC, 4(PC)
- | br TMPR1
-@@ -190,7 +190,7 @@
- | lg PC, LFUNC:RB->pc
- | llgf RA, 0(PC) // TODO: combine loads?
- | llgcr OP, RA
--| sllg TMPR1, OP, 3(r0)
-+| sllg TMPR1, OP, 3
- | la PC, 4(PC)
- | lg TMPR1, 0(TMPR1, DISPATCH)
- | br TMPR1
-@@ -211,7 +211,7 @@
- |//-----------------------------------------------------------------------
- |
- |// Macros to clear or set tags.
--|.macro cleartp, reg; sllg reg, reg, 17(r0); srlg reg, reg, 17(r0); .endmacro // TODO:
use nihf instead? would introduce dependence on z9-109.
-+|.macro cleartp, reg; sllg reg, reg, 17; srlg reg, reg, 17; .endmacro // TODO: use nihf
instead? would introduce dependence on z9-109.
- |.macro settp, reg, tp
- | oihh reg, ((tp>>1) &0xffff)
- | oihl reg, ((tp<<15)&0x8000)
-@@ -230,18 +230,18 @@
- |
- |// Macros to test operand types.
- |.macro checktp_nc, reg, tp, target
--| srag ITYPE, reg, 47(r0)
-+| srag ITYPE, reg, 47
- | clfi ITYPE, tp
- | jne target
- |.endmacro
- |.macro checktp, reg, tp, target
--| srag ITYPE, reg, 47(r0)
-+| srag ITYPE, reg, 47
- | cleartp reg
- | clfi ITYPE, tp
- | jne target
- |.endmacro
- |.macro checktptp, src, tp, target
--| srag ITYPE, src, 47(r0)
-+| srag ITYPE, src, 47
- | clfi ITYPE, tp
- | jne target
- |.endmacro
-@@ -250,7 +250,7 @@
- |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
- |
- |.macro checknumx, reg, target, jump
--| srag ITYPE, reg, 47(r0)
-+| srag ITYPE, reg, 47
- | clfi ITYPE, LJ_TISNUM
- | jump target
- |.endmacro
-@@ -273,7 +273,7 @@
- | // TODO: optimize this, was just lea PC, [PC+reg*4-BCBIAS_J*4].
- | // Can't clobber TMPR1 or condition code.
- | lgr TMPR2, TMPR1 // Workaround because TMPR2 == r0 and can't be used in lay.
--| sllg TMPR1, reg, 2(r0)
-+| sllg TMPR1, reg, 2
- | lay PC, (-BCBIAS_J*4)(TMPR1, PC)
- | lgr TMPR1, TMPR2
- |.endmacro
-@@ -394,7 +394,7 @@ static void build_subroutines(BuildCtx *ctx)
- | cghi RA, 0
- | je <5 // But check for LUA_MULTRET+1.
- | sgr RA, RD // Negative result!
-- | sllg TMPR1, RA, 3(r0)
-+ | sllg TMPR1, RA, 3
- | lay BASE, 0(TMPR1, BASE) // Correct top.
- | j <5
- |
-@@ -459,7 +459,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vm_growstack_f: // Grow stack for fixarg Lua function.
- | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
-- | sllg RD, NARGS:RD, 3(r0)
-+ | sllg RD, NARGS:RD, 3
- | lay RD, -8(RD, BASE)
- |1:
- | llgc RA, (PC2PROTO(framesize)-4)(PC)
-@@ -477,7 +477,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg LFUNC:RB, -16(BASE)
- | cleartp LFUNC:RB
- | sgr RD, BASE
-- | srlg RD, RD, 3(r0)
-+ | srlg RD, RD, 3
- | aghi NARGS:RD, 1
- | // BASE = new base, RB = LFUNC, RD = nargs+1
- | ins_callt // Just retry the call.
-@@ -513,7 +513,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg BASE, L:RB->base
- | lg RD, L:RB->top
- | sgr RD, RA
-- | srlg RD, RD, 3(r0)
-+ | srlg RD, RD, 3
- | aghi RD, 1 // RD = nresults+1
- | sgr RA, BASE // RA = resultofs
- | lg PC, -8(BASE)
-@@ -558,7 +558,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- | lg RD, L:LREG->top
- | sgr RD, RA
-- | srlg NARGS:RD, NARGS:RD, 3(r0) // TODO: support '3' on its own in dynasm.
-+ | srlg NARGS:RD, NARGS:RD, 3 // TODO: support '3' on its own in dynasm.
- | aghi NARGS:RD, 1 // RD = nargs+1
- |
- |->vm_call_dispatch:
-@@ -611,7 +611,7 @@ static void build_subroutines(BuildCtx *ctx)
- | nill PC, -8
- | lgr RB, BASE
- | sgr BASE, PC // Restore caller BASE.
-- | sllg TMPR1, RD, 3(r0)
-+ | sllg TMPR1, RD, 3
- | lghi TMPR2, LJ_TNIL
- | stg TMPR2, -8(RA, TMPR1) // Ensure one valid arg.
- | lgr RC, RA // ... in [RC]
-@@ -633,7 +633,7 @@ static void build_subroutines(BuildCtx *ctx)
- | je ->cont_ffi_callback // cont = 1: return from FFI callback.
- | // cont = 0: Tail call from C function.
- | sgr RB, BASE
-- | srl RB, 3(r0)
-+ | srl RB, 3
- | ahi RB, -3
- | llgfr RD, RB
- | j ->vm_call_tail
-@@ -641,13 +641,13 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->cont_cat: // BASE = base, RC = result, RB = mbase
- | llgc RA, PC_RB
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | aghi RB, -32
- | la RA, 0(RA, BASE)
- | sgr RA, RB
- | je ->cont_ra
- | lcgr RA, RA
-- | srlg RA, RA, 3(r0)
-+ | srlg RA, RA, 3
- | lg L:CARG1, SAVE_L
- | stg BASE, L:CARG1->base
- | lgfr CARG3, RA
-@@ -679,11 +679,11 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vmeta_tgetv:
- | llgc RC, PC_RC // Reload TValue *k from RC.
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | la RC, 0(RC, BASE)
- |1:
- | llgc RB, PC_RB // Reload TValue *t from RB.
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | la RB, 0(RB, BASE)
- |2:
- | lg L:CARG1, SAVE_L
-@@ -699,7 +699,7 @@ static void build_subroutines(BuildCtx *ctx)
- | je >3
- |->cont_ra: // BASE = base, RC = result
- | llgc RA, PC_RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg RB, 0(RC)
- | stg RB, 0(RA, BASE)
- | ins_next
-@@ -751,11 +751,11 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vmeta_tsetv:
- | llgc RC, PC_RC // Reload TValue *k from RC.
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | la RC, 0(RC, BASE)
- |1:
- | llgc RB, PC_RB // Reload TValue *t from RB.
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | la RB, 0(RB, BASE)
- |2:
- | lg L:CARG1, SAVE_L
-@@ -771,7 +771,7 @@ static void build_subroutines(BuildCtx *ctx)
- | je >3
- | // NOBARRIER: lj_meta_tset ensures the table is not black.
- | llgc RA, PC_RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg RB, 0(RA, BASE)
- | stg RB, 0(RC)
- |->cont_nop: // BASE = base, (RC = result)
-@@ -783,7 +783,7 @@ static void build_subroutines(BuildCtx *ctx)
- | stg PC, -24(RA) // [cont|PC]
- | llgc RC, PC_RA
- | // Copy value to third argument.
-- | sllg RB, RC, 3(r0)
-+ | sllg RB, RC, 3
- | lg RB, 0(RB, BASE)
- | stg RB, 16(RA)
- | la PC, FRAME_CONT(RA)
-@@ -811,9 +811,9 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vmeta_comp:
- | llgh RD, PC_RD
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | llgc RA, PC_RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg L:RB, SAVE_L
- | stg BASE, L:RB->base
- | la CARG2, 0(RA, BASE)
-@@ -840,7 +840,7 @@ static void build_subroutines(BuildCtx *ctx)
- |->cont_condt: // BASE = base, RC = result
- | la PC, 4(PC)
- | lg ITYPE, 0(RC)
-- | srag ITYPE, ITYPE, 47(r0)
-+ | srag ITYPE, ITYPE, 47
- | lghi TMPR2, LJ_TISTRUECOND
- | clr ITYPE, TMPR2 // Branch if result is true.
- | jl <5
-@@ -848,7 +848,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->cont_condf: // BASE = base, RC = result
- | lg ITYPE, 0(RC)
-- | srag ITYPE, ITYPE, 47(r0)
-+ | srag ITYPE, ITYPE, 47
- | lghi TMPR2, LJ_TISTRUECOND
- | clr ITYPE, TMPR2 // Branch if result is false.
- | j <4
-@@ -897,8 +897,8 @@ static void build_subroutines(BuildCtx *ctx)
- | llgc RB, PC_RB
- | llgc RC, PC_RC
- |->vmeta_arith_vn:
-- | sllg RB, RB, 3(r0)
-- | sllg RC, RC, 3(r0)
-+ | sllg RB, RB, 3
-+ | sllg RC, RC, 3
- | lay RB, 0(RB, BASE)
- | lay RC, 0(RC, KBASE)
- | j >1
-@@ -907,8 +907,8 @@ static void build_subroutines(BuildCtx *ctx)
- | llgc RC, PC_RC
- | llgc RB, PC_RB
- |->vmeta_arith_nv:
-- | sllg RC, RC, 3(r0)
-- | sllg RB, RB, 3(r0)
-+ | sllg RC, RC, 3
-+ | sllg RB, RB, 3
- | lay TMPR1, 0(RC, KBASE)
- | lay RC, 0(RB, BASE)
- | lgr RB, TMPR1
-@@ -916,7 +916,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vmeta_unm:
- | llgh RD, PC_RD
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | la RC, 0(RD, BASE)
- | lgr RB, RC
- | j >1
-@@ -925,13 +925,13 @@ static void build_subroutines(BuildCtx *ctx)
- | llgc RB, PC_RB
- | llgc RC, PC_RC
- |->vmeta_arith_vv:
-- | sllg RC, RC, 3(r0)
-- | sllg RB, RB, 3(r0)
-+ | sllg RC, RC, 3
-+ | sllg RB, RB, 3
- | lay RB, 0(RB, BASE)
- | lay RC, 0(RC, BASE)
- |1:
- | llgc RA, PC_RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lay RA, 0(RA, BASE)
- | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
- | lgr CARG2, RA
-@@ -960,7 +960,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vmeta_len:
- | llgh RD, PC_RD
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | lg L:RB, SAVE_L
- | stg BASE, L:RB->base
- | la CARG2, 0(RD, BASE)
-@@ -974,7 +974,7 @@ static void build_subroutines(BuildCtx *ctx)
- | cghi RC, 0
- | jne ->vmeta_binop // Binop call for compatibility.
- | llgh RD, PC_RD
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | lg TAB:CARG1, 0(RD, BASE)
- | cleartp TAB:CARG1
- | j ->BC_LEN_Z
-@@ -993,7 +993,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg L:CARG1, SAVE_L
- | stg BASE, L:CARG1->base
- | lay CARG2, -16(RA)
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | lay CARG3, -8(RA, RD)
- | stg PC, SAVE_PC
- | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
-@@ -1023,7 +1023,7 @@ static void build_subroutines(BuildCtx *ctx)
- | llgc OP, PC_OP
- | llgc RA, PC_RA
- | llgh RD, PC_RD
-- | sllg TMPR1, OP, 3(r0)
-+ | sllg TMPR1, OP, 3
- | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH) // Retry FORI or JFORI.
- | br TMPR1
- |
-@@ -1079,7 +1079,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc_1 assert
- | lg RB, 0(BASE)
-- | srag ITYPE, RB, 47(r0)
-+ | srag ITYPE, RB, 47
- | clfi ITYPE, LJ_TISTRUECOND; jhe ->fff_fallback
- | lg PC, -8(BASE)
- | st RD, SAVE_MULTRES
-@@ -1101,7 +1101,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc_1 type
- | lg RC, 0(BASE)
-- | srag RC, RC, 47(r0)
-+ | srag RC, RC, 47
- | lghi RB, LJ_TISNUM
- | clgr RC, RB
- | jnl >1
-@@ -1112,7 +1112,7 @@ static void build_subroutines(BuildCtx *ctx)
- |2:
- | lg CFUNC:RB, -16(BASE)
- | cleartp CFUNC:RB
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | lg STR:RC, ((char *)(&((GCfuncC *)0)->upvalue))(RC, CFUNC:RB)
- | lg PC, -8(BASE)
- | settp STR:RC, LJ_TSTR
-@@ -1161,7 +1161,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lhi TMPR2, -1
- | xr ITYPE, TMPR2 // not ITYPE
- | llgfr ITYPE, ITYPE
-- | sllg ITYPE, ITYPE, 3(r0)
-+ | sllg ITYPE, ITYPE, 3
- | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH)
- | j <2
- |
-@@ -1305,7 +1305,7 @@ static void build_subroutines(BuildCtx *ctx)
- | cl RA, TAB:RB->asize; jhe >2 // Not in array part?
- | lg RD, TAB:RB->array
- | lgfr TMPR1, RA
-- | sllg TMPR1, TMPR1, 3(r0)
-+ | sllg TMPR1, TMPR1, 3
- | la RD, 0(TMPR1, RD)
- |1:
- | lg TMPR2, 0(RD)
-@@ -1361,7 +1361,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // Note: this does a (harmless) copy of the function to the PC slot, too.
- | lgr KBASE, RD
- |2:
-- | sllg TMPR1, KBASE, 3(r0)
-+ | sllg TMPR1, KBASE, 3
- | lg RB, -24(TMPR1, RA)
- | stg RB, -16(TMPR1, RA)
- | aghi KBASE, -1
-@@ -1412,7 +1412,7 @@ static void build_subroutines(BuildCtx *ctx)
- | stg PC, 0(RA)
- | la RA, 8(RA)
- |1:
-- | sllg TMPR1, NARGS:RD, 3(r0)
-+ | sllg TMPR1, NARGS:RD, 3
- |.if resume
- | lay PC, -16(TMPR1, RA) // Check stack space (-1-thread).
- |.else
-@@ -1467,7 +1467,7 @@ static void build_subroutines(BuildCtx *ctx)
- | je >6 // No results?
- | la RD, 0(PC, BASE)
- | llgfr PC, PC
-- | srlg PC, PC, 3(r0)
-+ | srlg PC, PC, 3
- | clg RD, L:RB->maxstack
- | jh >9 // Need to grow stack?
- |
-@@ -1539,7 +1539,7 @@ static void build_subroutines(BuildCtx *ctx)
- | tmll TMPR2, CFRAME_RESUME
- | je ->fff_fallback
- | stg BASE, L:RB->base
-- | sllg RD, NARGS:RD, 3(r0)
-+ | sllg RD, NARGS:RD, 3
- | lay RD, -8(RD, BASE)
- | stg RD, L:RB->top
- | lghi RD, 0
-@@ -1591,12 +1591,12 @@ static void build_subroutines(BuildCtx *ctx)
- | // Adjust BASE. KBASE is assumed to be set for the calling frame.
- | llgc RA, PC_RA
- | lcgr RA, RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
- | ins_next
- |
- |6: // Fill up results with nil.
-- | sllg TMPR1, RD, 3(r0)
-+ | sllg TMPR1, RD, 3
- | lghi TMPR2, LJ_TNIL
- | stg TMPR2, -24(TMPR1, BASE)
- | la RD, 1(RD)
-@@ -1701,7 +1701,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro math_minmax, name, cjmp
- | .ffunc name
- | lghi RA, 2*8
-- | sllg TMPR1, RD, 3(r0)
-+ | sllg TMPR1, RD, 3
- | lg RB, 0(BASE)
- | ld f0, 0(BASE)
- | checkint RB, >4
-@@ -1792,7 +1792,7 @@ static void build_subroutines(BuildCtx *ctx)
- | checkstr STR:RB, ->fff_fallback
- | lg ITYPE, 8(BASE)
- | lgfr RA, ITYPE
-- | srag ITYPE, ITYPE, 47(r0)
-+ | srag ITYPE, ITYPE, 47
- | cghi ITYPE, LJ_TISNUM
- | jne ->fff_fallback
- | llgf RC, STR:RB->len
-@@ -1890,7 +1890,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name, 2
- | lgr TMPR1, NARGS:RD // Save for fallback.
-- | sllg RD, NARGS:RD, 3(r0)
-+ | sllg RD, NARGS:RD, 3
- | lay RD, -16(RD, BASE)
- |1:
- | clgr RD, BASE
-@@ -1974,7 +1974,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg PC, -8(BASE) // Fallback may overwrite PC.
- | stg PC, SAVE_PC // Redundant (but a defined value).
- | stg BASE, L:RB->base
-- | sllg RD, NARGS:RD, 3(r0)
-+ | sllg RD, NARGS:RD, 3
- | lay RD, -8(RD, BASE)
- | la RA, (8*LUA_MINSTACK)(RD) // Ensure enough space for handler.
- | stg RD, L:RB->top
-@@ -1992,7 +1992,7 @@ static void build_subroutines(BuildCtx *ctx)
- |1:
- | lg RA, L:RB->top
- | sgr RA, BASE
-- | srlg RA, RA, 3(r0)
-+ | srlg RA, RA, 3
- | cghi RD, 0
- | la NARGS:RD, 1(RA)
- | lg LFUNC:RB, -16(BASE)
-@@ -2007,7 +2007,7 @@ static void build_subroutines(BuildCtx *ctx)
- | jne >3
- | llgc RB, PC_RA
- | lcgr RB, RB
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | lay BASE, -16(RB, BASE) // base = base - (RB+2)*8
- | j ->vm_call_dispatch // Resolve again for tailcall.
- |3:
-@@ -2030,7 +2030,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg L:RB, SAVE_L
- | stg PC, SAVE_PC // Redundant (but a defined value).
- | stg BASE, L:RB->base
-- | sllg RD, NARGS:RD, 3(r0)
-+ | sllg RD, NARGS:RD, 3
- | lay RD, -8(RD, BASE)
- | lgr CARG1, L:RB
- | stg RD, L:RB->top
-@@ -2038,7 +2038,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg BASE, L:RB->base
- | lg RD, L:RB->top
- | sgr RD, BASE
-- | srlg RD, RD, 3(r0)
-+ | srlg RD, RD, 3
- | aghi NARGS:RD, 1
- | lg r14, TMP_STACK // Restore return address.
- | br r14
-@@ -2048,40 +2048,40 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->vm_record: // Dispatch target for recording phase.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->vm_rethook: // Dispatch target for return hooks.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->vm_inshook: // Dispatch target for instr/line hooks.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->cont_hook: // Continue from hook yield.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->vm_hotloop: // Hot loop counter underflow.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->vm_callhook: // Dispatch target for call hooks.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->vm_hotcall: // Hot call counter underflow.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->cont_stitch: // Trace stitching.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->vm_profhook: // Dispatch target for profiler hook.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |//-----------------------------------------------------------------------
- |//-- Trace exit handler -------------------------------------------------
-@@ -2090,11 +2090,11 @@ static void build_subroutines(BuildCtx *ctx)
- |// Called from an exit stub with the exit number on the stack.
- |// The 16 bit exit number is stored with two (sign-extended) push imm8.
- |->vm_exit_handler:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |->vm_exit_interp:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |//-----------------------------------------------------------------------
- |//-- Math helper functions ----------------------------------------------
-@@ -2112,7 +2112,7 @@ static void build_subroutines(BuildCtx *ctx)
- | ldr f0, f2
- | br r14
- |1: // partial remainder (sanity check)
-- | stg r0, 0(r0)
-+ | stg r0, 0
- |.endmacro
- |
- | vm_round vm_floor, 7 // Round towards -inf.
-@@ -2121,16 +2121,16 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// FP modulo x%y. Called by BC_MOD* and vm_arith.
- |->vm_mod: // NYI.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |//-----------------------------------------------------------------------
- |//-- Assertions ---------------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->assert_bad_for_arg_type:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- #ifdef LUA_USE_ASSERT
- #endif
- |
-@@ -2140,12 +2140,12 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |// Handler for callback functions. Callback slot number in ah/al.
- |->vm_ffi_callback:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->cont_ffi_callback: // Return from FFI callback.
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- |
- |->vm_ffi_call: // Call C function via FFI.
- | // Caveat: needs special frame unwinding, see below.
-@@ -2239,14 +2239,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
- | // RA = src1, RD = src2, JMP with RD = target
- | ins_AD
-- | sllg RA, RA, 3(r0)
-- | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg RD, RD, 3
- | ld f0, 0(RA, BASE)
- | ld f1, 0(RD, BASE)
- | lg RA, 0(RA, BASE)
- | lg RD, 0(RD, BASE)
-- | srag ITYPE, RA, 47(r0)
-- | srag RB, RD, 47(r0)
-+ | srag ITYPE, RA, 47
-+ | srag RB, RD, 47
- |
- | clfi ITYPE, LJ_TISNUM; jne >7
- | clfi RB, LJ_TISNUM; jne >8
-@@ -2283,15 +2283,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQV: case BC_ISNEV:
- vk = op == BC_ISEQV;
- | ins_AD // RA = src1, RD = src2, JMP with RD = target
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | ld f1, 0(RD, BASE)
- | lg RD, 0(RD, BASE)
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | ld f0, 0(RA, BASE)
- | lg RA, 0(RA, BASE)
- | la PC, 4(PC)
-- | srag RB, RD, 47(r0)
-- | srag ITYPE, RA, 47(r0)
-+ | srag RB, RD, 47
-+ | srag ITYPE, RA, 47
- | clfi RB, LJ_TISNUM; jne >7
- | clfi ITYPE, LJ_TISNUM; jne >8
- | cr RD, RA
-@@ -2397,8 +2397,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQS: case BC_ISNES:
- vk = op == BC_ISEQS;
- | ins_AND // RA = src, RD = str const, JMP with RD = target
-- | sllg RA, RA, 3(r0)
-- | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg RD, RD, 3
- | lg RB, 0(RA, BASE)
- | la PC, 4(PC)
- | checkstr RB, >3
-@@ -2413,8 +2413,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQN: case BC_ISNEN:
- vk = op == BC_ISEQN;
- | ins_AD // RA = src, RD = num const, JMP with RD = target
-- | sllg RA, RA, 3(r0)
-- | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg RD, RD, 3
- | ld f0, 0(RA, BASE)
- | lg RB, 0(RA, BASE)
- | ld f1, 0(RD, KBASE)
-@@ -2452,9 +2452,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQP: case BC_ISNEP:
- vk = op == BC_ISEQP;
- | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg RB, 0(RA, BASE)
-- | srag RB, RB, 47(r0)
-+ | srag RB, RB, 47
- | la PC, 4(PC)
- | cr RB, RD
- if (!LJ_HASFFI) goto iseqne_test;
-@@ -2481,14 +2481,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
- | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
-- | sllg RD, RD, 3(r0)
-- | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3
-+ | sllg RA, RA, 3
- | lg ITYPE, 0(RD, BASE)
- | la PC, 4(PC)
- if (op == BC_ISTC || op == BC_ISFC) {
- | lgr RB, ITYPE
- }
-- | srag ITYPE, ITYPE, 47(r0)
-+ | srag ITYPE, ITYPE, 47
- | clfi ITYPE, LJ_TISTRUECOND
- if (op == BC_IST || op == BC_ISTC) {
- | jhe >1
-@@ -2507,34 +2507,34 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISTYPE:
- | ins_AD // RA = src, RD = -type
- | lghr RD, RD // TODO: always sign extend RD?
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg RB, 0(RA, BASE)
-- | srag RB, RB, 47(r0)
-+ | srag RB, RB, 47
- | agr RB, RD
- | jne ->vmeta_istype
- | ins_next
- break;
- case BC_ISNUM:
- | ins_AD // RA = src, RD = -(TISNUM-1)
-- | sllg TMPR1, RA, 3(r0)
-+ | sllg TMPR1, RA, 3
- | lg TMPR1, 0(TMPR1, BASE)
- | checknumtp TMPR1, ->vmeta_istype
- | ins_next
- break;
- case BC_MOV:
- | ins_AD // RA = dst, RD = src
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | lg RB, 0(RD, BASE)
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | stg RB, 0(RA, BASE)
- | ins_next_
- break;
- case BC_NOT:
- | ins_AD // RA = dst, RD = src
-- | sllg RD, RD, 3(r0)
-- | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3
-+ | sllg RA, RA, 3
- | lg RB, 0(RD, BASE)
-- | srag RB, RB, 47(r0)
-+ | srag RB, RB, 47
- | load_false RC
- | cghi RB, LJ_TTRUE
- | je >1 // TODO: Maybe do something fancy to avoid the jump?
-@@ -2545,8 +2545,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_UNM:
- | ins_AD // RA = dst, RD = src
-- | sllg RA, RA, 3(r0)
-- | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg RD, RD, 3
- | lg RB, 0(RD, BASE)
- | checkint RB, >3
- | lcr RB, RB; jo >2
-@@ -2565,12 +2565,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_LEN:
- | ins_AD // RA = dst, RD = src
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | lg RD, 0(RD, BASE)
- | checkstr RD, >2
- | llgf RD, STR:RD->len
- |1:
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | setint RD
- | stg RD, 0(RA, BASE)
- | ins_next
-@@ -2604,9 +2604,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- |.macro ins_arithpre
- | ins_ABC
-- | sllg RB, RB, 3(r0)
-- | sllg RC, RC, 3(r0)
-- | sllg RA, RA, 3(r0)
-+ | sllg RB, RB, 3
-+ | sllg RC, RC, 3
-+ | sllg RA, RA, 3
- |.endmacro
- |
- |.macro ins_arithfp, ins
-@@ -2745,8 +2745,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_POW:
- | ins_ABC
-- | sllg RB, RB, 3(r0)
-- | sllg RC, RC, 3(r0)
-+ | sllg RB, RB, 3
-+ | sllg RC, RC, 3
- | ld FARG1, 0(RB, BASE)
- | ld FARG2, 0(RC, BASE)
- | lg TMPR2, 0(RB, BASE)
-@@ -2757,7 +2757,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | brasl r14, extern pow // double pow(double x, double y), result in f0.
- | llgc RA, PC_RA
- | lgr BASE, RB
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | std f0, 0(RA, BASE)
- | ins_next
- break;
-@@ -2768,7 +2768,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg BASE, L:CARG1->base
- | lgr CARG3, RC
- | sgr CARG3, RB
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | la CARG2, 0(RC, BASE)
- |->BC_CAT_Z:
- | lgr L:RB, L:CARG1
-@@ -2779,9 +2779,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ltgr RC, CRET1
- | jne ->vmeta_binop
- | llgc RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | llgc RA, PC_RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg RC, 0(RB, BASE)
- | stg RC, 0(RA, BASE)
- | ins_next
-@@ -2791,18 +2791,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_KSTR:
- | ins_AND // RA = dst, RD = str const (~)
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | lg RD, 0(RD, KBASE)
- | settp RD, LJ_TSTR
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | stg RD, 0(RA, BASE)
- | ins_next
- break;
- case BC_KCDATA:
- |.if FFI
- | ins_AND // RA = dst, RD = cdata const (~)
-- | sllg RD, RD, 3(r0)
-- | sllg RA, RA, 3(r0)
-+ | sllg RD, RD, 3
-+ | sllg RA, RA, 3
- | lg RD, 0(RD, KBASE)
- | settp RD, LJ_TCDATA
- | stg RD, 0(RA, BASE)
-@@ -2814,22 +2814,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Assumes DUALNUM.
- | lhr RD, RD // Sign-extend literal to 32-bits.
- | setint RD
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | stg RD, 0(RA, BASE)
- | ins_next
- break;
- case BC_KNUM:
- | ins_AD // RA = dst, RD = num const
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | ld f0, 0(RD, KBASE)
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | std f0, 0(RA, BASE)
- | ins_next
- break;
- case BC_KPRI:
- | ins_AD // RA = dst, RD = primitive type (~)
-- | sllg RA, RA, 3(r0)
-- | sllg RD, RD, 47(r0)
-+ | sllg RA, RA, 3
-+ | sllg RD, RD, 47
- | lghi TMPR2, -1
- | xgr RD, TMPR2 // not
- | stg RD, 0(RA, BASE)
-@@ -2837,8 +2837,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_KNIL:
- | ins_AD // RA = dst_start, RD = dst_end
-- | sllg RA, RA, 3(r0)
-- | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg RD, RD, 3
- | la RA, 8(RA, BASE)
- | la RD, 0(RD, BASE)
- | lghi RB, LJ_TNIL
-@@ -2855,8 +2855,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_UGET:
- | ins_AD // RA = dst, RD = upvalue #
-- | sllg RA, RA, 3(r0)
-- | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg RD, RD, 3
- | lg LFUNC:RB, -16(BASE)
- | cleartp LFUNC:RB
- | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RD, LFUNC:RB)
-@@ -2871,13 +2871,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_AD // RA = upvalue #, RD = src
- | lg LFUNC:RB, -16(BASE)
- | cleartp LFUNC:RB
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
- | // TODO: (instead of next 2 instructions) tm UPVAL:RB->closed, 0xff
- | llgc TMPR2, UPVAL:RB->closed
- | tmll TMPR2, 0xff
- | lg RB, UPVAL:RB->v
-- | sllg TMPR1, RD, 3(r0)
-+ | sllg TMPR1, RD, 3
- | lg RA, 0(TMPR1, BASE)
- | stg RA, 0(RB)
- | je >1
-@@ -2890,7 +2890,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- |
- |2: // Upvalue is black. Check if new value is collectable and white.
-- | srag RD, RA, 47(r0)
-+ | srag RD, RA, 47
- | ahi RD, -LJ_TISGCV
- | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
- | jle <1
-@@ -2911,8 +2911,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_USETS:
- | ins_AND // RA = upvalue #, RD = str const (~)
- | lg LFUNC:RB, -16(BASE)
-- | sllg RA, RA, 3(r0)
-- | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg RD, RD, 3
- | cleartp LFUNC:RB
- | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
- | lg STR:RA, 0(RD, KBASE)
-@@ -2946,8 +2946,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_USETN:
- | ins_AD // RA = upvalue #, RD = num const
- | lg LFUNC:RB, -16(BASE)
-- | sllg RA, RA, 3(r0)
-- | sllg RD, RD, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg RD, RD, 3
- | cleartp LFUNC:RB
- | ld f0, 0(RD, KBASE)
- | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
-@@ -2958,10 +2958,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_USETP:
- | ins_AD // RA = upvalue #, RD = primitive type (~)
- | lg LFUNC:RB, -16(BASE)
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | cleartp LFUNC:RB
- | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
-- | sllg RD, RD, 47(r0)
-+ | sllg RD, RD, 47
- | lghi TMPR2, -1
- | xgr RD, TMPR2
- | lg RA, UPVAL:RB->v
-@@ -2975,7 +2975,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ltg TMPR2, L:RB->openupval
- | je >1
- | stg BASE, L:RB->base
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | la CARG2, 0(RA, BASE)
- | lgr L:CARG1, L:RB
- | brasl r14, extern lj_func_closeuv // (lua_State *L, TValue *level)
-@@ -2990,7 +2990,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg BASE, L:RB->base
- | lg CARG3, -16(BASE)
- | cleartp CARG3
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | lg CARG2, 0(RD, KBASE) // Fetch GCproto *.
- | lgr CARG1, L:RB
- | stg PC, SAVE_PC
-@@ -2999,7 +2999,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // GCfuncL * returned in r2 (CRET1).
- | lg BASE, L:RB->base
- | llgc RA, PC_RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | settp LFUNC:CRET1, LJ_TFUNC
- | stg LFUNC:CRET1, 0(RA, BASE)
- | ins_next
-@@ -3013,7 +3013,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg PC, SAVE_PC
- | jhe >5
- |1:
-- | srlg CARG3, RD, 11(r0)
-+ | srlg CARG3, RD, 11
- | llill TMPR2, 0x7ff
- | nr RD, TMPR2
- | cr RD, TMPR2
-@@ -3025,7 +3025,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Table * returned in r2 (CRET1).
- | lg BASE, L:RB->base
- | llgc RA, PC_RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | settp TAB:CRET1, LJ_TTAB
- | stg TAB:CRET1, 0(RA, BASE)
- | ins_next
-@@ -3047,7 +3047,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg BASE, L:RB->base
- | jhe >3
- |2:
-- | sllg RD, RD, 3(r0)
-+ | sllg RD, RD, 3
- | lg TAB:CARG2, 0(RD, KBASE)
- | lgr L:CARG1, L:RB
- | brasl r14, extern lj_tab_dup // (lua_State *L, Table *kt)
-@@ -3055,7 +3055,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg BASE, L:RB->base
- | llgc RA, PC_RA
- | settp TAB:CRET1, LJ_TTAB
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | stg TAB:CRET1, 0(RA, BASE)
- | ins_next
- |3:
-@@ -3072,7 +3072,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg LFUNC:RB, -16(BASE)
- | cleartp LFUNC:RB
- | lg TAB:RB, LFUNC:RB->env
-- | sllg TMPR1, RD, 3(r0)
-+ | sllg TMPR1, RD, 3
- | lg STR:RC, 0(TMPR1, KBASE)
- | j ->BC_TGETS_Z
- break;
-@@ -3081,16 +3081,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg LFUNC:RB, -16(BASE)
- | cleartp LFUNC:RB
- | lg TAB:RB, LFUNC:RB->env
-- | sllg TMPR1, RD, 3(r0)
-+ | sllg TMPR1, RD, 3
- | lg STR:RC, 0(TMPR1, KBASE)
- | j ->BC_TSETS_Z
- break;
-
- case BC_TGETV:
- | ins_ABC // RA = dst, RB = table, RC = key
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | lg TAB:RB, 0(RB, BASE)
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | lg RC, 0(RC, BASE)
- | checktab TAB:RB, ->vmeta_tgetv
- |
-@@ -3099,14 +3099,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cl RC, TAB:RB->asize // Takes care of unordered, too.
- | jhe ->vmeta_tgetv // Not in array part? Use fallback.
- | llgfr RC, RC
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | ag RC, TAB:RB->array
- | // Get array slot.
- | lg ITYPE, 0(RC)
- | cghi ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
- | je >2
- |1:
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | stg ITYPE, 0(RA, BASE)
- | ins_next
- |
-@@ -3126,11 +3126,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETS:
- | ins_ABC
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | lg TAB:RB, 0(RB, BASE)
- | lghi TMPR1, -1
- | xgr RC, TMPR1
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | lg STR:RC, 0(RC, KBASE)
- | checktab TAB:RB, ->vmeta_tgets
- |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
-@@ -3148,7 +3148,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cghi ITYPE, LJ_TNIL
- | je >5 // Key found, but nil value?
- |2:
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | stg ITYPE, 0(RA, BASE)
- | ins_next
- |
-@@ -3170,19 +3170,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETB:
- | ins_ABC // RA = dst, RB = table, RC = byte literal
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | lg TAB:RB, 0(RB, BASE)
- | checktab TAB:RB, ->vmeta_tgetb
- | cl RC, TAB:RB->asize
- | jhe ->vmeta_tgetb
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | ag RC, TAB:RB->array
- | // Get array slot.
- | lg ITYPE, 0(RC)
- | cghi ITYPE, LJ_TNIL
- | je >2
- |1:
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | stg ITYPE, 0(RA, BASE)
- | ins_next
- |
-@@ -3197,29 +3197,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETR:
- | ins_ABC // RA = dst, RB = table, RC = key
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | lg TAB:RB, 0(RB, BASE)
- | cleartp TAB:RB
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | llgf RC, 4(RC, BASE) // Load low word (big endian).
- | cl RC, TAB:RB->asize
- | jhe ->vmeta_tgetr // Not in array part? Use fallback.
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | ag RC, TAB:RB->array
- | // Get array slot.
- |->BC_TGETR_Z:
- | lg ITYPE, 0(RC)
- |->BC_TGETR2_Z:
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | stg ITYPE, 0(RA, BASE)
- | ins_next
- break;
-
- case BC_TSETV:
- | ins_ABC // RA = src, RB = table, RC = key
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | lg TAB:RB, 0(RB, BASE)
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | lg RC, 0(RC, BASE)
- | checktab TAB:RB, ->vmeta_tsetv
- |
-@@ -3228,7 +3228,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cl RC, TAB:RB->asize // Takes care of unordered, too.
- | jhe ->vmeta_tsetv
- | llgfr RC, RC
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | ag RC, TAB:RB->array
- | lghi TMPR2, LJ_TNIL
- | cg TMPR2, 0(RC)
-@@ -3238,7 +3238,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | tmll TMPR1, LJ_GC_BLACK // isblack(table)
- | jne >7
- |2: // Set array slot.
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg RB, 0(RA, BASE)
- | stg RB, 0(RC)
- | ins_next
-@@ -3263,11 +3263,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETS:
- | ins_ABC // RA = src, RB = table, RC = str const (~)
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | lg TAB:RB, 0(RB, BASE)
- | lghi TMPR2, -1
- | xgr RC, TMPR2 // ~RC
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | lg STR:RC, 0(RC, KBASE)
- | checktab TAB:RB, ->vmeta_tsets
- |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
-@@ -3291,7 +3291,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
- | jne >7
- |3: // Set node value.
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg ITYPE, 0(RA, BASE)
- | stg ITYPE, 0(TMPR1)
- | ins_next
-@@ -3339,12 +3339,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETB:
- | ins_ABC // RA = src, RB = table, RC = byte literal
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | lg TAB:RB, 0(RB, BASE)
- | checktab TAB:RB, ->vmeta_tsetb
- | cl RC, TAB:RB->asize
- | jhe ->vmeta_tsetb
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | ag RC, TAB:RB->array
- | lghi TMPR2, LJ_TNIL
- | cg TMPR2, 0(RC)
-@@ -3354,7 +3354,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | tmll TMPR1, LJ_GC_BLACK // isblack(table)
- | jne >7
- |2: // Set array slot.
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg ITYPE, 0(RA, BASE)
- | stg ITYPE, 0(RC)
- | ins_next
-@@ -3374,10 +3374,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETR:
- | ins_ABC // RA = src, RB = table, RC = key
-- | sllg RB, RB, 3(r0)
-+ | sllg RB, RB, 3
- | lg TAB:RB, 0(RB, BASE)
- | cleartp TAB:RB
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | lg RC, 0(RC, BASE)
- | llgc TMPR2, TAB:RB->marked
- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
-@@ -3386,11 +3386,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cl RC, TAB:RB->asize
- | jhe ->vmeta_tsetr
- | llgfr RC, RC
-- | sllg RC, RC, 3(r0)
-+ | sllg RC, RC, 3
- | ag RC, TAB:RB->array
- | // Set array slot.
- |->BC_TSETR_Z:
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg ITYPE, 0(RA, BASE)
- | stg ITYPE, 0(RC)
- | ins_next
-@@ -3403,8 +3403,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_TSETM:
- | ins_AD // RA = base (table at base-1), RD = num const (start index)
- |1:
-- | sllg RA, RA, 3(r0)
-- | sllg TMPR1, RD, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg TMPR1, RD, 3
- | llgf TMPR1, 4(TMPR1, KBASE) // Integer constant is in lo-word.
- | la RA, 0(RA, BASE)
- | lg TAB:RB, -8(RA) // Guaranteed to be a table.
-@@ -3420,7 +3420,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | clgf RD, TAB:RB->asize
- | jh >5 // Doesn't fit into array part?
- | sgr RD, TMPR1
-- | sllg TMPR1, TMPR1, 3(r0)
-+ | sllg TMPR1, TMPR1, 3
- | ag TMPR1, TAB:RB->array
- |3: // Copy result slots to table.
- | lg RB, 0(RA)
-@@ -3459,7 +3459,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- if (op == BC_CALLM) {
- | agf NARGS:RD, SAVE_MULTRES
- }
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg LFUNC:RB, 0(BASE, RA)
- | checkfunc LFUNC:RB, ->vmeta_call_ra
- | la BASE, 16(RA, BASE)
-@@ -3473,7 +3473,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_CALLT:
- | ins_AD // RA = base, RD = nargs+1
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | la RA, 16(RA, BASE)
- | lgr KBASE, BASE // Use KBASE for move + vmeta_call hint.
- | lg LFUNC:RB, -16(RA)
-@@ -3512,7 +3512,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | jne <4
- | llgc RA, PC_RA
- | lcgr RA, RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg LFUNC:KBASE, -32(RA, BASE) // Need to prepare KBASE.
- | cleartp LFUNC:KBASE
- | lg KBASE, LFUNC:KBASE->pc
-@@ -3534,7 +3534,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ITERC:
- | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | la RA, 16(RA, BASE) // fb = base+2
- | lg RB, -32(RA) // Copy state. fb[0] = fb[-4].
- | lg RC, -24(RA) // Copy control var. fb[1] = fb[-3].
-@@ -3553,7 +3553,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.if JIT
- | // NYI: add hotloop, record BC_ITERN.
- |.endif
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg TAB:RB, -16(RA, BASE)
- | cleartp TAB:RB
- | llgf RC, -4(RA, BASE) // Get index from control var. // TODO: ENDIANNESS
DRAGONS.
-@@ -3562,7 +3562,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg ITYPE, TAB:RB->array
- |1: // Traverse array part.
- | clr RC, TMPR1; jhe >5 // Index points after array part?
-- | sllg RD, RC, 3(r0) // Warning: won't work if RD==RC!
-+ | sllg RD, RC, 3 // Warning: won't work if RD==RC!
- | lg TMPR2, 0(RD, ITYPE)
- | cghi TMPR2, LJ_TNIL; je >4
- | // Copy array slot to returned value.
-@@ -3609,7 +3609,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISNEXT:
- | ins_AD // RA = base, RD = target (points to ITERN)
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lg CFUNC:RB, -24(RA, BASE)
- | checkfunc CFUNC:RB, >5
- | lg TMPR1, -16(RA, BASE)
-@@ -3636,9 +3636,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_VARG:
- | // TODO: some opportunities for branch on index in here.
- | ins_ABC // RA = base, RB = nresults+1, RC = numparams
-- | sllg RA, RA, 3(r0)
-- | sllg RB, RB, 3(r0)
-- | sllg RC, RC, 3(r0)
-+ | sllg RA, RA, 3
-+ | sllg RB, RB, 3
-+ | sllg RC, RC, 3
- | la TMPR1, (16+FRAME_VARG)(RC, BASE)
- | la RA, 0(RA, BASE)
- | sg TMPR1, -8(BASE)
-@@ -3673,7 +3673,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slgr RC, TMPR1
- | jno <3 // No vararg slots? (borrow or zero)
- | llgfr RB, RC
-- | srlg RB, RB, 3(r0)
-+ | srlg RB, RB, 3
- | ahi RB, 1
- | st RB, SAVE_MULTRES // MULTRES = #varargs+1
- | lg L:RB, SAVE_L
-@@ -3717,7 +3717,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_RET: case BC_RET0: case BC_RET1:
- | ins_AD // RA = results, RD = nresults+1
- if (op != BC_RET0) {
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- }
- |1:
- | lg PC, -8(BASE)
-@@ -3758,7 +3758,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | llgc RA, PC_RA
- | lcgr RA, RA
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | lay BASE, -16(RA, BASE) // base = base - (RA+2)*8
- | lg LFUNC:KBASE, -16(BASE)
- | cleartp LFUNC:KBASE
-@@ -3772,7 +3772,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg TMPR1, -16(KBASE) // Note: relies on shifted base.
- | la KBASE, 8(KBASE)
- } else {
-- | sllg RC, RD, 3(r0) // RC used as temp.
-+ | sllg RC, RD, 3 // RC used as temp.
- | stg TMPR1, -24(RC, BASE)
- }
- | la RD, 1(RD)
-@@ -3813,7 +3813,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_IFORL:
- vk = (op == BC_IFORL || op == BC_JFORL);
- | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | la RA, 0(RA, BASE)
- | lg RB, FOR_IDX
- | checkint RB, >9
-@@ -3822,7 +3822,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | checkint TMPR1, ->vmeta_for
- | lg ITYPE, FOR_STEP
- | chi ITYPE, 0; jl >5
-- | srag ITYPE, ITYPE, 47(r0)
-+ | srag ITYPE, ITYPE, 47
- | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
- } else {
- #ifdef LUA_USE_ASSERT
-@@ -3865,7 +3865,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |
- |5: // Invert check for negative step.
- if (!vk) {
-- | srag ITYPE, ITYPE, 47(r0)
-+ | srag ITYPE, ITYPE, 47
- | cghi ITYPE, LJ_TISNUM; jne ->vmeta_for
- } else {
- | ar RB, ITYPE; jo <1
-@@ -3946,7 +3946,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- #endif
- case BC_IITERL:
- | ins_AJ // RA = base, RD = target
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | la RA, 0(RA, BASE)
- | lg RB, 0(RA)
- | cghi RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
-@@ -3977,8 +3977,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
-
- case BC_JLOOP:
-- | stg r0, 0(r0)
-- | stg r0, 0(r0)
-+ | stg r0, 0
-+ | stg r0, 0
- break;
-
- case BC_JMP:
-@@ -3998,7 +3998,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_FUNCF:
- |.if JIT
-- | stg r0, 0(r0)
-+ | stg r0, 0
- |.endif
- case BC_FUNCV: /* NYI: compiled vararg functions. */
- | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
-@@ -4012,7 +4012,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
- | lg KBASE, (PC2PROTO(k)-4)(PC)
- | lg L:RB, SAVE_L
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | la RA, 0(RA, BASE) // Top of frame.
- | clg RA, L:RB->maxstack
- | jh ->vm_growstack_f
-@@ -4029,7 +4029,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |
- |3: // Clear missing parameters.
- | // TODO: optimize this. Some of this can be hoisted.
-- | sllg TMPR1, NARGS:RD, 3(r0)
-+ | sllg TMPR1, NARGS:RD, 3
- | lghi TMPR2, LJ_TNIL
- | stg TMPR2, -8(TMPR1, BASE)
- | la RD, 1(RD)
-@@ -4042,19 +4042,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- #if !LJ_HASJIT
- break;
- #endif
-- | stg r0, 0(r0) // NYI: compiled vararg functions
-+ | stg r0, 0 // NYI: compiled vararg functions
- break; /* NYI: compiled vararg functions. */
-
- case BC_IFUNCV:
- | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
-- | sllg TMPR1, NARGS:RD, 3(r0)
-+ | sllg TMPR1, NARGS:RD, 3
- | la RB, (FRAME_VARG+8)(TMPR1)
- | la RD, 8(TMPR1, BASE)
- | lg LFUNC:KBASE, -16(BASE)
- | stg RB, -8(RD) // Store delta + FRAME_VARG.
- | stg LFUNC:KBASE, -16(RD) // Store copy of LFUNC.
- | lg L:RB, SAVE_L
-- | sllg RA, RA, 3(r0)
-+ | sllg RA, RA, 3
- | la RA, 0(RA, RD)
- | cg RA, L:RB->maxstack
- | jh ->vm_growstack_v // Need to grow stack.
-@@ -4101,7 +4101,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cleartp CFUNC:RB
- | lg KBASE, CFUNC:RB->f
- | lg L:RB, SAVE_L
-- | sllg RD, NARGS:RD, 3(r0)
-+ | sllg RD, NARGS:RD, 3
- | lay RD, -8(RD,BASE)
- | stg BASE, L:RB->base
- | lay RA, (8*LUA_MINSTACK)(RD)
-@@ -4125,7 +4125,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg BASE, L:RB->base
- | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
- | set_vmstate INTERP
-- | sllg TMPR1, RD, 3(r0)
-+ | sllg TMPR1, RD, 3
- | la RA, 0(TMPR1, BASE)
- | lcgr RA, RA
- | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
---
-2.20.1
-
-
-From b13cc96c0be2b73d7a3407825d9a409b2e7d9e6d Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 11:15:26 -0500
-Subject: [PATCH 204/247] Fix SI (tm) action parsing.
-
----
- dynasm/dasm_s390x.lua | 11 +++++++----
- 1 file changed, 7 insertions(+), 4 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index b175593..dc83c9f 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -312,6 +312,10 @@ local function is_int8(num)
- return -128 <= num and num < 128
- end
-
-+local function is_uint8(num)
-+ return 0 <= num and num < 256
-+end
-+
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
- -- If x is not specified then it is 0.
- local function split_memop(arg)
-@@ -510,13 +514,12 @@ end
- local function parse_imm8(imm)
- local imm_val = tonumber(imm)
- if imm_val then
-- if not is_int8(imm_val) then
-+ if not is_int8(imm_val) and not is_uint8(imm_val) then
- werror("Immediate value out of range: ", imm_val)
- end
-- else
-- iact = function() waction("IMM8",nil,imm) end
-+ return imm_val, nil
- end
-- return imm_val, iact
-+ return 0, function() waction("IMM8",nil,imm) end
- end
-
- local function parse_mask(mask)
---
-2.20.1
-
-
-From 350747cc88f3ed005064b0bfd70408586ccdbe68 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 11:44:27 -0500
-Subject: [PATCH 205/247] Use tm{,y} instructions where possible.
-
----
- src/vm_s390x.dasc | 72 +++++++++++++++--------------------------------
- 1 file changed, 22 insertions(+), 50 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index a441498..7e7915b 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1177,9 +1177,7 @@ static void build_subroutines(BuildCtx *ctx)
- | stg TAB:RA, TAB:RB->metatable
- | lg PC, -8(BASE)
- | stg TAB:TMPR1, -16(BASE) // Return original table.
-- | // TODO: change to tm
-- | llgc TMPR2, TAB:RB->marked
-- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
-+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
- | je >1
- | // Possible write barrier. Table is black, but skip iswhite(mt) check.
- | barrierback TAB:RB, RC
-@@ -2372,8 +2370,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg TAB:RB, TAB:RA->metatable
- | cghi TAB:RB, 0
- | je <2 // No metatable?
-- | llgc TMPR2, TAB:RB->nomm
-- | tmll TMPR2, 1<<MM_eq
-+ | tm TAB:RB->nomm, 1<<MM_eq
- | jne <2 // Or 'no __eq' flag set?
- if (vk) {
- | lghi RB, 0 // ne = 0 // TODO: should be 32-bit?
-@@ -2593,8 +2590,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | j <1
- #if LJ_52
- |9: // Check for __len.
-- | llgc TMPR2, TAB:RB->nomm
-- | tmll TMPR2, 1<<MM_len
-+ | tm TAB:RB->nomm, 1<<MM_len
- | jne <3
- | j ->vmeta_len // 'no __len' flag NOT set: check.
- #endif
-@@ -2873,18 +2869,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cleartp LFUNC:RB
- | sllg RA, RA, 3
- | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
-- | // TODO: (instead of next 2 instructions) tm UPVAL:RB->closed, 0xff
-- | llgc TMPR2, UPVAL:RB->closed
-- | tmll TMPR2, 0xff
-+ | tm UPVAL:RB->closed, 0xff
- | lg RB, UPVAL:RB->v
- | sllg TMPR1, RD, 3
- | lg RA, 0(TMPR1, BASE)
- | stg RA, 0(RB)
- | je >1
- | // Check barrier for closed upvalue.
-- | // TODO: tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv)
-- | llgc TMPR2, TV2MARKOFS(RB)
-- | tmll TMPR2, LJ_GC_BLACK
-+ | tmy TV2MARKOFS(RB), LJ_GC_BLACK // isblack(uv)
- | jne >2
- |1:
- | ins_next
-@@ -2892,12 +2884,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |2: // Upvalue is black. Check if new value is collectable and white.
- | srag RD, RA, 47
- | ahi RD, -LJ_TISGCV
-- | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
-+ | clfi RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
- | jle <1
- | cleartp GCOBJ:RA
-- | // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
-- | llgc TMPR2, GCOBJ:RA->gch.marked
-- | tmll TMPR2, LJ_GC_WHITES
-+ | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
- | je <1
- | // Crossed a write barrier. Move the barrier forward.
- | lgr CARG2, RB
-@@ -2919,21 +2909,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg RD, UPVAL:RB->v
- | settp STR:ITYPE, STR:RA, LJ_TSTR
- | stg STR:ITYPE, 0(RD)
-- | // TODO: tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
-- | llgc TMPR2, UPVAL:RB->marked
-- | tmll TMPR2, LJ_GC_BLACK
-+ | tm UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
- | jne >2
- |1:
- | ins_next
- |
- |2: // Check if string is white and ensure upvalue is closed.
-- | // TODO: tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
-- | llgc TMPR2, GCOBJ:RA->gch.marked
-- | tmll TMPR2, LJ_GC_WHITES
-+ | tm GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
- | je <1
-- | // TODO: tm UPVAL:RB->closed, 0xff
-- | llgc TMPR2, UPVAL:RB->closed
-- | tmll TMPR2, 0xff
-+ | tm UPVAL:RB->closed, 0xff
- | je <1
- | // Crossed a write barrier. Move the barrier forward.
- | lgr RB, BASE
-@@ -3114,8 +3098,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg TAB:TMPR1, TAB:RB->metatable
- | cghi TAB:TMPR1, 0
- | je <1
-- | llgc TMPR2, TAB:TMPR1->nomm
-- | tmll TMPR2, 1<<MM_index
-+ | tm TAB:TMPR1->nomm, 1<<MM_index
- | je ->vmeta_tgetv // 'no __index' flag NOT set: check.
- | j <1
- |
-@@ -3163,8 +3146,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg TAB:TMPR1, TAB:RB->metatable
- | cghi TAB:TMPR1, 0
- | je <2 // No metatable: done.
-- | llgc TMPR2, TAB:TMPR1->nomm
-- | tmll TMPR2, 1<<MM_index
-+ | tm TAB:TMPR1->nomm, 1<<MM_index
- | jne <2 // 'no __index' flag set: done.
- | j ->vmeta_tgets // Caveat: preserve STR:RC.
- break;
-@@ -3190,8 +3172,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg TAB:TMPR1, TAB:RB->metatable
- | cghi TAB:TMPR1, 0
- | je <1
-- | llgc TMPR2, TAB:TMPR1->nomm
-- | tmll TMPR2, 1<<MM_index
-+ | tm TAB:TMPR1->nomm, 1<<MM_index
- | je ->vmeta_tgetb // 'no __index' flag NOT set: check.
- | j <1
- break;
-@@ -3234,8 +3215,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cg TMPR2, 0(RC)
- | je >3 // Previous value is nil?
- |1:
-- | llgc TMPR1, TAB:RB->marked
-- | tmll TMPR1, LJ_GC_BLACK // isblack(table)
-+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
- | jne >7
- |2: // Set array slot.
- | sllg RA, RA, 3
-@@ -3247,8 +3227,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg TAB:TMPR1, TAB:RB->metatable
- | cghi TAB:TMPR1, 0
- | je <1
-- | llgc TMPR2, TAB:TMPR1->nomm
-- | tmll TMPR2, 1<<MM_newindex
-+ | tm TAB:TMPR1->nomm, 1<<MM_newindex
- | je ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
- | j <1
- |
-@@ -3287,8 +3266,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cg TMPR2, 0(TMPR1)
- | je >4 // Previous value is nil?
- |2:
-- | llgc TMPR2, TAB:RB->marked
-- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
-+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
- | jne >7
- |3: // Set node value.
- | sllg RA, RA, 3
-@@ -3300,8 +3278,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg TAB:ITYPE, TAB:RB->metatable
- | cghi TAB:ITYPE, 0
- | je <2
-- | llgc TMPR2, TAB:ITYPE->nomm
-- | tmll TMPR2, 1<<MM_newindex
-+ | tm TAB:ITYPE->nomm, 1<<MM_newindex
- | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
- | j <2
- |
-@@ -3315,8 +3292,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg TAB:TMPR1, TAB:RB->metatable
- | cghi TAB:TMPR1, 0
- | je >6 // No metatable: continue.
-- | llgc TMPR2, TAB:TMPR1->nomm
-- | tmll TMPR2, 1<<MM_newindex
-+ | tm TAB:TMPR1->nomm, 1<<MM_newindex
- | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
- |6:
- | stg ITYPE, TMP_STACK
-@@ -3350,8 +3326,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cg TMPR2, 0(RC)
- | je >3 // Previous value is nil?
- |1:
-- | llgc TMPR1, TAB:RB->marked
-- | tmll TMPR1, LJ_GC_BLACK // isblack(table)
-+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
- | jne >7
- |2: // Set array slot.
- | sllg RA, RA, 3
-@@ -3363,8 +3338,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg TAB:TMPR1, TAB:RB->metatable
- | cghi TAB:TMPR1, 0
- | je <1
-- | llgc TMPR2, TAB:TMPR1->nomm
-- | tmll TMPR2, 1<<MM_newindex
-+ | tm TAB:TMPR1->nomm, 1<<MM_newindex
- | je ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
- | j <1
- |
-@@ -3379,8 +3353,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cleartp TAB:RB
- | sllg RC, RC, 3
- | lg RC, 0(RC, BASE)
-- | llgc TMPR2, TAB:RB->marked
-- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
-+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
- | jne >7
- |2:
- | cl RC, TAB:RB->asize
-@@ -3409,8 +3382,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | la RA, 0(RA, BASE)
- | lg TAB:RB, -8(RA) // Guaranteed to be a table.
- | cleartp TAB:RB
-- | llgc TMPR2, TAB:RB->marked
-- | tmll TMPR2, LJ_GC_BLACK // isblack(table)
-+ | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
- | jne >7
- |2:
- | llgf RD, SAVE_MULTRES
---
-2.20.1
-
-
-From e3feb72542242b72d2602c213ef9665336dc2c57 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 11:58:24 -0500
-Subject: [PATCH 206/247] Minor instruction changes.
-
----
- src/vm_s390x.dasc | 8 ++------
- 1 file changed, 2 insertions(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 7e7915b..d16abc0 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -292,10 +292,7 @@
- |
- |// Move table write barrier back. Overwrites reg.
- |.macro barrierback, tab, reg
--| // TODO: more efficient way?
--| llgc reg, tab->marked
--| nill reg, (uint16_t)~LJ_GC_BLACK // black2gray(tab)
--| stc reg, tab->marked
-+| ni tab->marked, ~LJ_GC_BLACK // black2gray(tab)
- | lg reg, (DISPATCH_GL(gc.grayagain))(DISPATCH)
- | stg tab, (DISPATCH_GL(gc.grayagain))(DISPATCH)
- | stg reg, tab->gclist
-@@ -316,8 +313,7 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |->vm_returnp:
-- | lghi TMPR2, FRAME_P
-- | nr TMPR2, PC
-+ | tmll PC, FRAME_P
- | je ->cont_dispatch
- |
- | // Return from pcall or xpcall fast func.
---
-2.20.1
-
-
-From 64bcc38ad762a3cf078cfb25947e9665a3dd814a Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 12:04:17 -0500
-Subject: [PATCH 207/247] Remove TODO.
-
----
- src/vm_s390x.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index d16abc0..4c8e3a3 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -554,7 +554,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- | lg RD, L:LREG->top
- | sgr RD, RA
-- | srlg NARGS:RD, NARGS:RD, 3 // TODO: support '3' on its own in dynasm.
-+ | srlg NARGS:RD, NARGS:RD, 3
- | aghi NARGS:RD, 1 // RD = nargs+1
- |
- |->vm_call_dispatch:
---
-2.20.1
-
-
-From 0ec72b993ccfa4bc3d78a36571a9d66a93f67e61 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 12:52:02 -0500
-Subject: [PATCH 208/247] Add remaining (useful) SI instructions to DynASM.
-
----
- dynasm/dasm_s390x.lua | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index dc83c9f..bff135b 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1251,8 +1251,12 @@ map_op = {
- -- RIE-e
- brxhg_3 = "ec0000000044RIE-e",
- -- SI
-+ cli_2 = "000095000000SI",
-+ mvi_2 = "000092000000SI",
- ni_2 = "000094000000SI",
- tm_2 = "000091000000SI",
-+ xi_2 = "000097000000SI",
-+ oi_2 = "000096000000SI",
- -- SIY
- tmy_2 = "eb0000000051SIY",
- -- RXF
---
-2.20.1
-
-
-From ffdc1df8b907acf4c9579df624bc51bb01022153 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 12:52:45 -0500
-Subject: [PATCH 209/247] Replace some instruction sequences with their SI
- equivalents.
-
----
- src/vm_s390x.dasc | 10 +++-------
- 1 file changed, 3 insertions(+), 7 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 4c8e3a3..bb53757 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1395,9 +1395,7 @@ static void build_subroutines(BuildCtx *ctx)
- | checktptp L:TMPR2, LJ_TTHREAD, ->fff_fallback
- |.endif
- | ltg TMPR2, L:RB->cframe; jne ->fff_fallback
-- | // TODO: replace with cli.
-- | llgc TMPR1, L:RB->status
-- | cghi TMPR1, (uint8_t)LUA_YIELD; jh ->fff_fallback
-+ | cli L:RB->status, LUA_YIELD; jh ->fff_fallback
- | lg RA, L:RB->top
- | je >1 // Status != LUA_YIELD (i.e. 0)?
- | cg RA, L:RB->base // Check for presence of initial func.
-@@ -3250,8 +3248,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | n TMPR1, STR:RC->hash
- | lgfr TMPR1, TMPR1
- | mghi TMPR1, #NODE
-- | xr TMPR2, TMPR2
-- | stc TMPR2, TAB:RB->nomm // Clear metamethod cache.
-+ | mvi TAB:RB->nomm, 0 // Clear metamethod cache.
- | ag NODE:TMPR1, TAB:RB->node
- | settp ITYPE, STR:RC, LJ_TSTR
- |1:
-@@ -3596,8 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lghi TMPR2, BC_JMP
- | stcy TMPR2, PC_OP
- | branchPC RD
-- | lghi TMPR2, BC_ITERC
-- | stc TMPR2, 3(PC)
-+ | mvi 3(PC), BC_ITERC
- | j <1
- break;
-
---
-2.20.1
-
-
-From b6c6ea574fe90f5513c85e3898cd4edf3439c86f Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 14:12:06 -0500
-Subject: [PATCH 210/247] Use execute rather than loop for mvc and avoid jumps
- in fast path.
-
-Not sure if this works, the tests don't exercise the stack code.
----
- src/Makefile | 2 +-
- src/vm_s390x.dasc | 58 ++++++++++++++++++++++++-----------------------
- 2 files changed, 31 insertions(+), 29 deletions(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index d0f160a..21a67d8 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -56,7 +56,7 @@ CCOPT_mips=
- #
- CCDEBUG=
- # Uncomment the next line to generate debug information:
--#CCDEBUG= -g
-+CCDEBUG= -g
- #
- CCWARN= -Wall
- # Uncomment the next line to enable more warnings:
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index bb53757..f8be284 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2142,47 +2142,26 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_ffi_call: // Call C function via FFI.
- | // Caveat: needs special frame unwinding, see below.
- |.if FFI
-- | .type CCSTATE, CCallState, r10
-- | stmg r6, r15, 48(sp) // TODO: need to save r6, but might be better in separate
store?
-+ | .type CCSTATE, CCallState, r8
-+ | stmg r6, r15, 48(sp)
- | lgr CCSTATE, CARG1
-+ | lg r7, CCSTATE->func // TODO: move further up?
- |
- | // Readjust stack.
- | sgf sp, CCSTATE->spadj
- |
- | // Copy stack slots.
-- | llgc r0, CCSTATE->nsp
-- | cghi r0, 0
-- | jle >3
-- | lay r1, (offsetof(CCallState, stack))(CCSTATE) // Source.
-- | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
-+ | llgc r1, CCSTATE->nsp
-+ | chi r1, 0
-+ | jh >2
- |1:
-- | cghi r0, 256
-- | jl >2
-- | mvc 0(256, r11), 0(r1)
-- | aghi r1, 256*8
-- | aghi r11, 256*8
-- | aghi r0, -256
-- | j <1
-- |2:
-- | cghi r0, 0
-- | je >3
-- | // TODO: exrl mvc rather than loop.
-- | mvc 0(8, r11), 0(r1)
-- | aghi r1, 8
-- | aghi r11, 8
-- | aghi r0, -1
-- | j <2
-- |3:
-- |
- | lmg CARG1, CARG5, CCSTATE->gpr[0]
- | // TODO: conditionally load FPRs?
- | ld FARG1, CCSTATE->fpr[0]
- | ld FARG2, CCSTATE->fpr[1]
- | ld FARG3, CCSTATE->fpr[2]
- | ld FARG4, CCSTATE->fpr[3]
-- |5:
-- | lg r1, CCSTATE->func // TODO: move further up?
-- | basr r14, r1
-+ | basr r14, r7
- |
- | stg CRET1, CCSTATE->gpr[0]
- | stg f0, CCSTATE->fpr[0]
-@@ -2190,6 +2169,29 @@ static void build_subroutines(BuildCtx *ctx)
- | agf sp, CCSTATE->spadj
- | lmg r6, r15, 48(sp)
- | br r14
-+ |
-+ |2:
-+ | lay r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
-+ | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
-+ |3:
-+ | chi r1, 256
-+ | jl >4
-+ | mvc 0(256, r11), 0(r10)
-+ | la r10, 256*8(r10)
-+ | la r11, 256*8(r11)
-+ | ahi r1, -256
-+ | j <3
-+ |
-+ |4:
-+ | ahi r1, -1
-+ | jl <1
-+ | larl r9, >5
-+ | ex r1, 0(r9) // TODO: exrl is faster but needs z10.
-+ | j <1
-+ |
-+ |5:
-+ | // exrl target
-+ | mvc 0(1, r11), 0(r10)
- |.endif
- |// Note: vm_ffi_call must be the last function in this object file!
- |
---
-2.20.1
-
-
-From ccc804325d1987980ba84a054fc1d9f593e81ec9 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 16:11:30 -0500
-Subject: [PATCH 211/247] Various fixes for FFI calls.
-
-Fixes the following scenarios:
- * Returning floating point value.
- * More than 4 GPR arguments.
----
- src/lj_ccall.c | 13 +++++++++++--
- src/vm_s390x.dasc | 7 ++++---
- 2 files changed, 15 insertions(+), 5 deletions(-)
-
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index d2fb19c..e31bdb6 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -602,9 +602,9 @@
-
- #define CCALL_HANDLE_REGARG \
- if (isfp) { \
-- if (nfpr < maxgpr) { dp = &cc->fpr[nfpr++]; goto done; } \
-+ if (nfpr < CCALL_NARG_FPR) { dp = &cc->fpr[nfpr++]; goto done; } \
- } else { \
-- if (ngpr < CCALL_NARG_FPR) { dp = &cc->gpr[ngpr++]; goto done; } \
-+ if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
- }
-
- #else
-@@ -1099,6 +1099,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
- }
- #endif
-+#if LJ_TARGET_S390X
-+ /* Arguments need to be sign-/zero-extended to 64-bits. */
-+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)) &&
d->size <= 4) {
-+ if (d->info & CTF_UNSIGNED)
-+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
-+ else
-+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
-+ }
-+#endif
- #if LJ_TARGET_X64 && LJ_ABI_WIN
- if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
- if (nfpr == ngpr)
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f8be284..f0289de 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2164,21 +2164,22 @@ static void build_subroutines(BuildCtx *ctx)
- | basr r14, r7
- |
- | stg CRET1, CCSTATE->gpr[0]
-- | stg f0, CCSTATE->fpr[0]
-+ | std f0, CCSTATE->fpr[0]
- |
- | agf sp, CCSTATE->spadj
- | lmg r6, r15, 48(sp)
- | br r14
- |
- |2:
-+ | sll r1, 3
- | lay r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
- | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
- |3:
- | chi r1, 256
- | jl >4
- | mvc 0(256, r11), 0(r10)
-- | la r10, 256*8(r10)
-- | la r11, 256*8(r11)
-+ | la r10, 256(r10)
-+ | la r11, 256(r11)
- | ahi r1, -256
- | j <3
- |
---
-2.20.1
-
-
-From f270015a5f1f051e5497250b5d079c4dc5391e35 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 16:56:43 -0500
-Subject: [PATCH 212/247] Fix single-precision floating point parameters passed
- on stack.
-
-The opposite way round to the registers for some reason.
----
- src/lj_ccall.c | 8 +++++---
- 1 file changed, 5 insertions(+), 3 deletions(-)
-
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index e31bdb6..ace52df 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -1018,7 +1018,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- CTypeID did;
- CType *d;
- CTSize sz;
-- MSize n, isfp = 0, isva = 0;
-+ MSize n, isfp = 0, isva = 0, onstack = 0;
- void *dp, *rp = NULL;
-
- if (fid) { /* Get argument type from field. */
-@@ -1058,6 +1058,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- CCALL_HANDLE_REGARG /* Handle register arguments. */
-
- /* Otherwise pass argument on stack. */
-+ onstack = 1;
- if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) >
CTALIGN_PTR) {
- MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
- nsp = (nsp + align) & ~align; /* Align argument on stack. */
-@@ -1101,8 +1102,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- #endif
- #if LJ_TARGET_S390X
- /* Arguments need to be sign-/zero-extended to 64-bits. */
-- if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)) &&
d->size <= 4) {
-- if (d->info & CTF_UNSIGNED)
-+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) ||
-+ (isfp && onstack)) && d->size <= 4) {
-+ if (d->info & CTF_UNSIGNED || isfp)
- *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
- else
- *(int64_t *)dp = (int64_t)*(int32_t *)dp;
---
-2.20.1
-
-
-From ff9ac8036ec5b34565b82ea97d13984dbed040e0 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584(a)gmail.com>
-Date: Wed, 11 Jan 2017 17:04:09 +0530
-Subject: [PATCH 213/247] Added example for 'TM' instruction
-
----
- dynasm/Examples/test_z_inst.c | 21 ++++++++++++++++++++-
- 1 file changed, 20 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 8558aae..4d6ee41 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -258,6 +258,24 @@ static void load_test(dasm_State *state)
- }
- */
-
-+
-+static void test_mask(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ |lay sp , -8(sp)
-+ |stg r2, 4(sp)
-+ |tm 4(sp),0x04
-+ |je >2
-+ |jne >1
-+|1:
-+ |ar r2,r3
-+ |br r14
-+|2:
-+ |sr r2,r3
-+ |br r14
-+}
-+
- static void ssa(dasm_State *state) {
- dasm_State **Dst = &state;
-
-@@ -428,7 +446,8 @@ test_table test[] = {
- { 0, 0, 0, rsb, 0, "rsb"},
- {12,10, 0, rre, 10, "rre"},
- {16,10, 0, sqrt_rxe, 4,"sqrt_rxe"},
-- {16,10, 0, rxf, 116, "rxf"}
-+ {16,10, 0, rxf, 116, "rxf"},
-+ { 4, 3, 0, test_mask, 1,"test_mask"}
- };
-
- static void *jitcode(dasm_State **state, size_t *size)
---
-2.20.1
-
-
-From 61fbe12a18ca56589ef6ec5227fea3e810aac6b7 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 10 Jan 2017 17:09:59 -0500
-Subject: [PATCH 214/247] Fix ffi calls with complex parameters.
-
----
- src/lj_ccall.c | 15 +++++++--------
- 1 file changed, 7 insertions(+), 8 deletions(-)
-
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index ace52df..b5e987b 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -580,11 +580,12 @@
- cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \
- if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
-
--#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
-+#define CCALL_HANDLE_COMPLEXRET \
-+ cc->retref = 1; /* Return all complex values by reference. */ \
-+ cc->gpr[ngpr++] = (GPRArg)dp;
-
- #define CCALL_HANDLE_COMPLEXRET2 \
-- if (!cc->retref) \
-- *(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */
-+ UNUSED(dp); /* Nothing to do. */
-
- #define CCALL_HANDLE_STRUCTARG \
- /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
-@@ -594,11 +595,9 @@
- }
-
- #define CCALL_HANDLE_COMPLEXARG \
-- /* Pass complex float in a GPR and complex double by reference. */ \
-- if (sz != 2*sizeof(float)) { \
-- rp = cdataptr(lj_cdata_new(cts, did, sz)); \
-- sz = CTSIZE_PTR; \
-- }
-+ /* Pass complex numbers by reference. */ \
-+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
-+ sz = CTSIZE_PTR; \
-
- #define CCALL_HANDLE_REGARG \
- if (isfp) { \
---
-2.20.1
-
-
-From dfecffa99efa817266caab7260fe4fa0153986a1 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 11 Jan 2017 12:13:12 -0500
-Subject: [PATCH 215/247] Fix ffi calls returning structs.
-
----
- src/lj_ccall.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index b5e987b..1c6ed1c 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -576,9 +576,8 @@
- /* -- POSIX/s390x calling conventions --------------------------------------- */
-
- #define CCALL_HANDLE_STRUCTRET \
-- /* Return structs of size 1, 2, 4 or 8 in a GPR. */ \
-- cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \
-- if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
-+ cc->retref = 1; /* Return all structs by reference. */ \
-+ cc->gpr[ngpr++] = (GPRArg)dp;
-
- #define CCALL_HANDLE_COMPLEXRET \
- cc->retref = 1; /* Return all complex values by reference. */ \
-@@ -596,6 +595,7 @@
-
- #define CCALL_HANDLE_COMPLEXARG \
- /* Pass complex numbers by reference. */ \
-+ /* TODO: not sure why this is different to structs. */ \
- rp = cdataptr(lj_cdata_new(cts, did, sz)); \
- sz = CTSIZE_PTR; \
-
---
-2.20.1
-
-
-From d504ff399043a0d9ed014d9926b5b120b68c2491 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 11 Jan 2017 12:46:30 -0500
-Subject: [PATCH 216/247] Fix typo in lj_ccallback.h
-
-For future reference only, we aren't using this bit of code yet.
----
- src/lj_ccallback.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
-index 965f5d6..d6174e7 100644
---- a/src/lj_ccallback.c
-+++ b/src/lj_ccallback.c
-@@ -511,9 +511,9 @@ void lj_ccallback_mcode_free(CTState *cts)
-
- #define CALLBACK_HANDLE_REGARG \
- if (isfp) { \
-- if (nfpr < maxgpr) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
-+ if (nfpr < CCALL_NARG_FPR) { sp = &cts->cb.fpr[nfpr++]; goto done; } \
- } else { \
-- if (ngpr < CCALL_NARG_FPR) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
-+ if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \
- }
-
- #else
---
-2.20.1
-
-
-From c2a7b610774eda70af15c959cca401105e48f992 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 11 Jan 2017 14:42:24 -0500
-Subject: [PATCH 217/247] Delete LREG and replace it's uses with RB.
-
-This frees up a register and fixes a bug where RB was not loaded
-correctly into LREG.
----
- src/vm_s390x.dasc | 33 +++++++++++++++------------------
- 1 file changed, 15 insertions(+), 18 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f0289de..0e9709f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -41,8 +41,7 @@
- |.define KBASE, r8 // Constants of current Lua function.
- |.define PC, r9 // Next PC.
- |.define DISPATCH, r10 // Opcode dispatch table.
--|.define LREG, r11 // Register holding lua_State (also in SAVE_L).
--|.define ITYPE, r13 //
-+|.define ITYPE, r11 //
- |
- |// The following temporaries are not saved across C calls, except for RD.
- |.define RA, r1
-@@ -368,7 +367,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vm_leave_cp:
- | lg RA, SAVE_CFRAME // Restore previous C frame.
-- | stg RA, L:LREG->cframe
-+ | stg RA, L:RB->cframe
- | lghi CRET1, 0 // Ok return status for vm_pcall.
- |
- |->vm_leave_unw:
-@@ -498,7 +497,6 @@ static void build_subroutines(BuildCtx *ctx)
- | st RD, SAVE_NRES
- | stg RD, SAVE_ERRF
- | stg KBASE, L:RB->cframe
-- | lgr LREG, L:RB
- | clm RD, 1, L:RB->status
- | je >2 // Initial resume (like a call).
- |
-@@ -543,16 +541,15 @@ static void build_subroutines(BuildCtx *ctx)
- | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
- | aghi DISPATCH, GG_G2DISP
- | stg sp, L:RB->cframe
-- | lgr L:LREG, L:RB // TODO: use RB instead of LREG here?
- |
-- |2: // Entry point for vm_resume/vm_cpcall (RA = base, LREG = L, PC = ftype).
-- | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
-+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
-+ | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
- | set_vmstate INTERP
-- | lg BASE, L:LREG->base // BASE = old base (used in vmeta_call).
-+ | lg BASE, L:RB->base // BASE = old base (used in vmeta_call).
- | agr PC, RA
- | sgr PC, BASE // PC = frame delta + frame type
- |
-- | lg RD, L:LREG->top
-+ | lg RD, L:RB->top
- | sgr RD, RA
- | srlg NARGS:RD, NARGS:RD, 3
- | aghi NARGS:RD, 1 // RD = nargs+1
-@@ -569,23 +566,23 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_cpcall: // Setup protected C frame, call C.
- | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
- | saveregs
-- | lgr LREG, CARG1
-- | stg LREG, SAVE_L
-- | stg LREG, SAVE_PC // Any value outside of bytecode is ok.
-+ | lgr L:RB, CARG1
-+ | stg L:RB, SAVE_L
-+ | stg L:RB, SAVE_PC // Any value outside of bytecode is ok.
- |
-- | lg KBASE, L:LREG->stack // Compute -savestack(L, L->top).
-- | sg KBASE, L:LREG->top
-- | lg DISPATCH, L:LREG->glref // Setup pointer to dispatch table.
-+ | lg KBASE, L:RB->stack // Compute -savestack(L, L->top).
-+ | sg KBASE, L:RB->top
-+ | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
- | lghi RA, 0
- | stg RA, SAVE_ERRF // No error function.
- | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
- | aghi DISPATCH, GG_G2DISP
- | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
- |
-- | lg KBASE, L:LREG->cframe // Add our C frame to cframe chain.
-+ | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
- | stg KBASE, SAVE_CFRAME
-- | stg sp, L:LREG->cframe
-- | stg L:LREG, DISPATCH_GL(cur_L)(DISPATCH)
-+ | stg sp, L:RB->cframe
-+ | stg L:RB, DISPATCH_GL(cur_L)(DISPATCH)
- |
- | basr r14, CARG4 // (lua_State *L, lua_CFunction func, void *ud)
- | // TValue * (new base) or NULL returned in r2 (CRET1/).
---
-2.20.1
-
-
-From 6923d756df36343200211200ccdda564ac5cdb00 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 11 Jan 2017 16:16:51 -0500
-Subject: [PATCH 218/247] Clean up register allocations and comments slightly.
-
-Move RB from r12 to r13 so that it no longer overlaps with the
-GOT pointer (to avoid potential problems with PIC compilation).
----
- src/vm_s390x.dasc | 18 +++++++++---------
- 1 file changed, 9 insertions(+), 9 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 0e9709f..404c4b3 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -8,8 +8,8 @@
- |// r3-r5 | parameter | volatile |
- |// r6 | parameter | saved |
- |// r7-r11 | | saved |
--|// r12 | GOT pointer (needed?) | saved |
--|// r13 | literal pool (needed?) | saved |
-+|// r12 | GOT pointer (needed?) | saved |
-+|// r13 | literal pool (not needed) | saved |
- |// r14 | return address | volatile |
- |// r15 | stack pointer | saved |
- |// f0,f2,f4,f6 | parameter and return value | volatile |
-@@ -41,13 +41,13 @@
- |.define KBASE, r8 // Constants of current Lua function.
- |.define PC, r9 // Next PC.
- |.define DISPATCH, r10 // Opcode dispatch table.
--|.define ITYPE, r11 //
-+|.define ITYPE, r11 // Temporary used for type information.
- |
--|// The following temporaries are not saved across C calls, except for RD.
-+|// The following temporaries are not saved across C calls, except for RB.
- |.define RA, r1
--|.define RB, r12
-+|.define RB, r13 // Must be callee-save.
- |.define RC, r5 // Overlaps CARG4.
--|.define RD, r6 // Overlaps CARG5. Callee-saved.
-+|.define RD, r6 // Overlaps CARG5.
- |
- |// Calling conventions. Also used as temporaries.
- |.define CARG1, r2
-@@ -484,7 +484,7 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_resume: // Setup C frame and resume thread.
- | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
- | saveregs
-- | lgr L:RB, CARG1 // Caveat: CARG1 may be RA.
-+ | lgr L:RB, CARG1
- | stg CARG1, SAVE_L
- | lgr RA, CARG2
- | lghi PC, FRAME_CP
-@@ -752,7 +752,7 @@ static void build_subroutines(BuildCtx *ctx)
- | la RB, 0(RB, BASE)
- |2:
- | lg L:CARG1, SAVE_L
-- | stg BASE, L:CARG1->base // Caveat: CARG2/CARG3 may be BASE.
-+ | stg BASE, L:CARG1->base
- | lgr CARG2, RB
- | lgr CARG3, RC
- | lgr L:RB, L:CARG1
-@@ -4071,7 +4071,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lay RA, (8*LUA_MINSTACK)(RD)
- | clg RA, L:RB->maxstack
- | stg RD, L:RB->top
-- | lgr CARG1, L:RB // Caveat: CARG1 may be RA.
-+ | lgr CARG1, L:RB
- if (op != BC_FUNCC) {
- | lgr CARG2, KBASE
- }
---
-2.20.1
-
-
-From a512c07da58f4c1ab0e0a7c7ee854dd034291768 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 11 Jan 2017 16:38:35 -0500
-Subject: [PATCH 219/247] Add and use branch on count instructions where
- possible.
-
----
- dynasm/dasm_s390x.lua | 2 ++
- src/vm_s390x.dasc | 24 ++++++------------------
- 2 files changed, 8 insertions(+), 18 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index bff135b..8bf7084 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1240,6 +1240,8 @@ map_op = {
- mghi_2 = "0000a70d0000RI-a",
- -- RI-b mode instructions
- bras_2 = "0000a7050000RI-b",
-+ brct_2 = "0000a7060000RI-b",
-+ brctg_2 = "0000a7070000RI-b",
- -- RI-c mode instructions
- brc_2 = "0000a7040000RI-c",
- -- RIL-c
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 404c4b3..918a52f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1085,9 +1085,7 @@ static void build_subroutines(BuildCtx *ctx)
- | la RA, 8(RA)
- | lg RB, 0(RA)
- | stg RB, -16(RA)
-- | ahi RD, -1
-- | jne <1
-- | // TODO: replace with branch on count (brctg).
-+ | brct RD, <1
- |2:
- | llgf RD, SAVE_MULTRES
- | j ->fff_res_
-@@ -3392,9 +3390,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | la RA, 8(RA)
- | stg RB, 0(TMPR1)
- | la TMPR1, 8(TMPR1)
-- | aghi RD, -1
-- | jne <3
-- | // TODO: replace decrement/branch with branch on count.
-+ | brctg RD, <3
- |4:
- | ins_next
- |
-@@ -3458,9 +3454,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | la RA, 8(RA)
- | stg RB, 0(KBASE)
- | la KBASE, 8(KBASE)
-- | // TODO: replace decrement/branch with brctg
-- | aghi NARGS:RD, -1
-- | jne <2
-+ | brctg NARGS:RD, <2
- |
- | lg LFUNC:RB, -16(BASE)
- |3:
-@@ -3698,9 +3692,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg RB, 0(KBASE, RA)
- | stg RB, -16(KBASE)
- | la KBASE, 8(KBASE)
-- | // TODO: replace with brctg RD, <2 once supported.
-- | aghi RD, -1
-- | jne <2
-+ | brctg RD, <2
- |3:
- | llgf RD, SAVE_MULTRES // Note: MULTRES may be >256.
- | llgc RB, PC_RB
-@@ -4037,9 +4029,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg KBASE, 0(RD)
- | la RD, 8(RD)
- | stg TMPR1, -16(RA) // Clear old fixarg slot (help the GC).
-- | aghi RB, -1
-- | jne <1
-- | // TODO: brctg instead of decrement/branch
-+ | brctg RB, <1
- |2:
- if (op == BC_JFUNCV) {
- | llgh RD, PC_RD
-@@ -4052,9 +4042,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |3: // Clear missing parameters.
- | stg TMPR1, 0(RD) // TMPR1=LJ_TNIL (-1) here.
- | la RD, 8(RD)
-- | aghi RB, -1
-- | jne <3
-- | // TODO: brctg instead of decrement/branch
-+ | brctg RB, <3
- | j <2
- break;
-
---
-2.20.1
-
-
-From 2958e17c95a9a6b81b3bd328e34c17751968587b Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 11 Jan 2017 16:55:50 -0500
-Subject: [PATCH 220/247] Replace lay with la where possible.
-
----
- src/vm_s390x.dasc | 29 +++++++++++++++--------------
- 1 file changed, 15 insertions(+), 14 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 918a52f..dee93c6 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -22,6 +22,7 @@
- |// clfi (compare logical immediate) [requires z9-109]
- |// ldgr (load FPR from GPR) [requires z9-109 GA3]
- |// lgdr (load GPR from FPR) [requires z9-109 GA3]
-+|// lay (load address) [requires z900 GA2]
- |// ldy (load (long bfp)) [requires z900 GA2]
- |// stdy (store (long bfp)) [requires z900 GA2]
- |// TODO: alternative instructions?
-@@ -390,7 +391,7 @@ static void build_subroutines(BuildCtx *ctx)
- | je <5 // But check for LUA_MULTRET+1.
- | sgr RA, RD // Negative result!
- | sllg TMPR1, RA, 3
-- | lay BASE, 0(TMPR1, BASE) // Correct top.
-+ | la BASE, 0(TMPR1, BASE) // Correct top.
- | j <5
- |
- |8: // Corner case: need to grow stack for filling up results.
-@@ -430,7 +431,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lghi RD, 1+1 // Really 1+2 results, incr. later.
- | lg BASE, L:RB->base
- | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-- | lay DISPATCH, GG_G2DISP(DISPATCH)
-+ | la DISPATCH, GG_G2DISP(DISPATCH)
- | lg PC, -8(BASE) // Fetch PC of previous frame.
- | load_false RA
- | lg RB, 0(BASE)
-@@ -489,7 +490,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lgr RA, CARG2
- | lghi PC, FRAME_CP
- | lghi RD, 0
-- | lay KBASE, CFRAME_RESUME(sp)
-+ | la KBASE, CFRAME_RESUME(sp)
- | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
- | aghi DISPATCH, GG_G2DISP
- | stg RD, SAVE_PC // Any value outside of bytecode is ok.
-@@ -701,7 +702,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // BASE = base, L->top = new base, stack = cont/func/t/k
- | lg RA, L:RB->top
- | stg PC, -24(RA) // [cont|PC]
-- | lay PC, FRAME_CONT(RA)
-+ | la PC, FRAME_CONT(RA)
- | sgr PC, BASE
- | lg LFUNC:RB, -16(RA) // Guaranteed to be a function here.
- | lghi NARGS:RD, 2+1 // 2 args for func(t, k).
-@@ -892,8 +893,8 @@ static void build_subroutines(BuildCtx *ctx)
- |->vmeta_arith_vn:
- | sllg RB, RB, 3
- | sllg RC, RC, 3
-- | lay RB, 0(RB, BASE)
-- | lay RC, 0(RC, KBASE)
-+ | la RB, 0(RB, BASE)
-+ | la RC, 0(RC, KBASE)
- | j >1
- |
- |->vmeta_arith_nvo:
-@@ -902,8 +903,8 @@ static void build_subroutines(BuildCtx *ctx)
- |->vmeta_arith_nv:
- | sllg RC, RC, 3
- | sllg RB, RB, 3
-- | lay TMPR1, 0(RC, KBASE)
-- | lay RC, 0(RB, BASE)
-+ | la TMPR1, 0(RC, KBASE)
-+ | la RC, 0(RB, BASE)
- | lgr RB, TMPR1
- | j >1
- |
-@@ -920,12 +921,12 @@ static void build_subroutines(BuildCtx *ctx)
- |->vmeta_arith_vv:
- | sllg RC, RC, 3
- | sllg RB, RB, 3
-- | lay RB, 0(RB, BASE)
-- | lay RC, 0(RC, BASE)
-+ | la RB, 0(RB, BASE)
-+ | la RC, 0(RC, BASE)
- |1:
- | llgc RA, PC_RA
- | sllg RA, RA, 3
-- | lay RA, 0(RA, BASE)
-+ | la RA, 0(RA, BASE)
- | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
- | lgr CARG2, RA
- | lgr CARG3, RB
-@@ -2167,8 +2168,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |2:
- | sll r1, 3
-- | lay r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
-- | lay r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
-+ | la r10, (offsetof(CCallState, stack))(CCSTATE) // Source.
-+ | la r11, (CCALL_SPS_EXTRA*8)(sp) // Destination.
- |3:
- | chi r1, 256
- | jl >4
-@@ -4056,7 +4057,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | sllg RD, NARGS:RD, 3
- | lay RD, -8(RD,BASE)
- | stg BASE, L:RB->base
-- | lay RA, (8*LUA_MINSTACK)(RD)
-+ | la RA, (8*LUA_MINSTACK)(RD)
- | clg RA, L:RB->maxstack
- | stg RD, L:RB->top
- | lgr CARG1, L:RB
---
-2.20.1
-
-
-From 95da05418ed02a05f2768d1f84c8e7febb4d2e58 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Thu, 12 Jan 2017 14:29:55 -0500
-Subject: [PATCH 221/247] Implement debug.sethook().
-
----
- src/lj_frame.h | 7 +++-
- src/vm_s390x.dasc | 81 +++++++++++++++++++++++++++++++++++++++--------
- 2 files changed, 74 insertions(+), 14 deletions(-)
-
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index a30618e..9b2081d 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -272,7 +272,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
- #define CFRAME_OFS_PC 168
- #define CFRAME_OFS_MULTRES 160
- #define CFRAME_SIZE 240
--#define CFRAME_SHIFT_MULTRES 3
-+/*
-+** TODO: it would be good if we always decoded param*8 like
-+** the RISC architectures do. If so then SHIFT_MULTRES will
-+** need to change to 3.
-+*/
-+#define CFRAME_SHIFT_MULTRES 0
- #else
- #error "Missing CFRAME_* definitions for this architecture"
- #endif
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index dee93c6..b049a6c 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -188,12 +188,12 @@
- |.macro ins_callt
- | // BASE = new base, RB = LFUNC, RD = nargs+1, -8(BASE) = PC
- | lg PC, LFUNC:RB->pc
--| llgf RA, 0(PC) // TODO: combine loads?
--| llgcr OP, RA
--| sllg TMPR1, OP, 3
-+| llgc OP, 3(PC)
-+| llgc RA, 2(PC)
-+| sllg TMPR1, OP, 3
- | la PC, 4(PC)
--| lg TMPR1, 0(TMPR1, DISPATCH)
--| br TMPR1
-+| lg TMPR1, 0(TMPR1, DISPATCH)
-+| br TMPR1
- |.endmacro
- |
- |.macro ins_call
-@@ -2044,8 +2044,35 @@ static void build_subroutines(BuildCtx *ctx)
- | stg r0, 0
- |
- |->vm_inshook: // Dispatch target for instr/line hooks.
-- | stg r0, 0
-- | stg r0, 0
-+ | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
-+ | tmll RD, HOOK_ACTIVE // Hook already active?
-+ | jne >5
-+ |
-+ | tmll RD, LUA_MASKLINE|LUA_MASKCOUNT
-+ | je >5
-+ | ly TMPR2, (DISPATCH_GL(hookcount))(DISPATCH)
-+ | ahi TMPR2, -1
-+ | sty TMPR2, (DISPATCH_GL(hookcount))(DISPATCH)
-+ | je >1
-+ | tmll RD, LUA_MASKLINE
-+ | je >5
-+ |1:
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | lgr CARG2, PC
-+ | lgr CARG1, L:RB
-+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
-+ | brasl r14, extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
-+ |3:
-+ | lg BASE, L:RB->base
-+ |4:
-+ | llgc RA, PC_RA
-+ |5:
-+ | llgc OP, PC_OP
-+ | sllg TMPR1, OP, 3
-+ | llgh RD, PC_RD
-+ | lg TMPR1, GG_DISP2STATIC(TMPR1, DISPATCH)
-+ | br TMPR1
- |
- |->cont_hook: // Continue from hook yield.
- | stg r0, 0
-@@ -2056,12 +2083,40 @@ static void build_subroutines(BuildCtx *ctx)
- | stg r0, 0
- |
- |->vm_callhook: // Dispatch target for call hooks.
-- | stg r0, 0
-- | stg r0, 0
-+ | stg PC, SAVE_PC
-+ |.if JIT
-+ | j >1
-+ |.endif
- |
- |->vm_hotcall: // Hot call counter underflow.
-- | stg r0, 0
-- | stg r0, 0
-+ |.if JIT
-+ | stg PC, SAVE_PC
-+ | oill PC, 1 // Marker for hot call.
-+ |1:
-+ |.endif
-+ | sllg RD, NARGS:RD, 3
-+ | lay RD, -8(RD, BASE)
-+ | lg L:RB, SAVE_L
-+ | stg BASE, L:RB->base
-+ | stg RD, L:RB->top
-+ | lgr CARG2, PC
-+ | lgr CARG1, L:RB
-+ | brasl r14, extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
-+ | // ASMFunction returned in r2 (CRET1).
-+ | lghi TMPR2, 0
-+ | stg TMPR2, SAVE_PC // Invalidate for subsequent line hook.
-+ |.if JIT
-+ | nill PC, -2
-+ |.endif
-+ | lg BASE, L:RB->base
-+ | lg RD, L:RB->top
-+ | sgr RD, BASE
-+ | lgr RB, CRET1
-+ | llgc RA, PC_RA
-+ | srl RD, 3
-+ | ahi NARGS:RD, 1
-+ | llgfr RD, RD
-+ | br RB
- |
- |->cont_stitch: // Trace stitching.
- | stg r0, 0
-@@ -3422,7 +3477,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | agf NARGS:RD, SAVE_MULTRES
- }
- | sllg RA, RA, 3
-- | lg LFUNC:RB, 0(BASE, RA)
-+ | lg LFUNC:RB, 0(RA, BASE)
- | checkfunc LFUNC:RB, ->vmeta_call_ra
- | la BASE, 16(RA, BASE)
- | ins_call
-@@ -3659,7 +3714,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lgr CARG1, L:RB
- | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
- | lg BASE, L:RB->base
-- | llgf TMPR1, TMP_STACK_HI
-+ | lgf TMPR1, TMP_STACK_HI
- | lg RA, L:RB->top
- | agr TMPR1, BASE
- | j <6
---
-2.20.1
-
-
-From 4ab2d049e9514737c8560228ed8467ca5ab31b0d Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 16 Jan 2017 16:11:24 -0500
-Subject: [PATCH 222/247] Make TMPR1 r1 and RA r4.
-
-This avoids using the link register as a temporary.
----
- src/vm_s390x.dasc | 24 ++++++++++++------------
- 1 file changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index b049a6c..3ab8904 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -45,7 +45,7 @@
- |.define ITYPE, r11 // Temporary used for type information.
- |
- |// The following temporaries are not saved across C calls, except for RB.
--|.define RA, r1
-+|.define RA, r4 // Overlaps CARG3.
- |.define RB, r13 // Must be callee-save.
- |.define RC, r5 // Overlaps CARG4.
- |.define RD, r6 // Overlaps CARG5.
-@@ -65,7 +65,7 @@
- |.define CRET1, r2
- |
- |.define OP, r2
--|.define TMPR1, r14
-+|.define TMPR1, r1
- |.define TMPR2, r0
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
-@@ -534,7 +534,7 @@ static void build_subroutines(BuildCtx *ctx)
- | st CARG3, SAVE_NRES
- | lgr L:RB, CARG1
- | stg CARG1, SAVE_L
-- | lgr RA, CARG2
-+ | lgr RA, CARG2 // Caveat: RA = CARG3.
- |
- | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
- | lg KBASE, L:RB->cframe // Add our C frame to cframe chain.
-@@ -574,8 +574,8 @@ static void build_subroutines(BuildCtx *ctx)
- | lg KBASE, L:RB->stack // Compute -savestack(L, L->top).
- | sg KBASE, L:RB->top
- | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-- | lghi RA, 0
-- | stg RA, SAVE_ERRF // No error function.
-+ | lghi TMPR2, 0
-+ | stg TMPR2, SAVE_ERRF // No error function.
- | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
- | aghi DISPATCH, GG_G2DISP
- | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
-@@ -644,9 +644,9 @@ static void build_subroutines(BuildCtx *ctx)
- | srlg RA, RA, 3
- | lg L:CARG1, SAVE_L
- | stg BASE, L:CARG1->base
-- | lgfr CARG3, RA
-- | lg RA, 0(RC)
-- | stg RA, 0(RB)
-+ | lgfr CARG3, RA // Caveat: RA == CARG3.
-+ | lg TMPR2, 0(RC)
-+ | stg TMPR2, 0(RB)
- | lgr CARG2, RB
- | j ->BC_CAT_Z
- |
-@@ -811,7 +811,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg L:RB, SAVE_L
- | stg BASE, L:RB->base
- | la CARG2, 0(RA, BASE)
-- | la CARG3, 0(RD, BASE)
-+ | la CARG3, 0(RD, BASE) // Caveat: RA == CARG3
- | lgr CARG1, L:RB
- | llgc CARG4, PC_OP
- | stg PC, SAVE_PC
-@@ -878,7 +878,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg L:RB, SAVE_L
- | stg BASE, L:RB->base
- | llgfr CARG2, RA
-- | llgfr CARG3, RD
-+ | llgfr CARG3, RD // Caveat: CARG3 == RA.
- | lgr L:CARG1, L:RB
- | stg PC, SAVE_PC
- | brasl r14, extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
-@@ -929,7 +929,7 @@ static void build_subroutines(BuildCtx *ctx)
- | la RA, 0(RA, BASE)
- | llgc CARG5, PC_OP // Caveat: CARG5 == RD.
- | lgr CARG2, RA
-- | lgr CARG3, RB
-+ | lgr CARG3, RB // Caveat: CARG3 == RA.
- | // lgr CARG4, RC // Caveat: CARG4 == RC (nop, so commented out).
- | lg L:CARG1, SAVE_L
- | stg BASE, L:CARG1->base
-@@ -988,7 +988,7 @@ static void build_subroutines(BuildCtx *ctx)
- | stg BASE, L:CARG1->base
- | lay CARG2, -16(RA)
- | sllg RD, RD, 3
-- | lay CARG3, -8(RA, RD)
-+ | lay CARG3, -8(RA, RD) // Caveat: CARG3 == RA.
- | stg PC, SAVE_PC
- | brasl r14, extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
- | lgr RA, RB
---
-2.20.1
-
-
-From 2340fd2cf3e1f006f4dc773167cefeecf8d07586 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 16 Jan 2017 16:14:51 -0500
-Subject: [PATCH 223/247] Rename TMPR2 as TMPR0.
-
-TMPR2 was r0 and so cannot be used in address calculations (or
-shift values). Renaming it TMPR0 makes this more obvious.
----
- src/vm_s390x.dasc | 234 +++++++++++++++++++++++-----------------------
- 1 file changed, 117 insertions(+), 117 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 3ab8904..e43e774 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -64,9 +64,9 @@
- |
- |.define CRET1, r2
- |
--|.define OP, r2
-+|.define TMPR0, r0
- |.define TMPR1, r1
--|.define TMPR2, r0
-+|.define OP, r2
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
- |.define CFRAME_SPACE, 240 // Delta for sp, 8 byte aligned.
-@@ -272,10 +272,10 @@
- |.macro branchPC, reg
- | // TODO: optimize this, was just lea PC, [PC+reg*4-BCBIAS_J*4].
- | // Can't clobber TMPR1 or condition code.
--| lgr TMPR2, TMPR1 // Workaround because TMPR2 == r0 and can't be used in lay.
-+| lgr TMPR0, TMPR1 // Workaround because TMPR0 == r0 and can't be used in lay.
- | sllg TMPR1, reg, 2
- | lay PC, (-BCBIAS_J*4)(TMPR1, PC)
--| lgr TMPR1, TMPR2
-+| lgr TMPR1, TMPR0
- |.endmacro
- |
- |// Set current VM state.
-@@ -574,8 +574,8 @@ static void build_subroutines(BuildCtx *ctx)
- | lg KBASE, L:RB->stack // Compute -savestack(L, L->top).
- | sg KBASE, L:RB->top
- | lg DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-- | lghi TMPR2, 0
-- | stg TMPR2, SAVE_ERRF // No error function.
-+ | lghi TMPR0, 0
-+ | stg TMPR0, SAVE_ERRF // No error function.
- | st KBASE, SAVE_NRES // Neg. delta means cframe w/o frame.
- | aghi DISPATCH, GG_G2DISP
- | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
-@@ -606,8 +606,8 @@ static void build_subroutines(BuildCtx *ctx)
- | lgr RB, BASE
- | sgr BASE, PC // Restore caller BASE.
- | sllg TMPR1, RD, 3
-- | lghi TMPR2, LJ_TNIL
-- | stg TMPR2, -8(RA, TMPR1) // Ensure one valid arg.
-+ | lghi TMPR0, LJ_TNIL
-+ | stg TMPR0, -8(RA, TMPR1) // Ensure one valid arg.
- | lgr RC, RA // ... in [RC]
- | lg PC, -24(RB) // Restore PC from [cont|PC].
- | lg RA, -32(RB)
-@@ -645,8 +645,8 @@ static void build_subroutines(BuildCtx *ctx)
- | lg L:CARG1, SAVE_L
- | stg BASE, L:CARG1->base
- | lgfr CARG3, RA // Caveat: RA == CARG3.
-- | lg TMPR2, 0(RC)
-- | stg TMPR2, 0(RB)
-+ | lg TMPR0, 0(RC)
-+ | stg TMPR0, 0(RB)
- | lgr CARG2, RB
- | j ->BC_CAT_Z
- |
-@@ -728,8 +728,8 @@ static void build_subroutines(BuildCtx *ctx)
- | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
- | stg STR:RC, TMP_STACK
- | la RC, TMP_STACK
-- | llgc TMPR2, PC_OP
-- | cghi TMPR2, BC_GSET
-+ | llgc TMPR0, PC_OP
-+ | cghi TMPR0, BC_GSET
- | jne >1
- | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
- | lay RB, (DISPATCH_GL(tmptv))(DISPATCH) // Store fn->l.env in g->tmptv.
-@@ -835,16 +835,16 @@ static void build_subroutines(BuildCtx *ctx)
- | la PC, 4(PC)
- | lg ITYPE, 0(RC)
- | srag ITYPE, ITYPE, 47
-- | lghi TMPR2, LJ_TISTRUECOND
-- | clr ITYPE, TMPR2 // Branch if result is true.
-+ | lghi TMPR0, LJ_TISTRUECOND
-+ | clr ITYPE, TMPR0 // Branch if result is true.
- | jl <5
- | j <6
- |
- |->cont_condf: // BASE = base, RC = result
- | lg ITYPE, 0(RC)
- | srag ITYPE, ITYPE, 47
-- | lghi TMPR2, LJ_TISTRUECOND
-- | clr ITYPE, TMPR2 // Branch if result is false.
-+ | lghi TMPR0, LJ_TISTRUECOND
-+ | clr ITYPE, TMPR0 // Branch if result is false.
- | j <4
- |
- |->vmeta_equal:
-@@ -1041,8 +1041,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc_n, name, op
- | .ffunc_1 name
-- | lg TMPR2, 0(BASE)
-- | checknumtp TMPR2, ->fff_fallback
-+ | lg TMPR0, 0(BASE)
-+ | checknumtp TMPR0, ->fff_fallback
- | op f0, 0(BASE) // TODO: might be better to unconditionally load into f1.
- |.endmacro
- |
-@@ -1053,11 +1053,11 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_nn, name
- | .ffunc_2 name
- | lg TMPR1, 0(BASE)
-- | lg TMPR2, 8(BASE)
-+ | lg TMPR0, 8(BASE)
- | ld FARG1, 0(BASE)
- | ld FARG2, 8(BASE)
- | checknumtp TMPR1, ->fff_fallback
-- | checknumtp TMPR2, ->fff_fallback
-+ | checknumtp TMPR0, ->fff_fallback
- |.endmacro
- |
- |// Inlined GC threshold check. Caveat: uses label 1.
-@@ -1099,8 +1099,8 @@ static void build_subroutines(BuildCtx *ctx)
- | jnl >1
- | lgr RC, RB
- |1:
-- | lghi TMPR2, -1
-- | xgr RC, TMPR2
-+ | lghi TMPR0, -1
-+ | xgr RC, TMPR0
- |2:
- | lg CFUNC:RB, -16(BASE)
- | cleartp CFUNC:RB
-@@ -1120,8 +1120,8 @@ static void build_subroutines(BuildCtx *ctx)
- |1: // Field metatable must be at same offset for GCtab and GCudata!
- | lg TAB:RB, TAB:RB->metatable
- |2:
-- | lghi TMPR2, LJ_TNIL
-- | stg TMPR2, -16(BASE)
-+ | lghi TMPR0, LJ_TNIL
-+ | stg TMPR0, -16(BASE)
- | cghi TAB:RB, 0
- | je ->fff_res1
- | settp TAB:RC, TAB:RB, LJ_TTAB
-@@ -1150,8 +1150,8 @@ static void build_subroutines(BuildCtx *ctx)
- | clfi ITYPE, LJ_TISNUM; jh >7
- | lhi ITYPE, LJ_TISNUM
- |7:
-- | lhi TMPR2, -1
-- | xr ITYPE, TMPR2 // not ITYPE
-+ | lhi TMPR0, -1
-+ | xr ITYPE, TMPR0 // not ITYPE
- | llgfr ITYPE, ITYPE
- | sllg ITYPE, ITYPE, 3
- | lg TAB:RB, (DISPATCH_GL(gcroot[GCROOT_BASEMT]))(ITYPE, DISPATCH)
-@@ -1162,8 +1162,8 @@ static void build_subroutines(BuildCtx *ctx)
- | lgr TAB:TMPR1, TAB:RB
- | checktab TAB:RB, ->fff_fallback
- | // Fast path: no mt for table yet and not clearing the mt.
-- | lghi TMPR2, 0
-- | cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
-+ | lghi TMPR0, 0
-+ | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
- | lg TAB:RA, 8(BASE)
- | checktab TAB:RA, ->fff_fallback
- | stg TAB:RA, TAB:RB->metatable
-@@ -1213,8 +1213,8 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->fff_res1
- |3: // Handle numbers inline, unless a number base metatable is present.
- | clfi ITYPE, LJ_TISNUM; jh ->fff_fallback_1
-- | lghi TMPR2, 0
-- | cg TMPR2, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH)
-+ | lghi TMPR0, 0
-+ | cg TMPR0, (DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]))(DISPATCH)
- | jne ->fff_fallback
- | ffgccheck // Caveat: uses label 1.
- | lg L:RB, SAVE_L
-@@ -1256,12 +1256,12 @@ static void build_subroutines(BuildCtx *ctx)
- | lghi RD, 1+2
- | j ->fff_res
- |2: // Set missing 2nd arg to nil.
-- | lghi TMPR2, LJ_TNIL
-- | stg TMPR2, 8(BASE)
-+ | lghi TMPR0, LJ_TNIL
-+ | stg TMPR0, 8(BASE)
- | j <1
- |3: // End of traversal: return nil.
-- | lghi TMPR2, LJ_TNIL
-- | stg TMPR2, -16(BASE)
-+ | lghi TMPR0, LJ_TNIL
-+ | stg TMPR0, -16(BASE)
- | j ->fff_res1
- |
- |.ffunc_1 pairs
-@@ -1269,7 +1269,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lgr TMPR1, TAB:RB
- | checktab TAB:RB, ->fff_fallback
- #if LJ_52
-- | ltg TMPR2, TAB:RB->metatable; jne ->fff_fallback
-+ | ltg TMPR0, TAB:RB->metatable; jne ->fff_fallback
- #endif
- | lg CFUNC:RD, -16(BASE)
- | cleartp CFUNC:RD
-@@ -1278,8 +1278,8 @@ static void build_subroutines(BuildCtx *ctx)
- | lg PC, -8(BASE)
- | stg CFUNC:RD, -16(BASE)
- | stg TMPR1, -8(BASE)
-- | lghi TMPR2, LJ_TNIL
-- | stg TMPR2, 0(BASE)
-+ | lghi TMPR0, LJ_TNIL
-+ | stg TMPR0, 0(BASE)
- | lghi RD, 1+3
- | j ->fff_res
- |
-@@ -1298,13 +1298,13 @@ static void build_subroutines(BuildCtx *ctx)
- | sllg TMPR1, TMPR1, 3
- | la RD, 0(TMPR1, RD)
- |1:
-- | lg TMPR2, 0(RD)
-- | cghi TMPR2, LJ_TNIL; je ->fff_res0
-+ | lg TMPR0, 0(RD)
-+ | cghi TMPR0, LJ_TNIL; je ->fff_res0
- | // Copy array slot.
-- | stg TMPR2, -8(BASE)
-+ | stg TMPR0, -8(BASE)
- | j ->fff_res2
- |2: // Check for empty hash part first. Otherwise call C function.
-- | lt TMPR2, TAB:RB->hmask; je ->fff_res0
-+ | lt TMPR0, TAB:RB->hmask; je ->fff_res0
- | lgr CARG1, TAB:RB
- | lgr RB, BASE // Save BASE. // TODO: needed?
- | lgfr CARG2, RA
-@@ -1322,8 +1322,8 @@ static void build_subroutines(BuildCtx *ctx)
- | lgr TMPR1, TAB:RB
- | checktab TAB:RB, ->fff_fallback
- #if LJ_52
-- | lghi TMPR2, 0
-- | cg TMPR2, TAB:RB->metatable; jne ->fff_fallback
-+ | lghi TMPR0, 0
-+ | cg TMPR0, TAB:RB->metatable; jne ->fff_fallback
- #endif
- | lg CFUNC:RD, -16(BASE)
- | cleartp CFUNC:RD
-@@ -1375,7 +1375,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.if resume
- |.ffunc_1 coroutine_resume
- | lg L:RB, 0(BASE)
-- | lgr L:TMPR2, L:RB // Save type for checktptp.
-+ | lgr L:TMPR0, L:RB // Save type for checktptp.
- | cleartp L:RB
- |.else
- |.ffunc coroutine_wrap_aux
-@@ -1388,9 +1388,9 @@ static void build_subroutines(BuildCtx *ctx)
- | stg PC, SAVE_PC
- | stg L:RB, TMP_STACK
- |.if resume
-- | checktptp L:TMPR2, LJ_TTHREAD, ->fff_fallback
-+ | checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback
- |.endif
-- | ltg TMPR2, L:RB->cframe; jne ->fff_fallback
-+ | ltg TMPR0, L:RB->cframe; jne ->fff_fallback
- | cli L:RB->status, LUA_YIELD; jh ->fff_fallback
- | lg RA, L:RB->top
- | je >1 // Status != LUA_YIELD (i.e. 0)?
-@@ -1523,8 +1523,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc coroutine_yield
- | lg L:RB, SAVE_L
-- | lg TMPR2, L:RB->cframe
-- | tmll TMPR2, CFRAME_RESUME
-+ | lg TMPR0, L:RB->cframe
-+ | tmll TMPR0, CFRAME_RESUME
- | je ->fff_fallback
- | stg BASE, L:RB->base
- | sllg RD, NARGS:RD, 3
-@@ -1585,8 +1585,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |6: // Fill up results with nil.
- | sllg TMPR1, RD, 3
-- | lghi TMPR2, LJ_TNIL
-- | stg TMPR2, -24(TMPR1, BASE)
-+ | lghi TMPR0, LJ_TNIL
-+ | stg TMPR0, -24(TMPR1, BASE)
- | la RD, 1(RD)
- | j <5
- |
-@@ -1612,9 +1612,9 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc math_log
- | chi NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
-- | lg TMPR2, 0(BASE)
-+ | lg TMPR0, 0(BASE)
- | ld FARG1, 0(BASE)
-- | checknumtp TMPR2, ->fff_fallback
-+ | checknumtp TMPR0, ->fff_fallback
- | lgr RB, BASE
- | brasl r14, extern log
- | lgr BASE, RB
-@@ -1652,10 +1652,10 @@ static void build_subroutines(BuildCtx *ctx)
- | math_extern2 fmod
- |
- |.ffunc_2 math_ldexp
-- | lg TMPR2, 0(BASE)
-+ | lg TMPR0, 0(BASE)
- | ld FARG1, 0(BASE)
- | lg CARG1, 8(BASE)
-- | checknumtp TMPR2, ->fff_fallback
-+ | checknumtp TMPR0, ->fff_fallback
- | checkinttp CARG1, ->fff_fallback
- | lgfr CARG1, CARG1
- | lgr RB, BASE
-@@ -1695,11 +1695,11 @@ static void build_subroutines(BuildCtx *ctx)
- | checkint RB, >4
- |1: // Handle integers.
- | clgr RA, TMPR1; jhe ->fff_resRB
-- | lg TMPR2, -8(RA, BASE)
-- | checkint TMPR2, >3
-- | cr RB, TMPR2
-+ | lg TMPR0, -8(RA, BASE)
-+ | checkint TMPR0, >3
-+ | cr RB, TMPR0
- | cjmp >2
-- | lgr RB, TMPR2
-+ | lgr RB, TMPR0
- |2:
- | aghi RA, 8
- | j <1
-@@ -1707,7 +1707,7 @@ static void build_subroutines(BuildCtx *ctx)
- | jh ->fff_fallback
- | // Convert intermediate result to number and continue below.
- | cdfbr f0, RB
-- | ldgr f1, TMPR2
-+ | ldgr f1, TMPR0
- | j >6
- |4:
- | jh ->fff_fallback
-@@ -1737,7 +1737,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg STR:RB, 0(BASE)
- | checkstr STR:RB, ->fff_fallback
- | lg PC, -8(BASE)
-- | ltg TMPR2, STR:RB->len
-+ | ltg TMPR0, STR:RB->len
- | je ->fff_res0 // Return no results for empty string.
- | llgc RB, STR:RB[1]
- | j ->fff_resi
-@@ -1907,8 +1907,8 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->fff_resbit
- |
- |.ffunc_bit bit_bnot, 1
-- | lhi TMPR2, -1
-- | xr RB, TMPR2 // TODO: use xilf on newer models?
-+ | lhi TMPR0, -1
-+ | xr RB, TMPR0 // TODO: use xilf on newer models?
- | j ->fff_resbit
- |
- |->fff_fallback_bit_op:
-@@ -1943,9 +1943,9 @@ static void build_subroutines(BuildCtx *ctx)
- | checkint RA, ->fff_fallback
- | // TODO: shorter sequence of instructions to convert right rotate into left
rotate.
- | nill RA, 0x1f
-- | lghi TMPR2, 32
-- | sr TMPR2, RA
-- | lr RA, TMPR2
-+ | lghi TMPR0, 32
-+ | sr TMPR0, RA
-+ | lr RA, TMPR0
- | rll RB, RB, 0(RA)
- | j ->fff_resbit
- |
-@@ -2050,9 +2050,9 @@ static void build_subroutines(BuildCtx *ctx)
- |
- | tmll RD, LUA_MASKLINE|LUA_MASKCOUNT
- | je >5
-- | ly TMPR2, (DISPATCH_GL(hookcount))(DISPATCH)
-- | ahi TMPR2, -1
-- | sty TMPR2, (DISPATCH_GL(hookcount))(DISPATCH)
-+ | ly TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
-+ | ahi TMPR0, -1
-+ | sty TMPR0, (DISPATCH_GL(hookcount))(DISPATCH)
- | je >1
- | tmll RD, LUA_MASKLINE
- | je >5
-@@ -2103,8 +2103,8 @@ static void build_subroutines(BuildCtx *ctx)
- | lgr CARG1, L:RB
- | brasl r14, extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
- | // ASMFunction returned in r2 (CRET1).
-- | lghi TMPR2, 0
-- | stg TMPR2, SAVE_PC // Invalidate for subsequent line hook.
-+ | lghi TMPR0, 0
-+ | stg TMPR0, SAVE_PC // Invalidate for subsequent line hook.
- |.if JIT
- | nill PC, -2
- |.endif
-@@ -2604,8 +2604,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |3:
- | jh ->vmeta_unm
- | // Toggle sign bit.
-- | llihh TMPR2, 0x8000
-- | xgr RB, TMPR2
-+ | llihh TMPR0, 0x8000
-+ | xgr RB, TMPR0
- | j <1
- break;
- case BC_LEN:
-@@ -2793,10 +2793,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | sllg RC, RC, 3
- | ld FARG1, 0(RB, BASE)
- | ld FARG2, 0(RC, BASE)
-- | lg TMPR2, 0(RB, BASE)
-- | checknumtp TMPR2, ->vmeta_arith_vvo
-- | lg TMPR2, 0(RC, BASE)
-- | checknumtp TMPR2, ->vmeta_arith_vvo
-+ | lg TMPR0, 0(RB, BASE)
-+ | checknumtp TMPR0, ->vmeta_arith_vvo
-+ | lg TMPR0, 0(RC, BASE)
-+ | checknumtp TMPR0, ->vmeta_arith_vvo
- | lgr RB, BASE // TODO: redundant, BASE is currently callee-saved.
- | brasl r14, extern pow // double pow(double x, double y), result in f0.
- | llgc RA, PC_RA
-@@ -2874,8 +2874,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_AD // RA = dst, RD = primitive type (~)
- | sllg RA, RA, 3
- | sllg RD, RD, 47
-- | lghi TMPR2, -1
-- | xgr RD, TMPR2 // not
-+ | lghi TMPR0, -1
-+ | xgr RD, TMPR0 // not
- | stg RD, 0(RA, BASE)
- | ins_next
- break;
-@@ -2994,8 +2994,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cleartp LFUNC:RB
- | lg UPVAL:RB, (offsetof(GCfuncL, uvptr))(RA, LFUNC:RB)
- | sllg RD, RD, 47
-- | lghi TMPR2, -1
-- | xgr RD, TMPR2
-+ | lghi TMPR0, -1
-+ | xgr RD, TMPR0
- | lg RA, UPVAL:RB->v
- | stg RD, 0(RA)
- | ins_next
-@@ -3004,7 +3004,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_AD // RA = level, RD = target
- | branchPC RD // Do this first to free RD.
- | lg L:RB, SAVE_L
-- | ltg TMPR2, L:RB->openupval
-+ | ltg TMPR0, L:RB->openupval
- | je >1
- | stg BASE, L:RB->base
- | sllg RA, RA, 3
-@@ -3046,9 +3046,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | jhe >5
- |1:
- | srlg CARG3, RD, 11
-- | llill TMPR2, 0x7ff
-- | nr RD, TMPR2
-- | cr RD, TMPR2
-+ | llill TMPR0, 0x7ff
-+ | nr RD, TMPR0
-+ | cr RD, TMPR0
- | je >3
- |2:
- | lgr L:CARG1, L:RB
-@@ -3094,8 +3094,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lgr L:CARG1, L:RB
- | brasl r14, extern lj_gc_step_fixtop // (lua_State *L)
- | llgh RD, PC_RD // Need to reload RD.
-- | lghi TMPR2, -1
-- | xgr RD, TMPR2 // not RD
-+ | lghi TMPR0, -1
-+ | xgr RD, TMPR0 // not RD
- | j <2
- break;
-
-@@ -3259,8 +3259,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | llgfr RC, RC
- | sllg RC, RC, 3
- | ag RC, TAB:RB->array
-- | lghi TMPR2, LJ_TNIL
-- | cg TMPR2, 0(RC)
-+ | lghi TMPR0, LJ_TNIL
-+ | cg TMPR0, 0(RC)
- | je >3 // Previous value is nil?
- |1:
- | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-@@ -3292,8 +3292,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_ABC // RA = src, RB = table, RC = str const (~)
- | sllg RB, RB, 3
- | lg TAB:RB, 0(RB, BASE)
-- | lghi TMPR2, -1
-- | xgr RC, TMPR2 // ~RC
-+ | lghi TMPR0, -1
-+ | xgr RC, TMPR0 // ~RC
- | sllg RC, RC, 3
- | lg STR:RC, 0(RC, KBASE)
- | checktab TAB:RB, ->vmeta_tsets
-@@ -3309,8 +3309,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cg ITYPE, NODE:TMPR1->key
- | jne >5
- | // Ok, key found. Assumes: offsetof(Node, val) == 0
-- | lghi TMPR2, LJ_TNIL
-- | cg TMPR2, 0(TMPR1)
-+ | lghi TMPR0, LJ_TNIL
-+ | cg TMPR0, 0(TMPR1)
- | je >4 // Previous value is nil?
- |2:
- | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-@@ -3369,8 +3369,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | jhe ->vmeta_tsetb
- | sllg RC, RC, 3
- | ag RC, TAB:RB->array
-- | lghi TMPR2, LJ_TNIL
-- | cg TMPR2, 0(RC)
-+ | lghi TMPR0, LJ_TNIL
-+ | cg TMPR0, 0(RC)
- | je >3 // Previous value is nil?
- |1:
- | tm TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-@@ -3578,10 +3578,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |1: // Traverse array part.
- | clr RC, TMPR1; jhe >5 // Index points after array part?
- | sllg RD, RC, 3 // Warning: won't work if RD==RC!
-- | lg TMPR2, 0(RD, ITYPE)
-- | cghi TMPR2, LJ_TNIL; je >4
-+ | lg TMPR0, 0(RD, ITYPE)
-+ | cghi TMPR0, LJ_TNIL; je >4
- | // Copy array slot to returned value.
-- | lgr RB, TMPR2
-+ | lgr RB, TMPR0
- | stg RB, 8(RA, BASE)
- | // Return array index as a numeric key.
- | setint ITYPE, RC
-@@ -3605,8 +3605,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | llgfr ITYPE, RC
- | mghi ITYPE, #NODE
- | ag NODE:ITYPE, TAB:RB->node
-- | lghi TMPR2, LJ_TNIL
-- | cg TMPR2, NODE:ITYPE->val; je >7
-+ | lghi TMPR0, LJ_TNIL
-+ | cg TMPR0, NODE:ITYPE->val; je >7
- | ar TMPR1, RC
- | ahi TMPR1, 1
- | // Copy key and value from hash slot.
-@@ -3629,8 +3629,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | checkfunc CFUNC:RB, >5
- | lg TMPR1, -16(RA, BASE)
- | checktptp TMPR1, LJ_TTAB, >5
-- | lghi TMPR2, LJ_TNIL
-- | cg TMPR2, -8(RA, BASE); jne >5
-+ | lghi TMPR0, LJ_TNIL
-+ | cg TMPR0, -8(RA, BASE); jne >5
- | llgc TMPR1, CFUNC:RB->ffid
- | clfi TMPR1, (uint8_t)FF_next_N; jne >5
- | branchPC RD
-@@ -3640,8 +3640,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |1:
- | ins_next
- |5: // Despecialize bytecode if any of the checks fail.
-- | lghi TMPR2, BC_JMP
-- | stcy TMPR2, PC_OP
-+ | lghi TMPR0, BC_JMP
-+ | stcy TMPR0, PC_OP
- | branchPC RD
- | mvi 3(PC), BC_ITERC
- | j <1
-@@ -3672,8 +3672,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | clgr TMPR1, BASE // No more vararg slots?
- | jl <1
- |2: // Fill up remainder with nil.
-- | lghi TMPR2, LJ_TNIL // TODO: move out of loop. Add NIL range macro?
-- | stg TMPR2, 0(RA)
-+ | lghi TMPR0, LJ_TNIL // TODO: move out of loop. Add NIL range macro?
-+ | stg TMPR0, 0(RA)
- | la RA, 8(RA)
- | clgr RA, RB
- | jl <2
-@@ -3681,8 +3681,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next
- |
- |5: // Copy all varargs.
-- | lghi TMPR2, 1
-- | st TMPR2, SAVE_MULTRES // MULTRES = 0+1
-+ | lghi TMPR0, 1
-+ | st TMPR0, SAVE_MULTRES // MULTRES = 0+1
- | lgr RC, BASE
- | slgr RC, TMPR1
- | jno <3 // No vararg slots? (borrow or zero)
-@@ -3840,8 +3840,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- #ifdef LUA_USE_ASSERT
- | // lg TMPR1, FOR_STOP
- | checkinttp TMPR1, ->assert_bad_for_arg_type
-- | lg TMPR2, FOR_STEP
-- | checkinttp TMPR2, ->assert_bad_for_arg_type
-+ | lg TMPR0, FOR_STEP
-+ | checkinttp TMPR0, ->assert_bad_for_arg_type
- #endif
- | lg ITYPE, FOR_STEP
- | chi ITYPE, 0; jl >5
-@@ -3903,14 +3903,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | jhe ->vmeta_for
- }
- if (!vk) {
-- | lg TMPR2, FOR_STOP
-- | checknumtp TMPR2, ->vmeta_for
-+ | lg TMPR0, FOR_STOP
-+ | checknumtp TMPR0, ->vmeta_for
- } else {
- #ifdef LUA_USE_ASSERT
-- | lg TMPR2, FOR_STOP
-- | checknumtp TMPR2, ->assert_bad_for_arg_type
-- | lg TMPR2, FOR_STEP
-- | checknumtp TMPR2, ->assert_bad_for_arg_type
-+ | lg TMPR0, FOR_STOP
-+ | checknumtp TMPR0, ->assert_bad_for_arg_type
-+ | lg TMPR0, FOR_STEP
-+ | checknumtp TMPR0, ->assert_bad_for_arg_type
- #endif
- }
- | lg RB, FOR_STEP
-@@ -4042,8 +4042,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |3: // Clear missing parameters.
- | // TODO: optimize this. Some of this can be hoisted.
- | sllg TMPR1, NARGS:RD, 3
-- | lghi TMPR2, LJ_TNIL
-- | stg TMPR2, -8(TMPR1, BASE)
-+ | lghi TMPR0, LJ_TNIL
-+ | stg TMPR0, -8(TMPR1, BASE)
- | la RD, 1(RD)
- | clgr RD, RA
- | jle <3
---
-2.20.1
-
-
-From af09a721be79f674b296263d7a7918ab99da484d Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 16 Jan 2017 16:28:21 -0500
-Subject: [PATCH 224/247] Implement return hooks.
-
----
- src/vm_s390x.dasc | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index e43e774..6b86c03 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2040,8 +2040,10 @@ static void build_subroutines(BuildCtx *ctx)
- | stg r0, 0
- |
- |->vm_rethook: // Dispatch target for return hooks.
-- | stg r0, 0
-- | stg r0, 0
-+ | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
-+ | tmll RD, HOOK_ACTIVE
-+ | jne >5
-+ | j >1
- |
- |->vm_inshook: // Dispatch target for instr/line hooks.
- | llgc RD, (DISPATCH_GL(hookmask))(DISPATCH)
---
-2.20.1
-
-
-From 74b72dc05d4f1a53164146e4473c7b0149fb6915 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 17 Jan 2017 09:46:02 -0500
-Subject: [PATCH 225/247] Swap register assignments for BASE and RB.
-
-Feels more natural this way round. Puts all parameters in the range
-[r4,r7] and BASE is now the register used as the literal pool, which
-seems appropriate.
----
- src/vm_s390x.dasc | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 6b86c03..b5d5db6 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -38,15 +38,15 @@
- |//-----------------------------------------------------------------------
- |
- |// Fixed register assignments for the interpreter, callee-saved.
--|.define BASE, r7 // Base of current Lua stack frame.
- |.define KBASE, r8 // Constants of current Lua function.
- |.define PC, r9 // Next PC.
- |.define DISPATCH, r10 // Opcode dispatch table.
- |.define ITYPE, r11 // Temporary used for type information.
-+|.define BASE, r13 // Base of current Lua stack frame.
- |
- |// The following temporaries are not saved across C calls, except for RB.
- |.define RA, r4 // Overlaps CARG3.
--|.define RB, r13 // Must be callee-save.
-+|.define RB, r7 // Must be callee-save.
- |.define RC, r5 // Overlaps CARG4.
- |.define RD, r6 // Overlaps CARG5.
- |
---
-2.20.1
-
-
-From cad31012d8aa64eccc15c88c12cdc4ce9987eb59 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 17 Jan 2017 14:05:31 -0500
-Subject: [PATCH 226/247] Avoid saving/restoring floating point registers when
- entering the interpreter.
-
-We only need to worry about doing this if we actually use those
-floating point registers.
----
- src/vm_s390x.dasc | 18 +-----------------
- 1 file changed, 1 insertion(+), 17 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index b5d5db6..cb58cb9 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -103,26 +103,10 @@
- |.macro saveregs
- | stmg r6, r15, SAVE_GPRS_P
- | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
--| // TODO: save backchain?
--| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
--| std f9, SAVE_FPR9
--| std f10, SAVE_FPR10
--| std f11, SAVE_FPR11
--| std f12, SAVE_FPR12
--| std f13, SAVE_FPR13
--| std f14, SAVE_FPR14
--| std f15, SAVE_FPR15
-+| // f8-f15 are also callee-save but are not currently used in the interpreter.
- |.endmacro
- |
- |.macro restoreregs
--| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
--| ld f9, SAVE_FPR9
--| ld f10, SAVE_FPR10
--| ld f11, SAVE_FPR11
--| ld f12, SAVE_FPR12
--| ld f13, SAVE_FPR13
--| ld f14, SAVE_FPR14
--| ld f15, SAVE_FPR15
- | lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
- |.endmacro
- |
---
-2.20.1
-
-
-From 83ee1f95244ae4a7939e045004c7c752368e7cc6 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 17 Jan 2017 14:14:25 -0500
-Subject: [PATCH 227/247] Don't bother saving BASE before a call unless it will
- be modified.
-
-BASE is callee-saved anyway, so we don't need to save it in RB.
----
- src/vm_s390x.dasc | 30 +-----------------------------
- 1 file changed, 1 insertion(+), 29 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index cb58cb9..dbcd08b 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -22,7 +22,7 @@
- |// clfi (compare logical immediate) [requires z9-109]
- |// ldgr (load FPR from GPR) [requires z9-109 GA3]
- |// lgdr (load GPR from FPR) [requires z9-109 GA3]
--|// lay (load address) [requires z900 GA2]
-+|// lay (load address) [requires z900 GA2]
- |// ldy (load (long bfp)) [requires z900 GA2]
- |// stdy (store (long bfp)) [requires z900 GA2]
- |// TODO: alternative instructions?
-@@ -695,12 +695,10 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vmeta_tgetr:
- | lgr CARG1, TAB:RB
-- | lgr RB, BASE // Save BASE.
- | lgfr CARG2, RC
- | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
- | // cTValue * or NULL returned in r2 (CRET1).
- | llgc RA, PC_RA
-- | lgr BASE, RB // Restore BASE.
- | ltgr RC, CRET1
- | jne ->BC_TGETR_Z
- | lghi ITYPE, LJ_TNIL
-@@ -775,14 +773,12 @@ static void build_subroutines(BuildCtx *ctx)
- | lg L:CARG1, SAVE_L
- | lgr CARG2, TAB:RB
- | stg BASE, L:CARG1->base
-- | lgr RB, BASE // Save BASE (TODO: BASE is callee-saved anyway on s390x).
- | lgfr CARG3, RC
- | stg PC, SAVE_PC
- | brasl r14, extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
- | // TValue * returned in r2 (CRET1).
- | lgr RC, CRET1
- | llgc RA, PC_RA
-- | lgr BASE, RB // Restore BASE.
- | j ->BC_TSETR_Z
- |
- |//-- Comparison metamethods ---------------------------------------------
-@@ -1163,12 +1159,10 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc_2 rawget
- | lg TAB:CARG2, 0(BASE)
- | checktab TAB:CARG2, ->fff_fallback
-- | lgr RB, BASE // Save BASE.
- | la CARG3, 8(BASE)
- | lg CARG1, SAVE_L
- | brasl r14, extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
- | // cTValue * returned in r2 (CRET1).
-- | lgr BASE, RB // Restore BASE.
- | // Copy table slot.
- | lg RB, 0(CRET1)
- | lg PC, -8(BASE)
-@@ -1290,11 +1284,9 @@ static void build_subroutines(BuildCtx *ctx)
- |2: // Check for empty hash part first. Otherwise call C function.
- | lt TMPR0, TAB:RB->hmask; je ->fff_res0
- | lgr CARG1, TAB:RB
-- | lgr RB, BASE // Save BASE. // TODO: needed?
- | lgfr CARG2, RA
- | brasl r14, extern lj_tab_getinth // (GCtab *t, int32_t key)
- | // cTValue * or NULL returned in r2 (CRET1).
-- | lgr BASE, RB
- | ltgr RD, CRET1
- | jne <1
- |->fff_res0:
-@@ -1599,24 +1591,18 @@ static void build_subroutines(BuildCtx *ctx)
- | lg TMPR0, 0(BASE)
- | ld FARG1, 0(BASE)
- | checknumtp TMPR0, ->fff_fallback
-- | lgr RB, BASE
- | brasl r14, extern log
-- | lgr BASE, RB
- | j ->fff_resf0
- |
- |.macro math_extern, func
- | .ffunc_n math_ .. func
-- | lgr RB, BASE
- | brasl r14, extern func
-- | lgr BASE, RB
- | j ->fff_resf0
- |.endmacro
- |
- |.macro math_extern2, func
- | .ffunc_nn math_ .. func
-- | lgr RB, BASE
- | brasl r14, extern func
-- | lgr BASE, RB
- | j ->fff_resf0
- |.endmacro
- |
-@@ -1642,16 +1628,12 @@ static void build_subroutines(BuildCtx *ctx)
- | checknumtp TMPR0, ->fff_fallback
- | checkinttp CARG1, ->fff_fallback
- | lgfr CARG1, CARG1
-- | lgr RB, BASE
- | brasl r14, extern ldexp // (double, int)
-- | lgr BASE, RB
- | j ->fff_resf0
- |
- |.ffunc_n math_frexp
-- | lgr RB, BASE
- | la CARG1, TMP_STACK
- | brasl r14, extern frexp
-- | lgr BASE, RB
- | llgf RB, TMP_STACK
- | lg PC, -8(BASE)
- | stdy f0, -16(BASE)
-@@ -1661,10 +1643,8 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->fff_res
- |
- |.ffunc_n math_modf
-- | lgr RB, BASE
- | lay CARG1, -16(BASE)
- | brasl r14, extern modf // (double, double*)
-- | lgr BASE, RB
- | lg PC, -8(BASE)
- | stdy f0, -8(BASE)
- | lghi RD, 1+2
-@@ -2615,11 +2595,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |3:
- #endif
- |->BC_LEN_Z:
-- | lgr RB, BASE // Save BASE.
- | brasl r14, extern lj_tab_len // (GCtab *t)
- | // Length of table returned in r2 (CRET1).
- | lgr RD, CRET1
-- | lgr BASE, RB // Restore BASE.
- | llgc RA, PC_RA
- | j <1
- #if LJ_52
-@@ -2783,10 +2761,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | checknumtp TMPR0, ->vmeta_arith_vvo
- | lg TMPR0, 0(RC, BASE)
- | checknumtp TMPR0, ->vmeta_arith_vvo
-- | lgr RB, BASE // TODO: redundant, BASE is currently callee-saved.
- | brasl r14, extern pow // double pow(double x, double y), result in f0.
- | llgc RA, PC_RA
-- | lgr BASE, RB
- | sllg RA, RA, 3
- | std f0, 0(RA, BASE)
- | ins_next
-@@ -2925,10 +2901,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | je <1
- | // Crossed a write barrier. Move the barrier forward.
- | lgr CARG2, RB
-- | lgr RB, BASE // Save BASE.
- | lay GL:CARG1, GG_DISP2G(DISPATCH)
- | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
-- | lgr BASE, RB // Restore BASE.
- | j <1
- break;
- #undef TV2MARKOFS
-@@ -2954,11 +2928,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | tm UPVAL:RB->closed, 0xff
- | je <1
- | // Crossed a write barrier. Move the barrier forward.
-- | lgr RB, BASE
- | lgr CARG2, RD
- | lay GL:CARG1, GG_DISP2G(DISPATCH)
- | brasl r14, extern lj_gc_barrieruv // (global_State *g, TValue *tv)
-- | lgr BASE, RB // Restore BASE.
- | j <1
- break;
- case BC_USETN:
---
-2.20.1
-
-
-From 14ece27b3a5e5f61a4a7a2fd169276486780da13 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 17 Jan 2017 17:26:00 -0500
-Subject: [PATCH 228/247] Add debug frame information to the interpreter.
-
-Allows gdb to backtrace from inside the interpreter. Still need to
-add FFI support.
----
- src/vm_s390x.dasc | 43 +++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 43 insertions(+)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index dbcd08b..358fb76 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -4123,4 +4123,47 @@ static int build_backend(BuildCtx *ctx)
- /* Emit pseudo frame-info for all assembler functions. */
- static void emit_asm_debug(BuildCtx *ctx)
- {
-+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
-+ switch (ctx->mode) {
-+ case BUILD_elfasm:
-+ fprintf(ctx->fp, "\t.section
.debug_frame,\"\",@progbits\n");
-+ fprintf(ctx->fp,
-+ ".Lframe0:\n"
-+ "\t.long .LECIE0-.LSCIE0\n"
-+ ".LSCIE0:\n"
-+ "\t.long 0xffffffff\n"
-+ "\t.byte 0x1\n"
-+ "\t.string \"\"\n"
-+ "\t.uleb128 1\n"
-+ "\t.sleb128 -8\n"
-+ "\t.byte 0xe\n"
-+ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
-+ "\t.align 8\n"
-+ ".LECIE0:\n\n");
-+ fprintf(ctx->fp,
-+ ".LSFDE0:\n"
-+ "\t.long .LEFDE0-.LASFDE0\n"
-+ ".LASFDE0:\n"
-+ "\t.long .Lframe0\n"
-+ "\t.quad .Lbegin\n"
-+ "\t.quad %d\n"
-+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
-+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
-+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
-+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
-+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
-+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
-+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
-+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
-+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
-+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
-+ "\t.align 8\n"
-+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160);
-+ /* TODO: FFI, UNWIND */
-+ break;
-+ default: /* No other modes. */
-+ break;
-+ }
-+
- }
---
-2.20.1
-
-
-From 927ee9593562d622fd72f3161843ff0a37197aa4 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 18 Jan 2017 12:38:53 -0500
-Subject: [PATCH 229/247] Implement the LUAJIT_UNWIND_EXTERNAL option.
-
-Still need to add support for FFI.
----
- src/lj_err.c | 3 +++
- src/vm_s390x.dasc | 43 ++++++++++++++++++++++++++++++++++++++++++-
- 2 files changed, 45 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_err.c b/src/lj_err.c
-index abf176e..aa23265 100644
---- a/src/lj_err.c
-+++ b/src/lj_err.c
-@@ -235,6 +235,9 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
- return _URC_FATAL_PHASE1_ERROR;
- UNUSED(uexclass);
- cf = (void *)_Unwind_GetCFA(ctx);
-+#ifdef LJ_TARGET_S390X
-+ cf -= 160; /* CFA points 160 bytes above r15. */
-+#endif
- L = cframe_L(cf);
- if ((actions & _UA_SEARCH_PHASE)) {
- #if LJ_UNWIND_EXT
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 358fb76..f2dd30b 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -4160,7 +4160,48 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
- "\t.align 8\n"
- ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160);
-- /* TODO: FFI, UNWIND */
-+#if !LJ_NO_UNWIND
-+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
-+ fprintf(ctx->fp,
-+ ".Lframe1:\n"
-+ "\t.long .LECIE1-.LSCIE1\n"
-+ ".LSCIE1:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.string \"zPR\"\n"
-+ "\t.uleb128 0x1\n"
-+ "\t.sleb128 -8\n"
-+ "\t.byte 0xe\n"
-+ "\t.uleb128 6\n" /* augmentation length */
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.long lj_err_unwind_dwarf-.\n"
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
-+ "\t.align 8\n"
-+ ".LECIE1:\n\n");
-+ fprintf(ctx->fp,
-+ ".LSFDE2:\n"
-+ "\t.long .LEFDE2-.LASFDE2\n"
-+ ".LASFDE2:\n"
-+ "\t.long .LASFDE2-.Lframe1\n"
-+ "\t.long .Lbegin-.\n"
-+ "\t.long %d\n"
-+ "\t.uleb128 0\n" /* augmentation length */
-+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
-+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
-+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
-+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
-+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
-+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
-+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
-+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
-+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
-+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
-+ "\t.align 8\n"
-+ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160);
-+#endif
-+ /* TODO: FFI */
- break;
- default: /* No other modes. */
- break;
---
-2.20.1
-
-
-From 4380c472a203676de04483d54d3c1d6e9ba1aa12 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 18 Jan 2017 14:18:40 -0500
-Subject: [PATCH 230/247] Add file change missing from previous commit (needed
- for EXT unwinding).
-
----
- src/lj_arch.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 32d8bb3..0da6420 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -393,7 +393,7 @@
- #define LJ_ARCH_BITS 64
- #define LJ_ARCH_ENDIAN LUAJIT_BE
- #define LJ_TARGET_S390X 1
--#define LJ_TARGET_EHRETREG 0
-+#define LJ_TARGET_EHRETREG 0xe
- #define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */
- #define LJ_TARGET_MASKSHIFT 1
- #define LJ_TARGET_MASKROT 1
---
-2.20.1
-
-
-From 4abe79be62fc2fbb146dbcc4a12daafbc5507f2f Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 18 Jan 2017 15:08:58 -0500
-Subject: [PATCH 231/247] Implement support for unwinding through FFI stack
- frames.
-
-The DWARF here is a little incomplete, unwinding won't work while
-executing the first two instructions in vm_ffi_call.
----
- src/vm_s390x.dasc | 68 ++++++++++++++++++++++++++++++++++++++++++++---
- 1 file changed, 65 insertions(+), 3 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f2dd30b..f58d369 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2161,8 +2161,9 @@ static void build_subroutines(BuildCtx *ctx)
- |.if FFI
- | .type CCSTATE, CCallState, r8
- | stmg r6, r15, 48(sp)
-+ | lgr r13, sp // Use r13 as frame pointer.
- | lgr CCSTATE, CARG1
-- | lg r7, CCSTATE->func // TODO: move further up?
-+ | lg r7, CCSTATE->func
- |
- | // Readjust stack.
- | sgf sp, CCSTATE->spadj
-@@ -2183,7 +2184,7 @@ static void build_subroutines(BuildCtx *ctx)
- | stg CRET1, CCSTATE->gpr[0]
- | std f0, CCSTATE->fpr[0]
- |
-- | agf sp, CCSTATE->spadj
-+ | lgr sp, r13
- | lmg r6, r15, 48(sp)
- | br r14
- |
-@@ -4160,6 +4161,29 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
- "\t.align 8\n"
- ".LEFDE0:\n\n", fcofs, CFRAME_SIZE+160);
-+#if LJ_HASFFI
-+ fprintf(ctx->fp,
-+ ".LSFDE1:\n"
-+ "\t.long .LEFDE1-.LASFDE1\n"
-+ ".LASFDE1:\n"
-+ "\t.long .Lframe0\n"
-+ "\t.quad lj_vm_ffi_call\n"
-+ "\t.quad %d\n"
-+ "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */
-+ "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */
-+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
-+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
-+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
-+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
-+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
-+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
-+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
-+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
-+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
-+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
-+ "\t.align 8\n"
-+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
-+#endif
- #if !LJ_NO_UNWIND
- fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
- fprintf(ctx->fp,
-@@ -4200,11 +4224,49 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
- "\t.align 8\n"
- ".LEFDE2:\n\n", fcofs, CFRAME_SIZE+160);
-+#if LJ_HASFFI
-+ fprintf(ctx->fp,
-+ ".Lframe2:\n"
-+ "\t.long .LECIE2-.LSCIE2\n"
-+ ".LSCIE2:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.string \"zR\"\n"
-+ "\t.uleb128 0x1\n"
-+ "\t.sleb128 -8\n"
-+ "\t.byte 0xe\n"
-+ "\t.uleb128 1\n" /* augmentation length */
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.uleb128 0xf\n\t.uleb128 160\n"
-+ "\t.align 8\n"
-+ ".LECIE2:\n\n");
-+ fprintf(ctx->fp,
-+ ".LSFDE3:\n"
-+ "\t.long .LEFDE3-.LASFDE3\n"
-+ ".LASFDE3:\n"
-+ "\t.long .LASFDE3-.Lframe2\n"
-+ "\t.long lj_vm_ffi_call-.\n"
-+ "\t.long %d\n"
-+ "\t.uleb128 0\n" /* augmentation length */
-+ "\t.byte 0xe\n\t.uleb128 160\n" /* def_cfa_offset */
-+ "\t.byte 0xd\n\t.uleb128 0xd\n" /* def_cfa_register r13 (FP) */
-+ "\t.byte 0x86\n\t.uleb128 0xe\n" /* offset r6 */
-+ "\t.byte 0x87\n\t.uleb128 0xd\n" /* offset r7 */
-+ "\t.byte 0x88\n\t.uleb128 0xc\n" /* offset r8 */
-+ "\t.byte 0x89\n\t.uleb128 0xb\n" /* offset r9 */
-+ "\t.byte 0x8a\n\t.uleb128 0xa\n" /* offset r10 */
-+ "\t.byte 0x8b\n\t.uleb128 0x9\n" /* offset r11 */
-+ "\t.byte 0x8c\n\t.uleb128 0x8\n" /* offset r12 */
-+ "\t.byte 0x8d\n\t.uleb128 0x7\n" /* offset r13 */
-+ "\t.byte 0x8e\n\t.uleb128 0x6\n" /* offset r14 */
-+ "\t.byte 0x8f\n\t.uleb128 0x5\n" /* offset r15 */
-+ "\t.align 8\n"
-+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
-+#endif
- #endif
- /* TODO: FFI */
- break;
- default: /* No other modes. */
- break;
- }
--
- }
---
-2.20.1
-
-
-From 57e718bac6ab8ee1ca75565ad0cbc637f979efdb Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 18 Jan 2017 15:39:39 -0500
-Subject: [PATCH 232/247] Revert "Avoid saving/restoring floating point
- registers when entering the interpreter."
-
-This reverts commit e151edea531cf9341cebb1763136529ca8a39bb1.
----
- src/vm_s390x.dasc | 18 +++++++++++++++++-
- 1 file changed, 17 insertions(+), 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f58d369..d1cf952 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -103,10 +103,26 @@
- |.macro saveregs
- | stmg r6, r15, SAVE_GPRS_P
- | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
--| // f8-f15 are also callee-save but are not currently used in the interpreter.
-+| // TODO: save backchain?
-+| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
-+| std f9, SAVE_FPR9
-+| std f10, SAVE_FPR10
-+| std f11, SAVE_FPR11
-+| std f12, SAVE_FPR12
-+| std f13, SAVE_FPR13
-+| std f14, SAVE_FPR14
-+| std f15, SAVE_FPR15
- |.endmacro
- |
- |.macro restoreregs
-+| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
-+| ld f9, SAVE_FPR9
-+| ld f10, SAVE_FPR10
-+| ld f11, SAVE_FPR11
-+| ld f12, SAVE_FPR12
-+| ld f13, SAVE_FPR13
-+| ld f14, SAVE_FPR14
-+| ld f15, SAVE_FPR15
- | lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
- |.endmacro
- |
---
-2.20.1
-
-
-From 440a8ec59937da9b682410f267f947cccae6d75b Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 18 Jan 2017 15:43:24 -0500
-Subject: [PATCH 233/247] Add some TODOs to the saveregs and restoreregs
- macros.
-
-When unwinding the stack using the internal unwinder we may need to
-restore floating point registers clobbered by C calls. Since I'm
-not sure yet I'm going to be conservative and save/restore them
-for now. Most probably we want to, at the very least, avoid restoring
-them when cleanly exiting the interpreter.
----
- src/vm_s390x.dasc | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index d1cf952..637a174 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -104,6 +104,7 @@
- | stmg r6, r15, SAVE_GPRS_P
- | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
- | // TODO: save backchain?
-+| // TODO: is it necessary to save all float registers?
- | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
- | std f9, SAVE_FPR9
- | std f10, SAVE_FPR10
-@@ -115,6 +116,7 @@
- |.endmacro
- |
- |.macro restoreregs
-+| // TODO: restore float registers only when unwinding?
- | ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
- | ld f9, SAVE_FPR9
- | ld f10, SAVE_FPR10
---
-2.20.1
-
-
-From f3f5f7a36da8fb5144874589ca23394b4ec6bf1d Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 18 Jan 2017 16:18:56 -0500
-Subject: [PATCH 234/247] Remove unnecessary register moves.
-
----
- src/vm_s390x.dasc | 5 +----
- 1 file changed, 1 insertion(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 637a174..41b11cc 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -272,12 +272,9 @@
- |.define PC_RD, -4(PC)
- |
- |.macro branchPC, reg
--| // TODO: optimize this, was just lea PC, [PC+reg*4-BCBIAS_J*4].
--| // Can't clobber TMPR1 or condition code.
--| lgr TMPR0, TMPR1 // Workaround because TMPR0 == r0 and can't be used in lay.
-+| // Must not clobber condition code.
- | sllg TMPR1, reg, 2
- | lay PC, (-BCBIAS_J*4)(TMPR1, PC)
--| lgr TMPR1, TMPR0
- |.endmacro
- |
- |// Set current VM state.
---
-2.20.1
-
-
-From b5fa01effebf48018fc1216ba81997618987294a Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Wed, 18 Jan 2017 16:36:32 -0500
-Subject: [PATCH 235/247] Remove TODOs for branch on index.
-
-It is probably not suitable (relies on even-odd register numbering).
----
- src/vm_s390x.dasc | 2 --
- 1 file changed, 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 41b11cc..f1664de 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1418,7 +1418,6 @@ static void build_subroutines(BuildCtx *ctx)
- |2: // Move args to coroutine.
- | lg RC, 0(RB, PC)
- | stg RC, -8(PC)
-- | // TODO: replace with branch on count/index?
- | lay PC, -8(PC)
- | cgr PC, RA
- | jne <2
-@@ -1455,7 +1454,6 @@ static void build_subroutines(BuildCtx *ctx)
- |5: // Move results from coroutine.
- | lg RD, 0(RA)
- | stg RD, 0(RA, RB)
-- | // TODO: branch on count/index?
- | la RA, 8(RA)
- | cgr RA, KBASE
- | jne <5
---
-2.20.1
-
-
-From 90a94de9bf949ce5b9f307d782f0d53b4fe6fa9f Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 30 Jan 2017 15:54:47 -0500
-Subject: [PATCH 236/247] Use z10 instructions more frequently.
-
-It would be nice to support the base z/Architecture but it has
-quite a big impact on performance to do so. z10 gives us most
-of the desirable instructions. It should be possible to emulate the
-instructions if earlier machines were ever targetted.
----
- src/vm_s390x.dasc | 29 +++++++++++------------------
- 1 file changed, 11 insertions(+), 18 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f1664de..73966f8 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2,6 +2,9 @@
- |// Bytecode interpreter, fast functions and helper functions.
- |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- |
-+|// This assembly targets the instruction set available on z10 (and newer)
-+|// machines.
-+|
- |// ELF ABI registers:
- |// r0,r1 | | volatile |
- |// r2 | parameter and return value | volatile |
-@@ -18,15 +21,6 @@
- |// ar0,ar1 | TLS | volatile |
- |// ar2-ar15 | | volatile |
- |
--|// Instructions used that are not in base z/Architecture:
--|// clfi (compare logical immediate) [requires z9-109]
--|// ldgr (load FPR from GPR) [requires z9-109 GA3]
--|// lgdr (load GPR from FPR) [requires z9-109 GA3]
--|// lay (load address) [requires z900 GA2]
--|// ldy (load (long bfp)) [requires z900 GA2]
--|// stdy (store (long bfp)) [requires z900 GA2]
--|// TODO: alternative instructions?
--|
- |.arch s390x
- |.section code_op, code_sub
- |
-@@ -213,14 +207,14 @@
- |//-----------------------------------------------------------------------
- |
- |// Macros to clear or set tags.
--|.macro cleartp, reg; sllg reg, reg, 17; srlg reg, reg, 17; .endmacro // TODO: use nihf
instead? would introduce dependence on z9-109.
-+|.macro cleartp, reg
-+| nihf reg, 0x7fff
-+|.endmacro
- |.macro settp, reg, tp
--| oihh reg, ((tp>>1) &0xffff)
--| oihl reg, ((tp<<15)&0x8000)
-+| oihf reg, tp<<15
- |.endmacro
- |.macro settp, dst, reg, tp
--| llihh dst, ((tp>>1) &0xffff)
--| iihl dst, ((tp<<15)&0x8000)
-+| llihf dst, tp<<15
- | ogr dst, reg
- |.endmacro
- |.macro setint, reg
-@@ -1238,7 +1232,6 @@ static void build_subroutines(BuildCtx *ctx)
- | brasl r14, extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
- | // Flag returned in r2 (CRET1).
- | lg BASE, L:RB->base
-- | lgr RD, CRET1 // TODO: high bits needed? low bits load/test (ltr) enough?
- | ltr RD, CRET1; je >3 // End of traversal?
- | // Copy key and value to results.
- | lg RB, 8(BASE)
-@@ -1323,7 +1316,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg PC, -8(BASE)
- | stg CFUNC:RD, -16(BASE)
- | stg TMPR1, -8(BASE)
-- | llihh RD, ((int)LJ_TISNUM)>>1 // mov64 RD, ((int64_t)LJ_TISNUM<<47) //
TODO: write mov64-macro, use all of TISNUM (currently this is very fragile).
-+ | llihf RD, LJ_TISNUM<<15
- | stg RD, 0(BASE)
- | lghi RD, 1+3
- | j ->fff_res
-@@ -1768,7 +1761,7 @@ static void build_subroutines(BuildCtx *ctx)
- | jle >7
- |3:
- | sr TMPR1, RA // start > end?
-- | jnhe ->fff_emptystr // TODO: not sure about this, was jl in x64.
-+ | jnhe ->fff_emptystr
- | la RD, (#STR-1)(RA, STR:RB)
- | ahi TMPR1, 1
- |4:
-@@ -3444,11 +3437,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_CALL: case BC_CALLM:
- | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
-+ | sllg RA, RA, 3
- | lgr RD, RC
- if (op == BC_CALLM) {
- | agf NARGS:RD, SAVE_MULTRES
- }
-- | sllg RA, RA, 3
- | lg LFUNC:RB, 0(RA, BASE)
- | checkfunc LFUNC:RB, ->vmeta_call_ra
- | la BASE, 16(RA, BASE)
---
-2.20.1
-
-
-From 412a8b9100af44d35fddfe6dd731ad6f3737b524 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 30 Jan 2017 16:12:50 -0500
-Subject: [PATCH 237/247] Remove various TODOs.
-
----
- src/vm_s390x.dasc | 36 ++++++++++++++----------------------
- 1 file changed, 14 insertions(+), 22 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 73966f8..ce5e14c 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -97,8 +97,6 @@
- |.macro saveregs
- | stmg r6, r15, SAVE_GPRS_P
- | lay sp, -CFRAME_SPACE(sp) // Allocate stack frame.
--| // TODO: save backchain?
--| // TODO: is it necessary to save all float registers?
- | std f8, SAVE_FPR8 // f8-f15 are callee-saved.
- | std f9, SAVE_FPR9
- | std f10, SAVE_FPR10
-@@ -110,7 +108,6 @@
- |.endmacro
- |
- |.macro restoreregs
--| // TODO: restore float registers only when unwinding?
- | ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
- | ld f9, SAVE_FPR9
- | ld f10, SAVE_FPR10
-@@ -1032,7 +1029,7 @@ static void build_subroutines(BuildCtx *ctx)
- | .ffunc_1 name
- | lg TMPR0, 0(BASE)
- | checknumtp TMPR0, ->fff_fallback
-- | op f0, 0(BASE) // TODO: might be better to unconditionally load into f1.
-+ | op f0, 0(BASE)
- |.endmacro
- |
- |.macro .ffunc_n, name
-@@ -1877,8 +1874,7 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->fff_resbit
- |
- |.ffunc_bit bit_bnot, 1
-- | lhi TMPR0, -1
-- | xr RB, TMPR0 // TODO: use xilf on newer models?
-+ | xilf RB, -1
- | j ->fff_resbit
- |
- |->fff_fallback_bit_op:
-@@ -1891,7 +1887,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lg RA, 8(BASE)
- | checkint RA, ->fff_fallback
- | nill RA, 0x1f // Limit shift to 5-bits.
-- | ins RB, 0(RA) // TODO: fix shift args in DynASM.
-+ | ins RB, 0(RA)
- | j ->fff_resbit
- |.endmacro
- |
-@@ -2119,7 +2115,6 @@ static void build_subroutines(BuildCtx *ctx)
- |// Value to round is in f0. May clobber f0-f7 and r0. Return address is r14.
- |.macro vm_round, name, mask
- |->name:
-- | // TODO: handle edge cases?
- | lghi r0, 1
- | cdfbr f1, r0
- | didbr f0, f2, f1, mask // f0=remainder, f2=quotient.
-@@ -2211,7 +2206,7 @@ static void build_subroutines(BuildCtx *ctx)
- | ahi r1, -1
- | jl <1
- | larl r9, >5
-- | ex r1, 0(r9) // TODO: exrl is faster but needs z10.
-+ | ex r1, 0(r9)
- | j <1
- |
- |5:
-@@ -2394,9 +2389,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | tm TAB:RB->nomm, 1<<MM_eq
- | jne <2 // Or 'no __eq' flag set?
- if (vk) {
-- | lghi RB, 0 // ne = 0 // TODO: should be 32-bit?
-+ | lghi RB, 0 // ne = 0
- } else {
-- | lghi RB, 1 // ne = 1 // TODO: should be 32-bit?
-+ | lghi RB, 1 // ne = 1
- }
- | j ->vmeta_equal // Handle __eq metamethod.
- } else {
-@@ -2524,7 +2519,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISTYPE:
- | ins_AD // RA = src, RD = -type
-- | lghr RD, RD // TODO: always sign extend RD?
-+ | lghr RD, RD
- | sllg RA, RA, 3
- | lg RB, 0(RA, BASE)
- | srag RB, RB, 47
-@@ -2555,7 +2550,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | srag RB, RB, 47
- | load_false RC
- | cghi RB, LJ_TTRUE
-- | je >1 // TODO: Maybe do something fancy to avoid the jump?
-+ | je >1
- | load_true RC
- |1:
- | stg RC, 0(RA, BASE)
-@@ -3133,7 +3128,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | l TMPR1, TAB:RB->hmask
- | n TMPR1, STR:RC->hash
- | lgfr TMPR1, TMPR1
-- | mghi TMPR1, #NODE // TODO: not sure about this one, original: imul TMPRd, #NODE
-+ | mghi TMPR1, #NODE
- | ag NODE:TMPR1, TAB:RB->node
- | settp ITYPE, STR:RC, LJ_TSTR
- |1:
-@@ -3310,7 +3305,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg ITYPE, TMP_STACK
- | lg L:CARG1, SAVE_L
- | stg BASE, L:CARG1->base
-- | la CARG3, TMP_STACK // TODO: lea CARG3, ITYPE... not sure.
-+ | la CARG3, TMP_STACK
- | lgr CARG2, TAB:RB
- | stg PC, SAVE_PC
- | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
-@@ -3470,7 +3465,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | aghi NARGS:RD, -1
- | je >3
- |2: // Move args down.
-- | // TODO: mvc or something here?
- | lg RB, 0(RA)
- | la RA, 8(RA)
- | stg RB, 0(KBASE)
-@@ -3536,7 +3530,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | sllg RA, RA, 3
- | lg TAB:RB, -16(RA, BASE)
- | cleartp TAB:RB
-- | llgf RC, -4(RA, BASE) // Get index from control var. // TODO: ENDIANNESS
DRAGONS.
-+ | llgf RC, -4(RA, BASE) // Get index from control var.
- | llgf TMPR1, TAB:RB->asize
- | la PC, 4(PC)
- | lg ITYPE, TAB:RB->array
-@@ -3552,7 +3546,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | setint ITYPE, RC
- | stg ITYPE, 0(RA, BASE)
- | ahi RC, 1
-- | sty RC, -4(RA, BASE) // Update control var. // TODO: ENDIANNESS DRAGONS
-+ | sty RC, -4(RA, BASE) // Update control var.
- |2:
- | llgh RD, PC_RD // Get target from ITERL.
- | branchPC RD
-@@ -3579,7 +3573,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lg RC, NODE:ITYPE->val
- | stg RB, 0(RA, BASE)
- | stg RC, 8(RA, BASE)
-- | sty TMPR1, -4(RA, BASE) // TODO: ENDIANNESS DRAGONS
-+ | sty TMPR1, -4(RA, BASE)
- | j <2
- |
- |7: // Skip holes in hash part.
-@@ -3613,7 +3607,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
-
- case BC_VARG:
-- | // TODO: some opportunities for branch on index in here.
- | ins_ABC // RA = base, RB = nresults+1, RC = numparams
- | sllg RA, RA, 3
- | sllg RB, RB, 3
-@@ -4091,7 +4084,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- } else {
- | // (lua_State *L, lua_CFunction f)
- | lg TMPR1, (DISPATCH_GL(wrapf))(DISPATCH)
-- | basr r14, TMPR1 // TODO: TMPR1==r14, is this ok?
-+ | basr r14, TMPR1
- }
- | // nresults returned in r2 (CRET1).
- | lgr RD, CRET1
-@@ -4270,7 +4263,6 @@ static void emit_asm_debug(BuildCtx *ctx)
- ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
- #endif
- #endif
-- /* TODO: FFI */
- break;
- default: /* No other modes. */
- break;
---
-2.20.1
-
-
-From 9b6dcfca2e300b6ff0799d2310e1652e0d7b6a40 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 30 Jan 2017 16:24:06 -0500
-Subject: [PATCH 238/247] Simplify right rotations.
-
----
- src/vm_s390x.dasc | 7 +------
- 1 file changed, 1 insertion(+), 6 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index ce5e14c..5ebd20c 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1899,7 +1899,6 @@ static void build_subroutines(BuildCtx *ctx)
- | // Note: no inline conversion from number for 2nd argument!
- | lg RA, 8(BASE)
- | checkint RA, ->fff_fallback
-- | // Note: no need to limit rotate to 5-bits (wraps).
- | rll RB, RB, 0(RA)
- | j ->fff_resbit
- |
-@@ -1907,11 +1906,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // Note: no inline conversion from number for 2nd argument!
- | lg RA, 8(BASE)
- | checkint RA, ->fff_fallback
-- | // TODO: shorter sequence of instructions to convert right rotate into left
rotate.
-- | nill RA, 0x1f
-- | lghi TMPR0, 32
-- | sr TMPR0, RA
-- | lr RA, TMPR0
-+ | lcr RA, RA // Right rotate equivalent to negative left rotate.
- | rll RB, RB, 0(RA)
- | j ->fff_resbit
- |
---
-2.20.1
-
-
-From fab87db540bcbd01e9007b6f5a0b8aa743c28181 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 30 Jan 2017 16:40:32 -0500
-Subject: [PATCH 239/247] Hoist some loop invariants.
-
----
- src/vm_s390x.dasc | 7 ++++---
- 1 file changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 5ebd20c..939aea3 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -3614,6 +3614,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | je >5 // Copy all varargs?
- | lay RB, -8(RA, RB)
- | clgr TMPR1, BASE // No vararg slots?
-+ | lghi TMPR0, LJ_TNIL
- | jnl >2
- |1: // Copy vararg slots to destination slots.
- | lg RC, -16(TMPR1)
-@@ -3625,7 +3626,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | clgr TMPR1, BASE // No more vararg slots?
- | jl <1
- |2: // Fill up remainder with nil.
-- | lghi TMPR0, LJ_TNIL // TODO: move out of loop. Add NIL range macro?
- | stg TMPR0, 0(RA)
- | la RA, 8(RA)
- | clgr RA, RB
-@@ -3993,13 +3993,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- |
- |3: // Clear missing parameters.
-- | // TODO: optimize this. Some of this can be hoisted.
- | sllg TMPR1, NARGS:RD, 3
- | lghi TMPR0, LJ_TNIL
-+ |4:
- | stg TMPR0, -8(TMPR1, BASE)
-+ | la TMPR1, 8(TMPR1)
- | la RD, 1(RD)
- | clgr RD, RA
-- | jle <3
-+ | jle <4
- | j <2
- break;
-
---
-2.20.1
-
-
-From a9b655eede71f14c07db2d9a7bd2f61a47cd93a9 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Mon, 30 Jan 2017 16:44:15 -0500
-Subject: [PATCH 240/247] Delete old BUG comment.
-
----
- src/vm_s390x.dasc | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 939aea3..8238ae5 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -4092,7 +4092,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lcgr RA, RA
- | ag RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
- | lg PC, -8(BASE) // Fetch PC of caller.
-- | // BUG: PC seems to be -1 here sometimes. Not yet sure why.
- | j ->vm_returnc
- break;
-
---
-2.20.1
-
-
-From 7e6a16b1c8366867d038fe312bb72e9f85a23a22 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 31 Jan 2017 13:58:25 -0500
-Subject: [PATCH 241/247] s/TMP_STACK/SAVE_TMP/
-
-More in line with the naming of the other stack variables.
----
- src/vm_s390x.dasc | 54 +++++++++++++++++++++++------------------------
- 1 file changed, 27 insertions(+), 27 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 8238ae5..b3753a3 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -88,8 +88,8 @@
- |.define SAVE_FPR8, 176(sp)
- |.define SAVE_PC, 168(sp)
- |.define SAVE_MULTRES, 160(sp)
--|.define TMP_STACK, 160(sp) // Overlaps SAVE_MULTRES
--|.define TMP_STACK_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES).
-+|.define SAVE_TMP, 160(sp) // Overlaps SAVE_MULTRES
-+|.define SAVE_TMP_HI, 164(sp) // High 32-bits (to avoid SAVE_MULTRES).
- |
- |// Callee save area (allocated by interpreter).
- |.define CALLEESAVE, 000(sp) // <- sp in interpreter.
-@@ -644,8 +644,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vmeta_tgets:
- | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
-- | stg STR:RC, TMP_STACK
-- | la RC, TMP_STACK
-+ | stg STR:RC, SAVE_TMP
-+ | la RC, SAVE_TMP
- | llgc TMPR1, PC_OP
- | cghi TMPR1, BC_GGET
- | jne >1
-@@ -657,8 +657,8 @@ static void build_subroutines(BuildCtx *ctx)
- |->vmeta_tgetb:
- | llgc RC, PC_RC
- | setint RC
-- | stg RC, TMP_STACK
-- | la RC, TMP_STACK
-+ | stg RC, SAVE_TMP
-+ | la RC, SAVE_TMP
- | j >1
- |
- |->vmeta_tgetv:
-@@ -714,8 +714,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vmeta_tsets:
- | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
-- | stg STR:RC, TMP_STACK
-- | la RC, TMP_STACK
-+ | stg STR:RC, SAVE_TMP
-+ | la RC, SAVE_TMP
- | llgc TMPR0, PC_OP
- | cghi TMPR0, BC_GSET
- | jne >1
-@@ -727,8 +727,8 @@ static void build_subroutines(BuildCtx *ctx)
- |->vmeta_tsetb:
- | llgc RC, PC_RC
- | setint RC
-- | stg RC, TMP_STACK
-- | la RC, TMP_STACK
-+ | stg RC, SAVE_TMP
-+ | la RC, SAVE_TMP
- | j >1
- |
- |->vmeta_tsetv:
-@@ -968,7 +968,7 @@ static void build_subroutines(BuildCtx *ctx)
- | la RA, 16(RA, BASE) // RA previously set to RA*8.
- |->vmeta_call: // Resolve and call __call metamethod.
- | // BASE = old base, RA = new base, RC = nargs+1, PC = return
-- | stg NARGS:RD, TMP_STACK // Save RA, RC for us (not sure about this).
-+ | stg NARGS:RD, SAVE_TMP // Save RA, RC for us (not sure about this).
- | lgr RB, RA
- | lg L:CARG1, SAVE_L
- | stg BASE, L:CARG1->base
-@@ -980,7 +980,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lgr RA, RB
- | lg L:RB, SAVE_L
- | lg BASE, L:RB->base
-- | lg NARGS:RD, TMP_STACK
-+ | lg NARGS:RD, SAVE_TMP
- | lg LFUNC:RB, -16(RA)
- | aghi NARGS:RD, 1 // 32-bit on x64.
- | // This is fragile. L->base must not move, KBASE must always be defined.
-@@ -1367,7 +1367,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | lg PC, -8(BASE)
- | stg PC, SAVE_PC
-- | stg L:RB, TMP_STACK
-+ | stg L:RB, SAVE_TMP
- |.if resume
- | checktptp L:TMPR0, LJ_TTHREAD, ->fff_fallback
- |.endif
-@@ -1413,13 +1413,13 @@ static void build_subroutines(BuildCtx *ctx)
- | jne <2
- |3:
- | lgr CARG2, RA
-- | lg L:CARG1, TMP_STACK
-+ | lg L:CARG1, SAVE_TMP
- | lghi CARG3, 0
- | lghi CARG4, 0
- | brasl r14, ->vm_resume // (lua_State *L, TValue *base, 0, 0)
- |
- | lg L:RB, SAVE_L
-- | lg L:PC, TMP_STACK
-+ | lg L:PC, SAVE_TMP
- | lg BASE, L:RB->base
- | stg L:RB, (DISPATCH_GL(cur_L))(DISPATCH)
- | set_vmstate INTERP
-@@ -1487,12 +1487,12 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- |
- |9: // Handle stack expansion on return from yield.
-- | lg L:RA, TMP_STACK
-+ | lg L:RA, SAVE_TMP
- | stg KBASE, L:RA->top // Undo coroutine stack clearing.
- | lgr CARG2, PC
- | lgr CARG1, L:RB
- | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
-- | lg L:PC, TMP_STACK
-+ | lg L:PC, SAVE_TMP
- | lg BASE, L:RB->base
- | j <4 // Retry the stack move.
- |.endmacro
-@@ -1635,9 +1635,9 @@ static void build_subroutines(BuildCtx *ctx)
- | j ->fff_resf0
- |
- |.ffunc_n math_frexp
-- | la CARG1, TMP_STACK
-+ | la CARG1, SAVE_TMP
- | brasl r14, extern frexp
-- | llgf RB, TMP_STACK
-+ | llgf RB, SAVE_TMP
- | lg PC, -8(BASE)
- | stdy f0, -16(BASE)
- | setint RB
-@@ -1715,9 +1715,9 @@ static void build_subroutines(BuildCtx *ctx)
- | lg RB, 0(BASE)
- | checkint RB, ->fff_fallback
- | clfi RB, 255; jh ->fff_fallback
-- | strvh RB, TMP_STACK // Store [c,0].
-+ | strvh RB, SAVE_TMP // Store [c,0].
- | lghi TMPR1, 1
-- | la RD, TMP_STACK // Points to stack. Little-endian.
-+ | la RD, SAVE_TMP // Points to stack. Little-endian.
- |->fff_newstr:
- | lg L:RB, SAVE_L
- | stg BASE, L:RB->base
-@@ -1975,7 +1975,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->fff_gcstep: // Call GC step function.
- | // BASE = new base, RD = nargs+1
-- | stg r14, TMP_STACK // Save return address
-+ | stg r14, SAVE_TMP // Save return address
- | lg L:RB, SAVE_L
- | stg PC, SAVE_PC // Redundant (but a defined value).
- | stg BASE, L:RB->base
-@@ -1989,7 +1989,7 @@ static void build_subroutines(BuildCtx *ctx)
- | sgr RD, BASE
- | srlg RD, RD, 3
- | aghi NARGS:RD, 1
-- | lg r14, TMP_STACK // Restore return address.
-+ | lg r14, SAVE_TMP // Restore return address.
- | br r14
- |
- |//-----------------------------------------------------------------------
-@@ -3297,10 +3297,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | tm TAB:TMPR1->nomm, 1<<MM_newindex
- | je ->vmeta_tsets // 'no __newindex' flag NOT set: check.
- |6:
-- | stg ITYPE, TMP_STACK
-+ | stg ITYPE, SAVE_TMP
- | lg L:CARG1, SAVE_L
- | stg BASE, L:CARG1->base
-- | la CARG3, TMP_STACK
-+ | la CARG3, SAVE_TMP
- | lgr CARG2, TAB:RB
- | stg PC, SAVE_PC
- | brasl r14, extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
-@@ -3661,13 +3661,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stg RA, L:RB->top
- | stg PC, SAVE_PC
- | sgr TMPR1, BASE // Need delta, because BASE may change.
-- | st TMPR1, TMP_STACK_HI
-+ | st TMPR1, SAVE_TMP_HI
- | llgf CARG2, SAVE_MULTRES
- | aghi CARG2, -1
- | lgr CARG1, L:RB
- | brasl r14, extern lj_state_growstack // (lua_State *L, int n)
- | lg BASE, L:RB->base
-- | lgf TMPR1, TMP_STACK_HI
-+ | lgf TMPR1, SAVE_TMP_HI
- | lg RA, L:RB->top
- | agr TMPR1, BASE
- | j <6
---
-2.20.1
-
-
-From b834760dd3931663143d60251d52ee264f453a3c Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 31 Jan 2017 14:02:38 -0500
-Subject: [PATCH 242/247] Remove out of date comments.
-
----
- src/vm_s390x.dasc | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index b3753a3..21a1ff3 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -147,11 +147,7 @@
- |.macro ins_AND; lghi TMPR1, -1; xgr RD, TMPR1; .endmacro // RD = ~RD
- |
- |// Instruction decode+dispatch.
--| // TODO: tune this, right now we always decode RA-D even if they aren't used.
- |.macro ins_NEXT
--| // 32 63
--| // [ B | C | A | OP ]
--| // [ D | A | OP ]
- | llgc OP, 3(PC)
- | llgh RD, 0(PC)
- | llgc RA, 2(PC)
---
-2.20.1
-
-
-From c269d39bceb01fdc82ea301b724ee02ec9ebd9cd Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 31 Jan 2017 15:00:15 -0500
-Subject: [PATCH 243/247] Sort instructions in alphabetical order.
-
----
- dynasm/dasm_s390x.lua | 856 ++++++++++++++++++++----------------------
- 1 file changed, 417 insertions(+), 439 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 8bf7084..ced4f9f 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -587,245 +587,256 @@ end
- -- Template strings for s390x instructions.
- map_op = {
- a_2 = "00005a000000RX-a",
-- ar_2 = "000000001a00RR",
-- ay_2 = "e3000000005aRXY-a",
-+ ad_2 = "00006a000000RX-a",
-+ adb_2 = "ed000000001aRXE",
-+ adbr_2 = "0000b31a0000RRE",
-+ adr_2 = "000000002a00RR",
-+ ae_2 = "00007a000000RX-a",
-+ aeb_2 = "ed000000000aRXE",
-+ aebr_2 = "0000b30a0000RRE",
-+ aer_2 = "000000003a00RR",
-+ afi_2 = "c20900000000RIL-a",
- ag_2 = "e30000000008RXY-a",
-- agr_2 = "0000b9080000RRE",
- agf_2 = "e30000000018RXY-a",
-+ agfi_2 = "c20800000000RIL-a",
- agfr_2 = "0000b9180000RRE",
-- axbr_2 = "0000b34a0000RRE",
-- adbr_2 = "0000b31a0000RRE",
-- aebr_2 = "0000b30a0000RRE",
- aghi_2 = "0000a70b0000RI-a",
-+ agr_2 = "0000b9080000RRE",
- ah_2 = "00004a000000RX-a",
- ahi_2 = "0000a70a0000RI-a",
- ahy_2 = "e3000000007aRXY-a",
-- afi_2 = "c20900000000RIL-a",
-- agfi_2 = "c20800000000RIL-a",
- aih_2 = "cc0800000000RIL-a",
- al_2 = "00005e000000RX-a",
-- alr_2 = "000000001e00RR",
-- aly_2 = "e3000000005eRXY-a",
-- alg_2 = "e3000000000aRXY-a",
-- algr_2 = "0000b90a0000RRE",
-- algf_2 = "e3000000001aRXY-a",
-- algfr_2 = "0000b91a0000RRE",
-- alfi_2 = "c20b00000000RIL-a",
-- algfi_2 = "c20a00000000RIL-a",
- alc_2 = "e30000000098RXY-a",
-- alcr_2 = "0000b9980000RRE",
- alcg_2 = "e30000000088RXY-a",
- alcgr_2 = "0000b9880000RRE",
-+ alcr_2 = "0000b9980000RRE",
-+ alfi_2 = "c20b00000000RIL-a",
-+ alg_2 = "e3000000000aRXY-a",
-+ algf_2 = "e3000000001aRXY-a",
-+ algfi_2 = "c20a00000000RIL-a",
-+ algfr_2 = "0000b91a0000RRE",
-+ algr_2 = "0000b90a0000RRE",
-+ alr_2 = "000000001e00RR",
- alsih_2 = "cc0a00000000RIL-a",
- alsihn_2 = "cc0b00000000RIL-a",
-- axr_2 = "000000003600RR",
-- ad_2 = "00006a000000RX-a",
-- adr_2 = "000000002a00RR",
-- ae_2 = "00007a000000RX-a",
-- aer_2 = "000000003a00RR",
-- aw_2 = "00006e000000RX-a",
-- awr_2 = "000000002e00RR",
-+ aly_2 = "e3000000005eRXY-a",
-+ ap_2 = "fa0000000000SS-b",
-+ ar_2 = "000000001a00RR",
- au_2 = "00007e000000RX-a",
- aur_2 = "000000003e00RR",
-- n_2 = "000054000000RX-a",
-- nr_2 = "000000001400RR",
-- ny_2 = "e30000000054RXY-a",
-- ng_2 = "e30000000080RXY-a",
-- ngr_2 = "0000b9800000RRE",
-- nihf_2 = "c00a00000000RIL-a",
-- nihh_2 = "0000a5040000RI-a",
-- nihl_2 = "0000a5050000RI-a",
-- nilf_2 = "c00b00000000RIL-a",
-- nilh_2 = "0000a5060000RI-a",
-- nill_2 = "0000a5070000RI-a",
-+ aw_2 = "00006e000000RX-a",
-+ awr_2 = "000000002e00RR",
-+ axbr_2 = "0000b34a0000RRE",
-+ axr_2 = "000000003600RR",
-+ ay_2 = "e3000000005aRXY-a",
-+ bakr_2 = "0000b2400000RRE",
- bal_2 = "000045000000RX-a",
- balr_2 = "000000000500RR",
- bas_2 = "00004d000000RX-a",
- basr_2 = "000000000d00RR",
- bassm_2 = "000000000c00RR",
-- bsa_2 = "0000b25a0000RRE",
-- bsm_2 = "000000000b00RR",
-- bakr_2 = "0000b2400000RRE",
-- bsg_2 = "0000b2580000RRE",
-+ bc_2 = "000047000000RX-b",
- bc_2 = "000047000000RX-b",
- bcr_2 = "000000000700RR",
- bct_2 = "000046000000RX-a",
-- bctr_2 = "000000000600RR",
- bctg_2 = "e30000000046RXY-a",
- bctgr_2 = "0000b9460000RRE",
-+ bctr_2 = "000000000600RR",
-+ bras_2 = "0000a7050000RI-b",
-+ brasl_2 = "c00500000000RIL-b",
-+ brc_2 = "0000a7040000RI-c",
-+ brcl_2 = "c00400000000RIL-c",
-+ brcl_2 = "c00400000000RIL-c",
-+ brct_2 = "0000a7060000RI-b",
-+ brctg_2 = "0000a7070000RI-b",
-+ brcth_2 = "cc0600000000RIL-b",
-+ brxh_3 = "000084000000RSI",
-+ brxhg_3 = "ec0000000044RIE-e",
-+ bsa_2 = "0000b25a0000RRE",
-+ bsg_2 = "0000b2580000RRE",
-+ bsm_2 = "000000000b00RR",
- bxh_3 = "000086000000RS-a",
- bxhg_3 = "eb0000000044RSY-a",
- bxle_3 = "000087000000RS-a",
- bxleg_3 = "eb0000000045RSY-a",
-- brasl_2 = "c00500000000RIL-b",
-- brcl_2 = "c00400000000RIL-c",
-- brcth_2 = "cc0600000000RIL-b",
-- cksm_2 = "0000b2410000RRE",
-- km_2 = "0000b92e0000RRE",
-- kmf_2 = "0000b92a0000RRE",
-- kmc_2 = "0000b92f0000RRE",
-- kmo_2 = "0000b92b0000RRE",
- c_2 = "000059000000RX-a",
-- cr_2 = "000000001900RR",
-- cy_2 = "e30000000059RXY-a",
-- cg_2 = "e30000000020RXY-a",
-- cgr_2 = "0000b9200000RRE",
-- cgf_2 = "e30000000030RXY-a",
-- cgfr_2 = "0000b9300000RRE",
-- cxbr_2 = "0000b3490000RRE",
-- cxtr_2 = "0000b3ec0000RRE",
-- cxr_2 = "0000b3690000RRE",
-- cdbr_2 = "0000b3190000RRE",
-- cdtr_2 = "0000b3e40000RRE",
- cd_2 = "000069000000RX-a",
-+ cdb_2 = "ed0000000019RXE",
-+ cdbr_2 = "0000b3190000RRE",
-+ cdfbr_2 = "0000b3950000RRE",
-+ cdfbra_4 = "0000b3950000RRF-e",
-+ cdfr_2 = "0000b3b50000RRE",
-+ cdftr_2 = "0000b9510000RRE",
-+ cdgbr_2 = "0000b3a50000RRE",
-+ cdgbra_4 = "0000b3a50000RRF-e",
-+ cdgr_2 = "0000b3c50000RRE",
-+ cdgtr_2 = "0000b3f10000RRE",
- cdr_2 = "000000002900RR",
-- cebr_2 = "0000b3090000RRE",
-+ cds_3 = "0000bb000000RS-a",
-+ cdsg_3 = "eb000000003eRSY-a",
-+ cdstr_2 = "0000b3f30000RRE",
-+ cdsy_3 = "eb0000000031RSY-a",
-+ cdtr_2 = "0000b3e40000RRE",
-+ cdutr_2 = "0000b3f20000RRE",
- ce_2 = "000079000000RX-a",
-+ ceb_2 = "ed0000000009RXE",
-+ cebr_2 = "0000b3090000RRE",
-+ cedtr_2 = "0000b3f40000RRE",
-+ cefbr_2 = "0000b3940000RRE",
-+ cefbra_4 = "0000b3940000RRF-e",
-+ cefr_2 = "0000b3b40000RRE",
-+ cegbr_2 = "0000b3a40000RRE",
-+ cegbra_4 = "0000b3a40000RRF-e",
-+ cegr_2 = "0000b3c40000RRE",
- cer_2 = "000000003900RR",
-- kxbr_2 = "0000b3480000RRE",
-- kxtr_2 = "0000b3e80000RRE",
-- kdbr_2 = "0000b3180000RRE",
-- kdtr_2 = "0000b3e00000RRE",
-- kebr_2 = "0000b3080000RRE",
-- cs_3 = "0000ba000000RS-a",
-- csy_3 = "eb0000000014RSY-a",
-- csg_3 = "eb0000000030RSY-a",
-- csp_2 = "0000b2500000RRE",
-- cspg_2 = "0000b98a0000RRE",
- cextr_2 = "0000b3fc0000RRE",
-- cedtr_2 = "0000b3f40000RRE",
-- cds_3 = "0000bb000000RS-a",
-- cdsy_3 = "eb0000000031RSY-a",
-- cdsg_3 = "eb000000003eRSY-a",
-- ch_2 = "000049000000RX-a",
-- chy_2 = "e30000000079RXY-a",
-+ cfdbr_3 = "0000b3990000RRF-e",
-+ cfdbra_4 = "0000b3990000RRF-e",
-+ cfebr_3 = "0000b3980000RRF-e",
-+ cfebra_4 = "0000b3980000RRF-e",
-+ cfi_2 = "c20d00000000RIL-a",
-+ cfxbr_3 = "0000b39a0000RRF-e",
-+ cfxbra_4 = "0000b39a0000RRF-e",
-+ cg_2 = "e30000000020RXY-a",
-+ cgdbr_3 = "0000b3a90000RRF-e",
-+ cgdbra_4 = "0000b3a90000RRF-e",
-+ cgebr_3 = "0000b3a80000RRF-e",
-+ cgebra_4 = "0000b3a80000RRF-e",
-+ cgf_2 = "e30000000030RXY-a",
-+ cgfi_2 = "c20c00000000RIL-a",
-+ cgfr_2 = "0000b9300000RRE",
-+ cgfrl_2 = "c60c00000000RIL-b",
- cgh_2 = "e30000000034RXY-a",
-- chrl_2 = "c60500000000RIL-b",
-+ cghi_2 = "0000a70f0000RI-a",
- cghrl_2 = "c60400000000RIL-b",
-+ cgr_2 = "0000b9200000RRE",
-+ cgrl_2 = "c60800000000RIL-b",
-+ cgxbr_3 = "0000b3aa0000RRF-e",
-+ cgxbra_4 = "0000b3aa0000RRF-e",
-+ ch_2 = "000049000000RX-a",
- chf_2 = "e300000000cdRXY-a",
- chhr_2 = "0000b9cd0000RRE",
-+ chi_2 = "0000a70e0000RI-a",
- chlr_2 = "0000b9dd0000RRE",
-- cfi_2 = "c20d00000000RIL-a",
-- cgfi_2 = "c20c00000000RIL-a",
-+ chrl_2 = "c60500000000RIL-b",
-+ chy_2 = "e30000000079RXY-a",
- cih_2 = "cc0d00000000RIL-a",
-+ cksm_2 = "0000b2410000RRE",
- cl_2 = "000055000000RX-a",
-- clr_2 = "000000001500RR",
-- cly_2 = "e30000000055RXY-a",
-+ clc_2 = "d50000000000SS-a",
-+ clcl_2 = "000000000f00RR",
-+ clcle_3 = "0000a9000000RS-a",
-+ clclu_3 = "eb000000008fRSY-a",
-+ clfi_2 = "c20f00000000RIL-a",
- clg_2 = "e30000000021RXY-a",
-- clgr_2 = "0000b9210000RRE",
- clgf_2 = "e30000000031RXY-a",
-+ clgfi_2 = "c20e00000000RIL-a",
- clgfr_2 = "0000b9310000RRE",
-- clmh_3 = "eb0000000020RSY-b",
-- clm_3 = "0000bd000000RS-b",
-- clmy_3 = "eb0000000021RSY-b",
-+ clgfrl_2 = "c60e00000000RIL-b",
-+ clghrl_2 = "c60600000000RIL-b",
-+ clgr_2 = "0000b9210000RRE",
-+ clgrl_2 = "c60a00000000RIL-b",
- clhf_2 = "e300000000cfRXY-a",
- clhhr_2 = "0000b9cf0000RRE",
- clhlr_2 = "0000b9df0000RRE",
-- clfi_2 = "c20f00000000RIL-a",
-- clgfi_2 = "c20e00000000RIL-a",
-+ clhrl_2 = "c60700000000RIL-b",
-+ cli_2 = "000095000000SI",
- clih_2 = "cc0f00000000RIL-a",
-- clcl_2 = "000000000f00RR",
-- clcle_3 = "0000a9000000RS-a",
-- clclu_3 = "eb000000008fRSY-a",
-+ clm_3 = "0000bd000000RS-b",
-+ clmh_3 = "eb0000000020RSY-b",
-+ clmy_3 = "eb0000000021RSY-b",
-+ clr_2 = "000000001500RR",
- clrl_2 = "c60f00000000RIL-b",
-- clhrl_2 = "c60700000000RIL-b",
-- clgrl_2 = "c60a00000000RIL-b",
-- clghrl_2 = "c60600000000RIL-b",
-- clgfrl_2 = "c60e00000000RIL-b",
- clst_2 = "0000b25d0000RRE",
-+ cly_2 = "e30000000055RXY-a",
-+ cmpsc_2 = "0000b2630000RRE",
-+ cpya_2 = "0000b24d0000RRE",
-+ cr_2 = "000000001900RR",
- crl_2 = "c60d00000000RIL-b",
-- cgrl_2 = "c60800000000RIL-b",
-- cgfrl_2 = "c60c00000000RIL-b",
-+ cs_3 = "0000ba000000RS-a",
-+ csg_3 = "eb0000000030RSY-a",
-+ csp_2 = "0000b2500000RRE",
-+ cspg_2 = "0000b98a0000RRE",
-+ csy_3 = "eb0000000014RSY-a",
-+ cu41_2 = "0000b9b20000RRE",
-+ cu42_2 = "0000b9b30000RRE",
-+ cudtr_2 = "0000b3e20000RRE",
- cuse_2 = "0000b2570000RRE",
-- cmpsc_2 = "0000b2630000RRE",
-- kimd_2 = "0000b93e0000RRE",
-- klmd_2 = "0000b93f0000RRE",
-- kmac_2 = "0000b91e0000RRE",
-- thdr_2 = "0000b3590000RRE",
-- thder_2 = "0000b3580000RRE",
-+ cuxtr_2 = "0000b3ea0000RRE",
-+ cvb_2 = "00004f000000RX-a",
-+ cvbg_2 = "e3000000000eRXY-a",
-+ cvby_2 = "e30000000006RXY-a",
-+ cvd_2 = "00004e000000RX-a",
-+ cvdg_2 = "e3000000002eRXY-a",
-+ cvdy_2 = "e30000000026RXY-a",
-+ cxbr_2 = "0000b3490000RRE",
- cxfbr_2 = "0000b3960000RRE",
-- cxftr_2 = "0000b9590000RRE",
-+ cxfbra_4 = "0000b3960000RRF-e",
- cxfr_2 = "0000b3b60000RRE",
-- cdfbr_2 = "0000b3950000RRE",
-- cdftr_2 = "0000b9510000RRE",
-- cdfr_2 = "0000b3b50000RRE",
-- cefbr_2 = "0000b3940000RRE",
-- cefr_2 = "0000b3b40000RRE",
-+ cxftr_2 = "0000b9590000RRE",
- cxgbr_2 = "0000b3a60000RRE",
-- cxgtr_2 = "0000b3f90000RRE",
-+ cxgbra_4 = "0000b3a60000RRF-e",
- cxgr_2 = "0000b3c60000RRE",
-- cdgbr_2 = "0000b3a50000RRE",
-- cdgtr_2 = "0000b3f10000RRE",
-- cdgr_2 = "0000b3c50000RRE",
-- cegbr_2 = "0000b3a40000RRE",
-- cegr_2 = "0000b3c40000RRE",
-+ cxgtr_2 = "0000b3f90000RRE",
-+ cxr_2 = "0000b3690000RRE",
- cxstr_2 = "0000b3fb0000RRE",
-- cdstr_2 = "0000b3f30000RRE",
-+ cxtr_2 = "0000b3ec0000RRE",
- cxutr_2 = "0000b3fa0000RRE",
-- cdutr_2 = "0000b3f20000RRE",
-- cvb_2 = "00004f000000RX-a",
-- cvby_2 = "e30000000006RXY-a",
-- cvbg_2 = "e3000000000eRXY-a",
-- cvd_2 = "00004e000000RX-a",
-- cvdy_2 = "e30000000026RXY-a",
-- cvdg_2 = "e3000000002eRXY-a",
-- cuxtr_2 = "0000b3ea0000RRE",
-- cudtr_2 = "0000b3e20000RRE",
-- cu42_2 = "0000b9b30000RRE",
-- cu41_2 = "0000b9b20000RRE",
-- cpya_2 = "0000b24d0000RRE",
-+ cy_2 = "e30000000059RXY-a",
- d_2 = "00005d000000RX-a",
-- dr_2 = "000000001d00RR",
-- dxbr_2 = "0000b34d0000RRE",
-- dxr_2 = "0000b22d0000RRE",
-- ddbr_2 = "0000b31d0000RRE",
- dd_2 = "00006d000000RX-a",
-+ ddb_2 = "ed000000001dRXE",
-+ ddbr_2 = "0000b31d0000RRE",
- ddr_2 = "000000002d00RR",
-- debr_2 = "0000b30d0000RRE",
- de_2 = "00007d000000RX-a",
-+ deb_2 = "ed000000000dRXE",
-+ debr_2 = "0000b30d0000RRE",
- der_2 = "000000003d00RR",
-+ didbr_4 = "0000b35b0000RRF-b",
- dl_2 = "e30000000097RXY-a",
-- dlr_2 = "0000b9970000RRE",
- dlg_2 = "e30000000087RXY-a",
- dlgr_2 = "0000b9870000RRE",
-+ dlr_2 = "0000b9970000RRE",
-+ dr_2 = "000000001d00RR",
- dsg_2 = "e3000000000dRXY-a",
-- dsgr_2 = "0000b90d0000RRE",
- dsgf_2 = "e3000000001dRXY-a",
- dsgfr_2 = "0000b91d0000RRE",
-- x_2 = "000057000000RX-a",
-- xr_2 = "000000001700RR",
-- xy_2 = "e30000000057RXY-a",
-- xg_2 = "e30000000082RXY-a",
-- xgr_2 = "0000b9820000RRE",
-- xihf_2 = "c00600000000RIL-a",
-- xilf_2 = "c00700000000RIL-a",
-- ex_2 = "000044000000RX-a",
-- exrl_2 = "c60000000000RIL-b",
-+ dsgr_2 = "0000b90d0000RRE",
-+ dxbr_2 = "0000b34d0000RRE",
-+ dxr_2 = "0000b22d0000RRE",
- ear_2 = "0000b24f0000RRE",
-- esea_2 = "0000b99d0000RRE",
-- eextr_2 = "0000b3ed0000RRE",
-- eedtr_2 = "0000b3e50000RRE",
- ecag_3 = "eb000000004cRSY-a",
-+ ed_2 = "de0000000000SS-a",
-+ edmk_2 = "df0000000000SS-a",
-+ eedtr_2 = "0000b3e50000RRE",
-+ eextr_2 = "0000b3ed0000RRE",
- efpc_2 = "0000b38c0000RRE",
-- epar_2 = "0000b2260000RRE",
- epair_2 = "0000b99a0000RRE",
-+ epar_2 = "0000b2260000RRE",
- epsw_2 = "0000b98d0000RRE",
-- esar_2 = "0000b2270000RRE",
-- esair_2 = "0000b99b0000RRE",
-- esxtr_2 = "0000b3ef0000RRE",
-- esdtr_2 = "0000b3e70000RRE",
- ereg_2 = "0000b2490000RRE",
- eregg_2 = "0000b90e0000RRE",
-+ esair_2 = "0000b99b0000RRE",
-+ esar_2 = "0000b2270000RRE",
-+ esdtr_2 = "0000b3e70000RRE",
-+ esea_2 = "0000b99d0000RRE",
- esta_2 = "0000b24a0000RRE",
-+ esxtr_2 = "0000b3ef0000RRE",
-+ ex_2 = "000044000000RX-a",
-+ exrl_2 = "c60000000000RIL-b",
-+ fidr_2 = "0000b37f0000RRE",
-+ fier_2 = "0000b3770000RRE",
-+ fixr_2 = "0000b3670000RRE",
- flogr_2 = "0000b9830000RRE",
- hdr_2 = "000000002400RR",
- her_2 = "000000003400RR",
- iac_2 = "0000b2240000RRE",
- ic_2 = "000043000000RX-a",
-- icy_2 = "e30000000073RXY-a",
-- icmh_3 = "eb0000000080RSY-b",
- icm_3 = "0000bf000000RS-b",
-+ icmh_3 = "eb0000000080RSY-b",
- icmy_3 = "eb0000000081RSY-b",
-+ icy_2 = "e30000000073RXY-a",
- iihf_2 = "c00800000000RIL-a",
- iihh_2 = "0000a5000000RI-a",
- iihl_2 = "0000a5010000RI-a",
-@@ -835,439 +846,406 @@ map_op = {
- ipm_2 = "0000b2220000RRE",
- iske_2 = "0000b2290000RRE",
- ivsk_2 = "0000b2230000RRE",
-+ kdbr_2 = "0000b3180000RRE",
-+ kdtr_2 = "0000b3e00000RRE",
-+ kebr_2 = "0000b3080000RRE",
-+ kimd_2 = "0000b93e0000RRE",
-+ klmd_2 = "0000b93f0000RRE",
-+ km_2 = "0000b92e0000RRE",
-+ kmac_2 = "0000b91e0000RRE",
-+ kmc_2 = "0000b92f0000RRE",
-+ kmf_2 = "0000b92a0000RRE",
-+ kmo_2 = "0000b92b0000RRE",
-+ kxbr_2 = "0000b3480000RRE",
-+ kxtr_2 = "0000b3e80000RRE",
- l_2 = "000058000000RX-a",
-- lr_2 = "000000001800RR",
-- ly_2 = "e30000000058RXY-a",
-- lg_2 = "e30000000004RXY-a",
-- lgr_2 = "0000b9040000RRE",
-- lgf_2 = "e30000000014RXY-a",
-- lgfr_2 = "0000b9140000RRE",
-- lghi_2 = "0000a7090000RI-a",
-- lxr_2 = "0000b3650000RRE",
-- ld_2 = "000068000000RX-a",
-- ldr_2 = "000000002800RR",
-- ldy_2 = "ed0000000065RXY-a",
-- le_2 = "000078000000RX-a",
-- ler_2 = "000000003800RR",
-- ley_2 = "ed0000000064RXY-a",
-- lam_3 = "00009a000000RS-a",
-- lamy_3 = "eb000000009aRSY-a",
- la_2 = "000041000000RX-a",
-- lay_2 = "e30000000071RXY-a",
-- lae_2 = "000051000000RX-a",
-- laey_2 = "e30000000075RXY-a",
-- larl_2 = "c00000000000RIL-b",
- laa_3 = "eb00000000f8RSY-a",
- laag_3 = "eb00000000e8RSY-a",
- laal_3 = "eb00000000faRSY-a",
- laalg_3 = "eb00000000eaRSY-a",
-+ lae_2 = "000051000000RX-a",
-+ laey_2 = "e30000000075RXY-a",
-+ lam_3 = "00009a000000RS-a",
-+ lamy_3 = "eb000000009aRSY-a",
- lan_3 = "eb00000000f4RSY-a",
- lang_3 = "eb00000000e4RSY-a",
-- lax_3 = "eb00000000f7RSY-a",
-- laxg_3 = "eb00000000e7RSY-a",
- lao_3 = "eb00000000f6RSY-a",
- laog_3 = "eb00000000e6RSY-a",
-- lt_2 = "e30000000012RXY-a",
-- ltr_2 = "000000001200RR",
-- ltg_2 = "e30000000002RXY-a",
-- ltgr_2 = "0000b9020000RRE",
-- ltgf_2 = "e30000000032RXY-a",
-- ltgfr_2 = "0000b9120000RRE",
-- ltxbr_2 = "0000b3420000RRE",
-- ltxtr_2 = "0000b3de0000RRE",
-- ltxr_2 = "0000b3620000RRE",
-- ltdbr_2 = "0000b3120000RRE",
-- ltdtr_2 = "0000b3d60000RRE",
-- ltdr_2 = "000000002200RR",
-- ltebr_2 = "0000b3020000RRE",
-- lter_2 = "000000003200RR",
-+ larl_2 = "c00000000000RIL-b",
-+ lax_3 = "eb00000000f7RSY-a",
-+ laxg_3 = "eb00000000e7RSY-a",
-+ lay_2 = "e30000000071RXY-a",
- lb_2 = "e30000000076RXY-a",
-- lbr_2 = "0000b9260000RRE",
-- lgb_2 = "e30000000077RXY-a",
-- lgbr_2 = "0000b9060000RRE",
- lbh_2 = "e300000000c0RXY-a",
-- lcr_2 = "000000001300RR",
-- lcgr_2 = "0000b9030000RRE",
-- lcgfr_2 = "0000b9130000RRE",
-- lcxbr_2 = "0000b3430000RRE",
-- lcxr_2 = "0000b3630000RRE",
-+ lbr_2 = "0000b9260000RRE",
- lcdbr_2 = "0000b3130000RRE",
-- lcdr_2 = "000000002300RR",
- lcdfr_2 = "0000b3730000RRE",
-+ lcdr_2 = "000000002300RR",
- lcebr_2 = "0000b3030000RRE",
- lcer_2 = "000000003300RR",
-+ lcgfr_2 = "0000b9130000RRE",
-+ lcgr_2 = "0000b9030000RRE",
-+ lcr_2 = "000000001300RR",
- lctl_3 = "0000b7000000RS-a",
- lctlg_3 = "eb000000002fRSY-a",
-- fixr_2 = "0000b3670000RRE",
-- fidr_2 = "0000b37f0000RRE",
-- fier_2 = "0000b3770000RRE",
-+ lcxbr_2 = "0000b3430000RRE",
-+ lcxr_2 = "0000b3630000RRE",
-+ ld_2 = "000068000000RX-a",
-+ ldebr_2 = "0000b3040000RRE",
-+ lder_2 = "0000b3240000RRE",
- ldgr_2 = "0000b3c10000RRE",
-+ ldr_2 = "000000002800RR",
-+ ldxbr_2 = "0000b3450000RRE",
-+ ldxr_2 = "000000002500RR",
-+ ldy_2 = "ed0000000065RXY-a",
-+ le_2 = "000078000000RX-a",
-+ ledbr_2 = "0000b3440000RRE",
-+ ledr_2 = "000000003500RR",
-+ ler_2 = "000000003800RR",
-+ lexbr_2 = "0000b3460000RRE",
-+ lexr_2 = "0000b3660000RRE",
-+ ley_2 = "ed0000000064RXY-a",
-+ lfh_2 = "e300000000caRXY-a",
-+ lg_2 = "e30000000004RXY-a",
-+ lgb_2 = "e30000000077RXY-a",
-+ lgbr_2 = "0000b9060000RRE",
- lgdr_2 = "0000b3cd0000RRE",
-- lh_2 = "000048000000RX-a",
-- lhr_2 = "0000b9270000RRE",
-- lhy_2 = "e30000000078RXY-a",
-+ lgf_2 = "e30000000014RXY-a",
-+ lgfi_2 = "c00100000000RIL-a",
-+ lgfr_2 = "0000b9140000RRE",
-+ lgfrl_2 = "c40c00000000RIL-b",
- lgh_2 = "e30000000015RXY-a",
-+ lghi_2 = "0000a7090000RI-a",
- lghr_2 = "0000b9070000RRE",
-+ lghrl_2 = "c40400000000RIL-b",
-+ lgr_2 = "0000b9040000RRE",
-+ lgrl_2 = "c40800000000RIL-b",
-+ lh_2 = "000048000000RX-a",
- lhh_2 = "e300000000c4RXY-a",
- lhi_2 = "0000a7080000RI-a",
-+ lhr_2 = "0000b9270000RRE",
- lhrl_2 = "c40500000000RIL-b",
-- lghrl_2 = "c40400000000RIL-b",
-- lfh_2 = "e300000000caRXY-a",
-- lgfi_2 = "c00100000000RIL-a",
-- lxdbr_2 = "0000b3050000RRE",
-- lxdr_2 = "0000b3250000RRE",
-- lxebr_2 = "0000b3060000RRE",
-- lxer_2 = "0000b3260000RRE",
-- ldebr_2 = "0000b3040000RRE",
-- lder_2 = "0000b3240000RRE",
-- llgf_2 = "e30000000016RXY-a",
-- llgfr_2 = "0000b9160000RRE",
-+ lhy_2 = "e30000000078RXY-a",
- llc_2 = "e30000000094RXY-a",
-+ llch_2 = "e300000000c2RXY-a",
- llcr_2 = "0000b9940000RRE",
- llgc_2 = "e30000000090RXY-a",
- llgcr_2 = "0000b9840000RRE",
-- llch_2 = "e300000000c2RXY-a",
-- llh_2 = "e30000000095RXY-a",
-- llhr_2 = "0000b9950000RRE",
-+ llgf_2 = "e30000000016RXY-a",
-+ llgfr_2 = "0000b9160000RRE",
-+ llgfrl_2 = "c40e00000000RIL-b",
- llgh_2 = "e30000000091RXY-a",
- llghr_2 = "0000b9850000RRE",
-+ llghrl_2 = "c40600000000RIL-b",
-+ llgt_2 = "e30000000017RXY-a",
-+ llgtr_2 = "0000b9170000RRE",
-+ llh_2 = "e30000000095RXY-a",
- llhh_2 = "e300000000c6RXY-a",
-+ llhr_2 = "0000b9950000RRE",
- llhrl_2 = "c40200000000RIL-b",
-- llghrl_2 = "c40600000000RIL-b",
- llihf_2 = "c00e00000000RIL-a",
- llihh_2 = "0000a50c0000RI-a",
- llihl_2 = "0000a50d0000RI-a",
- llilf_2 = "c00f00000000RIL-a",
- llilh_2 = "0000a50e0000RI-a",
- llill_2 = "0000a50f0000RI-a",
-- llgfrl_2 = "c40e00000000RIL-b",
-- llgt_2 = "e30000000017RXY-a",
-- llgtr_2 = "0000b9170000RRE",
- lm_3 = "000098000000RS-a",
-- lmy_3 = "eb0000000098RSY-a",
- lmg_3 = "eb0000000004RSY-a",
- lmh_3 = "eb0000000096RSY-a",
-- lnr_2 = "000000001100RR",
-- lngr_2 = "0000b9010000RRE",
-- lngfr_2 = "0000b9110000RRE",
-- lnxbr_2 = "0000b3410000RRE",
-- lnxr_2 = "0000b3610000RRE",
-+ lmy_3 = "eb0000000098RSY-a",
- lndbr_2 = "0000b3110000RRE",
-- lndr_2 = "000000002100RR",
- lndfr_2 = "0000b3710000RRE",
-+ lndr_2 = "000000002100RR",
- lnebr_2 = "0000b3010000RRE",
- lner_2 = "000000003100RR",
-+ lngfr_2 = "0000b9110000RRE",
-+ lngr_2 = "0000b9010000RRE",
-+ lnr_2 = "000000001100RR",
-+ lnxbr_2 = "0000b3410000RRE",
-+ lnxr_2 = "0000b3610000RRE",
- loc_3 = "eb00000000f2RSY-b",
- locg_3 = "eb00000000e2RSY-b",
-- lpq_2 = "e3000000008fRXY-a",
-- lpr_2 = "000000001000RR",
-- lpgr_2 = "0000b9000000RRE",
-- lpgfr_2 = "0000b9100000RRE",
-- lpxbr_2 = "0000b3400000RRE",
-- lpxr_2 = "0000b3600000RRE",
- lpdbr_2 = "0000b3100000RRE",
-- lpdr_2 = "000000002000RR",
- lpdfr_2 = "0000b3700000RRE",
-+ lpdr_2 = "000000002000RR",
- lpebr_2 = "0000b3000000RRE",
- lper_2 = "000000003000RR",
-+ lpgfr_2 = "0000b9100000RRE",
-+ lpgr_2 = "0000b9000000RRE",
-+ lpq_2 = "e3000000008fRXY-a",
-+ lpr_2 = "000000001000RR",
-+ lpxbr_2 = "0000b3400000RRE",
-+ lpxr_2 = "0000b3600000RRE",
-+ lr_2 = "000000001800RR",
- lra_2 = "0000b1000000RX-a",
-- lray_2 = "e30000000013RXY-a",
- lrag_2 = "e30000000003RXY-a",
-+ lray_2 = "e30000000013RXY-a",
-+ lrdr_2 = "000000002500RR",
-+ lrer_2 = "000000003500RR",
- lrl_2 = "c40d00000000RIL-b",
-- lgrl_2 = "c40800000000RIL-b",
-- lgfrl_2 = "c40c00000000RIL-b",
-- lrvh_2 = "e3000000001fRXY-a",
- lrv_2 = "e3000000001eRXY-a",
-- lrvr_2 = "0000b91f0000RRE",
- lrvg_2 = "e3000000000fRXY-a",
- lrvgr_2 = "0000b90f0000RRE",
-- ldxbr_2 = "0000b3450000RRE",
-- ldxr_2 = "000000002500RR",
-- lrdr_2 = "000000002500RR",
-- lexbr_2 = "0000b3460000RRE",
-- lexr_2 = "0000b3660000RRE",
-- ledbr_2 = "0000b3440000RRE",
-- ledr_2 = "000000003500RR",
-- lrer_2 = "000000003500RR",
-+ lrvh_2 = "e3000000001fRXY-a",
-+ lrvr_2 = "0000b91f0000RRE",
-+ lt_2 = "e30000000012RXY-a",
-+ ltdbr_2 = "0000b3120000RRE",
-+ ltdr_2 = "000000002200RR",
-+ ltdtr_2 = "0000b3d60000RRE",
-+ ltebr_2 = "0000b3020000RRE",
-+ lter_2 = "000000003200RR",
-+ ltg_2 = "e30000000002RXY-a",
-+ ltgf_2 = "e30000000032RXY-a",
-+ ltgfr_2 = "0000b9120000RRE",
-+ ltgr_2 = "0000b9020000RRE",
-+ ltr_2 = "000000001200RR",
-+ ltxbr_2 = "0000b3420000RRE",
-+ ltxr_2 = "0000b3620000RRE",
-+ ltxtr_2 = "0000b3de0000RRE",
- lura_2 = "0000b24b0000RRE",
- lurag_2 = "0000b9050000RRE",
-- lzxr_2 = "0000b3760000RRE",
-+ lxdbr_2 = "0000b3050000RRE",
-+ lxdr_2 = "0000b3250000RRE",
-+ lxebr_2 = "0000b3060000RRE",
-+ lxer_2 = "0000b3260000RRE",
-+ lxr_2 = "0000b3650000RRE",
-+ ly_2 = "e30000000058RXY-a",
- lzdr_2 = "0000b3750000RRE",
- lzer_2 = "0000b3740000RRE",
-- msta_2 = "0000b2470000RRE",
-- mvcl_2 = "000000000e00RR",
-- mvcle_3 = "0000a8000000RS-a",
-- mvclu_3 = "eb000000008eRSY-a",
-- mvpg_2 = "0000b2540000RRE",
-- mvst_2 = "0000b2550000RRE",
-+ lzxr_2 = "0000b3760000RRE",
- m_2 = "00005c000000RX-a",
-- mfy_2 = "e3000000005cRXY-a",
-- mr_2 = "000000001c00RR",
-- mxbr_2 = "0000b34c0000RRE",
-- mxr_2 = "000000002600RR",
-- mdbr_2 = "0000b31c0000RRE",
-+ madb_3 = "ed000000001eRXF",
-+ maeb_3 = "ed000000000eRXF",
-+ maebr_3 = "0000b30e0000RRD",
-+ maer_3 = "0000b32e0000RRD",
- md_2 = "00006c000000RX-a",
-- mdr_2 = "000000002c00RR",
-- mxdbr_2 = "0000b3070000RRE",
-- mxd_2 = "000067000000RX-a",
-- mxdr_2 = "000000002700RR",
-- meebr_2 = "0000b3170000RRE",
-- meer_2 = "0000b3370000RRE",
-- mdebr_2 = "0000b30c0000RRE",
-+ mdb_2 = "ed000000001cRXE",
-+ mdbr_2 = "0000b31c0000RRE",
- mde_2 = "00007c000000RX-a",
-+ mdeb_2 = "ed000000000cRXE",
-+ mdebr_2 = "0000b30c0000RRE",
- mder_2 = "000000003c00RR",
-+ mdr_2 = "000000002c00RR",
- me_2 = "00007c000000RX-a",
-+ meeb_2 = "ed0000000017RXE",
-+ meebr_2 = "0000b3170000RRE",
-+ meer_2 = "0000b3370000RRE",
- mer_2 = "000000003c00RR",
-+ mfy_2 = "e3000000005cRXY-a",
-+ mghi_2 = "0000a70d0000RI-a",
- mh_2 = "00004c000000RX-a",
-+ mhi_2 = "0000a70c0000RI-a",
- mhy_2 = "e3000000007cRXY-a",
-+ ml_2 = "e30000000096RXY-a",
- mlg_2 = "e30000000086RXY-a",
- mlgr_2 = "0000b9860000RRE",
-- ml_2 = "e30000000096RXY-a",
- mlr_2 = "0000b9960000RRE",
-+ mr_2 = "000000001c00RR",
- ms_2 = "000071000000RX-a",
-- msr_2 = "0000b2520000RRE",
-- msy_2 = "e30000000051RXY-a",
-+ msfi_2 = "c20100000000RIL-a",
- msg_2 = "e3000000000cRXY-a",
-- msgr_2 = "0000b90c0000RRE",
- msgf_2 = "e3000000001cRXY-a",
-- msgfr_2 = "0000b91c0000RRE",
-- msfi_2 = "c20100000000RIL-a",
- msgfi_2 = "c20000000000RIL-a",
-- maer_3 = "0000b32e0000RRD",
-+ msgfr_2 = "0000b91c0000RRE",
-+ msgr_2 = "0000b90c0000RRE",
-+ msr_2 = "0000b2520000RRE",
-+ msta_2 = "0000b2470000RRE",
-+ msy_2 = "e30000000051RXY-a",
-+ mvc_2 = "d20000000000SS-a",
-+ mvcin_2 = "e80000000000SS-a",
-+ mvcl_2 = "000000000e00RR",
-+ mvcle_3 = "0000a8000000RS-a",
-+ mvclu_3 = "eb000000008eRSY-a",
-+ mvghi_2 = "e54800000000SIL",
- mvhhi_2 = "e54400000000SIL",
- mvhi_2 = "e54c00000000SIL",
-- mvghi_2 = "e54800000000SIL",
-+ mvi_2 = "000092000000SI",
-+ mvn_2 = "d10000000000SS-a",
-+ mvpg_2 = "0000b2540000RRE",
-+ mvst_2 = "0000b2550000RRE",
-+ mvz_2 = "d30000000000SS-a",
-+ mxbr_2 = "0000b34c0000RRE",
-+ mxd_2 = "000067000000RX-a",
-+ mxdb_2 = "ed0000000007RXE",
-+ mxdbr_2 = "0000b3070000RRE",
-+ mxdr_2 = "000000002700RR",
-+ mxr_2 = "000000002600RR",
-+ n_2 = "000054000000RX-a",
-+ nc_2 = "d40000000000SS-a",
-+ ng_2 = "e30000000080RXY-a",
-+ ngr_2 = "0000b9800000RRE",
-+ ni_2 = "000094000000SI",
-+ nihf_2 = "c00a00000000RIL-a",
-+ nihh_2 = "0000a5040000RI-a",
-+ nihl_2 = "0000a5050000RI-a",
-+ nilf_2 = "c00b00000000RIL-a",
-+ nilh_2 = "0000a5060000RI-a",
-+ nill_2 = "0000a5070000RI-a",
-+ nr_2 = "000000001400RR",
-+ ny_2 = "e30000000054RXY-a",
- o_2 = "000056000000RX-a",
-- or_2 = "000000001600RR",
-- oy_2 = "e30000000056RXY-a",
-+ oc_2 = "d60000000000SS-a",
- og_2 = "e30000000081RXY-a",
- ogr_2 = "0000b9810000RRE",
-+ oi_2 = "000096000000SI",
- oihf_2 = "c00c00000000RIL-a",
- oihh_2 = "0000a5080000RI-a",
- oihl_2 = "0000a5090000RI-a",
- oilf_2 = "c00d00000000RIL-a",
- oilh_2 = "0000a50a0000RI-a",
- oill_2 = "0000a50b0000RI-a",
-- pgin_2 = "0000b22e0000RRE",
-- pgout_2 = "0000b22f0000RRE",
-+ or_2 = "000000001600RR",
-+ oy_2 = "e30000000056RXY-a",
-+ palb_2 = "0000b2480000RRE",
- pcc_2 = "0000b92c0000RRE",
- pckmo_2 = "0000b9280000RRE",
-- pfmf_2 = "0000b9af0000RRE",
-- ptf_2 = "0000b9a20000RRE",
-- popcnt_2 = "0000b9e10000RRE",
- pfd_2 = "e30000000036m",
- pfdrl_2 = "c60200000000RIL-c",
-+ pfmf_2 = "0000b9af0000RRE",
-+ pgin_2 = "0000b22e0000RRE",
-+ pgout_2 = "0000b22f0000RRE",
-+ popcnt_2 = "0000b9e10000RRE",
- pt_2 = "0000b2280000RRE",
-+ ptf_2 = "0000b9a20000RRE",
- pti_2 = "0000b99e0000RRE",
-- palb_2 = "0000b2480000RRE",
-- rrbe_2 = "0000b22a0000RRE",
-- rrbm_2 = "0000b9ae0000RRE",
- rll_3 = "eb000000001dRSY-a",
- rllg_3 = "eb000000001cRSY-a",
-- srst_2 = "0000b25e0000RRE",
-- srstu_2 = "0000b9be0000RRE",
-+ rrbe_2 = "0000b22a0000RRE",
-+ rrbm_2 = "0000b9ae0000RRE",
-+ s_2 = "00005b000000RX-a",
- sar_2 = "0000b24e0000RRE",
-- sfpc_2 = "0000b3840000RRE",
-+ sd_2 = "00006b000000RX-a",
-+ sdb_2 = "ed000000001bRXE",
-+ sdbr_2 = "0000b31b0000RRE",
-+ sdr_2 = "000000002b00RR",
-+ se_2 = "00007b000000RX-a",
-+ seb_2 = "ed000000000bRXE",
-+ sebr_2 = "0000b30b0000RRE",
-+ ser_2 = "000000003b00RR",
- sfasr_2 = "0000b3850000RRE",
-- spm_2 = "000000000400RR",
-- ssar_2 = "0000b2250000RRE",
-- ssair_2 = "0000b99f0000RRE",
-- slda_2 = "00008f000000RS-a",
-- sldl_2 = "00008d000000RS-a",
-+ sfpc_2 = "0000b3840000RRE",
-+ sg_2 = "e30000000009RXY-a",
-+ sgf_2 = "e30000000019RXY-a",
-+ sgfr_2 = "0000b9190000RRE",
-+ sgr_2 = "0000b9090000RRE",
-+ sh_2 = "00004b000000RX-a",
-+ shy_2 = "e3000000007bRXY-a",
-+ sl_2 = "00005f000000RX-a",
- sla_2 = "00008b000000RS-a",
-- slak_3 = "eb00000000ddRSY-a",
- slag_3 = "eb000000000bRSY-a",
-+ slak_3 = "eb00000000ddRSY-a",
-+ slb_2 = "e30000000099RXY-a",
-+ slbg_2 = "e30000000089RXY-a",
-+ slbgr_2 = "0000b9890000RRE",
-+ slbr_2 = "0000b9990000RRE",
-+ slda_2 = "00008f000000RS-a",
-+ sldl_2 = "00008d000000RS-a",
-+ slfi_2 = "c20500000000RIL-a",
-+ slg_2 = "e3000000000bRXY-a",
-+ slgf_2 = "e3000000001bRXY-a",
-+ slgfi_2 = "c20400000000RIL-a",
-+ slgfr_2 = "0000b91b0000RRE",
-+ slgr_2 = "0000b90b0000RRE",
- sll_2 = "000089000000RS-a",
-- sllk_3 = "eb00000000dfRSY-a",
- sllg_3 = "eb000000000dRSY-a",
-- srda_2 = "00008e000000RS-a",
-- srdl_2 = "00008c000000RS-a",
-- sra_2 = "00008a000000RS-a",
-- srak_3 = "eb00000000dcRSY-a",
-- srag_3 = "eb000000000aRSY-a",
-- srl_2 = "000088000000RS-a",
-- srlk_3 = "eb00000000deRSY-a",
-- srlg_3 = "eb000000000cRSY-a",
-- sqxbr_2 = "0000b3160000RRE",
-- sqxr_2 = "0000b3360000RRE",
-+ sllk_3 = "eb00000000dfRSY-a",
-+ slr_2 = "000000001f00RR",
-+ sly_2 = "e3000000005fRXY-a",
-+ spm_2 = "000000000400RR",
-+ sqdb_2 = "ed0000000015RXE",
- sqdbr_2 = "0000b3150000RRE",
- sqdr_2 = "0000b2440000RRE",
-+ sqeb_2 = "ed0000000014RXE",
- sqebr_2 = "0000b3140000RRE",
- sqer_2 = "0000b2450000RRE",
-+ sqxbr_2 = "0000b3160000RRE",
-+ sqxr_2 = "0000b3360000RRE",
-+ sr_2 = "000000001b00RR",
-+ sra_2 = "00008a000000RS-a",
-+ srag_3 = "eb000000000aRSY-a",
-+ srak_3 = "eb00000000dcRSY-a",
-+ srda_2 = "00008e000000RS-a",
-+ srdl_2 = "00008c000000RS-a",
-+ srl_2 = "000088000000RS-a",
-+ srlg_3 = "eb000000000cRSY-a",
-+ srlk_3 = "eb00000000deRSY-a",
-+ srst_2 = "0000b25e0000RRE",
-+ srstu_2 = "0000b9be0000RRE",
-+ ssair_2 = "0000b99f0000RRE",
-+ ssar_2 = "0000b2250000RRE",
- st_2 = "000050000000RX-a",
-- sty_2 = "e30000000050RXY-a",
-- stg_2 = "e30000000024RXY-a",
-- std_2 = "000060000000RX-a",
-- stdy_2 = "ed0000000067RXY-a",
-- ste_2 = "000070000000RX-a",
-- stey_2 = "ed0000000066RXY-a",
- stam_3 = "00009b000000RS-a",
- stamy_3 = "eb000000009bRSY-a",
- stc_2 = "000042000000RX-a",
-- stcy_2 = "e30000000072RXY-a",
- stch_2 = "e300000000c3RXY-a",
-- stcmh_3 = "eb000000002cRSY-b",
- stcm_3 = "0000be000000RS-b",
-+ stcmh_3 = "eb000000002cRSY-b",
- stcmy_3 = "eb000000002dRSY-b",
-- stctl_3 = "0000b6000000RS-a",
- stctg_3 = "eb0000000025RSY-a",
-+ stctl_3 = "0000b6000000RS-a",
-+ stcy_2 = "e30000000072RXY-a",
-+ std_2 = "000060000000RX-a",
-+ stdy_2 = "ed0000000067RXY-a",
-+ ste_2 = "000070000000RX-a",
-+ stey_2 = "ed0000000066RXY-a",
-+ stfh_2 = "e300000000cbRXY-a",
-+ stfl_1 = "0000b2b10000S",
-+ stg_2 = "e30000000024RXY-a",
-+ stgrl_2 = "c40b00000000RIL-b",
- sth_2 = "000040000000RX-a",
-- sthy_2 = "e30000000070RXY-a",
- sthh_2 = "e300000000c7RXY-a",
- sthrl_2 = "c40700000000RIL-b",
-- stfh_2 = "e300000000cbRXY-a",
-+ sthy_2 = "e30000000070RXY-a",
- stm_3 = "000090000000RS-a",
-- stmy_3 = "eb0000000090RSY-a",
- stmg_3 = "eb0000000024RSY-a",
- stmh_3 = "eb0000000026RSY-a",
-+ stmy_3 = "eb0000000090RSY-a",
- stoc_3 = "eb00000000f3RSY-b",
- stocg_3 = "eb00000000e3RSY-b",
- stpq_2 = "e3000000008eRXY-a",
- strl_2 = "c40f00000000RIL-b",
-- stgrl_2 = "c40b00000000RIL-b",
-- strvh_2 = "e3000000003fRXY-a",
- strv_2 = "e3000000003eRXY-a",
- strvg_2 = "e3000000002fRXY-a",
-+ strvh_2 = "e3000000003fRXY-a",
- stura_2 = "0000b2460000RRE",
- sturg_2 = "0000b9250000RRE",
-- s_2 = "00005b000000RX-a",
-- sr_2 = "000000001b00RR",
-- sy_2 = "e3000000005bRXY-a",
-- sg_2 = "e30000000009RXY-a",
-- sgr_2 = "0000b9090000RRE",
-- sgf_2 = "e30000000019RXY-a",
-- sgfr_2 = "0000b9190000RRE",
-- sxbr_2 = "0000b34b0000RRE",
-- sdbr_2 = "0000b31b0000RRE",
-- sebr_2 = "0000b30b0000RRE",
-- sh_2 = "00004b000000RX-a",
-- shy_2 = "e3000000007bRXY-a",
-- sl_2 = "00005f000000RX-a",
-- slr_2 = "000000001f00RR",
-- sly_2 = "e3000000005fRXY-a",
-- slg_2 = "e3000000000bRXY-a",
-- slgr_2 = "0000b90b0000RRE",
-- slgf_2 = "e3000000001bRXY-a",
-- slgfr_2 = "0000b91b0000RRE",
-- slfi_2 = "c20500000000RIL-a",
-- slgfi_2 = "c20400000000RIL-a",
-- slb_2 = "e30000000099RXY-a",
-- slbr_2 = "0000b9990000RRE",
-- slbg_2 = "e30000000089RXY-a",
-- slbgr_2 = "0000b9890000RRE",
-- sxr_2 = "000000003700RR",
-- sd_2 = "00006b000000RX-a",
-- sdr_2 = "000000002b00RR",
-- se_2 = "00007b000000RX-a",
-- ser_2 = "000000003b00RR",
-+ sty_2 = "e30000000050RXY-a",
- su_2 = "00007f000000RX-a",
- sur_2 = "000000003f00RR",
-+ svc_1 = "000000000a00I",
- sw_2 = "00006f000000RX-a",
- swr_2 = "000000002f00RR",
-+ sxbr_2 = "0000b34b0000RRE",
-+ sxr_2 = "000000003700RR",
-+ sy_2 = "e3000000005bRXY-a",
- tar_2 = "0000b24c0000RRE",
- tb_2 = "0000b22c0000RRE",
-+ thder_2 = "0000b3580000RRE",
-+ thdr_2 = "0000b3590000RRE",
-+ tm_2 = "000091000000SI",
- tmhh_2 = "0000a7020000RI-a",
- tmhl_2 = "0000a7030000RI-a",
- tmlh_2 = "0000a7000000RI-a",
- tmll_2 = "0000a7010000RI-a",
-+ tmy_2 = "eb0000000051SIY",
-+ tr_2 = "dc0000000000SS-a",
- trace_3 = "000099000000RS-a",
- tracg_3 = "eb000000000fRSY-a",
- tre_2 = "0000b2a50000RRE",
--
--
-- -- SS-a instructions
-- clc_2 = "d50000000000SS-a",
-- ed_2 = "de0000000000SS-a",
-- edmk_2 = "df0000000000SS-a",
-- mvc_2 = "d20000000000SS-a",
-- mvcin_2 = "e80000000000SS-a",
-- mvn_2 = "d10000000000SS-a",
-- mvz_2 = "d30000000000SS-a",
-- nc_2 = "d40000000000SS-a",
-- oc_2 = "d60000000000SS-a",
-- tr_2 = "dc0000000000SS-a",
- trt_2 = "dd0000000000SS-a",
- trtr_2 = "d00000000000SS-a",
- unpka_2 = "ea0000000000SS-a",
- unpku_2 = "e20000000000SS-a",
-+ x_2 = "000057000000RX-a",
- xc_2 = "d70000000000SS-a",
-- ap_2 = "fa0000000000SS-b",
-- -- RRF-e instructions
-- cfebr_3 = "0000b3980000RRF-e",
-- cfebra_4 = "0000b3980000RRF-e",
-- cfdbr_3 = "0000b3990000RRF-e",
-- cfdbra_4 = "0000b3990000RRF-e",
-- cfxbr_3 = "0000b39a0000RRF-e",
-- cfxbra_4 = "0000b39a0000RRF-e",
-- cgebr_3 = "0000b3a80000RRF-e",
-- cgebra_4 = "0000b3a80000RRF-e",
-- cgdbr_3 = "0000b3a90000RRF-e",
-- cgdbra_4 = "0000b3a90000RRF-e",
-- cgxbr_3 = "0000b3aa0000RRF-e",
-- cgxbra_4 = "0000b3aa0000RRF-e",
-- cefbra_4 = "0000b3940000RRF-e",
-- cdfbra_4 = "0000b3950000RRF-e",
-- cxfbra_4 = "0000b3960000RRF-e",
-- cegbra_4 = "0000b3a40000RRF-e",
-- cdgbra_4 = "0000b3a50000RRF-e",
-- cxgbra_4 = "0000b3a60000RRF-e",
-- -- RXE instructions
-- adb_2 = "ed000000001aRXE",
-- aeb_2 = "ed000000000aRXE",
-- cdb_2 = "ed0000000019RXE",
-- ceb_2 = "ed0000000009RXE",
-- ddb_2 = "ed000000001dRXE",
-- deb_2 = "ed000000000dRXE",
-- mdb_2 = "ed000000001cRXE",
-- mdeb_2 = "ed000000000cRXE",
-- meeb_2 = "ed0000000017RXE",
-- mxdb_2 = "ed0000000007RXE",
-- sqdb_2 = "ed0000000015RXE",
-- sqeb_2 = "ed0000000014RXE",
-- sdb_2 = "ed000000001bRXE",
-- seb_2 = "ed000000000bRXE",
-- -- RRF-b instructions
-- didbr_4 = "0000b35b0000RRF-b",
-- -- S mode instructions
-- stfl_1 = "0000b2b10000S",
-- -- I- mode instructions
-- svc_1 = "000000000a00I",
-- -- RI-a mode instructions
-- -- TODO: change "i" to "RI-a"
-- chi_2 = "0000a70e0000RI-a",
-- cghi_2 = "0000a70f0000RI-a",
-- mhi_2 = "0000a70c0000RI-a",
-- mghi_2 = "0000a70d0000RI-a",
-- -- RI-b mode instructions
-- bras_2 = "0000a7050000RI-b",
-- brct_2 = "0000a7060000RI-b",
-- brctg_2 = "0000a7070000RI-b",
-- -- RI-c mode instructions
-- brc_2 = "0000a7040000RI-c",
-- -- RIL-c
-- brcl_2 = "c00400000000RIL-c",
-- -- RX-b mode instructions
-- bc_2 = "000047000000RX-b",
-- -- RSI
-- brxh_3 = "000084000000RSI",
-- -- RIE-e
-- brxhg_3 = "ec0000000044RIE-e",
-- -- SI
-- cli_2 = "000095000000SI",
-- mvi_2 = "000092000000SI",
-- ni_2 = "000094000000SI",
-- tm_2 = "000091000000SI",
-+ xg_2 = "e30000000082RXY-a",
-+ xgr_2 = "0000b9820000RRE",
- xi_2 = "000097000000SI",
-- oi_2 = "000096000000SI",
-- -- SIY
-- tmy_2 = "eb0000000051SIY",
-- -- RXF
-- madb_3 = "ed000000001eRXF",
-- maeb_3 = "ed000000000eRXF",
-- -- RRD
-- maebr_3 = "0000b30e0000RRD",
-- -- RS-b
-- clm_3 = "0000bd000000RS-b"
-+ xihf_2 = "c00600000000RIL-a",
-+ xilf_2 = "c00700000000RIL-a",
-+ xr_2 = "000000001700RR",
-+ xy_2 = "e30000000057RXY-a",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
---
-2.20.1
-
-
-From ba2755e6bc5bc11f7b404b6136d51f64c4e78b88 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 31 Jan 2017 15:30:54 -0500
-Subject: [PATCH 244/247] Use real encoding names for extended mnemonics.
-
----
- dynasm/dasm_s390x.lua | 54 ++++++++++++++++++-------------------------
- 1 file changed, 22 insertions(+), 32 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index ced4f9f..62aa7bc 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1251,13 +1251,13 @@ for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
- -- TODO: replace 'B' with correct encoding.
- -- brc
-- map_op["j"..cond.."_1"] =
"0000"..tohex(0xa7040000+shl(c, 20)).."w"
-+ map_op["j"..cond.."_1"] =
"0000"..tohex(0xa7040000+shl(c, 20)).."RI-c"
- -- brcl
-- map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c,
20)).."0000".."x"
-+ map_op["jg"..cond.."_1"] = tohex(0xc0040000+shl(c,
20)).."0000".."RIL-c"
- -- bc
-- map_op["b"..cond.."_1"] =
"0000"..tohex(0x47000000+shl(c, 20)).."y"
-+ map_op["b"..cond.."_1"] =
"0000"..tohex(0x47000000+shl(c, 20)).."RX-b"
- -- bcr
-- map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c,
4)).."z"
-+ map_op["b"..cond.."r_1"] = "0000"..tohex(0x0700+shl(c,
4)).."RR"
- end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
-@@ -1271,7 +1271,10 @@ local function parse_template(params, template, nparams, pos)
- -- Process each character.
- local p = sub(template, 13)
- if p == "RR" then
-- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ if #params > 1 then
-+ op2 = op2 + shl(parse_reg(params[1]),4)
-+ end
-+ op2 = op2 + parse_reg(params[#params])
- wputhw(op2)
- elseif p == "RRE" then
- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-@@ -1286,7 +1289,6 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
- if a then a() end
-- elseif p == "RX-b" then
- elseif p == "RXY-a" then
- local d, x, b, a = parse_mem_bxy(params[2])
- op0 = op0 + shl(parse_reg(params[1]), 4) + x
-@@ -1294,8 +1296,6 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + band(shr(d, 4), 0xff00)
- wputhw(op0); wputhw(op1); wputhw(op2)
- if a then a() end
-- elseif p == "m" then
--
- elseif p == "RIL-a" then
- op0 = op0 + shl(parse_reg(params[1]), 4)
- wputhw(op0);
-@@ -1395,18 +1395,25 @@ local function parse_template(params, template, nparams, pos)
- local mode, n, s = parse_label(params[2])
- waction("REL_"..mode, n, s)
- elseif p == "RI-c" then
-- op1 = op1 + shl(parse_num(params[1]),4)
-+ if #params > 1 then
-+ op1 = op1 + shl(parse_num(params[1]), 4)
-+ end
- wputhw(op1)
-- local mode, n, s = parse_label(params[2])
-+ local mode, n, s = parse_label(params[#params])
- waction("REL_"..mode, n, s)
- elseif p == "RIL-c" then
-- op0 = op0 + shl(parse_num(params[1]),4)
-- wputhhw(op0)
-- local mode, n, s = parse_label(params[2])
-+ if #params > 1 then
-+ op0 = op0 + shl(parse_num(params[1]), 4)
-+ end
-+ wputhw(op0)
-+ local mode, n, s = parse_label(params[#params])
- waction("REL_"..mode, n, s)
- elseif p == "RX-b" then
-- local d, x, b, a = parse_mem_bx(params[2])
-- op1 = op1 + shl(parse_num(params[1]), 4) + x
-+ local d, x, b, a = parse_mem_bx(params[#params])
-+ if #params > 1 then
-+ op1 = op1 + shl(parse_num(params[1]), 4)
-+ end
-+ op1 = op1 + x
- op2 = op2 + shl(b, 12) + d
- wputhw(op1);wputhw(op2);
- if a then a() end
-@@ -1442,23 +1449,6 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op1)
- op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) +
parse_reg(params[3])
- wputhw(op2)
-- elseif p == "w" then
-- local mode, n, s = parse_label(params[1])
-- wputhw(op1)
-- waction("REL_"..mode, n, s)
-- elseif p == "x" then
-- local mode, n, s = parse_label(params[1])
-- wputhw(op0)
-- waction("REL_"..mode, n, s)
-- elseif p == "y" then
-- local d, x, b, a = parse_mem_bx(params[1])
-- op1 = op1 + x
-- op2 = op2 + shl(b, 12) + d
-- wputhw(op1); wputhw(op2);
-- if a then a() end -- a() emits action.
-- elseif p == "z" then
-- op2 = op2 + parse_reg(params[1])
-- wputhw(op2)
- elseif p == "RS-b" then
- local m = parse_mask(params[2])
- local d, b, a = parse_mem_b(params[3])
---
-2.20.1
-
-
-From d35e70098503ded96e918e521bf8d2f8b16fdc25 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 31 Jan 2017 15:42:48 -0500
-Subject: [PATCH 245/247] Re-arrange instruction encodings so they are in
- alphabetical order.
-
----
- dynasm/dasm_s390x.lua | 251 +++++++++++++++++++++---------------------
- 1 file changed, 125 insertions(+), 126 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 62aa7bc..96ab88f 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1270,32 +1270,33 @@ local function parse_template(params, template, nparams, pos)
-
- -- Process each character.
- local p = sub(template, 13)
-- if p == "RR" then
-- if #params > 1 then
-- op2 = op2 + shl(parse_reg(params[1]),4)
-- end
-- op2 = op2 + parse_reg(params[#params])
-- wputhw(op2)
-- elseif p == "RRE" then
-- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-- wputhw(op1); wputhw(op2)
-+ if p == "I" then
-+ local imm_val, a = parse_imm8(params[1])
-+ op2 = op2 + imm_val;
-+ wputhw(op2);
-+ if a then a() end
- elseif p == "RI-a" then
- op1 = op1 + shl(parse_reg(params[1]),4)
- wputhw(op1);
- parse_imm16(params[2])
-- elseif p == "RX-a" then
-- local d, x, b, a = parse_mem_bx(params[2])
-- op1 = op1 + shl(parse_reg(params[1]), 4) + x
-- op2 = op2 + shl(b, 12) + d
-- wputhw(op1); wputhw(op2);
-- if a then a() end
-- elseif p == "RXY-a" then
-- local d, x, b, a = parse_mem_bxy(params[2])
-- op0 = op0 + shl(parse_reg(params[1]), 4) + x
-- op1 = op1 + shl(b, 12) + band(d, 0xfff)
-- op2 = op2 + band(shr(d, 4), 0xff00)
-- wputhw(op0); wputhw(op1); wputhw(op2)
-- if a then a() end
-+ elseif p == "RI-b" then
-+ op1 = op1 + shl(parse_reg(params[1]),4)
-+ wputhw(op1)
-+ local mode, n, s = parse_label(params[2])
-+ waction("REL_"..mode, n, s)
-+ elseif p == "RI-c" then
-+ if #params > 1 then
-+ op1 = op1 + shl(parse_num(params[1]), 4)
-+ end
-+ wputhw(op1)
-+ local mode, n, s = parse_label(params[#params])
-+ waction("REL_"..mode, n, s)
-+ elseif p == "RIE-e" then
-+ op0 = op0 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ wputhw1(op0)
-+ local mode, n, s = parse_label(params[3])
-+ waction("REL_"..mode, n, s)
-+ wputhw(op2)
- elseif p == "RIL-a" then
- op0 = op0 + shl(parse_reg(params[1]), 4)
- wputhw(op0);
-@@ -1305,6 +1306,37 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op0);
- local mode, n, s = parse_label(params[2])
- waction("REL_"..mode, n, s)
-+ elseif p == "RIL-c" then
-+ if #params > 1 then
-+ op0 = op0 + shl(parse_num(params[1]), 4)
-+ end
-+ wputhw(op0)
-+ local mode, n, s = parse_label(params[#params])
-+ waction("REL_"..mode, n, s)
-+ elseif p == "RR" then
-+ if #params > 1 then
-+ op2 = op2 + shl(parse_reg(params[1]),4)
-+ end
-+ op2 = op2 + parse_reg(params[#params])
-+ wputhw(op2)
-+ elseif p == "RRD" then
-+ wputhw(op1)
-+ op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) +
parse_reg(params[3])
-+ wputhw(op2)
-+ elseif p == "RRE" then
-+ op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ wputhw(op1); wputhw(op2)
-+ elseif p == "RRF-b" then
-+ wputhw(op1);
-+ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) +
parse_reg(params[3]) + shl(parse_mask(params[4]),8)
-+ wputhw(op2)
-+ elseif p == "RRF-e" then
-+ wputhw(op1)
-+ op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[2]),12) +
parse_reg(params[3])
-+ if params[4] then
-+ op2 = op2 + shl(parse_mask2(params[4]),8)
-+ end
-+ wputhw(op2)
- elseif p == "RS-a" then
- if (params[3]) then
- local d, b, a = parse_mem_b(params[3])
-@@ -1317,6 +1349,18 @@ local function parse_template(params, template, nparams, pos)
- end
- wputhw(op1); wputhw(op2)
- if a then a() end
-+ elseif p == "RS-b" then
-+ local m = parse_mask(params[2])
-+ local d, b, a = parse_mem_b(params[3])
-+ op1 = op1 + shl(parse_reg(params[1]), 4) + m
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2)
-+ if a then a() end
-+ elseif p == "RSI" then
-+ op1 = op1 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ wputhw(op1)
-+ local mode, n, s = parse_label(params[3])
-+ waction("REL_"..mode, n, s)
- elseif p == "RSY-a" then
- local d, b, a = parse_mem_by(params[3])
- op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
-@@ -1324,110 +1368,50 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + band(shr(d, 4), 0xff00)
- wputhw(op0); wputhw(op1); wputhw(op2)
- if a then a() end -- a() emits action.
-- elseif p == "SS-a" then
-- local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1])
-- local d2, b2, d2a = parse_mem_b(params[2])
-- op0 = op0 + l1
-- op1 = op1 + shl(b1, 12) + d1
-- op2 = op2 + shl(b2, 12) + d2
-- wputhw(op0)
-- if l1a then l1a() end
-- wputhw(op1)
-- if d1a then d1a() end
-- wputhw(op2)
-- if d2a then d2a() end
-- elseif p == "SS-b" then
-- local high_l=true;
-- local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1],high_l)
-- high_l=false;
-- local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2],high_l)
-- op0 = op0 + shl(l1,4) + l2
-- op1 = op1 + shl(b1, 12) + d1
-- op2 = op2 + shl(b2, 12) + d2
-- wputhw(op0)
-- if l1a then l1a() end
-- if l2a then l2a() end
-- wputhw(op1)
-- if d1a then d1a() end
-- wputhw(op2)
-- if d2a then d2a() end
-- elseif p == "SIL" then
-- wputhw(op0)
-- local d, b, a = parse_mem_b(params[1])
-- op1 = op1 + shl(b, 12) + d
-- wputhw(op1)
-+ elseif p == "RX-a" then
-+ local d, x, b, a = parse_mem_bx(params[2])
-+ op1 = op1 + shl(parse_reg(params[1]), 4) + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2);
- if a then a() end
-- parse_imm16(params[2])
-- elseif p == "RRF-e" then
-- wputhw(op1)
-- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[2]),12) +
parse_reg(params[3])
-- if params[4] then
-- op2 = op2 + shl(parse_mask2(params[4]),8)
-+ elseif p == "RX-b" then
-+ local d, x, b, a = parse_mem_bx(params[#params])
-+ if #params > 1 then
-+ op1 = op1 + shl(parse_num(params[1]), 4)
- end
-- wputhw(op2)
-+ op1 = op1 + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1);wputhw(op2);
-+ if a then a() end
- elseif p == "RXE" then
- local d, x, b, a = parse_mem_bx(params[2])
- op0 = op0 + shl(parse_reg(params[1]), 4) + x
- op1 = op1 + shl(b, 12) + d
-- -- m3 is not present, so assumed its not part of the instruction since its not
passed as a prameter
- wputhw(op0);
- wputhw(op1);
- if a then a() end
- wputhw(op2);
-- elseif p == "RRF-b" then
-- wputhw(op1);
-- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) +
parse_reg(params[3]) + shl(parse_mask(params[4]),8)
-+ elseif p == "RXF" then
-+ local d, x, b, a = parse_mem_bx(params[3])
-+ op0 = op0 + shl(parse_reg(params[2]),4) + x
-+ op1 = op1 + shl(b, 12) + d
-+ wputhw(op0); wputhw(op1);
-+ if a then a() end
-+ op2 = op2 + shl(parse_reg(params[1]),12)
- wputhw(op2)
-- elseif p =="S" then
-+ elseif p == "RXY-a" then
-+ local d, x, b, a = parse_mem_bxy(params[2])
-+ op0 = op0 + shl(parse_reg(params[1]), 4) + x
-+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
-+ op2 = op2 + band(shr(d, 4), 0xff00)
-+ wputhw(op0); wputhw(op1); wputhw(op2)
-+ if a then a() end
-+ elseif p == "S" then
- wputhw(op1);
- local d, b, a = parse_mem_b(params[1])
- op2 = op2 + shl(b,12) + d;
- wputhw(op2)
- if a then a() end
-- elseif p =="I" then
-- local imm_val, a = parse_imm8(params[1])
-- op2 = op2 + imm_val;
-- wputhw(op2);
-- if a then a() end
-- elseif p == "RI-b" then
-- op1 = op1 + shl(parse_reg(params[1]),4)
-- wputhw(op1)
-- local mode, n, s = parse_label(params[2])
-- waction("REL_"..mode, n, s)
-- elseif p == "RI-c" then
-- if #params > 1 then
-- op1 = op1 + shl(parse_num(params[1]), 4)
-- end
-- wputhw(op1)
-- local mode, n, s = parse_label(params[#params])
-- waction("REL_"..mode, n, s)
-- elseif p == "RIL-c" then
-- if #params > 1 then
-- op0 = op0 + shl(parse_num(params[1]), 4)
-- end
-- wputhw(op0)
-- local mode, n, s = parse_label(params[#params])
-- waction("REL_"..mode, n, s)
-- elseif p == "RX-b" then
-- local d, x, b, a = parse_mem_bx(params[#params])
-- if #params > 1 then
-- op1 = op1 + shl(parse_num(params[1]), 4)
-- end
-- op1 = op1 + x
-- op2 = op2 + shl(b, 12) + d
-- wputhw(op1);wputhw(op2);
-- if a then a() end
-- elseif p == "RSI" then
-- op1 = op1 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-- wputhw(op1)
-- local mode, n, s = parse_label(params[3])
-- waction("REL_"..mode, n, s)
-- elseif p == "RIE-e" then
-- op0 = op0 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-- wputhw1(op0)
-- local mode, n, s = parse_label(params[3])
-- waction("REL_"..mode, n, s)
-- wputhw(op2)
- elseif p == "SI" then
- local imm_val, a = parse_imm8(params[2])
- op1 = op1 + imm_val
-@@ -1437,25 +1421,13 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b,12) + d
- wputhw(op2)
- if a then a() end
-- elseif p == "RXF" then
-- local d, x, b, a = parse_mem_bx(params[3])
-- op0 = op0 + shl(parse_reg(params[2]),4) + x
-+ elseif p == "SIL" then
-+ wputhw(op0)
-+ local d, b, a = parse_mem_b(params[1])
- op1 = op1 + shl(b, 12) + d
-- wputhw(op0); wputhw(op1);
-- if a then a() end
-- op2 = op2 + shl(parse_reg(params[1]),12)
-- wputhw(op2)
-- elseif p == "RRD" then
- wputhw(op1)
-- op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) +
parse_reg(params[3])
-- wputhw(op2)
-- elseif p == "RS-b" then
-- local m = parse_mask(params[2])
-- local d, b, a = parse_mem_b(params[3])
-- op1 = op1 + shl(parse_reg(params[1]), 4) + m
-- op2 = op2 + shl(b, 12) + d
-- wputhw(op1); wputhw(op2)
- if a then a() end
-+ parse_imm16(params[2])
- elseif p == "SIY" then
- local imm8,iact = parse_imm8(params[2])
- op0 = op0 + shl(imm8, 8)
-@@ -1466,6 +1438,33 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + band(shr(d, 4), 0xff00)
- wputhw(op1); wputhw(op2)
- if a then a() end
-+ elseif p == "SS-a" then
-+ local d1, l1, b1, d1a, l1a = parse_mem_lb(params[1])
-+ local d2, b2, d2a = parse_mem_b(params[2])
-+ op0 = op0 + l1
-+ op1 = op1 + shl(b1, 12) + d1
-+ op2 = op2 + shl(b2, 12) + d2
-+ wputhw(op0)
-+ if l1a then l1a() end
-+ wputhw(op1)
-+ if d1a then d1a() end
-+ wputhw(op2)
-+ if d2a then d2a() end
-+ elseif p == "SS-b" then
-+ local high_l=true;
-+ local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1],high_l)
-+ high_l=false;
-+ local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2],high_l)
-+ op0 = op0 + shl(l1,4) + l2
-+ op1 = op1 + shl(b1, 12) + d1
-+ op2 = op2 + shl(b2, 12) + d2
-+ wputhw(op0)
-+ if l1a then l1a() end
-+ if l2a then l2a() end
-+ wputhw(op1)
-+ if d1a then d1a() end
-+ wputhw(op2)
-+ if d2a then d2a() end
- else
- werror("unrecognized encoding")
- end
---
-2.20.1
-
-
-From afb2bac1dabfc8f7d566deb79c1c23d1766f7bcf Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday(a)ca.ibm.com>
-Date: Tue, 31 Jan 2017 16:13:18 -0500
-Subject: [PATCH 246/247] Various minor style changes.
-
----
- dynasm/dasm_s390x.lua | 91 +++++++++++++++++++++----------------------
- 1 file changed, 45 insertions(+), 46 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 96ab88f..222eb74 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -49,7 +49,7 @@ local maxsecpos = 25 -- Keep this low, to avoid excessively long C
lines.
- -- Action name -> action number.
- local map_action = {}
- local max_action = 0
--for n,name in ipairs(action_names) do
-+for n, name in ipairs(action_names) do
- map_action[name] = n-1
- max_action = n
- end
-@@ -68,7 +68,7 @@ local secpos = 1
- -- Dump action names and numbers.
- local function dumpactions(out)
- out:write("DynASM encoding engine action codes:\n")
-- for n,name in ipairs(action_names) do
-+ for n, name in ipairs(action_names) do
- local num = map_action[name]
- out:write(format(" %-10s %02X %d\n", name, num, num))
- end
-@@ -89,7 +89,7 @@ local function writeactions(out, name)
- if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
- out:write("static const unsigned short ", name, "[", nn, "] =
{")
- local esc = false -- also need to escape for action arguments
-- for i = 1,nn do
-+ for i = 1, nn do
- assert(out:write("\n 0x", sub(tohex(actlist[i]), 5, 8)))
- if i ~= nn then assert(out:write(",")) end
- local name = action_names[actlist[i]+1]
-@@ -160,7 +160,7 @@ local function dumpglobals(out, lvl)
- local t = {}
- for name, n in pairs(map_global) do t[n] = name end
- out:write("Global labels:\n")
-- for i=20,next_global-1 do
-+ for i=20, next_global-1 do
- out:write(format(" %s\n", t[i]))
- end
- out:write("\n")
-@@ -171,7 +171,7 @@ local function writeglobals(out, prefix)
- local t = {}
- for name, n in pairs(map_global) do t[n] = name end
- out:write("enum {\n")
-- for i=20,next_global-1 do
-+ for i=20, next_global-1 do
- out:write(" ", prefix, t[i], ",\n")
- end
- out:write(" ", prefix, "_MAX\n};\n")
-@@ -182,7 +182,7 @@ local function writeglobalnames(out, name)
- local t = {}
- for name, n in pairs(map_global) do t[n] = name end
- out:write("static const char *const ", name, "[] = {\n")
-- for i=20,next_global-1 do
-+ for i=20, next_global-1 do
- out:write(" \"", t[i], "\",\n")
- end
- out:write(" (const char *)0\n};\n")
-@@ -206,7 +206,7 @@ end})
- -- Dump extern labels.
- local function dumpexterns(out, lvl)
- out:write("Extern labels:\n")
-- for i=0,next_extern-1 do
-+ for i=0, next_extern-1 do
- out:write(format(" %s\n", map_extern_[i]))
- end
- out:write("\n")
-@@ -215,7 +215,7 @@ end
- -- Write extern label names.
- local function writeexternnames(out, name)
- out:write("static const char *const ", name, "[] = {\n")
-- for i=0,next_extern-1 do
-+ for i=0, next_extern-1 do
- out:write(" \"", map_extern_[i], "\",\n")
- end
- out:write(" (const char *)0\n};\n")
-@@ -443,7 +443,7 @@ local function parse_mem_lb(arg)
- return dval, lval, parse_reg(b), dact, lact
- end
-
--local function parse_mem_l2b(arg,high_l)
-+local function parse_mem_l2b(arg, high_l)
- local reg = "r1?[0-9]"
- local d, l, b = match(arg,
"^(.*)%s*%(%s*(.*)%s*,%s*("..reg..")%s*%)$")
- if not d then
-@@ -474,7 +474,7 @@ local function parse_mem_l2b(arg,high_l)
- if high_l then
- lact = function() waction("LEN4HR", nil, l) end
- else
-- lact = function() waction("LEN4LR",nil,l) end
-+ lact = function() waction("LEN4LR", nil, l) end
- end
- end
- return dval, lval, parse_reg(b), dact, lact
-@@ -519,7 +519,7 @@ local function parse_imm8(imm)
- end
- return imm_val, nil
- end
-- return 0, function() waction("IMM8",nil,imm) end
-+ return 0, function() waction("IMM8", nil, imm) end
- end
-
- local function parse_mask(mask)
-@@ -1247,7 +1247,7 @@ map_op = {
- xr_2 = "000000001700RR",
- xy_2 = "e30000000057RXY-a",
- }
--for cond,c in pairs(map_cond) do
-+for cond, c in pairs(map_cond) do
- -- Extended mnemonics for branches.
- -- TODO: replace 'B' with correct encoding.
- -- brc
-@@ -1272,15 +1272,15 @@ local function parse_template(params, template, nparams, pos)
- local p = sub(template, 13)
- if p == "I" then
- local imm_val, a = parse_imm8(params[1])
-- op2 = op2 + imm_val;
-- wputhw(op2);
-+ op2 = op2 + imm_val
-+ wputhw(op2)
- if a then a() end
- elseif p == "RI-a" then
-- op1 = op1 + shl(parse_reg(params[1]),4)
-- wputhw(op1);
-+ op1 = op1 + shl(parse_reg(params[1]), 4)
-+ wputhw(op1)
- parse_imm16(params[2])
- elseif p == "RI-b" then
-- op1 = op1 + shl(parse_reg(params[1]),4)
-+ op1 = op1 + shl(parse_reg(params[1]), 4)
- wputhw(op1)
- local mode, n, s = parse_label(params[2])
- waction("REL_"..mode, n, s)
-@@ -1292,7 +1292,7 @@ local function parse_template(params, template, nparams, pos)
- local mode, n, s = parse_label(params[#params])
- waction("REL_"..mode, n, s)
- elseif p == "RIE-e" then
-- op0 = op0 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ op0 = op0 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
- wputhw1(op0)
- local mode, n, s = parse_label(params[3])
- waction("REL_"..mode, n, s)
-@@ -1303,7 +1303,7 @@ local function parse_template(params, template, nparams, pos)
- parse_imm32(params[2])
- elseif p == "RIL-b" then
- op0 = op0 + shl(parse_reg(params[1]), 4)
-- wputhw(op0);
-+ wputhw(op0)
- local mode, n, s = parse_label(params[2])
- waction("REL_"..mode, n, s)
- elseif p == "RIL-c" then
-@@ -1315,26 +1315,26 @@ local function parse_template(params, template, nparams, pos)
- waction("REL_"..mode, n, s)
- elseif p == "RR" then
- if #params > 1 then
-- op2 = op2 + shl(parse_reg(params[1]),4)
-+ op2 = op2 + shl(parse_reg(params[1]), 4)
- end
- op2 = op2 + parse_reg(params[#params])
- wputhw(op2)
- elseif p == "RRD" then
- wputhw(op1)
-- op2 = op2 + shl(parse_reg(params[1]),12) + shl(parse_reg(params[2]),4) +
parse_reg(params[3])
-+ op2 = op2 + shl(parse_reg(params[1]), 12) + shl(parse_reg(params[2]), 4) +
parse_reg(params[3])
- wputhw(op2)
- elseif p == "RRE" then
-- op2 = op2 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ op2 = op2 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
- wputhw(op1); wputhw(op2)
- elseif p == "RRF-b" then
-- wputhw(op1);
-- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_reg(params[2]),12) +
parse_reg(params[3]) + shl(parse_mask(params[4]),8)
-+ wputhw(op1)
-+ op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_reg(params[2]), 12) +
parse_reg(params[3]) + shl(parse_mask(params[4]), 8)
- wputhw(op2)
- elseif p == "RRF-e" then
- wputhw(op1)
-- op2 = op2 + shl(parse_reg(params[1]),4) + shl(parse_mask(params[2]),12) +
parse_reg(params[3])
-+ op2 = op2 + shl(parse_reg(params[1]), 4) + shl(parse_mask(params[2]), 12) +
parse_reg(params[3])
- if params[4] then
-- op2 = op2 + shl(parse_mask2(params[4]),8)
-+ op2 = op2 + shl(parse_mask2(params[4]), 8)
- end
- wputhw(op2)
- elseif p == "RS-a" then
-@@ -1357,7 +1357,7 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op1); wputhw(op2)
- if a then a() end
- elseif p == "RSI" then
-- op1 = op1 + shl(parse_reg(params[1]),4) + parse_reg(params[2])
-+ op1 = op1 + shl(parse_reg(params[1]), 4) + parse_reg(params[2])
- wputhw(op1)
- local mode, n, s = parse_label(params[3])
- waction("REL_"..mode, n, s)
-@@ -1372,7 +1372,7 @@ local function parse_template(params, template, nparams, pos)
- local d, x, b, a = parse_mem_bx(params[2])
- op1 = op1 + shl(parse_reg(params[1]), 4) + x
- op2 = op2 + shl(b, 12) + d
-- wputhw(op1); wputhw(op2);
-+ wputhw(op1); wputhw(op2)
- if a then a() end
- elseif p == "RX-b" then
- local d, x, b, a = parse_mem_bx(params[#params])
-@@ -1381,23 +1381,22 @@ local function parse_template(params, template, nparams, pos)
- end
- op1 = op1 + x
- op2 = op2 + shl(b, 12) + d
-- wputhw(op1);wputhw(op2);
-+ wputhw(op1); wputhw(op2)
- if a then a() end
- elseif p == "RXE" then
- local d, x, b, a = parse_mem_bx(params[2])
- op0 = op0 + shl(parse_reg(params[1]), 4) + x
- op1 = op1 + shl(b, 12) + d
-- wputhw(op0);
-- wputhw(op1);
-+ wputhw(op0); wputhw(op1)
- if a then a() end
- wputhw(op2);
- elseif p == "RXF" then
- local d, x, b, a = parse_mem_bx(params[3])
-- op0 = op0 + shl(parse_reg(params[2]),4) + x
-+ op0 = op0 + shl(parse_reg(params[2]), 4) + x
- op1 = op1 + shl(b, 12) + d
-- wputhw(op0); wputhw(op1);
-+ wputhw(op0); wputhw(op1)
- if a then a() end
-- op2 = op2 + shl(parse_reg(params[1]),12)
-+ op2 = op2 + shl(parse_reg(params[1]), 12)
- wputhw(op2)
- elseif p == "RXY-a" then
- local d, x, b, a = parse_mem_bxy(params[2])
-@@ -1409,7 +1408,7 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "S" then
- wputhw(op1);
- local d, b, a = parse_mem_b(params[1])
-- op2 = op2 + shl(b,12) + d;
-+ op2 = op2 + shl(b, 12) + d
- wputhw(op2)
- if a then a() end
- elseif p == "SI" then
-@@ -1418,7 +1417,7 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op1)
- if a then a() end
- local d, b, a = parse_mem_b(params[1])
-- op2 = op2 + shl(b,12) + d
-+ op2 = op2 + shl(b, 12) + d
- wputhw(op2)
- if a then a() end
- elseif p == "SIL" then
-@@ -1429,9 +1428,9 @@ local function parse_template(params, template, nparams, pos)
- if a then a() end
- parse_imm16(params[2])
- elseif p == "SIY" then
-- local imm8,iact = parse_imm8(params[2])
-+ local imm8, iact = parse_imm8(params[2])
- op0 = op0 + shl(imm8, 8)
-- wputhw(op0);
-+ wputhw(op0)
- if iact then iact() end
- local d, b, a = parse_mem_by(params[1])
- op1 = op1 + shl(b, 12) + band(d, 0xfff)
-@@ -1451,11 +1450,11 @@ local function parse_template(params, template, nparams, pos)
- wputhw(op2)
- if d2a then d2a() end
- elseif p == "SS-b" then
-- local high_l=true;
-- local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1],high_l)
-- high_l=false;
-- local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2],high_l)
-- op0 = op0 + shl(l1,4) + l2
-+ local high_l = true
-+ local d1, l1, b1, d1a, l1a = parse_mem_l2b(params[1], high_l)
-+ high_l = false
-+ local d2, l2, b2, d2a, l2a = parse_mem_l2b(params[2], high_l)
-+ op0 = op0 + shl(l1, 4) + l2
- op1 = op1 + shl(b1, 12) + d1
- op2 = op2 + shl(b2, 12) + d2
- wputhw(op0)
-@@ -1529,7 +1528,7 @@ end
- -- Pseudo-opcodes for data storage.
- map_op[".long_*"] = function(params)
- if not params then return "imm..." end
-- for _,p in ipairs(params) do
-+ for _, p in ipairs(params) do
- local n = tonumber(p)
- if not n then werror("bad immediate `"..p.."'") end
- if n < 0 then n = n + 2^32 end
-@@ -1545,7 +1544,7 @@ map_op[".align_1"] = function(params)
- if align then
- local x = align
- -- Must be a power of 2 in the range (2 ... 256).
-- for i=1,8 do
-+ for i=1, 8 do
- x = x / 2
- if x == 1 then
- waction("ALIGN", align-1, nil, 1) -- Action halfword is 2**n-1.
-@@ -1588,7 +1587,7 @@ local function dumptypes(out, lvl)
- for name in pairs(map_type) do t[#t+1] = name end
- sort(t)
- out:write("Type definitions:\n")
-- for _,name in ipairs(t) do
-+ for _, name in ipairs(t) do
- local tp = map_type[name]
- local reg = tp.reg or ""
- out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
---
-2.20.1
-
-
-From bed8a74b53230bd9cc1e4b6b064ee30516fa2f3d Mon Sep 17 00:00:00 2001
-From: preetikhorjuvenkar <preetikhorjuvenkar29(a)gmail.com>
-Date: Wed, 14 Feb 2018 10:49:53 +0000
-Subject: [PATCH 247/247] Removing reference to dis_s390x.lua
-
----
- Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/Makefile b/Makefile
-index fc8ed61..923bf72 100644
---- a/Makefile
-+++ b/Makefile
-@@ -88,7 +88,7 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
- FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
- dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
- dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
-- dis_mips64.lua dis_mips64el.lua dis_s390x.lua vmdef.lua
-+ dis_mips64.lua dis_mips64el.lua vmdef.lua
-
- ifeq (,$(findstring Windows,$(OS)))
- HOST_SYS:= $(shell uname -s)
---
-2.20.1
-
diff --git a/luajit-update-20190925.patch b/luajit-update-20190925.patch
deleted file mode 100644
index f96d11d..0000000
--- a/luajit-update-20190925.patch
+++ /dev/null
@@ -1,4897 +0,0 @@
-From d84f3bbd94f2f74c8d9e95f9df7bbc62616725e1 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Tue, 30 Apr 2019 15:48:46 +0530
-Subject: [PATCH] Update README to reflect reality for this repository
-
----
- README | 40 ++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 40 insertions(+)
-
-diff --git a/README b/README
-index 2b9ae9d2..c46794f8 100644
---- a/README
-+++ b/README
-@@ -1,3 +1,43 @@
-+LuaJIT
-+======
-+
-+This is an integrtion fork of the original
-+[LuaJIT project](https://repo.or.cz/w/luajit-2.0.git) authored by Mike Pall
-+with the aim of providing a quicker paced and more inclusive development
-+workflow for LuaJIT. The original README is preserved below for posterity.
-+This repo will attempt to remain in sync with developments in the original
-+LuaJIT but will allow space to innovate and fix bugs so as to provide
-+continuity for people interested in the project.
-+
-+LuaJIT is Copyright (c) 2005-2019 Mike Pall and various contributors. The list
-+of contributors may be found via the git log and from the CONTRIBUTORS file in
-+the top level of these sources, which is updated on a regular basis.
-+
-+Branches
-+--------
-+
-+The original LuaJIT project had two supported versions in v2.0 and v2.1. These
-+were tracked using three branches, master, v2.0 and v2.1, where master is used
-+for v2.0 bug fixes and v2.0 simply tracks master. This is confusing and does
-+not allow space for new development, so this project has a slightly different
-+branch layout that helps unblock development and also track the original
-+LuaJIT.
-+
-+This repo has the following main branches:
-+
-+ * master: This is where new features and language support will land. This may
-+ diverge from the original LuaJIT, although as the goal of this project
-+ suggests, attempts will be made to remain as compatible as possible.
-+ * v2.1: This remains a bug fix branch for this repository and will be regularly
-+ merged with the upstream v2.1 branch.
-+ * v2.0: This remains a bug fix branch that tracks the v2.0 branch upstream.
-+
-+The upstream master branch currently only tracks the v2.0 branch (or vice
-+versa, we may never know!) so it is ignored.
-+
-+Original README
-+===============
-+
- README for LuaJIT 2.1.0-beta3
- -----------------------------
-
---
-2.21.0
-
-From 624eec51ffdcc6dca0d620de0bc8a00a460da1d3 Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <agentzh(a)gmail.com>
-Date: Sun, 24 Dec 2017 13:10:31 -0800
-Subject: [PATCH 02/34] feature: implemented new API function jit.prngstate()
- for reading or setting the current PRNG state number used in the JIT
- compiler.
-
----
- src/lib_jit.c | 11 +++++++++++
- 1 file changed, 11 insertions(+)
-
-diff --git a/src/lib_jit.c b/src/lib_jit.c
-index 6972550b..c6cdda7a 100644
---- a/src/lib_jit.c
-+++ b/src/lib_jit.c
-@@ -141,6 +141,17 @@ LJLIB_CF(jit_attach)
- return 0;
- }
-
-+LJLIB_CF(jit_prngstate)
-+{
-+ jit_State *J = L2J(L);
-+ int32_t cur = (int32_t)J->prngstate;
-+ if (L->base < L->top && !tvisnil(L->base)) {
-+ J->prngstate = (uint32_t)lj_lib_checkint(L, 1);
-+ }
-+ setintV(L->top++, cur);
-+ return 1;
-+}
-+
- LJLIB_PUSH(top-5) LJLIB_SET(os)
- LJLIB_PUSH(top-4) LJLIB_SET(arch)
- LJLIB_PUSH(top-3) LJLIB_SET(version_num)
---
-2.21.0
-
-
-From 92c12849f85710f40bbef8391c89f1f452ddd52f Mon Sep 17 00:00:00 2001
-From: abhay1722 <abhays(a)us.ibm.com>
-Date: Tue, 30 Apr 2019 06:28:08 +0000
-Subject: [PATCH 03/34] bugfix: guarded the jit_prngstate builtin with the
- LJ_HAS_JIT macro.
-
----
- src/lib_jit.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/lib_jit.c b/src/lib_jit.c
-index c6cdda7a..ef444d7e 100644
---- a/src/lib_jit.c
-+++ b/src/lib_jit.c
-@@ -141,6 +141,7 @@ LJLIB_CF(jit_attach)
- return 0;
- }
-
-+#if LJ_HASJIT
- LJLIB_CF(jit_prngstate)
- {
- jit_State *J = L2J(L);
-@@ -151,6 +152,7 @@ LJLIB_CF(jit_prngstate)
- setintV(L->top++, cur);
- return 1;
- }
-+#endif
-
- LJLIB_PUSH(top-5) LJLIB_SET(os)
- LJLIB_PUSH(top-4) LJLIB_SET(arch)
---
-2.21.0
-
-
-From f2d82d08ae5d4b1eea35914e25f01389aa0bd21b Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Wed, 3 Jul 2019 21:09:29 +0530
-Subject: [PATCH 04/34] jit.prngstate: Return a sane value (0) for
- LUAJIT_DISABLE_JIT
-
-Have jit.prngstate() return 0 when JIT is disabled during build
-instead of throwing an error like so:
-
-src/luajit: foo.lua:1: attempt to call field 'prngstate' (a nil value)
-stack traceback:
- foo.lua:1: in main chunk
- [C]: at 0x00405130
----
- src/lib_jit.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/src/lib_jit.c b/src/lib_jit.c
-index ef444d7e..b84efa13 100644
---- a/src/lib_jit.c
-+++ b/src/lib_jit.c
-@@ -141,18 +141,20 @@ LJLIB_CF(jit_attach)
- return 0;
- }
-
--#if LJ_HASJIT
- LJLIB_CF(jit_prngstate)
- {
-+#if LJ_HASJIT
- jit_State *J = L2J(L);
- int32_t cur = (int32_t)J->prngstate;
- if (L->base < L->top && !tvisnil(L->base)) {
- J->prngstate = (uint32_t)lj_lib_checkint(L, 1);
- }
-+#else
-+ int32_t cur = 0;
-+#endif
- setintV(L->top++, cur);
- return 1;
- }
--#endif
-
- LJLIB_PUSH(top-5) LJLIB_SET(os)
- LJLIB_PUSH(top-4) LJLIB_SET(arch)
---
-2.21.0
-
-
-From 4abab76427696afc062f8c031e0958beec7f927a Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <agentzh(a)gmail.com>
-Date: Thu, 7 Jun 2018 22:27:29 -0700
-Subject: [PATCH 05/34] feature: implemented the new Lua and C API functions
- for thread exdata.
-
-The Lua API can be used like below:
-
- local exdata = require "thread.exdata"
- exdata(0xdeadbeefLL) -- set the exdata of the current Lua thread
- local ptr = exdata() -- fetch the exdata of the current Lua thread
-
-The exdata value on the Lua land is represented as a cdata object of the
-ctype "void*".
-
-Right now the reading API, i.e., `exdata()` calls without any arguments,
-can be JIT compiled.
-
-Also exposed the following public C API functions for manipulating
-exdata on the C land:
-
- void lua_setexdata(lua_State *L, void *exdata);
- void *lua_getexdata(lua_State *L);
-
-The exdata pointer is initialized to NULL when the main thread is
-created. Any child Lua threads created will inherit the parent's exdata
-but still have their own exdata storage. So child Lua threads can always
-override the inherited parent exdata pointer values.
-
-This API is used internally by the OpenResty core so never ever mess
-with it yourself in the context of OpenResty.
-
-Thanks Zexuan Luo for preparing the final version of the patch.
-
-Signed-off-by: Yichun Zhang (agentzh) <agentzh(a)gmail.com>
----
- README | 35 ++++++++
- src/lib_base.c | 37 ++++++++
- src/lj_api.c | 9 ++
- src/lj_errmsg.h | 1 +
- src/lj_ffrecord.c | 16 ++++
- src/lj_ir.h | 1 +
- src/lj_lib.c | 7 ++
- src/lj_lib.h | 1 +
- src/lj_obj.h | 1 +
- src/lj_state.c | 2 +
- src/lua.h | 2 +
- src/lualib.h | 1 +
- t/TestLJ.pm | 91 +++++++++++++++++++
- t/exdata.t | 221 ++++++++++++++++++++++++++++++++++++++++++++++
- 14 files changed, 425 insertions(+)
- create mode 100644 t/TestLJ.pm
- create mode 100644 t/exdata.t
-
-diff --git a/README b/README
-index c46794f8..073ae647 100644
---- a/README
-+++ b/README
-@@ -35,6 +35,41 @@ This repo has the following main branches:
- The upstream master branch currently only tracks the v2.0 branch (or vice
- versa, we may never know!) so it is ignored.
-
-+
-+Additional Features
-+-------------------
-+
-+* New API function thread.exdata to embed user data in LuaJIT threads. This
-+ API needs FFI and hence is not available when built with
-+ `-DLUAJIT_DISABLE_FFI`.
-+
-+ The Lua API can be used like below:
-+
-+ local exdata = require "thread.exdata"
-+ exdata(0xdeadbeefLL) -- set the exdata of the current Lua thread
-+ local ptr = exdata() -- fetch the exdata of the current Lua thread
-+
-+ The exdata value on the Lua land is represented as a cdata object of the
-+ ctype "void*".
-+
-+ Right now the reading API, i.e., `exdata()` calls without any arguments,
-+ can be JIT compiled.
-+
-+ Also exposed the following public C API functions for manipulating
-+ exdata on the C land:
-+
-+ void lua_setexdata(lua_State *L, void *exdata);
-+ void *lua_getexdata(lua_State *L);
-+
-+ The exdata pointer is initialized to NULL when the main thread is
-+ created. Any child Lua threads created will inherit the parent's exdata
-+ but still have their own exdata storage. So child Lua threads can always
-+ override the inherited parent exdata pointer values.
-+
-+ This API is used internally by the OpenResty core so never ever mess
-+ with it yourself in the context of OpenResty.
-+
-+
- Original README
- ===============
-
-diff --git a/src/lib_base.c b/src/lib_base.c
-index 1cd83058..e341a366 100644
---- a/src/lib_base.c
-+++ b/src/lib_base.c
-@@ -35,6 +35,7 @@
- #include "lj_strscan.h"
- #include "lj_strfmt.h"
- #include "lj_lib.h"
-+#include "lj_cdata.h"
-
- /* -- Base library: checks ------------------------------------------------ */
-
-@@ -652,6 +653,30 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn)
- setmref(fn->c.pc, &L2GG(L)->bcff[lj_lib_init_coroutine[1]+2]);
- }
-
-+#if LJ_HASFFI
-+LJLIB_NOREG LJLIB_CF(thread_exdata) LJLIB_REC(.)
-+{
-+ ptrdiff_t nargs = L->top - L->base;
-+ GCcdata *cd;
-+
-+ if (nargs == 0) {
-+ CTState *cts = ctype_ctsG(G(L));
-+ if (cts == NULL)
-+ lj_err_caller(L, LJ_ERR_FFI_NOTLOAD);
-+ cts->L = L; /* Save L for errors and allocations. */
-+
-+ cd = lj_cdata_new(cts, CTID_P_VOID, CTSIZE_PTR);
-+ cdata_setptr(cdataptr(cd), CTSIZE_PTR, L->exdata);
-+ setcdataV(L, L->top++, cd);
-+ return 1;
-+ }
-+
-+ cd = lj_lib_checkcdata(L, 1);
-+ L->exdata = cdata_getptr(cdataptr(cd), CTSIZE_PTR);
-+ return 0;
-+}
-+#endif
-+
- /* ------------------------------------------------------------------------ */
-
- static void newproxy_weaktable(lua_State *L)
-@@ -665,6 +690,13 @@ static void newproxy_weaktable(lua_State *L)
- t->nomm = (uint8_t)(~(1u<<MM_mode));
- }
-
-+#if LJ_HASFFI
-+static int luaopen_thread_exdata(lua_State *L)
-+{
-+ return lj_lib_postreg(L, lj_cf_thread_exdata, FF_thread_exdata, "exdata");
-+}
-+#endif
-+
- LUALIB_API int luaopen_base(lua_State *L)
- {
- /* NOBARRIER: Table and value are the same. */
-@@ -674,6 +706,11 @@ LUALIB_API int luaopen_base(lua_State *L)
- newproxy_weaktable(L); /* top-2. */
- LJ_LIB_REG(L, "_G", base);
- LJ_LIB_REG(L, LUA_COLIBNAME, coroutine);
-+
-+#if LJ_HASFFI
-+ lj_lib_prereg(L, LUA_THRLIBNAME ".exdata", luaopen_thread_exdata, env);
-+#endif
-+
- return 2;
- }
-
-diff --git a/src/lj_api.c b/src/lj_api.c
-index d17a5754..9c4864d7 100644
---- a/src/lj_api.c
-+++ b/src/lj_api.c
-@@ -1290,3 +1290,12 @@ LUA_API void lua_setallocf(lua_State *L, lua_Alloc f, void *ud)
- g->allocf = f;
- }
-
-+LUA_API void lua_setexdata(lua_State *L, void *exdata)
-+{
-+ L->exdata = exdata;
-+}
-+
-+LUA_API void *lua_getexdata(lua_State *L)
-+{
-+ return L->exdata;
-+}
-diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
-index 060a9f89..a3ecd016 100644
---- a/src/lj_errmsg.h
-+++ b/src/lj_errmsg.h
-@@ -181,6 +181,7 @@ ERRDEF(FFI_CBACKOV, "too many callbacks")
- #endif
- ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields")
- ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)")
-+ERRDEF(FFI_NOTLOAD, "ffi module not loaded (yet)")
- #endif
-
- #undef ERRDEF
-diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
-index 849d7a27..242d5d51 100644
---- a/src/lj_ffrecord.c
-+++ b/src/lj_ffrecord.c
-@@ -28,6 +28,7 @@
- #include "lj_vm.h"
- #include "lj_strscan.h"
- #include "lj_strfmt.h"
-+#include "lj_cdata.h"
-
- /* Some local macros to save typing. Undef'd at the end. */
- #define IR(ref) (&J->cur.ir[(ref)])
-@@ -1105,6 +1106,21 @@ static void LJ_FASTCALL recff_table_clear(jit_State *J,
RecordFFData *rd)
- } /* else: Interpreter will throw. */
- }
-
-+/* -- thread library fast functions ------------------------------------------ */
-+
-+void LJ_FASTCALL recff_thread_exdata(jit_State *J, RecordFFData *rd)
-+{
-+ TRef tr = J->base[0];
-+ if (!tr) {
-+ TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
-+ TRef trp = emitir(IRT(IR_FLOAD, IRT_PTR), trl, IRFL_THREAD_EXDATA);
-+ TRef trid = lj_ir_kint(J, CTID_P_VOID);
-+ J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, trp);
-+ return;
-+ }
-+ recff_nyiu(J, rd); /* this case is too rare to be interesting */
-+}
-+
- /* -- I/O library fast functions ------------------------------------------ */
-
- /* Get FILE* for I/O function. Any I/O error aborts recording, so there's
-diff --git a/src/lj_ir.h b/src/lj_ir.h
-index a46b561f..0961f665 100644
---- a/src/lj_ir.h
-+++ b/src/lj_ir.h
-@@ -196,6 +196,7 @@ IRFPMDEF(FPMENUM)
- _(FUNC_PC, offsetof(GCfunc, l.pc)) \
- _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \
- _(THREAD_ENV, offsetof(lua_State, env)) \
-+ _(THREAD_EXDATA, offsetof(lua_State, exdata)) \
- _(TAB_META, offsetof(GCtab, metatable)) \
- _(TAB_ARRAY, offsetof(GCtab, array)) \
- _(TAB_NODE, offsetof(GCtab, node)) \
-diff --git a/src/lj_lib.c b/src/lj_lib.c
-index b8638de6..63dfca6c 100644
---- a/src/lj_lib.c
-+++ b/src/lj_lib.c
-@@ -301,3 +301,10 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char
*lst)
- return def;
- }
-
-+GCcdata *lj_lib_checkcdata(lua_State *L, int narg)
-+{
-+ TValue *o = L->base + narg-1;
-+ if (!(o < L->top && tviscdata(o)))
-+ lj_err_argt(L, narg, LUA_TCDATA);
-+ return cdataV(o);
-+}
-diff --git a/src/lj_lib.h b/src/lj_lib.h
-index 37ec9d78..8cb675a1 100644
---- a/src/lj_lib.h
-+++ b/src/lj_lib.h
-@@ -45,6 +45,7 @@ LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
- LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
- LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
- LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
-+LJ_FUNC GCcdata *lj_lib_checkcdata(lua_State *L, int narg);
-
- /* Avoid including lj_frame.h. */
- #if LJ_GC64
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index 4ff59441..a63f8d7c 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -660,6 +660,7 @@ struct lua_State {
- GCRef env; /* Thread environment (table of globals). */
- void *cframe; /* End of C stack frame chain. */
- MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */
-+ void *exdata; /* user extra data pointer. added by OpenResty */
- };
-
- #define G(L) (mref(L->glref, global_State))
-diff --git a/src/lj_state.c b/src/lj_state.c
-index 632dd07e..a0fba2ac 100644
---- a/src/lj_state.c
-+++ b/src/lj_state.c
-@@ -225,6 +225,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
- return NULL;
- }
- L->status = LUA_OK;
-+ L->exdata = NULL;
- return L;
- }
-
-@@ -284,6 +285,7 @@ lua_State *lj_state_new(lua_State *L)
- setgcrefr(L1->env, L->env);
- stack_init(L1, L); /* init stack */
- lua_assert(iswhite(obj2gco(L1)));
-+ L1->exdata = L->exdata;
- return L1;
- }
-
-diff --git a/src/lua.h b/src/lua.h
-index 850bd796..9dcafd69 100644
---- a/src/lua.h
-+++ b/src/lua.h
-@@ -245,6 +245,8 @@ LUA_API void (lua_concat) (lua_State *L, int n);
- LUA_API lua_Alloc (lua_getallocf) (lua_State *L, void **ud);
- LUA_API void lua_setallocf (lua_State *L, lua_Alloc f, void *ud);
-
-+LUA_API void lua_setexdata(lua_State *L, void *exdata);
-+LUA_API void *lua_getexdata(lua_State *L);
-
-
- /*
-diff --git a/src/lualib.h b/src/lualib.h
-index bfc130a1..6aceabe5 100644
---- a/src/lualib.h
-+++ b/src/lualib.h
-@@ -21,6 +21,7 @@
- #define LUA_BITLIBNAME "bit"
- #define LUA_JITLIBNAME "jit"
- #define LUA_FFILIBNAME "ffi"
-+#define LUA_THRLIBNAME "thread"
-
- LUALIB_API int luaopen_base(lua_State *L);
- LUALIB_API int luaopen_math(lua_State *L);
-diff --git a/t/TestLJ.pm b/t/TestLJ.pm
-new file mode 100644
-index 00000000..cdc02a8e
---- /dev/null
-+++ b/t/TestLJ.pm
-@@ -0,0 +1,91 @@
-+package t::TestLJ;
-+
-+use v5.10.1;
-+use Test::Base -Base;
-+use IPC::Run3;
-+use Cwd qw( cwd );
-+use Test::LongString;
-+use File::Temp qw( tempdir );
-+
-+our @EXPORT = qw( run_tests );
-+
-+$ENV{LUA_CPATH} = "../?.so;;";
-+$ENV{LUA_PATH} = "../lua/?.lua;;";
-+#$ENV{LUA_PATH} = ($ENV{LUA_PATH} || "" ) . ';' . getcwd .
"/runtime/?.lua" . ';;';
-+
-+my $cwd = cwd;
-+
-+sub run_test ($) {
-+ my $block = shift;
-+ #print $json_xs->pretty->encode(\@new_rows);
-+ #my $res = #print $json_xs->pretty->encode($res);
-+ my $name = $block->name;
-+
-+ my $lua = $block->lua or
-+ die "No --- lua specified for test $name\n";
-+
-+ my $luafile = "test.lua";
-+
-+ {
-+ my $dir = tempdir "testlj_XXXXXXX", CLEANUP => 1;
-+ chdir $dir or die "$name - Cannot chdir to $dir: $!";
-+ open my $fh, ">$luafile"
-+ or die "$name - Cannot open $luafile in $dir for writing: $!\n";
-+ print $fh $lua;
-+ close $fh;
-+ }
-+
-+ my ($res, $err);
-+
-+ my @cmd;
-+
-+ if ($ENV{TEST_LJ_USE_VALGRIND}) {
-+ warn "$name\n";
-+ @cmd = ('valgrind', '-q', '--leak-check=full',
'luajit',
-+ defined($block->jv) ? '-jv' : (),
-+ defined($block->jdump) ? '-jdump' : (),
-+ $luafile);
-+ } else {
-+ @cmd = ('luajit',
-+ defined($block->jv) ? '-jv' : (),
-+ defined($block->jdump) ? '-jdump' : (),
-+ $luafile);
-+ }
-+
-+ run3 \@cmd, undef, \$res, \$err;
-+ my $rc = $?;
-+
-+ #warn "res:$res\nerr:$err\n";
-+
-+ my $exp_rc = $block->exit // 0;
-+
-+ is $exp_rc, $rc >> 8, "$name - exit code okay";
-+
-+ if (defined $block->err) {
-+ if ($err =~ /.*:.*:.*: (.*\s)?/) {
-+ $err = $1;
-+ }
-+ is $err, $block->err, "$name - err expected";
-+
-+ } elsif (defined $err && $err ne '') {
-+ warn "$name - STDERR:\n$err";
-+ }
-+
-+ if (defined $block->out) {
-+ #is $res, $block->out, "$name - output ok";
-+ is $res, $block->out, "$name - output ok";
-+
-+ } elsif (defined $res && $res ne '') {
-+ warn "$name - STDOUT:\n$res";
-+ }
-+
-+ chdir $cwd or die $!;
-+}
-+
-+sub run_tests () {
-+ for my $block (blocks()) {
-+ run_test($block);
-+ }
-+}
-+
-+1;
-diff --git a/t/exdata.t b/t/exdata.t
-new file mode 100644
-index 00000000..239bb86c
---- /dev/null
-+++ b/t/exdata.t
-@@ -0,0 +1,221 @@
-+# vim: set ss=4 ft= sw=4 et sts=4 ts=4:
-+
-+use lib '.';
-+use t::TestLJ;
-+
-+plan tests => 3 * blocks();
-+
-+run_tests();
-+
-+__DATA__
-+
-+=== TEST 1: interpreted (sanity)
-+--- lua
-+jit.off()
-+local assert = assert
-+local exdata = require "thread.exdata"
-+local ffi = require "ffi"
-+local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+local ptr = ffi.cast("void *", u64)
-+local saved_q
-+for i = 1, 5 do
-+ exdata(u64)
-+ local q = exdata()
-+ if saved_q then
-+ assert(q == saved_q)
-+ end
-+ saved_q = q
-+end
-+print(tostring(ptr))
-+print(tostring(saved_q))
-+--- jv
-+--- out
-+cdata<void *>: 0xefdeaddeadbeef
-+cdata<void *>: 0xefdeaddeadbeef
-+--- err
-+
-+
-+
-+=== TEST 2: newly created coroutines should inherit the exdata
-+--- lua
-+jit.off()
-+local exdata = require "thread.exdata"
-+local ffi = require "ffi"
-+local u64 = ffi.new("uintptr_t", 0xefdeadbeefLL)
-+local ptr = ffi.cast("void *", u64)
-+local ptr2 = ffi.cast("void *", u64 + 1)
-+local ptr3 = ffi.cast("void *", u64 - 2)
-+local saved_q
-+local function f()
-+ coroutine.yield(exdata())
-+ exdata(ptr2)
-+ coroutine.yield(exdata())
-+ coroutine.yield(exdata())
-+end
-+
-+exdata(u64)
-+
-+local co = coroutine.create(f)
-+
-+local ok, data = coroutine.resume(co)
-+assert(ok)
-+print(tostring(data))
-+
-+ok, data = coroutine.resume(co)
-+assert(ok)
-+print(tostring(data))
-+
-+exdata(ptr3)
-+
-+ok, data = coroutine.resume(co)
-+assert(ok)
-+print(tostring(data))
-+
-+print(tostring(exdata()))
-+--- jv
-+--- out
-+cdata<void *>: 0xefdeadbeef
-+cdata<void *>: 0xefdeadbef0
-+cdata<void *>: 0xefdeadbef0
-+cdata<void *>: 0xefdeadbeed
-+--- err
-+
-+
-+
-+=== TEST 3: JIT mode (reading)
-+--- lua
-+jit.opt.start("minstitch=100000", "hotloop=2")
-+local assert = assert
-+local exdata = require "thread.exdata"
-+local ffi = require "ffi"
-+local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+local ptr = ffi.cast("void *", u64)
-+local saved_q
-+exdata(u64)
-+for i = 1, 10 do
-+ local q = exdata()
-+ if saved_q then
-+ assert(q == saved_q)
-+ end
-+ saved_q = q
-+end
-+print(tostring(ptr))
-+print(tostring(saved_q))
-+
-+--- jv
-+--- out
-+cdata<void *>: 0xefdeaddeadbeef
-+cdata<void *>: 0xefdeaddeadbeef
-+--- err
-+[TRACE 1 test.lua:9 loop]
-+
-+
-+
-+=== TEST 4: JIT mode (writing)
-+--- lua
-+jit.opt.start("minstitch=100000", "hotloop=2")
-+local assert = assert
-+local exdata = require "thread.exdata"
-+local ffi = require "ffi"
-+local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+local ptr = ffi.cast("void *", u64)
-+local saved_q
-+for i = 1, 10 do
-+ exdata(u64)
-+ local q = exdata()
-+ if saved_q then
-+ assert(q == saved_q)
-+ end
-+ saved_q = q
-+end
-+print(tostring(ptr))
-+print(tostring(saved_q))
-+
-+--- jv
-+--- out
-+cdata<void *>: 0xefdeaddeadbeef
-+cdata<void *>: 0xefdeaddeadbeef
-+--- err
-+[TRACE --- test.lua:8 -- trace too short at test.lua:9]
-+
-+
-+
-+=== TEST 5: interpreted - check the number of arguments
-+--- lua
-+jit.off()
-+local assert = assert
-+local select = select
-+local exdata = require "thread.exdata"
-+local ffi = require "ffi"
-+local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+local ptr = ffi.cast("void *", u64)
-+
-+local function nargs(...)
-+ return select('#', ...)
-+end
-+print(nargs(exdata(ptr)))
-+print(nargs(exdata()))
-+--- jv
-+--- out
-+0
-+1
-+--- err
-+
-+
-+
-+=== TEST 6: JIT mode - check the number of arguments
-+--- lua
-+jit.opt.start("minstitch=100000", "hotloop=2")
-+local assert = assert
-+local select = select
-+local exdata = require "thread.exdata"
-+local ffi = require "ffi"
-+local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+local ptr = ffi.cast("void *", u64)
-+
-+local function nargs(...)
-+ return select('#', ...)
-+end
-+
-+local total = 0
-+for i = 1, 10 do
-+ total = total + nargs(exdata(ptr))
-+end
-+
-+print("set: " .. total)
-+
-+total = 0
-+for i = 1, 10 do
-+ total = total + nargs(exdata())
-+end
-+
-+print("get: " .. total)
-+--- jv
-+--- out
-+set: 0
-+get: 10
-+--- err
-+[TRACE --- test.lua:14 -- trace too short at test.lua:15]
-+[TRACE 1 test.lua:21 loop]
-+
-+
-+
-+=== TEST 7: interpreted (no ffi initialized)
-+--- lua
-+jit.off()
-+local assert = assert
-+local exdata = require "thread.exdata"
-+local saved_q
-+for i = 1, 5 do
-+ local q = exdata()
-+ if saved_q then
-+ assert(q == saved_q)
-+ end
-+ saved_q = q
-+end
-+print(tostring(saved_q))
-+--- jv
-+--- out
-+--- err
-+ffi module not loaded (yet)
-+--- exit: 1
---
-2.21.0
-
-
-From dd4dfee59a2e68cd04b1e1b4c8d4f21743bfe3d0 Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <yichun(a)openresty.com>
-Date: Wed, 30 Jan 2019 15:00:07 -0800
-Subject: [PATCH 06/34] bugfix: we broke the arm build in the commit c844a613.
- thanks Alec Muffett for the report in #37.
-
-Fix #37.
----
- src/lj_obj.h | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index a63f8d7c..3f674db2 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -661,6 +661,10 @@ struct lua_State {
- void *cframe; /* End of C stack frame chain. */
- MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */
- void *exdata; /* user extra data pointer. added by OpenResty */
-+#if LJ_TARGET_ARM
-+ uint32_t unused1;
-+ uint32_t unused2;
-+#endif
- };
-
- #define G(L) (mref(L->glref, global_State))
---
-2.21.0
-
-
-From 13f03a44f0b571397a6b52c6ea4c00356a2189c9 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Mon, 8 Jul 2019 22:59:51 +0530
-Subject: [PATCH 07/34] thread.exdata: Port openresty test and fix fallout
-
-Fix up contents.lua to expect thread.exdata and add two more tests
-from c58fe79b870f1934479bf14fe8035fc3d9fdfde2 in openresty/luajit2.
----
- t/TestLJ.pm | 91 -----------------
- t/exdata.t | 221 ----------------------------------------
- test/lib/contents.lua | 4 +-
- test/lib/ffi/exdata.lua | 152 +++++++++++++++++++++++++++
- test/lib/ffi/index | 1 +
- 5 files changed, 155 insertions(+), 314 deletions(-)
- delete mode 100644 t/TestLJ.pm
- delete mode 100644 t/exdata.t
- create mode 100644 test/lib/ffi/exdata.lua
-
-diff --git a/t/TestLJ.pm b/t/TestLJ.pm
-deleted file mode 100644
-index cdc02a8e..00000000
---- a/t/TestLJ.pm
-+++ /dev/null
-@@ -1,91 +0,0 @@
--package t::TestLJ;
--
--use v5.10.1;
--use Test::Base -Base;
--use IPC::Run3;
--use Cwd qw( cwd );
--use Test::LongString;
--use File::Temp qw( tempdir );
--
--our @EXPORT = qw( run_tests );
--
--$ENV{LUA_CPATH} = "../?.so;;";
--$ENV{LUA_PATH} = "../lua/?.lua;;";
--#$ENV{LUA_PATH} = ($ENV{LUA_PATH} || "" ) . ';' . getcwd .
"/runtime/?.lua" . ';;';
--
--my $cwd = cwd;
--
--sub run_test ($) {
-- my $block = shift;
-- #print $json_xs->pretty->encode(\@new_rows);
-- #my $res = #print $json_xs->pretty->encode($res);
-- my $name = $block->name;
--
-- my $lua = $block->lua or
-- die "No --- lua specified for test $name\n";
--
-- my $luafile = "test.lua";
--
-- {
-- my $dir = tempdir "testlj_XXXXXXX", CLEANUP => 1;
-- chdir $dir or die "$name - Cannot chdir to $dir: $!";
-- open my $fh, ">$luafile"
-- or die "$name - Cannot open $luafile in $dir for writing: $!\n";
-- print $fh $lua;
-- close $fh;
-- }
--
-- my ($res, $err);
--
-- my @cmd;
--
-- if ($ENV{TEST_LJ_USE_VALGRIND}) {
-- warn "$name\n";
-- @cmd = ('valgrind', '-q', '--leak-check=full',
'luajit',
-- defined($block->jv) ? '-jv' : (),
-- defined($block->jdump) ? '-jdump' : (),
-- $luafile);
-- } else {
-- @cmd = ('luajit',
-- defined($block->jv) ? '-jv' : (),
-- defined($block->jdump) ? '-jdump' : (),
-- $luafile);
-- }
--
-- run3 \@cmd, undef, \$res, \$err;
-- my $rc = $?;
--
-- #warn "res:$res\nerr:$err\n";
--
-- my $exp_rc = $block->exit // 0;
--
-- is $exp_rc, $rc >> 8, "$name - exit code okay";
--
-- if (defined $block->err) {
-- if ($err =~ /.*:.*:.*: (.*\s)?/) {
-- $err = $1;
-- }
-- is $err, $block->err, "$name - err expected";
--
-- } elsif (defined $err && $err ne '') {
-- warn "$name - STDERR:\n$err";
-- }
--
-- if (defined $block->out) {
-- #is $res, $block->out, "$name - output ok";
-- is $res, $block->out, "$name - output ok";
--
-- } elsif (defined $res && $res ne '') {
-- warn "$name - STDOUT:\n$res";
-- }
--
-- chdir $cwd or die $!;
--}
--
--sub run_tests () {
-- for my $block (blocks()) {
-- run_test($block);
-- }
--}
--
--1;
-diff --git a/t/exdata.t b/t/exdata.t
-deleted file mode 100644
-index 239bb86c..00000000
---- a/t/exdata.t
-+++ /dev/null
-@@ -1,221 +0,0 @@
--# vim: set ss=4 ft= sw=4 et sts=4 ts=4:
--
--use lib '.';
--use t::TestLJ;
--
--plan tests => 3 * blocks();
--
--run_tests();
--
--__DATA__
--
--=== TEST 1: interpreted (sanity)
----- lua
--jit.off()
--local assert = assert
--local exdata = require "thread.exdata"
--local ffi = require "ffi"
--local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
--local ptr = ffi.cast("void *", u64)
--local saved_q
--for i = 1, 5 do
-- exdata(u64)
-- local q = exdata()
-- if saved_q then
-- assert(q == saved_q)
-- end
-- saved_q = q
--end
--print(tostring(ptr))
--print(tostring(saved_q))
----- jv
----- out
--cdata<void *>: 0xefdeaddeadbeef
--cdata<void *>: 0xefdeaddeadbeef
----- err
--
--
--
--=== TEST 2: newly created coroutines should inherit the exdata
----- lua
--jit.off()
--local exdata = require "thread.exdata"
--local ffi = require "ffi"
--local u64 = ffi.new("uintptr_t", 0xefdeadbeefLL)
--local ptr = ffi.cast("void *", u64)
--local ptr2 = ffi.cast("void *", u64 + 1)
--local ptr3 = ffi.cast("void *", u64 - 2)
--local saved_q
--local function f()
-- coroutine.yield(exdata())
-- exdata(ptr2)
-- coroutine.yield(exdata())
-- coroutine.yield(exdata())
--end
--
--exdata(u64)
--
--local co = coroutine.create(f)
--
--local ok, data = coroutine.resume(co)
--assert(ok)
--print(tostring(data))
--
--ok, data = coroutine.resume(co)
--assert(ok)
--print(tostring(data))
--
--exdata(ptr3)
--
--ok, data = coroutine.resume(co)
--assert(ok)
--print(tostring(data))
--
--print(tostring(exdata()))
----- jv
----- out
--cdata<void *>: 0xefdeadbeef
--cdata<void *>: 0xefdeadbef0
--cdata<void *>: 0xefdeadbef0
--cdata<void *>: 0xefdeadbeed
----- err
--
--
--
--=== TEST 3: JIT mode (reading)
----- lua
--jit.opt.start("minstitch=100000", "hotloop=2")
--local assert = assert
--local exdata = require "thread.exdata"
--local ffi = require "ffi"
--local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
--local ptr = ffi.cast("void *", u64)
--local saved_q
--exdata(u64)
--for i = 1, 10 do
-- local q = exdata()
-- if saved_q then
-- assert(q == saved_q)
-- end
-- saved_q = q
--end
--print(tostring(ptr))
--print(tostring(saved_q))
--
----- jv
----- out
--cdata<void *>: 0xefdeaddeadbeef
--cdata<void *>: 0xefdeaddeadbeef
----- err
--[TRACE 1 test.lua:9 loop]
--
--
--
--=== TEST 4: JIT mode (writing)
----- lua
--jit.opt.start("minstitch=100000", "hotloop=2")
--local assert = assert
--local exdata = require "thread.exdata"
--local ffi = require "ffi"
--local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
--local ptr = ffi.cast("void *", u64)
--local saved_q
--for i = 1, 10 do
-- exdata(u64)
-- local q = exdata()
-- if saved_q then
-- assert(q == saved_q)
-- end
-- saved_q = q
--end
--print(tostring(ptr))
--print(tostring(saved_q))
--
----- jv
----- out
--cdata<void *>: 0xefdeaddeadbeef
--cdata<void *>: 0xefdeaddeadbeef
----- err
--[TRACE --- test.lua:8 -- trace too short at test.lua:9]
--
--
--
--=== TEST 5: interpreted - check the number of arguments
----- lua
--jit.off()
--local assert = assert
--local select = select
--local exdata = require "thread.exdata"
--local ffi = require "ffi"
--local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
--local ptr = ffi.cast("void *", u64)
--
--local function nargs(...)
-- return select('#', ...)
--end
--print(nargs(exdata(ptr)))
--print(nargs(exdata()))
----- jv
----- out
--0
--1
----- err
--
--
--
--=== TEST 6: JIT mode - check the number of arguments
----- lua
--jit.opt.start("minstitch=100000", "hotloop=2")
--local assert = assert
--local select = select
--local exdata = require "thread.exdata"
--local ffi = require "ffi"
--local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
--local ptr = ffi.cast("void *", u64)
--
--local function nargs(...)
-- return select('#', ...)
--end
--
--local total = 0
--for i = 1, 10 do
-- total = total + nargs(exdata(ptr))
--end
--
--print("set: " .. total)
--
--total = 0
--for i = 1, 10 do
-- total = total + nargs(exdata())
--end
--
--print("get: " .. total)
----- jv
----- out
--set: 0
--get: 10
----- err
--[TRACE --- test.lua:14 -- trace too short at test.lua:15]
--[TRACE 1 test.lua:21 loop]
--
--
--
--=== TEST 7: interpreted (no ffi initialized)
----- lua
--jit.off()
--local assert = assert
--local exdata = require "thread.exdata"
--local saved_q
--for i = 1, 5 do
-- local q = exdata()
-- if saved_q then
-- assert(q == saved_q)
-- end
-- saved_q = q
--end
--print(tostring(saved_q))
----- jv
----- out
----- err
--ffi module not loaded (yet)
----- exit: 1
-diff --git a/test/lib/contents.lua b/test/lib/contents.lua
-index 09866f6f..1d393d96 100644
---- a/test/lib/contents.lua
-+++ b/test/lib/contents.lua
-@@ -19,7 +19,7 @@ local function check(m, expected, exclude)
- end
-
- do --- base
-- check(_G,
"_G:_VERSION:arg:assert:collectgarbage:coroutine:debug:dofile:error:getmetatable:io:ipairs:load:loadfile:math:next:os:package:pairs:pcall:print:rawequal:rawget:rawset:require:select:setmetatable:string:table:tonumber:tostring:type:xpcall",
"rawlen:bit:bit32:jit:gcinfo:setfenv:getfenv:loadstring:unpack:module:newproxy")
-+ check(_G,
"_G:_VERSION:arg:assert:collectgarbage:coroutine:debug:dofile:error:exdata:getmetatable:io:ipairs:load:loadfile:math:next:os:package:pairs:pcall:print:rawequal:rawget:rawset:require:select:setmetatable:string:table:tonumber:tostring:type:xpcall",
"rawlen:bit:bit32:jit:gcinfo:setfenv:getfenv:loadstring:unpack:module:newproxy")
- end
-
- do --- pre-5.2 base +lua<5.2
-@@ -145,7 +145,7 @@ do --- package.loaded
- loaded[k] = v
- end
- end
-- check(loaded, "_G:coroutine:debug:io:math:os:package:string:table",
"bit:bit32:common:ffi:jit:table.new")
-+ check(loaded,
"_G:coroutine:debug:io:math:os:package:string:table:thread.exdata",
"bit:bit32:common:ffi:jit:table.new")
- end
-
- do --- bit +bit
-diff --git a/test/lib/ffi/exdata.lua b/test/lib/ffi/exdata.lua
-new file mode 100644
-index 00000000..32a39ebe
---- /dev/null
-+++ b/test/lib/ffi/exdata.lua
-@@ -0,0 +1,152 @@
-+local exdata = require "thread.exdata"
-+local ffi = require "ffi"
-+
-+local function nargs(...)
-+ return select('#', ...)
-+end
-+
-+--[[ These tests need to be first so that they read the default value and not
-+ the value updated by the tests that follow. --]]
-+do --- default value: JIT off
-+ jit.off()
-+ local saved_q
-+ for i = 1, 5 do
-+ local q = exdata()
-+ if saved_q then
-+ assert(q == saved_q)
-+ end
-+ saved_q = q
-+ end
-+ print(saved_q)
-+ assert(saved_q == nil)
-+end
-+
-+do --- default value: JIT on
-+ jit.opt.start("minstitch=100000", "hotloop=2")
-+ jit.on()
-+ local saved_q
-+ for i = 1, 5 do
-+ local q = exdata()
-+ if saved_q then
-+ assert(q == saved_q)
-+ end
-+ saved_q = q
-+ end
-+ print(saved_q)
-+ assert(saved_q == nil)
-+end
-+
-+do --- sanity: JIT off
-+ jit.off()
-+ local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+ local ptr = ffi.cast("void *", u64)
-+ local saved_q
-+ for i = 1, 5 do
-+ exdata(u64)
-+ local q = exdata()
-+ if saved_q then
-+ assert(q == saved_q)
-+ end
-+ saved_q = q
-+ end
-+ print(ptr)
-+ assert(tostring(ptr) == "cdata<void *>: 0xefdeaddeadbeef")
-+ assert(tostring(saved_q) == "cdata<void *>: 0xefdeaddeadbeef")
-+end
-+
-+do --- coroutines: JIT off
-+ jit.off()
-+ local u64 = ffi.new("uintptr_t", 0xefdeadbeefLL)
-+ local ptr = ffi.cast("void *", u64)
-+ local ptr2 = ffi.cast("void *", u64 + 1)
-+ local ptr3 = ffi.cast("void *", u64 - 2)
-+ local saved_q
-+ local function f()
-+ coroutine.yield(exdata())
-+ exdata(ptr2)
-+ coroutine.yield(exdata())
-+ coroutine.yield(exdata())
-+ end
-+
-+ exdata(u64)
-+
-+ local co = coroutine.create(f)
-+
-+ local ok, data = coroutine.resume(co)
-+ assert(ok)
-+ assert(tostring(data) == "cdata<void *>: 0xefdeadbeef")
-+
-+ ok, data = coroutine.resume(co)
-+ assert(ok)
-+ assert(tostring(data) == "cdata<void *>: 0xefdeadbef0")
-+
-+ exdata(ptr3)
-+
-+ ok, data = coroutine.resume(co)
-+ assert(ok)
-+ assert(tostring(data) == "cdata<void *>: 0xefdeadbef0")
-+ assert(tostring(exdata()) == "cdata<void *>: 0xefdeadbeed")
-+end
-+
-+do --- reading: JIT on
-+ jit.opt.start("minstitch=100000", "hotloop=2")
-+ local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+ local ptr = ffi.cast("void *", u64)
-+ local saved_q
-+ exdata(u64)
-+ for i = 1, 10 do
-+ local q = exdata()
-+ if saved_q then
-+ assert(q == saved_q)
-+ end
-+ saved_q = q
-+ end
-+ assert(tostring(ptr) == "cdata<void *>: 0xefdeaddeadbeef")
-+ assert(tostring(saved_q) == "cdata<void *>: 0xefdeaddeadbeef")
-+end
-+
-+do --- writing: JIT on
-+ jit.opt.start("minstitch=100000", "hotloop=2")
-+ local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+ local ptr = ffi.cast("void *", u64)
-+ local saved_q
-+ for i = 1, 10 do
-+ exdata(u64)
-+ local q = exdata()
-+ if saved_q then
-+ assert(q == saved_q)
-+ end
-+ saved_q = q
-+ end
-+ assert(tostring(ptr) == "cdata<void *>: 0xefdeaddeadbeef")
-+ assert(tostring(saved_q) == "cdata<void *>: 0xefdeaddeadbeef")
-+end
-+
-+do --- Check number of arguments: JIT off
-+ jit.off()
-+ local select = select
-+ local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+ local ptr = ffi.cast("void *", u64)
-+
-+ assert(nargs(exdata(ptr)) == 0)
-+ assert(nargs(exdata()) == 1)
-+end
-+
-+do --- Check number of arguments: JIT on
-+ jit.opt.start("minstitch=100000", "hotloop=2")
-+ local select = select
-+ local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
-+ local ptr = ffi.cast("void *", u64)
-+
-+ local total = 0
-+ for i = 1, 10 do
-+ total = total + nargs(exdata(ptr))
-+ end
-+ assert(total == 0)
-+
-+ for i = 1, 10 do
-+ total = total + nargs(exdata())
-+ end
-+ assert(total == 10)
-+end
-+
-diff --git a/test/lib/ffi/index b/test/lib/ffi/index
-index 7933c5a7..45464ff8 100644
---- a/test/lib/ffi/index
-+++ b/test/lib/ffi/index
-@@ -2,6 +2,7 @@ bit64.lua +luajit>=2.1
- cdata_var.lua
- copy_fill.lua
- err.lua
-+exdata.lua
- istype.lua
- jit_array.lua
- jit_complex.lua
---
-2.21.0
-
-
-From 5c461aa215646e3dabb183c318d902f3180debbd Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Wed, 31 Jul 2019 19:54:57 +0530
-Subject: [PATCH 08/34] [aarch64] Fix crash with side traces under register
- pressure
-
-IRRefs that get into the side trace from the parent trace may restore
-REF_BASE under register pressure and get to head_side holding on to
-it. Restore such references so that REF_BASE gets RID_BASE back in
-head_side.
----
- src/lj_asm_arm64.h | 11 +++++++++++
- 1 file changed, 11 insertions(+)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index c72144a3..d2176e0b 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -1949,6 +1949,17 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet
allow)
- IRIns *ir;
- asm_head_lreg(as);
- ir = IR(REF_BASE);
-+
-+ /* IRRefs that get into the side trace from the parent trace may restore
-+ * REF_BASE under severe register pressure and thus reach here holding on to
-+ * the register. Restore such references so that REF_BASE gets RID_BASE back
-+ * when it tries to allocate below. */
-+ if (!ra_hasreg(ir->r)) {
-+ Reg r = ra_gethint(ir->r);
-+ if (!rset_test(as->freeset, r))
-+ ra_restore(as, regcost_ref(as->cost[r]));
-+ }
-+
- if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) ||
irt_ismarked(ir->t)))
- ra_spill(as, ir);
- if (ra_hasspill(irp->s)) {
---
-2.21.0
-
-
-From 4be079d219cdb44de3912f823407c7661f610d65 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Wed, 31 Jul 2019 19:59:11 +0530
-Subject: [PATCH 09/34] [aarch64] Allocate LJ_TISNUM early
-
-LJ_TISNUM is allocated too late in the cycle and it ends up reusing a
-different register, resulting in a crash under register pressure.
-Hoist the allocation to the top so that it is done early enough and
-the allowed register set no longer contains that register. This also
-has the nice side effect of beig slightly faster since it hoists a
-constant allocation out of the generated loop.
----
- src/lj_asm_arm64.h | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index d2176e0b..45661c6f 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -769,6 +769,14 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- uint32_t khash;
- MCLabel l_end, l_loop, l_next;
- rset_clear(allow, tab);
-+ Reg tisnum = RID_TMP;
-+
-+ /* Allocate register early and clear it from the allowed set since it gets
-+ * used multiple times during the loop. */
-+ if (irt_isnum(kt) && !isk) {
-+ tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
-+ rset_clear(allow, tisnum);
-+ }
-
- if (!isk) {
- key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
-@@ -819,9 +827,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_nm(as, A64I_CMPx, key, tmp);
- emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
- } else {
-- Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
- Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
-- rset_clear(allow, tisnum);
- emit_nm(as, A64I_FCMPd, key, ftmp);
- emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
- emit_cond_branch(as, CC_LO, l_next);
---
-2.21.0
-
-
-From 74a5510fb8910ebaa08d14aaaa66a69ac3f16cf5 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Wed, 7 Aug 2019 21:11:25 +0530
-Subject: [PATCH 10/34] [thread.exdata] Clean up test cases and add +jit
- wherever applicable
-
-Run JIT tests only on configurations where JIT is on.
----
- test/lib/ffi/exdata.lua | 11 ++++-------
- 1 file changed, 4 insertions(+), 7 deletions(-)
-
-diff --git a/test/lib/ffi/exdata.lua b/test/lib/ffi/exdata.lua
-index 32a39ebe..e048b740 100644
---- a/test/lib/ffi/exdata.lua
-+++ b/test/lib/ffi/exdata.lua
-@@ -17,11 +17,10 @@ do --- default value: JIT off
- end
- saved_q = q
- end
-- print(saved_q)
- assert(saved_q == nil)
- end
-
--do --- default value: JIT on
-+do --- default value: JIT on +jit
- jit.opt.start("minstitch=100000", "hotloop=2")
- jit.on()
- local saved_q
-@@ -32,7 +31,6 @@ do --- default value: JIT on
- end
- saved_q = q
- end
-- print(saved_q)
- assert(saved_q == nil)
- end
-
-@@ -49,7 +47,6 @@ do --- sanity: JIT off
- end
- saved_q = q
- end
-- print(ptr)
- assert(tostring(ptr) == "cdata<void *>: 0xefdeaddeadbeef")
- assert(tostring(saved_q) == "cdata<void *>: 0xefdeaddeadbeef")
- end
-@@ -88,7 +85,7 @@ do --- coroutines: JIT off
- assert(tostring(exdata()) == "cdata<void *>: 0xefdeadbeed")
- end
-
--do --- reading: JIT on
-+do --- reading: JIT on +jit
- jit.opt.start("minstitch=100000", "hotloop=2")
- local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
- local ptr = ffi.cast("void *", u64)
-@@ -105,7 +102,7 @@ do --- reading: JIT on
- assert(tostring(saved_q) == "cdata<void *>: 0xefdeaddeadbeef")
- end
-
--do --- writing: JIT on
-+do --- writing: JIT on +jit
- jit.opt.start("minstitch=100000", "hotloop=2")
- local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
- local ptr = ffi.cast("void *", u64)
-@@ -132,7 +129,7 @@ do --- Check number of arguments: JIT off
- assert(nargs(exdata()) == 1)
- end
-
--do --- Check number of arguments: JIT on
-+do --- Check number of arguments: JIT on +jit
- jit.opt.start("minstitch=100000", "hotloop=2")
- local select = select
- local u64 = ffi.new("uintptr_t", 0xefdeaddeadbeefLL)
---
-2.21.0
-
-
-From da70b450c65d4f3789852d5bf2675d16c2a5de35 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Thu, 8 Aug 2019 14:59:16 +0530
-Subject: [PATCH 11/34] [thread.exdata] Drop 64-bit-isms from test
-
-The test case assumes that the target is 64-bit because of which it
-fails on armv7. Drop the string comparisons and instead just do
-numerical comparisons to ensure that the overflowed values match on
-32-bit.
----
- test/lib/ffi/exdata.lua | 17 +++++++----------
- 1 file changed, 7 insertions(+), 10 deletions(-)
-
-diff --git a/test/lib/ffi/exdata.lua b/test/lib/ffi/exdata.lua
-index e048b740..0b8dfddc 100644
---- a/test/lib/ffi/exdata.lua
-+++ b/test/lib/ffi/exdata.lua
-@@ -47,8 +47,7 @@ do --- sanity: JIT off
- end
- saved_q = q
- end
-- assert(tostring(ptr) == "cdata<void *>: 0xefdeaddeadbeef")
-- assert(tostring(saved_q) == "cdata<void *>: 0xefdeaddeadbeef")
-+ assert(ptr == saved_q)
- end
-
- do --- coroutines: JIT off
-@@ -71,18 +70,18 @@ do --- coroutines: JIT off
-
- local ok, data = coroutine.resume(co)
- assert(ok)
-- assert(tostring(data) == "cdata<void *>: 0xefdeadbeef")
-+ assert(data == ptr)
-
- ok, data = coroutine.resume(co)
- assert(ok)
-- assert(tostring(data) == "cdata<void *>: 0xefdeadbef0")
-+ assert(data == ptr2)
-
- exdata(ptr3)
-
- ok, data = coroutine.resume(co)
- assert(ok)
-- assert(tostring(data) == "cdata<void *>: 0xefdeadbef0")
-- assert(tostring(exdata()) == "cdata<void *>: 0xefdeadbeed")
-+ assert(data == ptr2)
-+ assert(exdata() == ptr3)
- end
-
- do --- reading: JIT on +jit
-@@ -98,8 +97,7 @@ do --- reading: JIT on +jit
- end
- saved_q = q
- end
-- assert(tostring(ptr) == "cdata<void *>: 0xefdeaddeadbeef")
-- assert(tostring(saved_q) == "cdata<void *>: 0xefdeaddeadbeef")
-+ assert(ptr == saved_q)
- end
-
- do --- writing: JIT on +jit
-@@ -115,8 +113,7 @@ do --- writing: JIT on +jit
- end
- saved_q = q
- end
-- assert(tostring(ptr) == "cdata<void *>: 0xefdeaddeadbeef")
-- assert(tostring(saved_q) == "cdata<void *>: 0xefdeaddeadbeef")
-+ assert(ptr == saved_q)
- end
-
- do --- Check number of arguments: JIT off
---
-2.21.0
-
-
-From 4121ead645790370a1b5e668ba7249afe9fb92d5 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 14 Aug 2019 14:23:46 +0530
-Subject: [PATCH 12/34] [ppc] Fix access beyond list in ipairs
-
-The load into TMP2 was incorrectly put into ENDIAN_LE, which made the
-subsequent check invalid.
----
- src/vm_ppc.dasc | 2 --
- 1 file changed, 2 deletions(-)
-
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index f0b3498a..d059b8f6 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -1850,9 +1850,7 @@ static void build_subroutines(BuildCtx *ctx)
- | ble >2 // Not in array part?
- |.if FPU
- | lfdux f0, TMP1, TMP3
-- |.if ENDIAN_LE
- | lwz TMP2, WORD_HI(TMP1)
-- |.endif
- |.else
- | lwzux TMP2, TMP1, TMP3
- | lwz TMP3, WORD_HI(TMP1)
---
-2.21.0
-
-
-From 1b12bef3aa18701ceadbadad45fca993788979c5 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 14 Aug 2019 14:24:55 +0530
-Subject: [PATCH 13/34] [ppc] Fix typo
-
----
- src/vm_ppc.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index d059b8f6..8ea1963a 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -3174,7 +3174,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vm_exit_handler:
- |.if JIT
-- | addi sp, TMP0, sp, -(EXIT_OFFSET+32*8+32*PSIZE)
-+ | addi sp, sp, -(EXIT_OFFSET+32*8+32*PSIZE)
- | saver 3 // CARG1
- | saver 4 // CARG2
- | saver 5 // CARG3
---
-2.21.0
-
-
-From f135accb7e141abddd997023094a835ad91c853e Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 14 Aug 2019 14:49:47 +0530
-Subject: [PATCH 14/34] [ppc] Load BASEP4 as much as possible
-
-BASEP4 doesn't seem to get initialized all the time, especially when
-BASE is updated because of which programs can crash at random on
-ppc32. Err on the conservative side and set BASEP4 every time BASE_LO
-(or BASE_HI for LE) are accessed.
-
-This eventually needs to be tuned optimally.
----
- src/vm_ppc.dasc | 47 +++++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 47 insertions(+)
-
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index 8ea1963a..9627950d 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -1124,6 +1124,9 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | b ->BC_TGETR_Z
- |1:
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ |.endif
- | stwx TISNIL, BASE_HI, RA
- | b ->cont_nop
- |
-@@ -3668,6 +3671,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
- | // RA = src1*8, RD = src2*8, JMP with RD = target
-+ | addi BASEP4, BASE, 4
- |.if DUALNUM
- | lwzx CARG1, BASE_HI, RA
- | addi PC, PC, 4
-@@ -3773,6 +3777,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQV: case BC_ISNEV:
- vk = op == BC_ISEQV;
- | // RA = src1*8, RD = src2*8, JMP with RD = target
-+ | addi BASEP4, BASE, 4
- |.if DUALNUM
- | lwzx CARG1, BASE_HI, RA
- | addi PC, PC, 4
-@@ -3890,6 +3895,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQS: case BC_ISNES:
- vk = op == BC_ISEQS;
- | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
-+ | addi BASEP4, BASE, 4
- | lwzx TMP0, BASE_HI, RA
- | srwi RD, RD, 1
- | lwzx STR:TMP3, BASE_LO, RA
-@@ -3923,6 +3929,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQN: case BC_ISNEN:
- vk = op == BC_ISEQN;
- | // RA = src*8, RD = num_const*8, JMP with RD = target
-+ | addi BASEP4, BASE, 4
- |.if DUALNUM
- | lwzx CARG1, BASE_HI, RA
- | addi PC, PC, 4
-@@ -4018,6 +4025,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQP: case BC_ISNEP:
- vk = op == BC_ISEQP;
- | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
-+ | addi BASEP4, BASE, 4
- | lwzx TMP0, BASE_HI, RA
- | srwi TMP1, RD, 3
- | lwz TMP2, 0(PC)
-@@ -4048,6 +4056,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
- | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
-+ | addi BASEP4, BASE, 4
- | lwzx TMP0, BASE_HI, RD
- | lwz INS, 0(PC)
- | addi PC, PC, 4
-@@ -4093,6 +4102,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISTYPE:
- | // RA = src*8, RD = -type*8
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ |.endif
- | lwzx TMP0, BASE_HI, RA
- | srwi TMP1, RD, 3
- | ins_next1
-@@ -4107,6 +4119,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_ISNUM:
- | // RA = src*8, RD = -(TISNUM-1)*8
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ |.endif
- | lwzx TMP0, BASE_HI, RA
- | ins_next1
- | checknum TMP0
-@@ -4132,6 +4147,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_NOT:
- | // RA = dst*8, RD = src*8
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ |.endif
- | ins_next1
- | lwzx TMP0, BASE_HI, RD
- | .gpr64 extsw TMP0, TMP0
-@@ -4142,6 +4160,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_UNM:
- | // RA = dst*8, RD = src*8
-+ | addi BASEP4, BASE, 4
- | lwzx TMP1, BASE_HI, RD
- | lwzx TMP0, BASE_LO, RD
- |.if DUALNUM and not GPR64
-@@ -4184,6 +4203,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_LEN:
- | // RA = dst*8, RD = src*8
-+ | addi BASEP4, BASE, 4
- | lwzx TMP0, BASE_HI, RD
- | lwzx CARG1, BASE_LO, RD
- | checkstr TMP0; bne >2
-@@ -4224,6 +4244,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- |.macro ins_arithpre
- | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
-+ | addi BASEP4, BASE, 4
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-@@ -4371,6 +4392,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |
- |.macro ins_arithdn, intins, fpins, fpcall
- | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
-+ | addi BASEP4, BASE, 4
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-@@ -4524,6 +4546,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_POW:
- | // NYI: (partial) integer arithmetic.
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ |.endif
- | lwzx CARG1, BASE_HI, RB
- | lwzx CARG3, BASE, RC
- |.if FPU
-@@ -4583,6 +4608,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_KSTR:
- | // RA = dst*8, RD = str_const*8 (~)
-+ | addi BASEP4, BASE, 4
- | srwi TMP1, RD, 1
- | subfic TMP1, TMP1, -4
- | ins_next1
-@@ -4595,6 +4621,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_KCDATA:
- |.if FFI
- | // RA = dst*8, RD = cdata_const*8 (~)
-+ | addi BASEP4, BASE, 4
- | srwi TMP1, RD, 1
- | subfic TMP1, TMP1, -4
- | ins_next1
-@@ -4607,6 +4634,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_KSHORT:
- | // RA = dst*8, RD = int16_literal*8
-+ | addi BASEP4, BASE, 4
- |.if DUALNUM
- | slwi RD, RD, 13
- | srawi RD, RD, 16
-@@ -4652,6 +4680,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_KPRI:
- | // RA = dst*8, RD = primitive_type*8 (~)
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ |.endif
- | srwi TMP1, RD, 3
- | not TMP0, TMP1
- | ins_next1
-@@ -4660,6 +4691,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_KNIL:
- | // RA = base*8, RD = end*8
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ |.endif
- | stwx TISNIL, BASE_HI, RA
- | addi RA, RA, 8
- |1:
-@@ -4900,6 +4934,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_TGETV:
- | // RA = dst*8, RB = table*8, RC = key*8
-+ | addi BASEP4, BASE, 4
- | lwzx CARG1, BASE_HI, RB
- | lwzx CARG2, BASE_HI, RC
- | lwzx TAB:RB, BASE_LO, RB
-@@ -4974,6 +5009,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETS:
- | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
-+ | addi BASEP4, BASE, 4
- | lwzx CARG1, BASE_HI, RB
- | srwi TMP1, RC, 1
- | lwzx TAB:RB, BASE_LO, RB
-@@ -4983,6 +5019,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bne ->vmeta_tgets1
- |->BC_TGETS_Z:
- | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
-+ | addi BASEP4, BASE, 4
- | lwz TMP0, TAB:RB->hmask
- | lwz TMP1, STR:RC->hash
- | lwz NODE:TMP2, TAB:RB->node
-@@ -5022,6 +5059,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETB:
- | // RA = dst*8, RB = table*8, RC = index*8
-+ | addi BASEP4, BASE, 4
- | lwzx CARG1, BASE_HI, RB
- | srwi TMP0, RC, 3
- | lwzx TAB:RB, BASE_LO, RB
-@@ -5063,6 +5101,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETR:
- | // RA = dst*8, RB = table*8, RC = key*8
-+ | addi BASEP4, BASE, 4
- | lwzx TAB:CARG1, BASE_LO, RB
- |.if DUALNUM
- | lwz TMP0, TAB:CARG1->asize
-@@ -5096,6 +5135,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_TSETV:
- | // RA = src*8, RB = table*8, RC = key*8
-+ | addi BASEP4, BASE, 4
- | lwzx CARG1, BASE_HI, RB
- | lwzx CARG2, BASE_HI, RC
- | lwzx TAB:RB, BASE_LO, RB
-@@ -5178,6 +5218,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETS:
- | // RA = src*8, RB = table*8, RC = str_const*8 (~)
-+ | addi BASEP4, BASE, 4
- | lwzx CARG1, BASE_HI, RB
- | srwi TMP1, RC, 1
- | lwzx TAB:RB, BASE_LO, RB
-@@ -5273,6 +5314,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETB:
- | // RA = src*8, RB = table*8, RC = index*8
-+ | addi BASEP4, BASE, 4
- | lwzx CARG1, BASE_HI, RB
- | srwi TMP0, RC, 3
- | lwzx TAB:RB, BASE_LO, RB
-@@ -5323,6 +5365,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETR:
- | // RA = dst*8, RB = table*8, RC = key*8
-+ | addi BASEP4, BASE, 4
- | lwzx TAB:CARG2, BASE_LO, RB
- |.if DUALNUM
- | lbz TMP3, TAB:CARG2->marked
-@@ -6021,6 +6064,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |9: // FP loop.
- |.else
- |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
- | lwzx TMP1, RA, BASE_LO
- | add RA, RA, BASE
- |.else
-@@ -6218,6 +6262,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- |
- |3: // Clear missing parameters.
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ |.endif
- | stwx TISNIL, BASE_HI, NARGS8:RC
- | addi NARGS8:RC, NARGS8:RC, 8
- | b <2
---
-2.21.0
-
-
-From 4c83e55809602a1051c77198d7e7c3ab6c6b7227 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 14 Aug 2019 15:40:09 +0530
-Subject: [PATCH 15/34] [ppc] Revert LE code for assert
-
----
- src/vm_ppc.dasc | 12 ------------
- 1 file changed, 12 deletions(-)
-
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index 9627950d..6d8681de 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -1517,22 +1517,11 @@ static void build_subroutines(BuildCtx *ctx)
- | bge cr1, ->fff_fallback
- | stw CARG3, WORD_HI(RA)
- | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
-- |.if not ENDIAN_LE
- | addi TMP1, BASE, 8
- | add TMP2, RA, NARGS8:RC
-- |.endif
- | stw CARG1, WORD_LO(RA)
- | beq ->fff_res // Done if exactly 1 argument.
-- |.if ENDIAN_LE
-- | li TMP1, 8
-- | subi RC, RC, 8
-- |.endif
- |1:
-- |.if ENDIAN_LE
-- | cmplw TMP1, RC
-- | lfdx f0, BASE, TMP1
-- | stfdx f0, RA, TMP1
-- |.else
- | cmplw TMP1, TMP2
- |.if FPU
- | lfd f0, 0(TMP1)
-@@ -1543,7 +1532,6 @@ static void build_subroutines(BuildCtx *ctx)
- | stw CARG1, -8(TMP1)
- | stw CARG2, -4(TMP1)
- |.endif
-- |.endif
- | addi TMP1, TMP1, 8
- | bney <1
- | b ->fff_res
---
-2.21.0
-
-
-From 84240a602aa7f2cd055e21fbb53e8630503a56b6 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 14 Aug 2019 16:03:38 +0530
-Subject: [PATCH 16/34] [ppc] Fix off by one in assert
-
-It ended up reading the first argument twice.
----
- src/vm_ppc.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index 6d8681de..31ed39a5 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -1525,7 +1525,7 @@ static void build_subroutines(BuildCtx *ctx)
- | cmplw TMP1, TMP2
- |.if FPU
- | lfd f0, 0(TMP1)
-- | stfd f0, 0(TMP1)
-+ | stfd f0, -8(TMP1)
- |.else
- | lwz CARG1, 0(TMP1)
- | lwz CARG2, 4(TMP1)
---
-2.21.0
-
-
-From d8a7769ef37435f3c81c19779395eb6adb95037a Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Tue, 27 Aug 2019 23:20:28 +0530
-Subject: [PATCH 17/34] Move all register allocations out of the asm_href loop
-
-Register allocations inline while emitting HREF loop code is hazardous
-because a spill would mean a load or remat generated in the loop body.
----
- src/lj_asm_arm64.h | 38 ++++++++++++++++++++++----------------
- 1 file changed, 22 insertions(+), 16 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index 45661c6f..a8835588 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -769,13 +769,29 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- uint32_t khash;
- MCLabel l_end, l_loop, l_next;
- rset_clear(allow, tab);
-- Reg tisnum = RID_TMP;
-+ Reg tisnum = RID_TMP, scr = RID_NONE, type = RID_NONE, ftmp = RID_NONE;
-
-- /* Allocate register early and clear it from the allowed set since it gets
-- * used multiple times during the loop. */
-- if (irt_isnum(kt) && !isk) {
-- tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
-- rset_clear(allow, tisnum);
-+ /* Allocate registers before emitting loop code. Allocating inline will
-+ * result in spills and restores getting into the loop body. */
-+ if (irt_isnum(kt)) {
-+ if (!isk) {
-+ tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
-+ rset_clear(allow, tisnum);
-+ ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
-+ }
-+ } else if (irt_isaddr(kt)) {
-+ if (isk) {
-+ int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
-+ scr = ra_allock(as, kk, allow);
-+ } else {
-+ scr = ra_scratch(as, allow);
-+ }
-+ rset_clear(allow, scr);
-+ } else {
-+ lua_assert(irt_ispri(kt) && !irt_isnil(kt));
-+ type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
-+ scr = ra_scratch(as, rset_clear(allow, type));
-+ rset_clear(allow, scr);
- }
-
- if (!isk) {
-@@ -827,7 +843,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_nm(as, A64I_CMPx, key, tmp);
- emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
- } else {
-- Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
- emit_nm(as, A64I_FCMPd, key, ftmp);
- emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
- emit_cond_branch(as, CC_LO, l_next);
-@@ -835,24 +850,15 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
- }
- } else if (irt_isaddr(kt)) {
-- Reg scr;
- if (isk) {
-- int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
-- scr = ra_allock(as, kk, allow);
- emit_nm(as, A64I_CMPx, scr, tmp);
- emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
- } else {
-- scr = ra_scratch(as, allow);
- emit_nm(as, A64I_CMPx, tmp, scr);
- emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
- }
-- rset_clear(allow, scr);
- } else {
-- Reg type, scr;
- lua_assert(irt_ispri(kt) && !irt_isnil(kt));
-- type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
-- scr = ra_scratch(as, rset_clear(allow, type));
-- rset_clear(allow, scr);
- emit_nm(as, A64I_CMPw, scr, type);
- emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
- }
---
-2.21.0
-
-
-From c31d028cff6a02acbddac573f3a996c113ba2837 Mon Sep 17 00:00:00 2001
-From: Shuxin Yang <shuxinyang2006(a)gmail.com>
-Date: Mon, 26 Dec 2016 01:32:58 -0800
-Subject: [PATCH 18/34] optimize: lj_str_new: uses randomized hash functions
- based on crc32 when -msse4.2 is specified.
-
-security wise:
--------------
-o. crc32 up to 128 bytes, so it is difficult to attach with len <= 128.
-o. for len >= 128, random 128 bytes are crc32-ed, so it is vulnerable.
-
-performance wise:
------------------
-o. performance is measured by 'make -C src/x64/test benchmark'
-o. new hash function is realtively computationally cheaper if len < 120
- and about 1.8x as slow if len >= 120.
-o. for len in [1-3], original hash function has better distribution.
- need to understand why it is so.
-
-Signed-off-by: Yichun Zhang (agentzh) <yichun(a)openresty.com>
----
- src/lj_str.c | 44 ++++--
- src/x64/Makefile | 13 ++
- src/x64/src/lj_str_hash_x64.h | 266 +++++++++++++++++++++++++++++++
- src/x64/test/Makefile | 47 ++++++
- src/x64/test/benchmark.cxx | 278 +++++++++++++++++++++++++++++++++
- src/x64/test/test.cpp | 73 +++++++++
- src/x64/test/test_str_comp.lua | 67 ++++++++
- src/x64/test/test_util.cxx | 21 +++
- src/x64/test/test_util.hpp | 57 +++++++
- 9 files changed, 856 insertions(+), 10 deletions(-)
- create mode 100644 src/x64/Makefile
- create mode 100644 src/x64/src/lj_str_hash_x64.h
- create mode 100644 src/x64/test/Makefile
- create mode 100644 src/x64/test/benchmark.cxx
- create mode 100644 src/x64/test/test.cpp
- create mode 100644 src/x64/test/test_str_comp.lua
- create mode 100644 src/x64/test/test_util.cxx
- create mode 100644 src/x64/test/test_util.hpp
-
-diff --git a/src/lj_str.c b/src/lj_str.c
-index f1b5fb5d..5862f421 100644
---- a/src/lj_str.c
-+++ b/src/lj_str.c
-@@ -118,17 +118,16 @@ void lj_str_resize(lua_State *L, MSize newmask)
- g->strhash = newhash;
- }
-
--/* Intern a string and return string object. */
--GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
--{
-- global_State *g;
-- GCstr *s;
-- GCobj *o;
-+#include "x64/src/lj_str_hash_x64.h"
-+
-+#if defined(LJ_ARCH_STR_HASH)
-+#define LJ_STR_HASH LJ_ARCH_STR_HASH
-+#else
-+static MSize
-+lj_str_original_hash(const char *str, size_t lenx) {
- MSize len = (MSize)lenx;
- MSize a, b, h = len;
-- if (lenx >= LJ_MAX_STR)
-- lj_err_msg(L, LJ_ERR_STROV);
-- g = G(L);
-+
- /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
- if (len >= 4) { /* Caveat: unaligned access! */
- a = lj_getu32(str);
-@@ -142,11 +141,36 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
- b = *(const uint8_t *)(str+(len>>1));
- h ^= b; h -= lj_rol(b, 14);
- } else {
-- return &g->strempty;
-+ return 0;
- }
-+
- a ^= h; a -= lj_rol(h, 11);
- b ^= a; b -= lj_rol(a, 25);
- h ^= b; h -= lj_rol(b, 16);
-+
-+ return h;
-+}
-+#define LJ_STR_HASH lj_str_original_hash
-+#endif
-+
-+/* Intern a string and return string object. */
-+GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
-+{
-+ global_State *g;
-+ GCstr *s;
-+ GCobj *o;
-+ MSize len = (MSize)lenx;
-+ MSize h;
-+
-+ if (lenx >= LJ_MAX_STR)
-+ lj_err_msg(L, LJ_ERR_STROV);
-+ g = G(L);
-+ if (LJ_UNLIKELY(lenx == 0)) {
-+ return &g->strempty;
-+ }
-+
-+ h = LJ_STR_HASH(str, lenx);
-+
- /* Check if the string has already been interned. */
- o = gcref(g->strhash[h & g->strmask]);
- if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
-diff --git a/src/x64/Makefile b/src/x64/Makefile
-new file mode 100644
-index 00000000..27277140
---- /dev/null
-+++ b/src/x64/Makefile
-@@ -0,0 +1,13 @@
-+.PHONY: default test benchmark clean
-+
-+default:
-+ @echo "make target include: test bechmark clean"
-+
-+test:
-+ $(MAKE) -C test test
-+
-+benchmark:
-+ $(MAKE) -C test benchmark
-+
-+clean:
-+ $(MAKE) -C test clean
-diff --git a/src/x64/src/lj_str_hash_x64.h b/src/x64/src/lj_str_hash_x64.h
-new file mode 100644
-index 00000000..b783a394
---- /dev/null
-+++ b/src/x64/src/lj_str_hash_x64.h
-@@ -0,0 +1,266 @@
-+/*
-+ * This file defines string hash function using CRC32. It takes advantage of
-+ * Intel hardware support (crc32 instruction, SSE 4.2) to speedup the CRC32
-+ * computation. The hash functions try to compute CRC32 of length and up
-+ * to 128 bytes of given string.
-+ */
-+
-+#ifndef _LJ_STR_HASH_X64_H_
-+#define _LJ_STR_HASH_X64_H_
-+
-+#if defined(__SSE4_2__) && defined(__x86_64) && defined(__GNUC__)
-+
-+#include <stdint.h>
-+#include <sys/types.h>
-+#include <unistd.h>
-+#include <time.h>
-+#include <smmintrin.h>
-+
-+#include "../../lj_def.h"
-+
-+#undef LJ_AINLINE
-+#define LJ_AINLINE
-+
-+static const uint64_t* cast_uint64p(const char* str)
-+{
-+ return (const uint64_t*)(void*)str;
-+}
-+
-+static const uint32_t* cast_uint32p(const char* str)
-+{
-+ return (const uint32_t*)(void*)str;
-+}
-+
-+/* hash string with len in [1, 4) */
-+static LJ_AINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len)
-+{
-+#if 0
-+ /* TODO: The if-1 part (i.e the original algorithm) is working better when
-+ * the load-factor is high, as revealed by conflict benchmark (via
-+ * 'make benchmark' command); need to understand why it's so.
-+ */
-+ uint32_t v = str[0];
-+ v = (v << 8) | str[len >> 1];
-+ v = (v << 8) | str[len - 1];
-+ v = (v << 8) | len;
-+ return _mm_crc32_u32(0, v);
-+#else
-+ uint32_t a, b, h = len;
-+
-+ a = *(const uint8_t *)str;
-+ h ^= *(const uint8_t *)(str+len-1);
-+ b = *(const uint8_t *)(str+(len>>1));
-+ h ^= b; h -= lj_rol(b, 14);
-+
-+ a ^= h; a -= lj_rol(h, 11);
-+ b ^= a; b -= lj_rol(a, 25);
-+ h ^= b; h -= lj_rol(b, 16);
-+
-+ return h;
-+#endif
-+}
-+
-+/* hash string with len in [4, 16) */
-+static LJ_AINLINE uint32_t lj_str_hash_4_16(const char* str, uint32_t len)
-+{
-+ uint64_t v1, v2, h;
-+
-+ if (len >= 8) {
-+ v1 = *cast_uint64p(str);
-+ v2 = *cast_uint64p(str + len - 8);
-+ } else {
-+ v1 = *cast_uint32p(str);
-+ v2 = *cast_uint32p(str + len - 4);
-+ }
-+
-+ h = _mm_crc32_u32(0, len);
-+ h = _mm_crc32_u64(h, v1);
-+ h = _mm_crc32_u64(h, v2);
-+ return h;
-+}
-+
-+/* hash string with length in [16, 128) */
-+static uint32_t lj_str_hash_16_128(const char* str, uint32_t len)
-+{
-+ uint64_t h1, h2;
-+ uint32_t i;
-+
-+ h1 = _mm_crc32_u32(0, len);
-+ h2 = 0;
-+
-+ for (i = 0; i < len - 16; i += 16) {
-+ h1 += _mm_crc32_u64(h1, *cast_uint64p(str + i));
-+ h2 += _mm_crc32_u64(h2, *cast_uint64p(str + i + 8));
-+ };
-+
-+ h1 = _mm_crc32_u64(h1, *cast_uint64p(str + len - 16));
-+ h2 = _mm_crc32_u64(h2, *cast_uint64p(str + len - 8));
-+
-+ return _mm_crc32_u32(h1, h2);
-+}
-+
-+/* **************************************************************************
-+ *
-+ * Following is code about hashing string with length >= 128
-+ *
-+ * **************************************************************************
-+ */
-+static uint32_t random_pos[32][2];
-+static const int8_t log2_tab[128] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,
-+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,
-+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 };
-+
-+/* return floor(log2(n)) */
-+static LJ_AINLINE uint32_t log2_floor(uint32_t n)
-+{
-+ if (n <= 127) {
-+ return log2_tab[n];
-+ }
-+
-+ if ((n >> 8) <= 127) {
-+ return log2_tab[n >> 8] + 8;
-+ }
-+
-+ if ((n >> 16) <= 127) {
-+ return log2_tab[n >> 16] + 16;
-+ }
-+
-+ if ((n >> 24) <= 127) {
-+ return log2_tab[n >> 24] + 24;
-+ }
-+
-+ return 31;
-+}
-+
-+#define POW2_MASK(n) ((1L << (n)) - 1)
-+
-+/* This function is to populate `random_pos` such that random_pos[i][*]
-+ * contains random value in the range of [2**i, 2**(i+1)).
-+ */
-+static void x64_init_random(void)
-+{
-+ int i, seed, rml;
-+
-+ /* Calculate the ceil(log2(RAND_MAX)) */
-+ rml = log2_floor(RAND_MAX);
-+ if (RAND_MAX & (RAND_MAX - 1)) {
-+ rml += 1;
-+ }
-+
-+ /* Init seed */
-+ seed = _mm_crc32_u32(0, getpid());
-+ seed = _mm_crc32_u32(seed, time(NULL));
-+ srandom(seed);
-+
-+ /* Now start to populate the random_pos[][]. */
-+ for (i = 0; i < 3; i++) {
-+ /* No need to provide random value for chunk smaller than 8 bytes */
-+ random_pos[i][0] = random_pos[i][1] = 0;
-+ }
-+
-+ for (; i < rml; i++) {
-+ random_pos[i][0] = random() & POW2_MASK(i+1);
-+ random_pos[i][1] = random() & POW2_MASK(i+1);
-+ }
-+
-+ for (; i < 31; i++) {
-+ int j;
-+ for (j = 0; j < 2; j++) {
-+ uint32_t v, scale;
-+ scale = random_pos[i - rml][0];
-+ if (scale == 0) {
-+ scale = 1;
-+ }
-+ v = (random() * scale) & POW2_MASK(i+1);
-+ random_pos[i][j] = v;
-+ }
-+ }
-+}
-+#undef POW2_MASK
-+
-+void __attribute__((constructor)) x64_init_random_constructor()
-+{
-+ x64_init_random();
-+}
-+
-+/* Return a pre-computed random number in the range of [1**chunk_sz_order,
-+ * 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value
-+ * may be greater than chunk-size; it is up to the caller to make sure
-+ * "chunk-base + return-value-of-this-func" has valid virtual address.
-+ */
-+static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order,
-+ uint32_t idx)
-+{
-+ uint32_t pos = random_pos[chunk_sz_order][idx & 1];
-+ return pos;
-+}
-+
-+static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str,
-+ uint32_t len)
-+{
-+ uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2;
-+ uint64_t h1, h2, v;
-+ const char* chunk_ptr;
-+
-+ chunk_num = 16;
-+ chunk_sz = len / chunk_num;
-+ chunk_sz_log2 = log2_floor(chunk_sz);
-+
-+ pos1 = get_random_pos_unsafe(chunk_sz_log2, 0);
-+ pos2 = get_random_pos_unsafe(chunk_sz_log2, 1);
-+
-+ h1 = _mm_crc32_u32(0, len);
-+ h2 = 0;
-+
-+ /* loop over 14 chunks, 2 chunks at a time */
-+ for (i = 0, chunk_ptr = str; i < (chunk_num / 2 - 1);
-+ chunk_ptr += chunk_sz, i++) {
-+
-+ v = *cast_uint64p(chunk_ptr + pos1);
-+ h1 = _mm_crc32_u64(h1, v);
-+
-+ v = *cast_uint64p(chunk_ptr + chunk_sz + pos2);
-+ h2 = _mm_crc32_u64(h2, v);
-+ }
-+
-+ /* the last two chunks */
-+ v = *cast_uint64p(chunk_ptr + pos1);
-+ h1 = _mm_crc32_u64(h1, v);
-+
-+ v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2);
-+ h2 = _mm_crc32_u64(h2, v);
-+
-+ /* process the trailing part */
-+ h1 = _mm_crc32_u64(h1, *cast_uint64p(str));
-+ h2 = _mm_crc32_u64(h2, *cast_uint64p(str + len - 8));
-+
-+ h1 = _mm_crc32_u32(h1, h2);
-+ return h1;
-+}
-+
-+/* NOTE: the "len" should not be zero */
-+static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len)
-+{
-+ if (len < 128) {
-+ if (len >= 16) { /* [16, 128) */
-+ return lj_str_hash_16_128(str, len);
-+ }
-+
-+ if (len >= 4) { /* [4, 16) */
-+ return lj_str_hash_4_16(str, len);
-+ }
-+
-+ /* [0, 4) */
-+ return lj_str_hash_1_4(str, len);
-+ }
-+ /* [128, inf) */
-+ return lj_str_hash_128_above(str, len);
-+}
-+
-+#define LJ_ARCH_STR_HASH lj_str_hash
-+#else
-+#undef LJ_ARCH_STR_HASH
-+#endif
-+#endif /*_LJ_STR_HASH_X64_H_*/
-diff --git a/src/x64/test/Makefile b/src/x64/test/Makefile
-new file mode 100644
-index 00000000..4326ab3d
---- /dev/null
-+++ b/src/x64/test/Makefile
-@@ -0,0 +1,47 @@
-+.PHONY: default test benchmark
-+
-+default: test benchmark
-+
-+COMMON_OBJ := test_util.o
-+
-+TEST_PROGRAM := ht_test
-+BENCHMARK_PROGRAM := ht_benchmark
-+
-+TEST_PROGRAM_OBJ := $(COMMON_OBJ) test.o
-+BENCHMARK_PROGRAM_OBJ := $(COMMON_OBJ) benchmark.o
-+
-+ifeq ($(WITH_VALGRIND), 1)
-+ VALGRIND := valgrind --leak-check=full
-+else
-+ VALGRIND :=
-+endif
-+
-+CXXFLAGS := -O3 -MD -g -msse4.2 -Wall -I../src -I../../../src
-+
-+%.o: %.cxx
-+ $(CXX) $(CXXFLAGS) -MD -c $<
-+
-+test: $(TEST_PROGRAM)
-+ @echo "some unit test"
-+ $(VALGRIND) ./$(TEST_PROGRAM)
-+
-+ @echo "smoke test"
-+ ../../luajit test_str_comp.lua
-+
-+benchmark: $(BENCHMARK_PROGRAM)
-+ # micro benchmark
-+ ./$(BENCHMARK_PROGRAM)
-+
-+$(TEST_PROGRAM) : $(TEST_PROGRAM_OBJ)
-+ cat $(TEST_PROGRAM_OBJ:.o=.d) > dep1.txt
-+ $(CXX) $+ $(CXXFLAGS) -lm -o $@
-+
-+$(BENCHMARK_PROGRAM): $(BENCHMARK_PROGRAM_OBJ)
-+ cat $(BENCHMARK_PROGRAM_OBJ:.o=.d) > dep2.txt
-+ $(CXX) $+ $(CXXFLAGS) -o $@
-+
-+-include dep1.txt
-+-include dep2.txt
-+
-+clean:
-+ -rm -f *.o *.d dep*.txt $(BENCHMARK_PROGRAM) $(TEST_PROGRAM)
-diff --git a/src/x64/test/benchmark.cxx b/src/x64/test/benchmark.cxx
-new file mode 100644
-index 00000000..e37edb03
---- /dev/null
-+++ b/src/x64/test/benchmark.cxx
-@@ -0,0 +1,278 @@
-+#include <sys/time.h> // for gettimeofday()
-+extern "C" {
-+#include "lj_str_hash_x64.h"
-+}
-+#include <string>
-+#include <vector>
-+#include <utility>
-+#include <algorithm>
-+#include "test_util.hpp"
-+#include <stdio.h>
-+#include <math.h>
-+
-+using namespace std;
-+
-+#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
-+#define lj_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
-+
-+const char* separator = "-------------------------------------------";
-+
-+static uint32_t LJ_AINLINE
-+lj_original_hash(const char *str, size_t len)
-+{
-+ uint32_t a, b, h = len;
-+ if (len >= 4) {
-+ a = lj_getu32(str); h ^= lj_getu32(str+len-4);
-+ b = lj_getu32(str+(len>>1)-2);
-+ h ^= b; h -= lj_rol(b, 14);
-+ b += lj_getu32(str+(len>>2)-1);
-+ a ^= h; a -= lj_rol(h, 11);
-+ b ^= a; b -= lj_rol(a, 25);
-+ h ^= b; h -= lj_rol(b, 16);
-+ } else {
-+ a = *(const uint8_t *)str;
-+ h ^= *(const uint8_t *)(str+len-1);
-+ b = *(const uint8_t *)(str+(len>>1));
-+ h ^= b; h -= lj_rol(b, 14);
-+ }
-+
-+ a ^= h; a -= lj_rol(h, 11);
-+ b ^= a; b -= lj_rol(a, 25);
-+ h ^= b; h -= lj_rol(b, 16);
-+
-+ return h;
-+}
-+
-+template<class T> double
-+BenchmarkHashTmpl(T func, char* buf, size_t len)
-+{
-+ TestClock timer;
-+ uint32_t h = 0;
-+
-+ timer.start();
-+ for(int i = 1; i < 1000000 * 100; i++) {
-+ // So the buf is not loop invariant, hence the F(...)
-+ buf[i % 4096] = i;
-+ h += func(buf, len) ^ i;
-+ }
-+ timer.stop();
-+
-+ // make h alive
-+ test_printf("%x", h);
-+ return timer.getElapseInSecond();
-+}
-+
-+struct TestFuncWas
-+{
-+ uint32_t operator()(const char* buf, uint32_t len) {
-+ return lj_original_hash(buf, len);
-+ }
-+};
-+
-+struct TestFuncIs
-+{
-+ uint32_t operator()(const char* buf, uint32_t len) {
-+ return lj_str_hash(buf, len);
-+ }
-+};
-+
-+static void
-+benchmarkIndividual(char* buf)
-+{
-+ fprintf(stdout,"\n\nCompare performance of particular len (in second)\n");
-+ fprintf(stdout, "%-12s%-8s%-8s%s\n", "len", "was",
"is", "diff");
-+ fprintf(stdout, "-------------------------------------------\n");
-+
-+ uint32_t lens[] = {3, 4, 7, 10, 15, 16, 20, 32, 36, 63, 80, 100,
-+ 120, 127, 280, 290, 400};
-+ for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) {
-+ uint32_t len = lens[i];
-+ double e1 = BenchmarkHashTmpl(TestFuncWas(), buf, len);
-+ double e2 = BenchmarkHashTmpl(TestFuncIs(), buf, len);
-+ fprintf(stdout, "len = %4d: %-7.3lf %-7.3lf %.2f\n", len, e1, e2,
(e1-e2)/e1);
-+ }
-+}
-+
-+template<class T> double
-+BenchmarkChangeLenTmpl(T func, char* buf, uint32_t* len_vect, uint32_t len_num)
-+{
-+ TestClock timer;
-+ uint32_t h = 0;
-+
-+ timer.start();
-+ for(int i = 1; i < 1000000 * 100; i++) {
-+ for (int j = 0; j < (int)len_num; j++) {
-+ // So the buf is not loop invariant, hence the F(...)
-+ buf[(i + j) % 4096] = i;
-+ h += func(buf, len_vect[j]) ^ j;
-+ }
-+ }
-+ timer.stop();
-+
-+ // make h alive
-+ test_printf("%x", h);
-+ return timer.getElapseInSecond();
-+}
-+
-+// It is to measure the performance when length is changing.
-+// The purpose is to see how balanced branches impact the performance.
-+//
-+static void
-+benchmarkToggleLens(char* buf)
-+{
-+ double e1, e2;
-+ fprintf(stdout,"\nChanging length (in second):");
-+ fprintf(stdout, "\n%-20s%-8s%-8s%s\n%s\n", "len", "was",
"is", "diff",
-+ separator);
-+
-+ uint32_t lens1[] = {4, 9};
-+ e1 = BenchmarkChangeLenTmpl(TestFuncWas(), buf, lens1, 2);
-+ e2 = BenchmarkChangeLenTmpl(TestFuncIs(), buf, lens1, 2);
-+ fprintf(stdout, "%-20s%-7.3lf %-7.3lf %.2f\n", "4,9", e1, e2,
(e1-e2)/e1);
-+
-+ uint32_t lens2[] = {1, 4, 9};
-+ e1 = BenchmarkChangeLenTmpl(TestFuncWas(), buf, lens2, 3);
-+ e2 = BenchmarkChangeLenTmpl(TestFuncIs(), buf, lens2, 3);
-+ fprintf(stdout, "%-20s%-7.3lf %-7.3lf %.2f\n", "1,4,9", e1, e2,
(e1-e2)/e1);
-+
-+ uint32_t lens3[] = {1, 33, 4, 9};
-+ e1 = BenchmarkChangeLenTmpl(TestFuncWas(), buf, lens3, 4);
-+ e2 = BenchmarkChangeLenTmpl(TestFuncIs(), buf, lens3, 4);
-+ fprintf(stdout, "%-20s%-7.3lf %-7.3lf %.2f\n", "1,33,4,9",
-+ e1, e2, (e1-e2)/e1);
-+}
-+
-+static void
-+genRandomString(uint32_t min, uint32_t max,
-+ uint32_t num, vector<string>& result)
-+{
-+ double scale = (max - min) / (RAND_MAX + 1.0);
-+ result.clear();
-+ result.reserve(num);
-+ for (uint32_t i = 0; i < num; i++) {
-+ uint32_t len = (rand() * scale) + min;
-+
-+ char* buf = new char[len];
-+ for (uint32_t l = 0; l < len; l++) {
-+ buf[l] = rand() % 255;
-+ }
-+ result.push_back(string(buf, len));
-+ delete[] buf;
-+ }
-+}
-+
-+// Return the standard deviation of given array of number
-+static double
-+standarDeviation(const vector<uint32_t>& v)
-+{
-+ uint64_t total = 0;
-+ for (vector<uint32_t>::const_iterator i = v.begin(), e = v.end();
-+ i != e; ++i) {
-+ total += *i;
-+ }
-+
-+ double avg = total / (double)v.size();
-+ double sd = 0;
-+
-+ for (vector<uint32_t>::const_iterator i = v.begin(), e = v.end();
-+ i != e; ++i) {
-+ double t = avg - *i;
-+ sd = sd + t*t;
-+ }
-+
-+ return sqrt(sd/v.size());
-+}
-+
-+static pair<double, double>
-+benchmarkConflictHelper(uint32_t bucketNum, const vector<string>& strs)
-+{
-+ if (bucketNum & (bucketNum - 1)) {
-+ bucketNum = (1L << (log2_floor(bucketNum) + 1));
-+ }
-+ uint32_t mask = bucketNum - 1;
-+
-+ vector<uint32_t> conflictWas(bucketNum);
-+ vector<uint32_t> conflictIs(bucketNum);
-+
-+ conflictWas.resize(bucketNum);
-+ conflictIs.resize(bucketNum);
-+
-+ for (vector<string>::const_iterator i = strs.begin(), e = strs.end();
-+ i != e; ++i) {
-+ uint32_t h1 = lj_original_hash(i->c_str(), i->size());
-+ uint32_t h2 = lj_str_hash(i->c_str(), i->size());
-+
-+ conflictWas[h1 & mask]++;
-+ conflictIs[h2 & mask]++;
-+ }
-+
-+#if 0
-+ std::sort(conflictWas.begin(), conflictWas.end(), std::greater<int>());
-+ std::sort(conflictIs.begin(), conflictIs.end(), std::greater<int>());
-+
-+ fprintf(stderr, "%d %d %d %d vs %d %d %d %d\n",
-+ conflictWas[0], conflictWas[1], conflictWas[2], conflictWas[3],
-+ conflictIs[0], conflictIs[1], conflictIs[2], conflictIs[3]);
-+#endif
-+
-+ return pair<double, double>(standarDeviation(conflictWas),
-+ standarDeviation(conflictIs));
-+}
-+
-+static void
-+benchmarkConflict()
-+{
-+ srand(time(0));
-+
-+ float loadFactor[] = { 0.5f, 1.0f, 2.0f, 4.0f, 8.0f };
-+ int bucketNum[] = { 512, 1024, 2048, 4096, 8192, 16384};
-+ int lenRange[][2] = { {1,3}, {4, 15}, {16, 127}, {128, 1024}, {1, 1024}};
-+
-+ fprintf(stdout,
-+ "\nBechmarking conflict (stand deviation of conflict)\n%s\n",
-+ separator);
-+
-+ for (uint32_t k = 0; k < sizeof(lenRange)/sizeof(lenRange[0]); k++) {
-+ fprintf(stdout, "\nlen range from %d - %d\n", lenRange[k][0],
-+ lenRange[k][1]);
-+ fprintf(stdout, "%-10s %-12s %-10s %-10s diff\n%s\n",
-+ "bucket", "load-factor", "was",
"is", separator);
-+ for (uint32_t i = 0; i < sizeof(bucketNum)/sizeof(bucketNum[0]); ++i) {
-+ for (uint32_t j = 0;
-+ j < sizeof(loadFactor)/sizeof(loadFactor[0]);
-+ ++j) {
-+ int strNum = bucketNum[i] * loadFactor[j];
-+ vector<string> strs(strNum);
-+ genRandomString(lenRange[k][0], lenRange[k][1], strNum, strs);
-+
-+ pair<double, double> p;
-+ p = benchmarkConflictHelper(bucketNum[i], strs);
-+ fprintf(stdout, "%-10d %-12.2f %-10.2f %-10.2f %.2f\n",
-+ bucketNum[i], loadFactor[j], p.first, p.second,
-+ p.first - p.second);
-+ }
-+ }
-+ }
-+}
-+
-+static void
-+benchmarkHashFunc()
-+{
-+ char buf[4096];
-+ char c = getpid() % 'a';
-+ for (int i = 0; i < (int)sizeof(buf); i++) {
-+ buf[i] = (c + i) % 255;
-+ }
-+
-+ benchmarkConflict();
-+ benchmarkIndividual(buf);
-+ benchmarkToggleLens(buf);
-+}
-+
-+int
-+main(int argc, char** argv)
-+{
-+ fprintf(stdout, "========================\nMicro benchmark...\n");
-+ benchmarkHashFunc();
-+ return 0;
-+}
-diff --git a/src/x64/test/test.cpp b/src/x64/test/test.cpp
-new file mode 100644
-index 00000000..bc92acbb
---- /dev/null
-+++ b/src/x64/test/test.cpp
-@@ -0,0 +1,73 @@
-+#include <stdint.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <map>
-+#include "test_util.hpp"
-+#include "lj_str_hash_x64.h"
-+
-+using namespace std;
-+
-+static bool
-+smoke_test()
-+{
-+ fprintf(stdout, "running smoke tests...\n");
-+ char buf[1024];
-+ char c = getpid() % 'a';
-+
-+ for (int i = 0; i < (int)sizeof(buf); i++) {
-+ buf[i] = (c + i) % 255;
-+ }
-+
-+ uint32_t lens[] = {3, 4, 5, 7, 8, 16, 17, 24, 25, 32, 33, 127, 128,
-+ 255, 256, 257};
-+ for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) {
-+ string s(buf, lens[i]);
-+ test_printf("%d", lj_str_hash(s.c_str(), lens[i]));
-+ }
-+
-+ return true;
-+}
-+
-+static bool
-+verify_log2()
-+{
-+ fprintf(stdout, "verify log2...\n");
-+ bool err = false;
-+ std::map<uint32_t, uint32_t> lm;
-+ lm[0] =(uint32_t)-1;
-+ lm[1] = 0;
-+ lm[2] = 1;
-+ for (int i = 2; i < 31; i++) {
-+ lm[(1<<i) - 2] = i - 1;
-+ lm[(1<<i) - 1] = i - 1;
-+ lm[1<<i] = i;
-+ lm[(1<<i) + 1] = i;
-+ }
-+ lm[(uint32_t)-1] = 31;
-+
-+ for (map<uint32_t, uint32_t>::iterator iter = lm.begin(), iter_e = lm.end();
-+ iter != iter_e; ++iter) {
-+ uint32_t v = (*iter).first;
-+ uint32_t log2_expect = (*iter).second;
-+ uint32_t log2_get = log2_floor(v);
-+ if (log2_expect != log2_get) {
-+ err = true;
-+ fprintf(stderr, "log2(%u) expect %u, get %u\n", v, log2_expect,
log2_get);
-+ exit(1);
-+ }
-+ }
-+ return !err;
-+}
-+
-+int
-+main(int argc, char** argv)
-+{
-+ fprintf(stdout, "=======================\nRun unit testing...\n");
-+
-+ ASSERT(smoke_test(), "smoke_test test failed");
-+ ASSERT(verify_log2(), "log2 failed");
-+
-+ fprintf(stdout, TestErrMsgMgr::noError() ? "succ\n\n" :
"fail\n\n");
-+
-+ return TestErrMsgMgr::noError() ? 0 : -1;
-+}
-diff --git a/src/x64/test/test_str_comp.lua b/src/x64/test/test_str_comp.lua
-new file mode 100644
-index 00000000..3a5c3e67
---- /dev/null
-+++ b/src/x64/test/test_str_comp.lua
-@@ -0,0 +1,67 @@
-+--[[
-+ Given two content-idental string s1, s2, test if they end up to be the
-+ same string object. The purpose of this test is to make sure hash function
-+ do not accidently include extraneous bytes before and after the string in
-+ question.
-+]]
-+
-+local ffi = require("ffi")
-+local C = ffi.C
-+
-+ffi.cdef[[
-+ void free(void*);
-+ char* malloc(size_t);
-+ void *memset(void*, int, size_t);
-+ void *memcpy(void*, void*, size_t);
-+ long time(void*);
-+ void srandom(unsigned);
-+ long random(void);
-+]]
-+
-+
-+local function test_equal(len_min, len_max)
-+ -- source string is wrapped by 16-byte-junk both before and after the
-+ -- string
-+ local x = C.random()
-+ local l = len_min + x % (len_max - len_min);
-+ local buf_len = tonumber(l + 16 * 2)
-+
-+ local src_buf = C.malloc(buf_len)
-+ for i = 0, buf_len - 1 do
-+ src_buf[i] = C.random() % 255
-+ end
-+
-+ -- dest string is the clone of the source string, but it is sandwiched
-+ -- by different junk bytes
-+ local dest_buf = C.malloc(buf_len)
-+ C.memset(dest_buf, 0x5a, buf_len)
-+
-+ local ofst = 8 + (C.random() % 8)
-+ C.memcpy(dest_buf + ofst, src_buf + 16, l);
-+
-+ local str1 = ffi.string(src_buf + 16, l)
-+ local str2 = ffi.string(dest_buf + ofst, l)
-+
-+ C.free(src_buf)
-+ C.free(dest_buf)
-+
-+ if str1 ~= str2 then
-+ -- Oops, look like hash function mistakenly include extraneous bytes
-+ -- close to the string
-+ return 1 -- wtf
-+ end
-+end
-+
-+--local lens = {1, 4, 16, 128, 1024}
-+local lens = {128, 1024}
-+local iter = 1000
-+
-+for i = 1, #lens - 1 do
-+ for j = 1, iter do
-+ if test_equal(lens[i], lens[i+1]) ~= nil then
-+ os.exit(1)
-+ end
-+ end
-+end
-+
-+os.exit(0)
-diff --git a/src/x64/test/test_util.cxx b/src/x64/test/test_util.cxx
-new file mode 100644
-index 00000000..34b7d675
---- /dev/null
-+++ b/src/x64/test/test_util.cxx
-@@ -0,0 +1,21 @@
-+#include <stdarg.h>
-+#include <stdio.h>
-+#include "test_util.hpp"
-+
-+using namespace std;
-+
-+std::vector<TestErrMsg> TestErrMsgMgr::_errMsg;
-+
-+void
-+test_printf(const char* format, ...)
-+{
-+ va_list args;
-+ va_start (args, format);
-+
-+ FILE* devNull = fopen("/dev/null", "w");
-+ if (devNull != 0) {
-+ (void)vfprintf (devNull, format, args);
-+ }
-+ fclose(devNull);
-+ va_end (args);
-+}
-diff --git a/src/x64/test/test_util.hpp b/src/x64/test/test_util.hpp
-new file mode 100644
-index 00000000..6cc2ea2c
---- /dev/null
-+++ b/src/x64/test/test_util.hpp
-@@ -0,0 +1,57 @@
-+#ifndef _TEST_UTIL_HPP_
-+#define _TEST_UTIL_HPP_
-+
-+#include <sys/time.h> // gettimeofday()
-+#include <string>
-+#include <vector>
-+
-+struct TestErrMsg
-+{
-+ const char* fileName;
-+ unsigned lineNo;
-+ std::string errMsg;
-+
-+ TestErrMsg(const char* FN, unsigned LN, const char* Err):
-+ fileName(FN), lineNo(LN), errMsg(Err) {}
-+};
-+
-+class TestErrMsgMgr
-+{
-+public:
-+ static std::vector<TestErrMsg> getError();
-+ static void
-+ addError(const char* fileName, unsigned lineNo, const char* Err) {
-+ _errMsg.push_back(TestErrMsg(fileName, lineNo, Err));
-+ }
-+
-+ static bool noError() {
-+ return _errMsg.empty();
-+ }
-+
-+private:
-+ static std::vector<TestErrMsg> _errMsg;
-+};
-+
-+#define ASSERT(c, e) \
-+ if (!(c)) { TestErrMsgMgr::addError(__FILE__, __LINE__, (e)); }
-+
-+class TestClock
-+{
-+public:
-+ void start() { gettimeofday(&_start, 0); }
-+ void stop() { gettimeofday(&_end, 0); }
-+ double getElapseInSecond() {
-+ return (_end.tv_sec - _start.tv_sec)
-+ + ((long)_end.tv_usec - (long)_start.tv_usec) / 1000000.0;
-+ }
-+
-+private:
-+ struct timeval _start, _end;
-+};
-+
-+// write to /dev/null, the only purpose is to make the data fed to the
-+// function alive.
-+extern void test_printf(const char* format, ...)
-+ __attribute__ ((format (printf, 1, 2)));
-+
-+#endif //_TEST_UTIL_HPP_
---
-2.21.0
-
-
-From d9f1f081339ca387206a6b4f786f52c3a4227b95 Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <agentzh(a)gmail.com>
-Date: Thu, 19 Apr 2018 23:42:33 -0700
-Subject: [PATCH 19/34] bugfix: fixed compatibility regression with MinGW gcc.
- this bug had appeared in commit 7923c63.
-
----
- src/x64/src/lj_str_hash_x64.h | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/x64/src/lj_str_hash_x64.h b/src/x64/src/lj_str_hash_x64.h
-index b783a394..063f631c 100644
---- a/src/x64/src/lj_str_hash_x64.h
-+++ b/src/x64/src/lj_str_hash_x64.h
-@@ -21,6 +21,11 @@
- #undef LJ_AINLINE
- #define LJ_AINLINE
-
-+#ifdef __MINGW32__
-+#define random() ((long) rand())
-+#define srandom(seed) srand(seed)
-+#endif
-+
- static const uint64_t* cast_uint64p(const char* str)
- {
- return (const uint64_t*)(void*)str;
---
-2.21.0
-
-
-From cd6f7e61915ddc365a0416de5916eb1ebc7962e3 Mon Sep 17 00:00:00 2001
-From: Shuxin Yang <shuxinyang2006(a)gmail.com>
-Date: Mon, 10 Jul 2017 19:11:50 -0700
-Subject: [PATCH 20/34] bugfix: FFI C parsers could not parse some C constructs
- like `__attribute((aligned(N)))` and `#pragma`.
-
-Decoupled hash functions used in comparison (hardcoded) and string table.
-
-This bug had first appeared in v2.1-20170405 (or OpenResty 1.11.2.3).
-
-Signed-off-by: Yichun Zhang (agentzh) <agentzh(a)gmail.com>
----
- src/lib_ffi.c | 2 +-
- src/lj_cparse.c | 14 ++++++------
- src/lj_str.c | 16 +++++++++-----
- src/lj_str.h | 2 ++
- src/x64/test/Makefile | 1 +
- src/x64/test/unit/ffi/test_abi.lua | 10 +++++++++
- src/x64/test/unit/ffi/test_line_directive.lua | 15 +++++++++++++
- .../unit/ffi/test_pragma_pack_pushpop.lua | 12 ++++++++++
- src/x64/test/unit/ffi/test_var_attribute.lua | 22 +++++++++++++++++++
- src/x64/test/unit_test.sh | 22 +++++++++++++++++++
- 10 files changed, 103 insertions(+), 13 deletions(-)
- create mode 100644 src/x64/test/unit/ffi/test_abi.lua
- create mode 100644 src/x64/test/unit/ffi/test_line_directive.lua
- create mode 100644 src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua
- create mode 100644 src/x64/test/unit/ffi/test_var_attribute.lua
- create mode 100644 src/x64/test/unit_test.sh
-
-diff --git a/src/lib_ffi.c b/src/lib_ffi.c
-index 8032411e..bddecd8a 100644
---- a/src/lib_ffi.c
-+++ b/src/lib_ffi.c
-@@ -727,7 +727,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
- {
- GCstr *s = lj_lib_checkstr(L, 1);
- int b = 0;
-- switch (s->hash) {
-+ switch (lj_str_indep_hash(s)) {
- #if LJ_64
- case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */
- #else
-diff --git a/src/lj_cparse.c b/src/lj_cparse.c
-index 19f632ff..0724d4a6 100644
---- a/src/lj_cparse.c
-+++ b/src/lj_cparse.c
-@@ -1069,7 +1069,7 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl)
- if (cp->tok == CTOK_IDENT) {
- GCstr *attrstr = cp->str;
- cp_next(cp);
-- switch (attrstr->hash) {
-+ switch (lj_str_indep_hash(attrstr)) {
- case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */
- cp_decl_align(cp, decl);
- break;
-@@ -1138,7 +1138,7 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl)
- while (cp->tok == CTOK_IDENT) {
- GCstr *attrstr = cp->str;
- cp_next(cp);
-- switch (attrstr->hash) {
-+ switch (lj_str_indep_hash(attrstr)) {
- case H_(bc2395fa,98f267f8): /* align */
- cp_decl_align(cp, decl);
- break;
-@@ -1728,16 +1728,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
- {
- cp_next(cp);
- if (cp->tok == CTOK_IDENT &&
-- cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */
-+ (lj_str_indep_hash(cp->str)) == H_(e79b999f,42ca3e85)) { /* pack */
- cp_next(cp);
- cp_check(cp, '(');
- if (cp->tok == CTOK_IDENT) {
-- if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */
-+ if (lj_str_indep_hash(cp->str) == H_(738e923c,a1b65954)) { /* push */
- if (cp->curpack < CPARSE_MAX_PACKSTACK) {
- cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
- cp->curpack++;
- }
-- } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */
-+ } else if (lj_str_indep_hash(cp->str) == H_(6c71cf27,6c71cf27)) { /* pop */
- if (cp->curpack > 0) cp->curpack--;
- } else {
- cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
-@@ -1787,12 +1787,12 @@ static void cp_decl_multi(CPState *cp)
- cp_line(cp, hashline);
- continue;
- } else if (tok == CTOK_IDENT &&
-- cp->str->hash == H_(187aab88,fcb60b42)) { /* line */
-+ lj_str_indep_hash(cp->str) == H_(187aab88,fcb60b42)) { /* line */
- if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok);
- cp_line(cp, hashline);
- continue;
- } else if (tok == CTOK_IDENT &&
-- cp->str->hash == H_(f5e6b4f8,1d509107)) { /* pragma */
-+ lj_str_indep_hash(cp->str) == H_(f5e6b4f8,1d509107)) { /* pragma */
- cp_pragma(cp, hashline);
- continue;
- } else {
-diff --git a/src/lj_str.c b/src/lj_str.c
-index 5862f421..fd2420c9 100644
---- a/src/lj_str.c
-+++ b/src/lj_str.c
-@@ -118,11 +118,6 @@ void lj_str_resize(lua_State *L, MSize newmask)
- g->strhash = newhash;
- }
-
--#include "x64/src/lj_str_hash_x64.h"
--
--#if defined(LJ_ARCH_STR_HASH)
--#define LJ_STR_HASH LJ_ARCH_STR_HASH
--#else
- static MSize
- lj_str_original_hash(const char *str, size_t lenx) {
- MSize len = (MSize)lenx;
-@@ -150,6 +145,17 @@ lj_str_original_hash(const char *str, size_t lenx) {
-
- return h;
- }
-+
-+MSize
-+lj_str_indep_hash(GCstr *str) {
-+ return lj_str_original_hash(strdata(str), str->len);
-+}
-+
-+#include "x64/src/lj_str_hash_x64.h"
-+
-+#if defined(LJ_ARCH_STR_HASH)
-+#define LJ_STR_HASH LJ_ARCH_STR_HASH
-+#else
- #define LJ_STR_HASH lj_str_original_hash
- #endif
-
-diff --git a/src/lj_str.h b/src/lj_str.h
-index 85c1e405..0e21432e 100644
---- a/src/lj_str.h
-+++ b/src/lj_str.h
-@@ -24,4 +24,6 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
- #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
- #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
-
-+MSize lj_str_indep_hash(GCstr *str);
-+
- #endif
-diff --git a/src/x64/test/Makefile b/src/x64/test/Makefile
-index 4326ab3d..3ec44eae 100644
---- a/src/x64/test/Makefile
-+++ b/src/x64/test/Makefile
-@@ -24,6 +24,7 @@ CXXFLAGS := -O3 -MD -g -msse4.2 -Wall -I../src -I../../../src
- test: $(TEST_PROGRAM)
- @echo "some unit test"
- $(VALGRIND) ./$(TEST_PROGRAM)
-+ ./unit_test.sh
-
- @echo "smoke test"
- ../../luajit test_str_comp.lua
-diff --git a/src/x64/test/unit/ffi/test_abi.lua b/src/x64/test/unit/ffi/test_abi.lua
-new file mode 100644
-index 00000000..9fafcf55
---- /dev/null
-+++ b/src/x64/test/unit/ffi/test_abi.lua
-@@ -0,0 +1,10 @@
-+local ffi = require "ffi"
-+
-+-- TODO: test "gc64" and "win" parameters
-+assert((ffi.abi("32bit") or ffi.abi("64bit"))
-+ and ffi.abi("le")
-+ and not ffi.abi("be")
-+ and ffi.abi("fpu")
-+ and not ffi.abi("softfp")
-+ and ffi.abi("hardfp")
-+ and not ffi.abi("eabi"))
-diff --git a/src/x64/test/unit/ffi/test_line_directive.lua
b/src/x64/test/unit/ffi/test_line_directive.lua
-new file mode 100644
-index 00000000..a8b0403c
---- /dev/null
-+++ b/src/x64/test/unit/ffi/test_line_directive.lua
-@@ -0,0 +1,15 @@
-+local x = [=[
-+local ffi = require "ffi"
-+
-+ffi.cdef [[
-+ #line 100
-+ typedef Int xxx
-+]]
-+]=]
-+
-+local function foo()
-+ loadstring(x)()
-+end
-+
-+local r, e = pcall(foo)
-+assert(string.find(e, "declaration specifier expected near 'Int' at line
100") ~= nil)
-diff --git a/src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua
b/src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua
-new file mode 100644
-index 00000000..5f1bdd30
---- /dev/null
-+++ b/src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua
-@@ -0,0 +1,12 @@
-+local ffi = require "ffi"
-+
-+ffi.cdef[[
-+#pragma pack(push, 1)
-+typedef struct {
-+ char x;
-+ double y;
-+} foo;
-+#pragma pack(pop)
-+]]
-+
-+assert(ffi.sizeof("foo") == 9)
-diff --git a/src/x64/test/unit/ffi/test_var_attribute.lua
b/src/x64/test/unit/ffi/test_var_attribute.lua
-new file mode 100644
-index 00000000..11252bba
---- /dev/null
-+++ b/src/x64/test/unit/ffi/test_var_attribute.lua
-@@ -0,0 +1,22 @@
-+local ffi = require "ffi"
-+
-+ffi.cdef[[
-+typedef struct { int a; char b; } __attribute__((packed)) myty1;
-+typedef struct { int a; char b; } __attribute__((__packed__)) myty1_a;
-+
-+typedef struct { int a; char b; } __attribute__((aligned(16))) myty2_a;
-+typedef struct { int a; char b; } __attribute__((__aligned__(16))) myty2;
-+
-+typedef int __attribute__ ((vector_size (32))) myty3;
-+typedef int __attribute__ ((__vector_size__ (32))) myty3_a;
-+
-+typedef int __attribute__ ((mode(DI))) myty4;
-+]]
-+
-+assert(ffi.sizeof("myty1") == 5 and
-+ ffi.sizeof("myty1_a") == 5 and
-+ ffi.alignof("myty2") == 16 and
-+ ffi.alignof("myty2_a") == 16 and
-+ ffi.sizeof("myty3") == 32 and
-+ ffi.sizeof("myty3_a") == 32 and
-+ ffi.sizeof("myty4") == 8)
-diff --git a/src/x64/test/unit_test.sh b/src/x64/test/unit_test.sh
-new file mode 100644
-index 00000000..c6633ca2
---- /dev/null
-+++ b/src/x64/test/unit_test.sh
-@@ -0,0 +1,22 @@
-+#!/bin/sh
-+DIR=$(cd $(dirname $0); pwd)
-+cd $DIR
-+
-+LUAJIT=$DIR/../../luajit
-+HASERR=0
-+
-+find $DIR/unit -name "*.lua" -print | while read x; do
-+ $LUAJIT $x >/dev/null 2>/dev/null
-+ if [ $? -eq 0 ]; then
-+ echo "$x ok"
-+ else
-+ HASERR=1
-+ echo "$x failed"
-+ fi
-+done
-+
-+if [ $HASERR -eq 0 ]; then
-+ exit 0
-+fi
-+
-+exit 1
---
-2.21.0
-
-
-From 37df5975eeb9862d3b950b6e4fa7405316b2bbd1 Mon Sep 17 00:00:00 2001
-From: "Yichun Zhang (agentzh)" <yichun(a)openresty.com>
-Date: Sun, 7 Apr 2019 10:34:06 -0700
-Subject: [PATCH 21/34] style: minor coding style fixes.
-
-This is a followup fix for commit 2d3392771.
----
- src/lj_str.c | 7 ++++---
- src/x64/src/lj_str_hash_x64.h | 2 +-
- 2 files changed, 5 insertions(+), 4 deletions(-)
-
-diff --git a/src/lj_str.c b/src/lj_str.c
-index fd2420c9..842394c5 100644
---- a/src/lj_str.c
-+++ b/src/lj_str.c
-@@ -119,7 +119,8 @@ void lj_str_resize(lua_State *L, MSize newmask)
- }
-
- static MSize
--lj_str_original_hash(const char *str, size_t lenx) {
-+lj_str_original_hash(const char *str, size_t lenx)
-+{
- MSize len = (MSize)lenx;
- MSize a, b, h = len;
-
-@@ -147,7 +148,8 @@ lj_str_original_hash(const char *str, size_t lenx) {
- }
-
- MSize
--lj_str_indep_hash(GCstr *str) {
-+lj_str_indep_hash(GCstr *str)
-+{
- return lj_str_original_hash(strdata(str), str->len);
- }
-
-@@ -224,4 +226,3 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
- g->strnum--;
- lj_mem_free(g, s, sizestring(s));
- }
--
-diff --git a/src/x64/src/lj_str_hash_x64.h b/src/x64/src/lj_str_hash_x64.h
-index 063f631c..cf37a2d2 100644
---- a/src/x64/src/lj_str_hash_x64.h
-+++ b/src/x64/src/lj_str_hash_x64.h
-@@ -203,7 +203,7 @@ static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t
chunk_sz_order,
- }
-
- static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str,
-- uint32_t len)
-+ uint32_t len)
- {
- uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2;
- uint64_t h1, h2, v;
---
-2.21.0
-
-
-From 465ae4fcca3b3d598f412cdd13060b6403b20de0 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 4 Sep 2019 16:01:40 +0530
-Subject: [PATCH 22/34] x86: Move lj_str_hash to the main code
-
-lj_str_hash can be written in an architecture independent manner so
-move it out from the x64 directory and add some convenience macros and
-assignments to make it easier to port.
----
- src/Makefile | 2 +-
- src/lj_str.c | 41 +--------
- src/lj_str.h | 6 ++
- .../src/lj_str_hash_x64.h => lj_str_hash.c} | 88 ++++++++++++-------
- 4 files changed, 67 insertions(+), 70 deletions(-)
- rename src/{x64/src/lj_str_hash_x64.h => lj_str_hash.c} (76%)
-
-diff --git a/src/Makefile b/src/Makefile
-index 6764d32f..b22e325c 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -509,7 +509,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
- lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
- lj_carith.o lj_clib.o lj_cparse.o \
- lj_lib.o lj_alloc.o lib_aux.o \
-- $(LJLIB_O) lib_init.o
-+ $(LJLIB_O) lib_init.o lj_str_hash.o
-
- LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
- LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
-diff --git a/src/lj_str.c b/src/lj_str.c
-index 842394c5..d13477cd 100644
---- a/src/lj_str.c
-+++ b/src/lj_str.c
-@@ -118,49 +118,12 @@ void lj_str_resize(lua_State *L, MSize newmask)
- g->strhash = newhash;
- }
-
--static MSize
--lj_str_original_hash(const char *str, size_t lenx)
--{
-- MSize len = (MSize)lenx;
-- MSize a, b, h = len;
--
-- /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
-- if (len >= 4) { /* Caveat: unaligned access! */
-- a = lj_getu32(str);
-- h ^= lj_getu32(str+len-4);
-- b = lj_getu32(str+(len>>1)-2);
-- h ^= b; h -= lj_rol(b, 14);
-- b += lj_getu32(str+(len>>2)-1);
-- } else if (len > 0) {
-- a = *(const uint8_t *)str;
-- h ^= *(const uint8_t *)(str+len-1);
-- b = *(const uint8_t *)(str+(len>>1));
-- h ^= b; h -= lj_rol(b, 14);
-- } else {
-- return 0;
-- }
--
-- a ^= h; a -= lj_rol(h, 11);
-- b ^= a; b -= lj_rol(a, 25);
-- h ^= b; h -= lj_rol(b, 16);
--
-- return h;
--}
--
- MSize
- lj_str_indep_hash(GCstr *str)
- {
-- return lj_str_original_hash(strdata(str), str->len);
-+ return lj_str_hash_default(strdata(str), str->len);
- }
-
--#include "x64/src/lj_str_hash_x64.h"
--
--#if defined(LJ_ARCH_STR_HASH)
--#define LJ_STR_HASH LJ_ARCH_STR_HASH
--#else
--#define LJ_STR_HASH lj_str_original_hash
--#endif
--
- /* Intern a string and return string object. */
- GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
- {
-@@ -177,7 +140,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
- return &g->strempty;
- }
-
-- h = LJ_STR_HASH(str, lenx);
-+ h = lj_str_hash(str, lenx);
-
- /* Check if the string has already been interned. */
- o = gcref(g->strhash[h & g->strmask]);
-diff --git a/src/lj_str.h b/src/lj_str.h
-index 0e21432e..cc045e13 100644
---- a/src/lj_str.h
-+++ b/src/lj_str.h
-@@ -26,4 +26,10 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
-
- MSize lj_str_indep_hash(GCstr *str);
-
-+typedef MSize (*lj_str_hashfn) (const char *, size_t);
-+
-+extern lj_str_hashfn lj_str_hash;
-+
-+extern MSize lj_str_hash_default(const char *str, size_t lenx);
-+
- #endif
-diff --git a/src/x64/src/lj_str_hash_x64.h b/src/lj_str_hash.c
-similarity index 76%
-rename from src/x64/src/lj_str_hash_x64.h
-rename to src/lj_str_hash.c
-index cf37a2d2..97eb2a77 100644
---- a/src/x64/src/lj_str_hash_x64.h
-+++ b/src/lj_str_hash.c
-@@ -5,18 +5,18 @@
- * to 128 bytes of given string.
- */
-
--#ifndef _LJ_STR_HASH_X64_H_
--#define _LJ_STR_HASH_X64_H_
--
--#if defined(__SSE4_2__) && defined(__x86_64) && defined(__GNUC__)
--
- #include <stdint.h>
- #include <sys/types.h>
- #include <unistd.h>
- #include <time.h>
- #include <smmintrin.h>
-
--#include "../../lj_def.h"
-+#include "lj_def.h"
-+#include "lj_str.h"
-+
-+#if defined(__SSE4_2__) && defined(__x86_64) && defined(__GNUC__)
-+#define lj_crc32_u32 _mm_crc32_u32
-+#define lj_crc32_u64 _mm_crc32_u64
-
- #undef LJ_AINLINE
- #define LJ_AINLINE
-@@ -48,7 +48,7 @@ static LJ_AINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t
len)
- v = (v << 8) | str[len >> 1];
- v = (v << 8) | str[len - 1];
- v = (v << 8) | len;
-- return _mm_crc32_u32(0, v);
-+ return lj_crc32_u32(0, v);
- #else
- uint32_t a, b, h = len;
-
-@@ -78,9 +78,9 @@ static LJ_AINLINE uint32_t lj_str_hash_4_16(const char* str, uint32_t
len)
- v2 = *cast_uint32p(str + len - 4);
- }
-
-- h = _mm_crc32_u32(0, len);
-- h = _mm_crc32_u64(h, v1);
-- h = _mm_crc32_u64(h, v2);
-+ h = lj_crc32_u32(0, len);
-+ h = lj_crc32_u64(h, v1);
-+ h = lj_crc32_u64(h, v2);
- return h;
- }
-
-@@ -90,18 +90,18 @@ static uint32_t lj_str_hash_16_128(const char* str, uint32_t len)
- uint64_t h1, h2;
- uint32_t i;
-
-- h1 = _mm_crc32_u32(0, len);
-+ h1 = lj_crc32_u32(0, len);
- h2 = 0;
-
- for (i = 0; i < len - 16; i += 16) {
-- h1 += _mm_crc32_u64(h1, *cast_uint64p(str + i));
-- h2 += _mm_crc32_u64(h2, *cast_uint64p(str + i + 8));
-+ h1 += lj_crc32_u64(h1, *cast_uint64p(str + i));
-+ h2 += lj_crc32_u64(h2, *cast_uint64p(str + i + 8));
- };
-
-- h1 = _mm_crc32_u64(h1, *cast_uint64p(str + len - 16));
-- h2 = _mm_crc32_u64(h2, *cast_uint64p(str + len - 8));
-+ h1 = lj_crc32_u64(h1, *cast_uint64p(str + len - 16));
-+ h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8));
-
-- return _mm_crc32_u32(h1, h2);
-+ return lj_crc32_u32(h1, h2);
- }
-
- /* **************************************************************************
-@@ -155,8 +155,8 @@ static void x64_init_random(void)
- }
-
- /* Init seed */
-- seed = _mm_crc32_u32(0, getpid());
-- seed = _mm_crc32_u32(seed, time(NULL));
-+ seed = lj_crc32_u32(0, getpid());
-+ seed = lj_crc32_u32(seed, time(NULL));
- srandom(seed);
-
- /* Now start to populate the random_pos[][]. */
-@@ -216,7 +216,7 @@ static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str,
- pos1 = get_random_pos_unsafe(chunk_sz_log2, 0);
- pos2 = get_random_pos_unsafe(chunk_sz_log2, 1);
-
-- h1 = _mm_crc32_u32(0, len);
-+ h1 = lj_crc32_u32(0, len);
- h2 = 0;
-
- /* loop over 14 chunks, 2 chunks at a time */
-@@ -224,29 +224,29 @@ static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str,
- chunk_ptr += chunk_sz, i++) {
-
- v = *cast_uint64p(chunk_ptr + pos1);
-- h1 = _mm_crc32_u64(h1, v);
-+ h1 = lj_crc32_u64(h1, v);
-
- v = *cast_uint64p(chunk_ptr + chunk_sz + pos2);
-- h2 = _mm_crc32_u64(h2, v);
-+ h2 = lj_crc32_u64(h2, v);
- }
-
- /* the last two chunks */
- v = *cast_uint64p(chunk_ptr + pos1);
-- h1 = _mm_crc32_u64(h1, v);
-+ h1 = lj_crc32_u64(h1, v);
-
- v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2);
-- h2 = _mm_crc32_u64(h2, v);
-+ h2 = lj_crc32_u64(h2, v);
-
- /* process the trailing part */
-- h1 = _mm_crc32_u64(h1, *cast_uint64p(str));
-- h2 = _mm_crc32_u64(h2, *cast_uint64p(str + len - 8));
-+ h1 = lj_crc32_u64(h1, *cast_uint64p(str));
-+ h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8));
-
-- h1 = _mm_crc32_u32(h1, h2);
-+ h1 = lj_crc32_u32(h1, h2);
- return h1;
- }
-
- /* NOTE: the "len" should not be zero */
--static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len)
-+static LJ_AINLINE uint32_t lj_str_hash_opt(const char* str, size_t len)
- {
- if (len < 128) {
- if (len >= 16) { /* [16, 128) */
-@@ -264,8 +264,36 @@ static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len)
- return lj_str_hash_128_above(str, len);
- }
-
--#define LJ_ARCH_STR_HASH lj_str_hash
-+lj_str_hashfn lj_str_hash = lj_str_hash_opt;
- #else
--#undef LJ_ARCH_STR_HASH
-+lj_str_hashfn lj_str_hash = lj_str_hash_default;
- #endif
--#endif /*_LJ_STR_HASH_X64_H_*/
-+
-+MSize
-+lj_str_hash_default(const char *str, size_t lenx)
-+{
-+ MSize len = (MSize)lenx;
-+ MSize a, b, h = len;
-+
-+ /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
-+ if (len >= 4) { /* Caveat: unaligned access! */
-+ a = lj_getu32(str);
-+ h ^= lj_getu32(str+len-4);
-+ b = lj_getu32(str+(len>>1)-2);
-+ h ^= b; h -= lj_rol(b, 14);
-+ b += lj_getu32(str+(len>>2)-1);
-+ } else if (len > 0) {
-+ a = *(const uint8_t *)str;
-+ h ^= *(const uint8_t *)(str+len-1);
-+ b = *(const uint8_t *)(str+(len>>1));
-+ h ^= b; h -= lj_rol(b, 14);
-+ } else {
-+ return 0;
-+ }
-+
-+ a ^= h; a -= lj_rol(h, 11);
-+ b ^= a; b -= lj_rol(a, 25);
-+ h ^= b; h -= lj_rol(b, 16);
-+
-+ return h;
-+}
---
-2.21.0
-
-
-From e44776bd2162062da0fece5ca0aa68907bccf199 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Thu, 5 Sep 2019 16:13:21 +0530
-Subject: [PATCH 23/34] Detect SSE4.2 support dynamically
-
-Move the cpu detection code out into its own module and run the
-routine as a constructor. Make the flags available as a global
-LJ_CPU_FLAGS.
----
- src/Makefile | 13 ++++--
- src/lib_jit.c | 108 +++-------------------------------------------
- src/lj_arch.h | 3 ++
- src/lj_init.c | 101 +++++++++++++++++++++++++++++++++++++++++++
- src/lj_jit.h | 1 +
- src/lj_state.c | 5 +++
- src/lj_str.c | 35 +++++++++++++++
- src/lj_str.h | 2 +
- src/lj_str_hash.c | 50 ++++++---------------
- 9 files changed, 176 insertions(+), 142 deletions(-)
- create mode 100644 src/lj_init.c
-
-diff --git a/src/Makefile b/src/Makefile
-index b22e325c..a74cda7f 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -509,7 +509,14 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
- lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
- lj_carith.o lj_clib.o lj_cparse.o \
- lj_lib.o lj_alloc.o lib_aux.o \
-- $(LJLIB_O) lib_init.o lj_str_hash.o
-+ $(LJLIB_O) lib_init.o lj_init.o
-+
-+ifeq (x64,$(TARGET_LJARCH))
-+ LJCORE_O += lj_str_hash.o
-+ lj_str_hash-CFLAGS = -msse4.2
-+endif
-+
-+F_CFLAGS = $($(patsubst %.c,%-CFLAGS,$<))
-
- LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
- LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
-@@ -693,8 +700,8 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
-
- %.o: %.c
- $(E) "CC $@"
-- $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
-- $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
-+ $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $(@:.o=_dyn.o) $<
-+ $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $@ $<
-
- %.o: %.S
- $(E) "ASM $@"
-diff --git a/src/lib_jit.c b/src/lib_jit.c
-index b84efa13..5bf44276 100644
---- a/src/lib_jit.c
-+++ b/src/lib_jit.c
-@@ -663,116 +663,20 @@ JIT_PARAMDEF(JIT_PARAMINIT)
- };
- #endif
-
--#if LJ_TARGET_ARM && LJ_TARGET_LINUX
--#include <sys/utsname.h>
--#endif
--
--/* Arch-dependent CPU detection. */
--static uint32_t jit_cpudetect(lua_State *L)
-+/* Initialize JIT compiler. */
-+static void jit_init(lua_State *L)
- {
-- uint32_t flags = 0;
--#if LJ_TARGET_X86ORX64
-- uint32_t vendor[4];
-- uint32_t features[4];
-- if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
--#if !LJ_HASJIT
--#define JIT_F_SSE2 2
--#endif
-- flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
-+ extern uint32_t LJ_CPU_FLAGS;
- #if LJ_HASJIT
-- flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
-- flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
-- if (vendor[2] == 0x6c65746e) { /* Intel. */
-- if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
-- flags |= JIT_F_LEA_AGU;
-- } else if (vendor[2] == 0x444d4163) { /* AMD. */
-- uint32_t fam = (features[0] & 0x0ff00f00);
-- if (fam >= 0x00000f00) /* K8, K10. */
-- flags |= JIT_F_PREFER_IMUL;
-- }
-- if (vendor[0] >= 7) {
-- uint32_t xfeatures[4];
-- lj_vm_cpuid(7, xfeatures);
-- flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
-- }
--#endif
-- }
-+ jit_State *J = L2J(L);
- /* Check for required instruction set support on x86 (unnecessary on x64). */
- #if LJ_TARGET_X86
-- if (!(flags & JIT_F_SSE2))
-+ if (!(LJ_CPU_FLAGS & JIT_F_SSE2))
- luaL_error(L, "CPU with SSE2 required");
- #endif
--#elif LJ_TARGET_ARM
--#if LJ_HASJIT
-- int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
--#if LJ_TARGET_LINUX
-- if (ver < 70) { /* Runtime ARM CPU detection. */
-- struct utsname ut;
-- uname(&ut);
-- if (strncmp(ut.machine, "armv", 4) == 0) {
-- if (ut.machine[4] >= '7')
-- ver = 70;
-- else if (ut.machine[4] == '6')
-- ver = 60;
-- }
-- }
--#endif
-- flags |= ver >= 70 ? JIT_F_ARMV7 :
-- ver >= 61 ? JIT_F_ARMV6T2_ :
-- ver >= 60 ? JIT_F_ARMV6_ : 0;
-- flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
--#endif
--#elif LJ_TARGET_ARM64
-- /* No optional CPU features to detect (for now). */
--#elif LJ_TARGET_PPC
--#if LJ_HASJIT
--#if LJ_ARCH_SQRT
-- flags |= JIT_F_SQRT;
--#endif
--#if LJ_ARCH_ROUND
-- flags |= JIT_F_ROUND;
--#endif
--#endif
--#elif LJ_TARGET_MIPS
--#if LJ_HASJIT
-- /* Compile-time MIPS CPU detection. */
--#if LJ_ARCH_VERSION >= 20
-- flags |= JIT_F_MIPSXXR2;
--#endif
-- /* Runtime MIPS CPU detection. */
--#if defined(__GNUC__)
-- if (!(flags & JIT_F_MIPSXXR2)) {
-- int x;
--#ifdef __mips16
-- x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */
--#else
-- /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
-- __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x)
: : "$2");
--#endif
-- if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
-- }
--#endif
--#endif
--#elif LJ_TARGET_S390X
-- /* No optional CPU features to detect (for now). */
--#else
--#error "Missing CPU detection for this architecture"
--#endif
-- UNUSED(L);
-- return flags;
--}
--
--/* Initialize JIT compiler. */
--static void jit_init(lua_State *L)
--{
-- uint32_t flags = jit_cpudetect(L);
--#if LJ_HASJIT
-- jit_State *J = L2J(L);
-- J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
-+ J->flags = LJ_CPU_FLAGS | JIT_F_ON | JIT_F_OPT_DEFAULT;
- memcpy(J->param, jit_param_default, sizeof(J->param));
- lj_dispatch_update(G(L));
--#else
-- UNUSED(flags);
- #endif
- }
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 19dd258f..2a61af9a 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -191,6 +191,9 @@
- #ifdef LUAJIT_ENABLE_GC64
- #define LJ_TARGET_GC64 1
- #endif
-+#ifdef __GNUC__
-+#define LJ_HAS_OPTIMISED_HASH 1
-+#endif
-
- #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
-
-diff --git a/src/lj_init.c b/src/lj_init.c
-new file mode 100644
-index 00000000..bc706a26
---- /dev/null
-+++ b/src/lj_init.c
-@@ -0,0 +1,101 @@
-+#include <stdint.h>
-+#include "lj_arch.h"
-+#include "lj_jit.h"
-+#include "lj_vm.h"
-+#include "lj_str.h"
-+
-+uint32_t LJ_CPU_FLAGS = 0;
-+
-+#if LJ_TARGET_ARM && LJ_TARGET_LINUX
-+#include <sys/utsname.h>
-+#endif
-+
-+/* Arch-dependent CPU detection. */
-+static void __attribute__((constructor)) lj_cpudetect(void)
-+{
-+ uint32_t flags = 0;
-+#if LJ_TARGET_X86ORX64
-+ uint32_t vendor[4];
-+ uint32_t features[4];
-+ if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
-+#if !LJ_HASJIT
-+#define JIT_F_SSE2 2
-+#endif
-+ flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
-+#if LJ_HASJIT
-+ flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
-+ flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
-+ flags |= ((features[2] >> 20)&1) * JIT_F_SSE4_2;
-+ if (vendor[2] == 0x6c65746e) { /* Intel. */
-+ if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
-+ flags |= JIT_F_LEA_AGU;
-+ } else if (vendor[2] == 0x444d4163) { /* AMD. */
-+ uint32_t fam = (features[0] & 0x0ff00f00);
-+ if (fam >= 0x00000f00) /* K8, K10. */
-+ flags |= JIT_F_PREFER_IMUL;
-+ }
-+ if (vendor[0] >= 7) {
-+ uint32_t xfeatures[4];
-+ lj_vm_cpuid(7, xfeatures);
-+ flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
-+ }
-+#endif
-+ }
-+#elif LJ_TARGET_ARM
-+#if LJ_HASJIT
-+ int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
-+#if LJ_TARGET_LINUX
-+ if (ver < 70) { /* Runtime ARM CPU detection. */
-+ struct utsname ut;
-+ uname(&ut);
-+ if (strncmp(ut.machine, "armv", 4) == 0) {
-+ if (ut.machine[4] >= '7')
-+ ver = 70;
-+ else if (ut.machine[4] == '6')
-+ ver = 60;
-+ }
-+ }
-+#endif
-+ flags |= ver >= 70 ? JIT_F_ARMV7 :
-+ ver >= 61 ? JIT_F_ARMV6T2_ :
-+ ver >= 60 ? JIT_F_ARMV6_ : 0;
-+ flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
-+#endif
-+#elif LJ_TARGET_ARM64
-+ /* No optional CPU features to detect (for now). */
-+#elif LJ_TARGET_PPC
-+#if LJ_HASJIT
-+#if LJ_ARCH_SQRT
-+ flags |= JIT_F_SQRT;
-+#endif
-+#if LJ_ARCH_ROUND
-+ flags |= JIT_F_ROUND;
-+#endif
-+#endif
-+#elif LJ_TARGET_MIPS
-+#if LJ_HASJIT
-+ /* Compile-time MIPS CPU detection. */
-+#if LJ_ARCH_VERSION >= 20
-+ flags |= JIT_F_MIPSXXR2;
-+#endif
-+ /* Runtime MIPS CPU detection. */
-+#if defined(__GNUC__)
-+ if (!(flags & JIT_F_MIPSXXR2)) {
-+ int x;
-+#ifdef __mips16
-+ x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */
-+#else
-+ /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
-+ __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x)
: : "$2");
-+#endif
-+ if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
-+ }
-+#endif
-+#endif
-+#elif LJ_TARGET_S390X
-+ /* No optional CPU features to detect (for now). */
-+#else
-+#error "Missing CPU detection for this architecture"
-+#endif
-+ LJ_CPU_FLAGS = flags;
-+}
-diff --git a/src/lj_jit.h b/src/lj_jit.h
-index 5d41ef4b..919c58ee 100644
---- a/src/lj_jit.h
-+++ b/src/lj_jit.h
-@@ -20,6 +20,7 @@
- #define JIT_F_PREFER_IMUL 0x00000080
- #define JIT_F_LEA_AGU 0x00000100
- #define JIT_F_BMI2 0x00000200
-+#define JIT_F_SSE4_2 0x00000400
-
- /* Names for the CPU-specific flags. Must match the order above. */
- #define JIT_F_CPU_FIRST JIT_F_SSE2
-diff --git a/src/lj_state.c b/src/lj_state.c
-index a0fba2ac..9be16cb3 100644
---- a/src/lj_state.c
-+++ b/src/lj_state.c
-@@ -189,6 +189,11 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
- GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State));
- lua_State *L = &GG->L;
- global_State *g = &GG->g;
-+ extern uint32_t LJ_CPU_FLAGS;
-+
-+#ifdef LJ_HAS_OPTIMISED_HASH
-+ lj_str_hash_init (LJ_CPU_FLAGS);
-+#endif
- if (GG == NULL || !checkptrGC(GG)) return NULL;
- memset(GG, 0, sizeof(GG_State));
- L->gct = ~LJ_TTHREAD;
-diff --git a/src/lj_str.c b/src/lj_str.c
-index d13477cd..5598a0f7 100644
---- a/src/lj_str.c
-+++ b/src/lj_str.c
-@@ -118,6 +118,41 @@ void lj_str_resize(lua_State *L, MSize newmask)
- g->strhash = newhash;
- }
-
-+#ifdef LJ_HAS_OPTIMISED_HASH
-+lj_str_hashfn lj_str_hash = lj_str_hash_default;
-+#else
-+#define lj_str_hash lj_str_hash_default
-+#endif
-+
-+MSize
-+lj_str_hash_default(const char *str, size_t lenx)
-+{
-+ MSize len = (MSize)lenx;
-+ MSize a, b, h = len;
-+
-+ /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
-+ if (len >= 4) { /* Caveat: unaligned access! */
-+ a = lj_getu32(str);
-+ h ^= lj_getu32(str+len-4);
-+ b = lj_getu32(str+(len>>1)-2);
-+ h ^= b; h -= lj_rol(b, 14);
-+ b += lj_getu32(str+(len>>2)-1);
-+ } else if (len > 0) {
-+ a = *(const uint8_t *)str;
-+ h ^= *(const uint8_t *)(str+len-1);
-+ b = *(const uint8_t *)(str+(len>>1));
-+ h ^= b; h -= lj_rol(b, 14);
-+ } else {
-+ return 0;
-+ }
-+
-+ a ^= h; a -= lj_rol(h, 11);
-+ b ^= a; b -= lj_rol(a, 25);
-+ h ^= b; h -= lj_rol(b, 16);
-+
-+ return h;
-+}
-+
- MSize
- lj_str_indep_hash(GCstr *str)
- {
-diff --git a/src/lj_str.h b/src/lj_str.h
-index cc045e13..3dcad85a 100644
---- a/src/lj_str.h
-+++ b/src/lj_str.h
-@@ -32,4 +32,6 @@ extern lj_str_hashfn lj_str_hash;
-
- extern MSize lj_str_hash_default(const char *str, size_t lenx);
-
-+extern void lj_str_hash_init(uint32_t flags);
-+
- #endif
-diff --git a/src/lj_str_hash.c b/src/lj_str_hash.c
-index 97eb2a77..87a6d66c 100644
---- a/src/lj_str_hash.c
-+++ b/src/lj_str_hash.c
-@@ -13,8 +13,14 @@
-
- #include "lj_def.h"
- #include "lj_str.h"
-+#include "lj_jit.h"
-+#include "lj_arch.h"
-+
-+#ifdef LJ_HAS_OPTIMISED_HASH
-+#if !defined(__SSE4_2__)
-+#error "This file must be built with -msse4.2"
-+#endif
-
--#if defined(__SSE4_2__) && defined(__x86_64) && defined(__GNUC__)
- #define lj_crc32_u32 _mm_crc32_u32
- #define lj_crc32_u64 _mm_crc32_u64
-
-@@ -144,7 +150,7 @@ static LJ_AINLINE uint32_t log2_floor(uint32_t n)
- /* This function is to populate `random_pos` such that random_pos[i][*]
- * contains random value in the range of [2**i, 2**(i+1)).
- */
--static void x64_init_random(void)
-+static void str_hash_init_random(void)
- {
- int i, seed, rml;
-
-@@ -185,11 +191,6 @@ static void x64_init_random(void)
- }
- #undef POW2_MASK
-
--void __attribute__((constructor)) x64_init_random_constructor()
--{
-- x64_init_random();
--}
--
- /* Return a pre-computed random number in the range of [1**chunk_sz_order,
- * 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value
- * may be greater than chunk-size; it is up to the caller to make sure
-@@ -264,36 +265,11 @@ static LJ_AINLINE uint32_t lj_str_hash_opt(const char* str, size_t
len)
- return lj_str_hash_128_above(str, len);
- }
-
--lj_str_hashfn lj_str_hash = lj_str_hash_opt;
--#else
--lj_str_hashfn lj_str_hash = lj_str_hash_default;
--#endif
--
--MSize
--lj_str_hash_default(const char *str, size_t lenx)
-+void lj_str_hash_init(uint32_t flags)
- {
-- MSize len = (MSize)lenx;
-- MSize a, b, h = len;
--
-- /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
-- if (len >= 4) { /* Caveat: unaligned access! */
-- a = lj_getu32(str);
-- h ^= lj_getu32(str+len-4);
-- b = lj_getu32(str+(len>>1)-2);
-- h ^= b; h -= lj_rol(b, 14);
-- b += lj_getu32(str+(len>>2)-1);
-- } else if (len > 0) {
-- a = *(const uint8_t *)str;
-- h ^= *(const uint8_t *)(str+len-1);
-- b = *(const uint8_t *)(str+(len>>1));
-- h ^= b; h -= lj_rol(b, 14);
-- } else {
-- return 0;
-+ if (flags & JIT_F_SSE4_2) {
-+ lj_str_hash = lj_str_hash_opt;
-+ str_hash_init_random();
- }
--
-- a ^= h; a -= lj_rol(h, 11);
-- b ^= a; b -= lj_rol(a, 25);
-- h ^= b; h -= lj_rol(b, 16);
--
-- return h;
- }
-+#endif
---
-2.21.0
-
-
-From c7e0e64eb8311c56f7757fb5ffd696b0bb79e379 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Mon, 16 Sep 2019 06:50:27 +0530
-Subject: [PATCH 24/34] Fix up lj_str_hash smoke tests
-
-Make the lj_str_hash test include and run the right sources. This
-should go into $srcdir/tests/ in my repo but keep it here for now.
----
- src/lj_str_hash.c | 4 +++-
- src/x64/test/Makefile | 2 +-
- src/x64/test/test.cpp | 7 +++++--
- 3 files changed, 9 insertions(+), 4 deletions(-)
-
-diff --git a/src/lj_str_hash.c b/src/lj_str_hash.c
-index 87a6d66c..1c16116a 100644
---- a/src/lj_str_hash.c
-+++ b/src/lj_str_hash.c
-@@ -16,7 +16,7 @@
- #include "lj_jit.h"
- #include "lj_arch.h"
-
--#ifdef LJ_HAS_OPTIMISED_HASH
-+#if defined(LJ_HAS_OPTIMISED_HASH) || defined(SMOKETEST)
- #if !defined(__SSE4_2__)
- #error "This file must be built with -msse4.2"
- #endif
-@@ -268,7 +268,9 @@ static LJ_AINLINE uint32_t lj_str_hash_opt(const char* str, size_t
len)
- void lj_str_hash_init(uint32_t flags)
- {
- if (flags & JIT_F_SSE4_2) {
-+#ifndef SMOKETEST
- lj_str_hash = lj_str_hash_opt;
-+#endif
- str_hash_init_random();
- }
- }
-diff --git a/src/x64/test/Makefile b/src/x64/test/Makefile
-index 3ec44eae..c5adfcbe 100644
---- a/src/x64/test/Makefile
-+++ b/src/x64/test/Makefile
-@@ -24,7 +24,7 @@ CXXFLAGS := -O3 -MD -g -msse4.2 -Wall -I../src -I../../../src
- test: $(TEST_PROGRAM)
- @echo "some unit test"
- $(VALGRIND) ./$(TEST_PROGRAM)
-- ./unit_test.sh
-+ bash ./unit_test.sh
-
- @echo "smoke test"
- ../../luajit test_str_comp.lua
-diff --git a/src/x64/test/test.cpp b/src/x64/test/test.cpp
-index bc92acbb..5f03bfb6 100644
---- a/src/x64/test/test.cpp
-+++ b/src/x64/test/test.cpp
-@@ -3,13 +3,16 @@
- #include <stdlib.h>
- #include <map>
- #include "test_util.hpp"
--#include "lj_str_hash_x64.h"
-+
-+#define SMOKETEST
-+#include "../../lj_str_hash.c"
-
- using namespace std;
-
- static bool
- smoke_test()
- {
-+ lj_str_hash_init(JIT_F_SSE4_2);
- fprintf(stdout, "running smoke tests...\n");
- char buf[1024];
- char c = getpid() % 'a';
-@@ -22,7 +25,7 @@ smoke_test()
- 255, 256, 257};
- for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) {
- string s(buf, lens[i]);
-- test_printf("%d", lj_str_hash(s.c_str(), lens[i]));
-+ test_printf("%d", lj_str_hash_opt(s.c_str(), lens[i]));
- }
-
- return true;
---
-2.21.0
-
-
-From a83ed45abee436a6fc7ac8fcc27a9df3c5e9f71f Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Thu, 19 Sep 2019 13:25:54 -0700
-Subject: [PATCH 25/34] Do not call ldconfig on OpenBSD
-
-OpenBSD ldconfig does not have the -n flag, so there's no point in
-calling ldconfig; the manual symlinks should be good enough.
-
-Solves LuaJIT/LuaJIT#515
----
- Makefile | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/Makefile b/Makefile
-index f4b84081..024e35c9 100644
---- a/Makefile
-+++ b/Makefile
-@@ -104,6 +104,10 @@ ifeq (Darwin,$(TARGET_SYS))
- LDCONFIG= :
- endif
-
-+ifeq (OpenBSD,$(TARGET_SYS))
-+ LDCONFIG= :
-+endif
-+
- ##############################################################################
-
- LUAJIT_BIN= src/luajit
---
-2.21.0
-
-
-From 1c9bf3c6e07d90b2ddfe38deef78b0ccbbe1afd7 Mon Sep 17 00:00:00 2001
-From: xiabin <snyh(a)snyh.org>
-Date: Mon, 1 Jul 2019 09:09:04 +0800
-Subject: [PATCH 26/34] typo: add the forgotten delay slot hint
-
----
- src/vm_mips.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
-index f3248125..952cc192 100644
---- a/src/vm_mips.dasc
-+++ b/src/vm_mips.dasc
-@@ -399,7 +399,7 @@ static void build_subroutines(BuildCtx *ctx)
- | xori AT, TMP0, FRAME_C
- | and TMP2, PC, TMP2
- | bnez AT, ->vm_returnp
-- | subu TMP2, BASE, TMP2 // TMP2 = previous base.
-+ |. subu TMP2, BASE, TMP2 // TMP2 = previous base.
- |
- | addiu TMP1, RD, -8
- | sw TMP2, L->base
---
-2.21.0
-
-
-From 7d76ffcaede1a1887efa57b51d80f524b67ff2ba Mon Sep 17 00:00:00 2001
-From: "s.ostanevich" <s.ostanevich(a)sostanevich.local>
-Date: Thu, 8 Aug 2019 15:33:17 +0300
-Subject: [PATCH 27/34] fix #505: prevent propagation through SNEW
-
-folder can propagate pointer of original string in case
-SNEW is done with literal offset of 0 - this causes pointer
-arithmetics problems later on
----
- src/lj_ffrecord.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
-index 242d5d51..d42609e6 100644
---- a/src/lj_ffrecord.c
-+++ b/src/lj_ffrecord.c
-@@ -950,8 +950,9 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData
*rd)
- str->len-(MSize)start, pat->len)) {
- TRef pos;
- emitir(IRTG(IR_NE, IRT_PGC), tr, trp0);
-- pos = emitir(IRTI(IR_SUB), tr, emitir(IRT(IR_STRREF, IRT_PGC), trstr, tr0));
-- J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
-+ /* Caveat: can't use STRREF trstr 0 here because that might be pointing into a
wrong string due to folding. */
-+ pos = emitir(IRTI(IR_SUB), tr, trsptr);
-+ J->base[0] = emitir(IRTI(IR_ADD), pos, emitir(IRTI(IR_ADD), trstart,
lj_ir_kint(J, 1)));
- J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
- rd->nres = 2;
- } else {
---
-2.21.0
-
-
-From 0f898472a2d545fa720d49abf4440134e5bdfc03 Mon Sep 17 00:00:00 2001
-From: "s.ostanevich" <s.ostanevich(a)sostanevich.local>
-Date: Thu, 5 Sep 2019 17:05:34 +0300
-Subject: [PATCH 28/34] follow-up for the LUAJit folder problem
-
-The original patch did not provide correct ending of patch from
-string:find() call. Sent to review to @mraleph as part of
-LUAJit gh-505
-
-fixes: #4476
----
- src/lj_ffrecord.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
-index d42609e6..5b1f184e 100644
---- a/src/lj_ffrecord.c
-+++ b/src/lj_ffrecord.c
-@@ -953,7 +953,7 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData
*rd)
- /* Caveat: can't use STRREF trstr 0 here because that might be pointing into a
wrong string due to folding. */
- pos = emitir(IRTI(IR_SUB), tr, trsptr);
- J->base[0] = emitir(IRTI(IR_ADD), pos, emitir(IRTI(IR_ADD), trstart,
lj_ir_kint(J, 1)));
-- J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
-+ J->base[1] = emitir(IRTI(IR_ADD), pos, emitir(IRTI(IR_ADD), trplen, trstart));
- rd->nres = 2;
- } else {
- emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0);
---
-2.21.0
-
-
-From 6a91b0dd5d136f7f61d172e76881f5e47cd55e27 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Tue, 24 Sep 2019 04:52:29 -0700
-Subject: [PATCH 29/34] Add test for string.find
-
-This test is for LuaJIT/LuaJIT#505
-
-Signed-off-by: s.ostanevich <s.ostanevich(a)sostanevich.local>
----
- test/lib/string/find.lua | 10 ++++++++++
- test/lib/string/index | 1 +
- 2 files changed, 11 insertions(+)
- create mode 100644 test/lib/string/find.lua
-
-diff --git a/test/lib/string/find.lua b/test/lib/string/find.lua
-new file mode 100644
-index 00000000..81fd2c32
---- /dev/null
-+++ b/test/lib/string/find.lua
-@@ -0,0 +1,10 @@
-+do --- find relative
-+ jit.opt.start("hotloop=1")
-+ for _ = 1, 20 do
-+ local value = "abc"
-+ local pos_c = string.find(value, "c", 1, true)
-+ local value2 = string.sub(value, 1, pos_c - 1)
-+ local pos_b = string.find(value2, "b", 2, true)
-+ assert(pos_b == 2, "FAIL: position of 'b' is " .. pos_b)
-+ end
-+end
-diff --git a/test/lib/string/index b/test/lib/string/index
-index c0638e9c..83796e84 100644
---- a/test/lib/string/index
-+++ b/test/lib/string/index
-@@ -2,6 +2,7 @@ metatable.lua
- byte.lua
- char.lua
- dump.lua
-+find.lua
- format
- len.lua
- lower_upper.lua
---
-2.21.0
-
-
-From 2a5ee45ecd4d40caa6ba16b24e30dc048b53755e Mon Sep 17 00:00:00 2001
-From: Julien Desgats <julien(a)cloudflare.com>
-Date: Wed, 14 Aug 2019 09:58:13 +0100
-Subject: [PATCH 30/34] Attempt to fix erratic profiler behaviour when called
- during GC
-
-It turns out that calling the hook profiler while a Lua finalizer is
-running causes some issues with VM internal hook flags and dispatch
-table. Hooks are restored but not dispatch table, which causes both to
-be out-of-sync. This patch ensures that the dispatch table stays in sync
-and that we will not call the profiling hook when a finalizer is
-running.
-
-The extra dispatch table updates are nearly no-ops most of the time. as
-the flags would not match only when the `HOOK_PROFILE` is set (which
-should be quite unusual). Experiments at 100Hz with a extremely GC
-intensive script showed an overhead of about 1%. The actual effect on a
-production workload should be lower.
-
-fixes LuaJIT/LuaJIT#512
----
- src/lj_gc.c | 2 ++
- src/lj_obj.h | 2 +-
- src/lj_profile.c | 2 +-
- 3 files changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/src/lj_gc.c b/src/lj_gc.c
-index 2aaf5b2c..449db4a6 100644
---- a/src/lj_gc.c
-+++ b/src/lj_gc.c
-@@ -466,6 +466,7 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
- TValue *top;
- lj_trace_abort(g);
- hook_entergc(g); /* Disable hooks and new traces during __gc. */
-+ lj_dispatch_update(g);
- g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
- top = L->top;
- copyTV(L, top++, mo);
-@@ -474,6 +475,7 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
- L->top = top+1;
- errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */
- hook_restore(g, oldh);
-+ lj_dispatch_update(g);
- g->gc.threshold = oldt; /* Restore GC threshold. */
- if (errcode)
- lj_err_throw(L, errcode); /* Propagate errors. */
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index 3f674db2..6b06b4a1 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -638,7 +638,7 @@ typedef struct global_State {
- #define HOOK_PROFILE 0x80
- #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
- #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
--#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC))
-+#define hook_entergc(g) ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC))
& ~HOOK_PROFILE)
- #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT))
- #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE)
- #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK)
-diff --git a/src/lj_profile.c b/src/lj_profile.c
-index 3223697f..e2966e0c 100644
---- a/src/lj_profile.c
-+++ b/src/lj_profile.c
-@@ -153,7 +153,7 @@ static void profile_trigger(ProfileState *ps)
- profile_lock(ps);
- ps->samples++; /* Always increment number of samples. */
- mask = g->hookmask;
-- if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) { /* Set profile hook. */
-+ if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */
- int st = g->vmstate;
- ps->vmstate = st >= 0 ? 'N' :
- st == ~LJ_VMST_INTERP ? 'I' :
---
-2.21.0
-
-
-From d83ef2b043f0a63903f9b44f81822b1e585e189b Mon Sep 17 00:00:00 2001
-From: Priya Seth <sethp(a)us.ibm.com>
-Date: Tue, 24 Sep 2019 05:03:18 -0700
-Subject: [PATCH 31/34] Fix BC_POW on ppc64le
-
----
- src/vm_ppc.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index 31ed39a5..c63f15c3 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -4538,7 +4538,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addi BASEP4, BASE, 4
- |.endif
- | lwzx CARG1, BASE_HI, RB
-- | lwzx CARG3, BASE, RC
-+ | lwzx CARG3, BASE_HI, RC
- |.if FPU
- | lfdx FARG1, BASE, RB
- | lfdx FARG2, BASE, RC
---
-2.21.0
-
-
-From 1022c08bbc0101eb4227191f304ce9c74150ade5 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Tue, 24 Sep 2019 05:09:17 -0700
-Subject: [PATCH 32/34] tests: Enable unportable math tests
-
-They caught a POW bug in ppc64le, so they're useful enough. We will
-figure out a fix if they're found to be broken.
----
- test/index | 1 +
- test/unportable/math_special.lua | 37 ++++++++++++++++++--------------
- 2 files changed, 22 insertions(+), 16 deletions(-)
-
-diff --git a/test/index b/test/index
-index bd4081e3..b1580c83 100644
---- a/test/index
-+++ b/test/index
-@@ -4,3 +4,4 @@ bc +luajit>=2
- computations.lua
- trace +jit
- opt +jit
-+unportable
-diff --git a/test/unportable/math_special.lua b/test/unportable/math_special.lua
-index 49161014..bdf22f01 100644
---- a/test/unportable/math_special.lua
-+++ b/test/unportable/math_special.lua
-@@ -31,24 +31,29 @@ local powcheck = {
- "+inf +inf +inf +inf +1 +1 +0 +0 nan",
- "nan nan nan nan +1 nan nan nan nan",
- }
--for j=1,#inp do
-- local y = inp[j]
-- check(function(x) return x^y end, powcheck[j])
-+
-+do --- math.pow
-+ for j=1,#inp do
-+ local y = inp[j]
-+ check(function(x) return x^y end, powcheck[j])
-+ end
- end
-
--check(math.abs, "+0 +0 +0.5 +0.5 +1 +1 +inf +inf nan")
--check(math.floor, "+0 -0 +0 -1 +1 -1 +inf -inf nan")
--check(math.ceil, "+0 -0 +1 -0 +1 -1 +inf -inf nan")
--check(math.sqrt, "+0 -0 +0.70711 nan +1 nan +inf nan nan")
--check(math.sin, "+0 -0 +0.47943 -0.47943 +0.84147 -0.84147 nan nan nan")
--check(math.cos, "+1 +1 +0.87758 +0.87758 +0.5403 +0.5403 nan nan nan")
--check(math.tan, "+0 -0 +0.5463 -0.5463 +1.5574 -1.5574 nan nan nan")
--check(math.asin, "+0 -0 +0.5236 -0.5236 +1.5708 -1.5708 nan nan nan")
--check(math.acos, "+1.5708 +1.5708 +1.0472 +2.0944 +0 +3.1416 nan nan nan")
--check(math.atan, "+0 -0 +0.46365 -0.46365 +0.7854 -0.7854 +1.5708 -1.5708
nan")
--check(math.log, "-inf -inf -0.69315 nan +0 nan +inf nan nan")
--check(math.log10, "-inf -inf -0.30103 nan +0 nan +inf nan nan")
--check(math.exp, "+1 +1 +1.6487 +0.60653 +2.7183 +0.36788 +inf +0 nan")
-+do --- math functions
-+ check(math.abs, "+0 +0 +0.5 +0.5 +1 +1 +inf +inf nan")
-+ check(math.floor, "+0 -0 +0 -1 +1 -1 +inf -inf nan")
-+ check(math.ceil, "+0 -0 +1 -0 +1 -1 +inf -inf nan")
-+ check(math.sqrt, "+0 -0 +0.70711 nan +1 nan +inf nan nan")
-+ check(math.sin, "+0 -0 +0.47943 -0.47943 +0.84147 -0.84147 nan nan nan")
-+ check(math.cos, "+1 +1 +0.87758 +0.87758 +0.5403 +0.5403 nan nan nan")
-+ check(math.tan, "+0 -0 +0.5463 -0.5463 +1.5574 -1.5574 nan nan nan")
-+ check(math.asin, "+0 -0 +0.5236 -0.5236 +1.5708 -1.5708 nan nan nan")
-+ check(math.acos, "+1.5708 +1.5708 +1.0472 +2.0944 +0 +3.1416 nan nan nan")
-+ check(math.atan, "+0 -0 +0.46365 -0.46365 +0.7854 -0.7854 +1.5708 -1.5708
nan")
-+ check(math.log, "-inf -inf -0.69315 nan +0 nan +inf nan nan")
-+ check(math.log10, "-inf -inf -0.30103 nan +0 nan +inf nan nan")
-+ check(math.exp, "+1 +1 +1.6487 +0.60653 +2.7183 +0.36788 +inf +0 nan")
-+end
-
- -- Pointless: deg, rad, min, max, pow
- -- LATER: %, fmod, frexp, ldexp, modf, sinh, cosh, tanh
---
-2.21.0
-
-
-From c5838c12bc125c3c75197e2de068c5b948515602 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 25 Sep 2019 11:33:58 -0700
-Subject: [PATCH 33/34] test: Add the index file
-
-Oops.
----
- test/unportable/index | 1 +
- 1 file changed, 1 insertion(+)
- create mode 100644 test/unportable/index
-
-diff --git a/test/unportable/index b/test/unportable/index
-new file mode 100644
-index 00000000..2549a068
---- /dev/null
-+++ b/test/unportable/index
-@@ -0,0 +1 @@
-+math_special.lua
---
-2.21.0
-
-
-From 7489a362a404421b413b3907f0521901de8818a8 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 25 Sep 2019 11:34:51 -0700
-Subject: [PATCH 34/34] Avoid build warning
-
-LJ_CPU_FLAGS is only needed when the optimised hash algorithm is being
-built.
----
- src/lj_state.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_state.c b/src/lj_state.c
-index 9be16cb3..e664959e 100644
---- a/src/lj_state.c
-+++ b/src/lj_state.c
-@@ -189,9 +189,9 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
- GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State));
- lua_State *L = &GG->L;
- global_State *g = &GG->g;
-- extern uint32_t LJ_CPU_FLAGS;
-
- #ifdef LJ_HAS_OPTIMISED_HASH
-+ extern uint32_t LJ_CPU_FLAGS;
- lj_str_hash_init (LJ_CPU_FLAGS);
- #endif
- if (GG == NULL || !checkptrGC(GG)) return NULL;
---
-2.21.0
-
-From 22b8b09962be279f81cd1f4afd43964e82e072e1 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 25 Sep 2019 12:08:02 -0700
-Subject: [PATCH 1/2] Fix more build warnings for non-x86
-
----
- src/lib_jit.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lib_jit.c b/src/lib_jit.c
-index 5bf44276..5754f3c6 100644
---- a/src/lib_jit.c
-+++ b/src/lib_jit.c
-@@ -666,8 +666,8 @@ JIT_PARAMDEF(JIT_PARAMINIT)
- /* Initialize JIT compiler. */
- static void jit_init(lua_State *L)
- {
-- extern uint32_t LJ_CPU_FLAGS;
- #if LJ_HASJIT
-+ extern uint32_t LJ_CPU_FLAGS;
- jit_State *J = L2J(L);
- /* Check for required instruction set support on x86 (unnecessary on x64). */
- #if LJ_TARGET_X86
---
-2.21.0
-
-
-From 968fa8e5600ec9e91e9c67bcbe65bc76e09352e3 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 25 Sep 2019 12:08:15 -0700
-Subject: [PATCH 2/2] test: Run string.find test only when JIT is enabled in
- build
-
----
- test/lib/string/find.lua | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/test/lib/string/find.lua b/test/lib/string/find.lua
-index 81fd2c32..8e8a4ba1 100644
---- a/test/lib/string/find.lua
-+++ b/test/lib/string/find.lua
-@@ -1,4 +1,4 @@
--do --- find relative
-+do --- find relative +jit
- jit.opt.start("hotloop=1")
- for _ = 1, 20 do
- local value = "abc"
---
-2.21.0
-
-From a10d0321a30d285907d0b400b00bbc6e058aa518 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 25 Sep 2019 12:27:11 -0700
-Subject: [PATCH] fix make amalg
-
-The amalg target builds everything into a single object file with the
-hope of producing better code. Add the lj_init and lj_str_hash
-sources in there as well.
-
-Eventually we need to see if just doing LTO is better since this is
-really just a hack.
----
- src/Makefile | 2 +-
- src/ljamalg.c | 2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index a74cda7f..a1fad2fa 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -623,7 +623,7 @@ default all: $(TARGET_T)
-
- amalg:
- @grep "^[+|]" ljamalg.c
-- $(MAKE) all "LJCORE_O=ljamalg.o"
-+ $(MAKE) all "LJCORE_O=ljamalg.o lj_str_hash.o"
-
- clean:
- $(HOST_RM) $(ALL_RM)
-diff --git a/src/ljamalg.c b/src/ljamalg.c
-index f1f28623..1e1f1b9d 100644
---- a/src/ljamalg.c
-+++ b/src/ljamalg.c
-@@ -94,4 +94,4 @@
- #include "lib_jit.c"
- #include "lib_ffi.c"
- #include "lib_init.c"
--
-+#include "lj_init.c"
---
-2.21.0
-
-From 18fd03af8d9228a88b9164926558ed53700e85d8 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 25 Sep 2019 13:00:36 -0700
-Subject: [PATCH] Always build lj_str_hash
-
-Fix up Makefile and lj_str_hash.c to make it safe to always build the
-file. It will only have a meaningful implementation for -msse4.2 for
-now.
----
- src/Makefile | 3 +--
- src/lj_str_hash.c | 6 +++---
- 2 files changed, 4 insertions(+), 5 deletions(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index a1fad2fa..fe94858c 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -509,10 +509,9 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
- lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
- lj_carith.o lj_clib.o lj_cparse.o \
- lj_lib.o lj_alloc.o lib_aux.o \
-- $(LJLIB_O) lib_init.o lj_init.o
-+ $(LJLIB_O) lib_init.o lj_init.o lj_str_hash.o
-
- ifeq (x64,$(TARGET_LJARCH))
-- LJCORE_O += lj_str_hash.o
- lj_str_hash-CFLAGS = -msse4.2
- endif
-
-diff --git a/src/lj_str_hash.c b/src/lj_str_hash.c
-index 1c16116a..6612065b 100644
---- a/src/lj_str_hash.c
-+++ b/src/lj_str_hash.c
-@@ -5,6 +5,9 @@
- * to 128 bytes of given string.
- */
-
-+#include "lj_arch.h"
-+
-+#if defined(LJ_HAS_OPTIMISED_HASH) || defined(SMOKETEST)
- #include <stdint.h>
- #include <sys/types.h>
- #include <unistd.h>
-@@ -14,9 +17,6 @@
- #include "lj_def.h"
- #include "lj_str.h"
- #include "lj_jit.h"
--#include "lj_arch.h"
--
--#if defined(LJ_HAS_OPTIMISED_HASH) || defined(SMOKETEST)
- #if !defined(__SSE4_2__)
- #error "This file must be built with -msse4.2"
- #endif
---
-2.21.0
-
-From 1d9200ca48196e7792ffaa8c57375ab78675e341 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Wed, 25 Sep 2019 13:05:47 -0700
-Subject: [PATCH] Include lj_dispatch.h
-
----
- src/lj_gc.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/lj_gc.c b/src/lj_gc.c
-index 449db4a6..fe3a4bad 100644
---- a/src/lj_gc.c
-+++ b/src/lj_gc.c
-@@ -20,6 +20,7 @@
- #include "lj_meta.h"
- #include "lj_state.h"
- #include "lj_frame.h"
-+#include "lj_dispatch.h"
- #if LJ_HASFFI
- #include "lj_ctype.h"
- #include "lj_cdata.h"
---
-2.21.0
-
diff --git a/luajit.spec b/luajit.spec
index 43a8a19..48300de 100644
--- a/luajit.spec
+++ b/luajit.spec
@@ -4,110 +4,22 @@ Name: luajit
Version: 2.1.0
%global apiver %(v=%{version}; echo ${v%.${v#[0-9].[0-9].}})
%global srcver %{version}%{?rctag:-%{rctag}}
-Release: 0.20%{?rctag:%{rctag}}%{?dist}
+Release: 0.21%{?rctag:%{rctag}}%{?dist}
Summary: Just-In-Time Compiler for Lua
License: MIT
URL:
http://luajit.org/
Source0:
http://luajit.org/download/LuaJIT-%{srcver}.tar.gz
-# Patches from
https://github.com/siddhesh/LuaJIT.git
-# Generated from v2.1 branch against the 2.1.0-beta3 tag.
-# Some patches, as indicated below, have been modified to account for merge
-# commits, so care needs to be taken when auto-generating patches so that
-# existing patches are not replaced.
-
-# Merge commit
-Patch1: 0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch
-# Merge commit
-Patch2: 0002-Add-missing-LJ_MAX_JSLOTS-check.patch
-Patch3: 0003-MIPS-Use-precise-search-for-exit-jump-patching.patch
-Patch4: 0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch
-Patch5: 0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch
-Patch6: 0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch
-Patch7: 0007-Remove-unused-define.patch
-Patch8: 0008-Modify-fix-for-warning-from-ar.patch
-Patch9: 0009-x64-LJ_GC64-Fix-emit_rma.patch
-Patch10: 0010-PPC-Add-soft-float-support-to-interpreter.patch
-Patch11: 0011-Use-https-for-freelists.org-links.patch
-Patch12: 0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch
-Patch13: 0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch
-Patch14: 0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch
-Patch15: 0015-MIPS64-Hide-internal-function.patch
-# Merge commit
-Patch16: 0016-DynASM-x86-Fix-potential-REL_A-overflow.patch
-Patch17: 0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch
-Patch18: 0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch
-Patch19: 0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch
-Patch20: 0020-ARM64-Fix-assembly-of-HREFK.patch
-Patch21: 0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch
-Patch22: 0022-ARM64-Fix-xpcall-error-case.patch
-Patch23: 0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch
-Patch24: 0024-ARM64-Fix-xpcall-error-case-really.patch
-Patch25: 0025-MIPS64-Fix-xpcall-error-case.patch
-Patch26: 0026-Fix-IR_BUFPUT-assembly.patch
-# This patch gets dropped when merged from master to v2.1.
-# Patch27: 0027-Fix-string.format-c-0.patch
-Patch28: 0028-Fix-ARMv8-32-bit-subset-detection.patch
-Patch29: 0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch
-Patch30: 0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch
-# Merge commit
-Patch31: 0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch
-# Merge commit
-Patch32: 0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch
-# Merge commit
-Patch33: 0033-Clear-stack-after-print_jit_status-in-CLI.patch
-Patch34: 0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch
-Patch35: 0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch
-Patch36: 0036-Give-expected-results-for-negative-non-base-10-numbe.patch
-Patch37: 0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch
-Patch38: 0038-Bump-copyright-date-to-2018.patch
-# Merge commit
-Patch39: 0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch
-Patch40: 0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch
-# Merge commit
-Patch41: 0041-PPC-NetBSD-Fix-endianess-check.patch
-Patch42: 0042-DynASM-x86-Add-FMA3-instructions.patch
-Patch43: 0043-x86-Disassemble-FMA3-instructions.patch
-Patch44: 0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch
-Patch45: 0045-Windows-Add-UWP-support-part-1.patch
-Patch46: 0046-ARM64-Fix-write-barrier-in-BC_USETS.patch
-Patch47: 0047-ARM64-Fix-exit-stub-patching.patch
-Patch48: 0048-DynASM-Fix-warning.patch
-Patch49: 0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch
-Patch50: 0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch
-Patch51: 0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch
-Patch52: 0052-Actually-implement-maxirconst-trace-limit.patch
-Patch53: 0053-Better-detection-of-MinGW-build.patch
-# Merge commit
-Patch54: 0054-Fix-overflow-of-snapshot-map-offset.patch
-Patch55: 0055-DynASM-PPC-Fix-shadowed-variable.patch
-Patch56: 0056-DynASM-MIPS-Fix-shadowed-variable.patch
-Patch57: 0057-Fix-MinGW-build.patch
-Patch58: 0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch
-Patch59: 0059-Improve-luaL_addlstring.patch
-Patch60: 0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch
-Patch61: 0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch
-Patch62: 0062-Remove-redundant-emit_check_ofs.patch
-Patch63: 0063-aarch64-Use-the-xzr-register-whenever-possible.patch
-Patch64: 0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch
-Patch65: 0065-Add-support-for-FNMADD-and-FNMSUB.patch
-Patch66: 0066-Fix-os.date-for-timezone-change-awareness.patch
-Patch67: 0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch
-Patch68: 0068-bench-Fix-build-warnings.patch
-Patch69: 0069-Guard-against-undefined-behaviour-when-casting-from-.patch
-Patch70: 0070-Fix-build-erro-with-fnmsub-fusing.patch
-Patch71: 0071-aarch64-better-float-to-unsigned-int-conversion.patch
-Patch72: 0072-Better-behaviour-for-float-to-uint32_t-conversions.patch
-Patch73: luajit-s390x.patch
-Patch74: arm-Fix-up-condition-codes-for-conditional-arithmeti.patch
-Patch75: bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch
-Patch76: remove-setrlimit-on-freebsd.patch
-Patch77: test-check-for-package_searchers-only-in-compat5_2.patch
-Patch78: patch-for-ppc64-support.patch
-Patch79: luajit-openresty-features.patch
-Patch80: luajit-update-20190925.patch
-
-ExclusiveArch: %{arm} %{ix86} x86_64 %{mips} aarch64 s390x ppc64le
+# Patches from
https://github.com/LuaJit/LuaJIT.git
+# Generated from v2.1 branch against the 2.1.0-beta3 tag using
+# git diff v2.1.0-beta3..v2.1 > luajit-2.1-update.patch
+Patch0: luajit-2.1-update.patch
+# Patches from
https://github.com/cryptomilk/LuaJIT/commits/v2.1-fedora
+# git format-patch --stdout -l1 --no-renames v2.1..v2.1-fedora >
luajit-2.1-fedora.patch
+Patch1: luajit-2.1-fedora.patch
+
+# ppc64le and s390x patchsets doesn't apply or build anymore
+ExclusiveArch: %{arm} %{ix86} x86_64 %{mips} aarch64
BuildRequires: gcc
BuildRequires: make
@@ -148,6 +60,8 @@ make amalg Q= E=@: PREFIX=%{_prefix} TARGET_STRIP=: \
%make_install PREFIX=%{_prefix} \
MULTILIB=%{_lib}
+ln -sf luajit-2.1.0-beta3 %{buildroot}%{_bindir}/luajit
+
rm -rf _tmp_html ; mkdir _tmp_html
cp -a doc _tmp_html/html
@@ -177,6 +91,11 @@ make check || true
%{_libdir}/pkgconfig/%{name}.pc
%changelog
+* Tue Oct 12 2021 Andreas Schneider <asn(a)redhat.com> - 2.1.0-0.21beta3
+- Rebase onto
https://github.com/LuaJIT/LuaJIT/tree/v2.1
+- Dropped support for ppc64le
+- Dropped support for s390x
+
* Thu Jul 22 2021 Fedora Release Engineering <releng(a)fedoraproject.org> -
2.1.0-0.20beta3
- Rebuilt for
https://fedoraproject.org/wiki/Fedora_35_Mass_Rebuild
diff --git a/patch-for-ppc64-support.patch b/patch-for-ppc64-support.patch
deleted file mode 100644
index 127602e..0000000
--- a/patch-for-ppc64-support.patch
+++ /dev/null
@@ -1,3636 +0,0 @@
-From 45da59d33101bc2f6af48e7f8fd04ca70aec9b98 Mon Sep 17 00:00:00 2001
-From: Guy Menanteau <menantea(a)fr.ibm.com>
-Date: Tue, 11 Jun 2019 11:11:47 +0000
-Subject: [PATCH] Patch for PPC64 support
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
- Create a patch for PPC64 support based on
-https://github.com/LuaJIT/LuaJIT/pull/140.
-https://bugzilla.redhat.com/show_bug.cgi?id=1591701
- This patch has been rebased to match FPU support
-
-Author: Guy Menanteau <menantea(a)fr.ibm.com>
-
-Signed-Off-By: Marcin Kościelnicki <koriakin(a)0x04.net
-Signed-Off-By: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
----
- dynasm/dasm_ppc.lua | 5 +
- src/Makefile | 11 +-
- src/host/buildvm_asm.c | 16 +-
- src/lj_arch.h | 15 +-
- src/lj_ccall.c | 166 ++++-
- src/lj_ccall.h | 13 +
- src/lj_ccallback.c | 68 +-
- src/lj_ctype.h | 2 +-
- src/lj_def.h | 4 +
- src/lj_frame.h | 9 +
- src/lj_target_ppc.h | 14 +
- src/vm_ppc.dasc | 1340 +++++++++++++++++++++++++++-------------
- 12 files changed, 1200 insertions(+), 463 deletions(-)
-
-diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
-index 216f9259..8b6cb72b 100644
---- a/dynasm/dasm_ppc.lua
-+++ b/dynasm/dasm_ppc.lua
-@@ -257,9 +257,11 @@ map_op = {
- addic_3 = "30000000RRI",
- ["addic._3"] = "34000000RRI",
- addi_3 = "38000000RR0I",
-+ addil_3 = "38000000RR0J",
- li_2 = "38000000RI",
- la_2 = "38000000RD",
- addis_3 = "3c000000RR0I",
-+ addisl_3 = "3c000000RR0J",
- lis_2 = "3c000000RI",
- lus_2 = "3c000000RU",
- bc_3 = "40000000AAK",
-@@ -842,6 +844,9 @@ map_op = {
- srdi_3 = op_alias("rldicl_4", function(p)
- p[4] = p[3]; p[3] = "64-("..p[3]..")"
- end),
-+ ["srdi._3"] = op_alias("rldicl._4", function(p)
-+ p[4] = p[3]; p[3] = "64-("..p[3]..")"
-+ end),
- clrldi_3 = op_alias("rldicl_4", function(p)
- p[4] = p[3]; p[3] = "0"
- end),
-diff --git a/src/Makefile b/src/Makefile
-index 21a67d8e..6764d32f 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -459,7 +459,16 @@ ifeq (ppc,$(TARGET_LJARCH))
- DASM_AFLAGS+= -D GPR64
- endif
- ifeq (PS3,$(TARGET_SYS))
-- DASM_AFLAGS+= -D PPE -D TOC
-+ DASM_AFLAGS+= -D PPE
-+ endif
-+ ifneq (,$(findstring LJ_ARCH_PPC_OPD 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D OPD
-+ endif
-+ ifneq (,$(findstring LJ_ARCH_PPC_OPDENV 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D OPDENV
-+ endif
-+ ifneq (,$(findstring LJ_ARCH_PPC_ELFV2 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D ELFV2
- endif
- ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
- DASM_ARCH= ppc64
-diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
-index 6743c73c..7033e654 100644
---- a/src/host/buildvm_asm.c
-+++ b/src/host/buildvm_asm.c
-@@ -188,18 +188,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
- #else
- #define TOCPREFIX ""
- #endif
-- if ((ins >> 26) == 16) {
-+ if ((ins >> 26) == 14) {
-+ fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins
>> 16) & 31, sym);
-+ } else if ((ins >> 26) == 15) {
-+ fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins
>> 16) & 31, sym);
-+ } else if ((ins >> 26) == 16) {
- fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
- (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins
>> 16) & 31, sym);
- } else if ((ins >> 26) == 18) {
--#if LJ_ARCH_PPC64
-- const char *suffix = strchr(sym, '@');
-- if (suffix && suffix[1] == 'h') {
-- fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
-- } else if (suffix && suffix[1] == 'l') {
-- fprintf(ctx->fp, "\tld 12, %s\n", sym);
-- } else
--#endif
- fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ?
"bl" : "b", sym);
- } else {
- fprintf(stderr,
-@@ -298,7 +294,7 @@ void emit_asm(BuildCtx *ctx)
- int i, rel;
-
- fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n",
ctx->dasm_arch);
--#if LJ_ARCH_PPC64
-+#if LJ_ARCH_PPC_ELFV2
- fprintf(ctx->fp, "\t.abiversion 2\n");
- #endif
- fprintf(ctx->fp, "\t.text\n");
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 0da64200..19dd258f 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -299,10 +299,18 @@
- #if LJ_TARGET_CONSOLE
- #define LJ_ARCH_PPC32ON64 1
- #define LJ_ARCH_NOFFI 1
-+#if LJ_TARGET_PS3
-+#define LJ_ARCH_PPC_OPD 1
-+#endif
- #elif LJ_ARCH_BITS == 64
--#define LJ_ARCH_PPC64 1
--#define LJ_TARGET_GC64 1
-+#define LJ_ARCH_PPC32ON64 1
- #define LJ_ARCH_NOJIT 1 /* NYI */
-+#if _CALL_ELF == 2
-+#define LJ_ARCH_PPC_ELFV2 1
-+#else
-+#define LJ_ARCH_PPC_OPD 1
-+#define LJ_ARCH_PPC_OPDENV 1
-+#endif
- #endif
-
- #if _ARCH_PWR7
-@@ -462,12 +470,6 @@
- #error "No support for ILP32 model on ARM64"
- #endif
- #elif LJ_TARGET_PPC
--#if !LJ_ARCH_PPC64 && (defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER)
|| (_BYTE_ORDER == _LITTLE_ENDIAN)))
--#error "No support for little-endian PPC32"
--#endif
--#if LJ_ARCH_PPC64
--#error "No support for PowerPC 64 bit mode (yet)"
--#endif
- #if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
- #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
- #endif
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index 5e31ac00..5d9aa63c 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -369,21 +369,97 @@
- #elif LJ_TARGET_PPC
- /* -- PPC calling conventions --------------------------------------------- */
-
-+#if LJ_ARCH_BITS == 64
-+
-+#if LJ_ARCH_PPC_ELFV2
-+
-+#define CCALL_HANDLE_STRUCTRET \
-+ if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \
-+ cc->retref = 1; /* Return by reference. */ \
-+ cc->gpr[ngpr++] = (GPRArg)dp; \
-+ }
-+
-+#define CCALL_HANDLE_STRUCTRET2 \
-+ int isfp = ccall_classify_fp(cts, ctr); \
-+ int i; \
-+ if (isfp == FTYPE_FLOAT) { \
-+ for (i = 0; i < ctr->size / 4; i++) \
-+ ((float *)dp)[i] = cc->fpr[i]; \
-+ } else if (isfp == FTYPE_DOUBLE) { \
-+ for (i = 0; i < ctr->size / 8; i++) \
-+ ((double *)dp)[i] = cc->fpr[i]; \
-+ } else { \
-+ if (ctr->size < 8 && LJ_BE) { \
-+ sp += 8 - ctr->size; \
-+ } \
-+ memcpy(dp, sp, ctr->size); \
-+ }
-+
-+#else
-+
- #define CCALL_HANDLE_STRUCTRET \
- cc->retref = 1; /* Return all structs by reference. */ \
- cc->gpr[ngpr++] = (GPRArg)dp;
-
-+#endif
-+
- #define CCALL_HANDLE_COMPLEXRET \
- /* Complex values are returned in 2 or 4 GPRs. */ \
- cc->retref = 0;
-
-+#define CCALL_HANDLE_STRUCTARG
-+
- #define CCALL_HANDLE_COMPLEXRET2 \
-- memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
-+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
-+ ((float *)dp)[0] = cc->fpr[0]; \
-+ ((float *)dp)[1] = cc->fpr[1]; \
-+ } else { /* Copy complex double from FPRs. */ \
-+ ((double *)dp)[0] = cc->fpr[0]; \
-+ ((double *)dp)[1] = cc->fpr[1]; \
-+ }
-+
-+#define CCALL_HANDLE_COMPLEXARG \
-+ isfp = 1; \
-+ if (d->size == sizeof(float) * 2) { \
-+ d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \
-+ isf32 = 1; \
-+ }
-+
-+#define CCALL_HANDLE_REGARG \
-+ if (isfp && d->size == sizeof(float)) { \
-+ d = ctype_get(cts, CTID_DOUBLE); \
-+ isf32 = 1; \
-+ } \
-+ if (ngpr < maxgpr) { \
-+ dp = &cc->gpr[ngpr]; \
-+ ngpr += n; \
-+ if (ngpr > maxgpr) { \
-+ nsp += ngpr - 8; \
-+ ngpr = 8; \
-+ if (nsp > CCALL_MAXSTACK) { \
-+ goto err_nyi; \
-+ } \
-+ } \
-+ goto done; \
-+ }
-+
-+#else
-+
-+#define CCALL_HANDLE_STRUCTRET \
-+ cc->retref = 1; /* Return all structs by reference. */ \
-+ cc->gpr[ngpr++] = (GPRArg)dp;
-+
-+#define CCALL_HANDLE_COMPLEXRET \
-+ /* Complex values are returned in 2 or 4 GPRs. */ \
-+ cc->retref = 0;
-
- #define CCALL_HANDLE_STRUCTARG \
- rp = cdataptr(lj_cdata_new(cts, did, sz)); \
- sz = CTSIZE_PTR; /* Pass all structs by reference. */
-
-+#define CCALL_HANDLE_COMPLEXRET2 \
-+ memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
-+
- #define CCALL_HANDLE_COMPLEXARG \
- /* Pass complex by value in 2 or 4 GPRs. */
-
-@@ -418,6 +494,8 @@
- }
- #endif
-
-+#endif
-+
- #if !LJ_ABI_SOFTFP
- #define CCALL_HANDLE_RET \
- if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
-@@ -845,6 +923,50 @@ noth: /* Not a homogeneous float/double aggregate. */
-
- #endif
-
-+/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */
-+
-+#if LJ_ARCH_PPC_ELFV2
-+
-+#define FTYPE_FLOAT 1
-+#define FTYPE_DOUBLE 2
-+
-+static unsigned int ccall_classify_fp(CTState *cts, CType *ct) {
-+ if (ctype_isfp(ct->info)) {
-+ if (ct->size == sizeof(float))
-+ return FTYPE_FLOAT;
-+ else
-+ return FTYPE_DOUBLE;
-+ } else if (ctype_iscomplex(ct->info)) {
-+ if (ct->size == sizeof(float) * 2)
-+ return FTYPE_FLOAT;
-+ else
-+ return FTYPE_DOUBLE;
-+ } else if (ctype_isstruct(ct->info)) {
-+ int res = -1;
-+ int sz = ct->size;
-+ while (ct->sib) {
-+ ct = ctype_get(cts, ct->sib);
-+ if (ctype_isfield(ct->info)) {
-+ int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct));
-+ if (res == -1)
-+ res = sub;
-+ if (sub != -1 && sub != res)
-+ return 0;
-+ } else if (ctype_isbitfield(ct->info) ||
-+ ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
-+ return 0;
-+ }
-+ }
-+ if (res > 0 && sz > res * 4 * 8)
-+ return 0;
-+ return res;
-+ } else {
-+ return 0;
-+ }
-+}
-+
-+#endif
-+
- /* -- MIPS64 ABI struct classification ---------------------------- */
-
- #if LJ_TARGET_MIPS64
-@@ -1019,6 +1141,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- CTSize sz;
- MSize n, isfp = 0, isva = 0, onstack = 0;
- void *dp, *rp = NULL;
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
-+ int isf32 = 0;
-+#endif
-
- if (fid) { /* Get argument type from field. */
- CType *ctf = ctype_get(cts, fid);
-@@ -1076,7 +1201,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- *(void **)dp = rp;
- dp = rp;
- }
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE
-+ if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) {
-+ dp = (char *)dp + (CTSIZE_PTR - sz);
-+ }
-+#endif
- lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
-+ if (isfp) {
-+ int i;
-+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
-+ cc->fpr[nfpr++] = ((double *)dp)[i];
-+ }
-+ if (isf32) {
-+ int i;
-+ for (i = 0; i < d->size / 8; i++)
-+ ((float *)dp)[i*2] = ((double *)dp)[i];
-+ }
-+#endif
-+#if LJ_ARCH_PPC_ELFV2
-+ if (ctype_isstruct(d->info)) {
-+ isfp = ccall_classify_fp(cts, d);
-+ int i;
-+ if (isfp == FTYPE_FLOAT) {
-+ for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++)
-+ cc->fpr[nfpr++] = ((float *)dp)[i];
-+ } else if (isfp == FTYPE_DOUBLE) {
-+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
-+ cc->fpr[nfpr++] = ((double *)dp)[i];
-+ }
-+ }
-+#endif
- /* Extend passed integers to 32 bits at least. */
- if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
- if (d->info & CTF_UNSIGNED)
-@@ -1090,6 +1245,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- if (isfp && d->size == sizeof(float))
- ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
- #endif
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
-+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info))
-+ && d->size <= 4) {
-+ if (d->info & CTF_UNSIGNED)
-+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
-+ else
-+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
-+ }
-+#endif
- #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
- if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
- #if LJ_TARGET_MIPS64
-diff --git a/src/lj_ccall.h b/src/lj_ccall.h
-index f237eaaa..af110317 100644
---- a/src/lj_ccall.h
-+++ b/src/lj_ccall.h
-@@ -86,10 +86,23 @@ typedef union FPRArg {
- #elif LJ_TARGET_PPC
-
- #define CCALL_NARG_GPR 8
-+#if LJ_ARCH_BITS == 64
-+#define CCALL_NARG_FPR 13
-+#if LJ_ARCH_PPC_ELFV2
-+#define CCALL_NRET_GPR 2
-+#define CCALL_NRET_FPR 8
-+#define CCALL_SPS_EXTRA 14
-+#else
-+#define CCALL_NRET_GPR 1
-+#define CCALL_NRET_FPR 2
-+#define CCALL_SPS_EXTRA 16
-+#endif
-+#else
- #define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
- #define CCALL_NRET_GPR 4 /* For complex double. */
- #define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
- #define CCALL_SPS_EXTRA 4
-+#endif
- #define CCALL_SPS_FREE 0
-
- typedef intptr_t GPRArg;
-diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
-index d6174e76..1866fd16 100644
---- a/src/lj_ccallback.c
-+++ b/src/lj_ccallback.c
-@@ -61,8 +61,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
-
- #elif LJ_TARGET_PPC
-
-+#if LJ_ARCH_PPC_OPD
-+
-+#define CALLBACK_SLOT2OFS(slot) (24*(slot))
-+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24)
-+#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
-+
-+#elif LJ_ARCH_PPC_ELFV2
-+
-+#define CALLBACK_SLOT2OFS(slot) (4*(slot))
-+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4)
-+#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10)
-+
-+#else
-+
- #define CALLBACK_MCODE_HEAD 24
-
-+#endif
-+
- #elif LJ_TARGET_MIPS32
-
- #define CALLBACK_MCODE_HEAD 20
-@@ -188,24 +204,59 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
- }
- #elif LJ_TARGET_PPC
-+#if LJ_ARCH_PPC_OPD
-+register void *vm_toc __asm__("r2");
-+static void callback_mcode_init(global_State *g, uint64_t *page)
-+{
-+ uint64_t *p = page;
-+ void *target = (void *)lj_vm_ffi_callback;
-+ MSize slot;
-+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
-+ *p++ = (uint64_t)target;
-+ *p++ = (uint64_t)vm_toc;
-+ *p++ = (uint64_t)g | ((uint64_t)slot << 47);
-+ }
-+ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 8);
-+}
-+#else
- static void callback_mcode_init(global_State *g, uint32_t *page)
- {
- uint32_t *p = page;
- void *target = (void *)lj_vm_ffi_callback;
- MSize slot;
-+#if LJ_ARCH_PPC_ELFV2
-+ // Needs to be in sync with lj_vm_ffi_callback.
-+ lua_assert(CALLBACK_MCODE_SIZE == 4096);
-+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
-+ *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2);
-+ p++;
-+ }
-+ *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff);
-+ *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff);
-+ *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) |
PPCF_M6(63-32); /* sldi */
-+ *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32);
/* sldi */
-+ *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >>
16) & 0xffff);
-+ *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16)
& 0xffff);
-+ *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) &
0xffff);
-+ *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff);
-+ *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1);
-+ *p++ = PPCI_BCTR;
-+#else
- *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16);
-- *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16);
-+ *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16);
- *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff);
-- *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff);
-+ *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff);
- *p++ = PPCI_MTCTR | PPCF_T(RID_TMP);
- *p++ = PPCI_BCTR;
- for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
-- *p++ = PPCI_LI | PPCF_T(RID_R11) | slot;
-+ *p++ = PPCI_LI | PPCF_T(RID_R12) | slot;
- *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
- p++;
- }
-- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
-+#endif
-+ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 4);
- }
-+#endif
- #elif LJ_TARGET_MIPS
- static void callback_mcode_init(global_State *g, uint32_t *page)
- {
-@@ -662,6 +713,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue
*o)
- *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
- (int32_t)*(int16_t *)dp;
- }
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
-+ if (ctr->size <= 4 &&
-+ (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) {
-+ if (ctr->info & CTF_UNSIGNED)
-+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
-+ else
-+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
-+ }
-+#endif
- #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
- /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
- if (ctr->size <= 4 &&
-diff --git a/src/lj_ctype.h b/src/lj_ctype.h
-index 0c220a88..105865b4 100644
---- a/src/lj_ctype.h
-+++ b/src/lj_ctype.h
-@@ -153,7 +153,7 @@ typedef struct CType {
-
- /* Simplify target-specific configuration. Checked in lj_ccall.h. */
- #define CCALL_MAX_GPR 8
--#define CCALL_MAX_FPR 8
-+#define CCALL_MAX_FPR 14
-
- typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg;
-
-diff --git a/src/lj_def.h b/src/lj_def.h
-index e67bb24c..59f3dc59 100644
---- a/src/lj_def.h
-+++ b/src/lj_def.h
-@@ -71,7 +71,11 @@ typedef unsigned int uintptr_t;
- #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
- #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
-
-+#if defined(__powerpc64__) && _CALL_ELF != 2
-+#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */
-+#else
- #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
-+#endif
-
- /* Minimum table/buffer sizes. */
- #define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index 9b2081d1..74185324 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -210,6 +210,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
- #define CFRAME_OFS_MULTRES 408
- #define CFRAME_SIZE 384
- #define CFRAME_SHIFT_MULTRES 3
-+#elif LJ_ARCH_PPC_ELFV2
-+#define CFRAME_OFS_ERRF 360
-+#define CFRAME_OFS_NRES 356
-+#define CFRAME_OFS_PREV 336
-+#define CFRAME_OFS_L 352
-+#define CFRAME_OFS_PC 348
-+#define CFRAME_OFS_MULTRES 344
-+#define CFRAME_SIZE 368
-+#define CFRAME_SHIFT_MULTRES 3
- #elif LJ_ARCH_PPC32ON64
- #define CFRAME_OFS_ERRF 472
- #define CFRAME_OFS_NRES 468
-diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
-index c5c991a3..f0c8c941 100644
---- a/src/lj_target_ppc.h
-+++ b/src/lj_target_ppc.h
-@@ -30,8 +30,13 @@ enum {
-
- /* Calling conventions. */
- RID_RET = RID_R3,
-+#if LJ_LE
-+ RID_RETHI = RID_R4,
-+ RID_RETLO = RID_R3,
-+#else
- RID_RETHI = RID_R3,
- RID_RETLO = RID_R4,
-+#endif
- RID_FPRET = RID_F1,
-
- /* These definitions must match with the *.dasc file(s): */
-@@ -131,6 +136,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p,
uint32_t exitno)
- #define PPCF_C(r) ((r) << 6)
- #define PPCF_MB(n) ((n) << 6)
- #define PPCF_ME(n) ((n) << 1)
-+#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1)))
-+#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5)))
- #define PPCF_Y 0x00200000
- #define PPCF_DOT 0x00000001
-
-@@ -200,6 +207,13 @@ typedef enum PPCIns {
- PPCI_RLWINM = 0x54000000,
- PPCI_RLWIMI = 0x50000000,
-
-+ PPCI_RLDICL = 0x78000000,
-+ PPCI_RLDICR = 0x78000004,
-+ PPCI_RLDIC = 0x78000008,
-+ PPCI_RLDIMI = 0x7800000c,
-+ PPCI_RLDCL = 0x78000010,
-+ PPCI_RLDCR = 0x78000012,
-+
- PPCI_B = 0x48000000,
- PPCI_BL = 0x48000001,
- PPCI_BC = 0x40800000,
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index 0839668c..f0b3498a 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -22,35 +22,40 @@
- |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
- |// Affects reg saves, stack layout, carry/overflow/dot flags etc.
- |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
--|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3).
-+|// OPD Need function descriptors (64 bit or 32 bit variant, e.g. PS3).
- |// Function pointers are really a struct: code, TOC, env (optional).
--|// TOCENV Function pointers have an environment pointer, too (not on PS3).
-+|// OPDENV Function pointers have an environment pointer, too (not on PS3).
-+|// ELFV2 The 64-bit ELF V2 ABI is in use.
- |// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360).
- |// Must avoid (slow) micro-coded instructions.
- |
- |.if P64
--|.define TOC, 1
--|.define TOCENV, 1
- |.macro lpx, a, b, c; ldx a, b, c; .endmacro
- |.macro lp, a, b; ld a, b; .endmacro
- |.macro stp, a, b; std a, b; .endmacro
-+|.macro stpx, a, b, c; stdx a, b, c; .endmacro
- |.define decode_OPP, decode_OP8
--|.if FFI
--|// Missing: Calling conventions, 64 bit regs, TOC.
--|.error lib_ffi not yet implemented for PPC64
--|.endif
-+|.define PSIZE, 8
- |.else
- |.macro lpx, a, b, c; lwzx a, b, c; .endmacro
- |.macro lp, a, b; lwz a, b; .endmacro
- |.macro stp, a, b; stw a, b; .endmacro
-+|.macro stpx, a, b, c; stwx a, b, c; .endmacro
- |.define decode_OPP, decode_OP4
-+|.define PSIZE, 4
- |.endif
- |
- |// Convenience macros for TOC handling.
--|.if TOC
-+|.if OPD or ELFV2
- |// Linker needs a TOC patch area for every external call relocation.
--|.macro blex, target; bl extern target@plt; nop; .endmacro
-+|.macro blex, target; bl extern target; nop; .endmacro
- |.macro .toc, a, b; a, b; .endmacro
-+|.else
-+|.macro blex, target; bl extern target@plt; .endmacro
-+|.macro .toc, a, b; .endmacro
-+|.endif
-+|.if OPD
-+|.macro .opd, a, b; a, b; .endmacro
- |.if P64
- |.define TOC_OFS, 8
- |.define ENV_OFS, 16
-@@ -58,13 +63,13 @@
- |.define TOC_OFS, 4
- |.define ENV_OFS, 8
- |.endif
--|.else // No TOC.
--|.macro blex, target; bl extern target@plt; .endmacro
--|.macro .toc, a, b; .endmacro
-+|.else // No OPD.
-+|.macro .opd, a, b; .endmacro
- |.endif
--|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro
-+|.macro .opdenv, a, b; .if OPDENV; a, b; .endif; .endmacro
- |
- |.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro
-+|.macro .elfv2, a, b; .if ELFV2; a, b; .endif; .endmacro
- |
- |.macro andix., y, a, i
- |.if PPE
-@@ -75,29 +80,6 @@
- |.endif
- |.endmacro
- |
--|.macro clrso, reg
--|.if PPE
--| li reg, 0
--| mtxer reg
--|.else
--| mcrxr cr0
--|.endif
--|.endmacro
--|
--|.macro checkov, reg, noov
--|.if PPE
--| mfxer reg
--| add reg, reg, reg
--| cmpwi reg, 0
--| li reg, 0
--| mtxer reg
--| bgey noov
--|.else
--| mcrxr cr0
--| bley noov
--|.endif
--|.endmacro
--|
- |//-----------------------------------------------------------------------
- |
- |// Fixed register assignments for the interpreter.
-@@ -123,6 +105,7 @@
- |.define LREG, r18 // Register holding lua_State (also in SAVE_L).
- |.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
- |.define JGL, r31 // On-trace: global_State + 32768.
-+|.define BASEP4, r26 // Equal to BASE + 4
- |
- |// Constants for type-comparisons, stores and conversions. C callee-save.
- |.define TISNUM, r22
-@@ -159,6 +142,12 @@
- |.if FPU
- |.define FARG1, f1
- |.define FARG2, f2
-+|.define FARG3, f3
-+|.define FARG4, f4
-+|.define FARG5, f5
-+|.define FARG6, f6
-+|.define FARG7, f7
-+|.define FARG8, f8
- |.endif
- |
- |.define CRET1, r3
-@@ -166,6 +155,7 @@
- |
- |.define TOCREG, r2 // TOC register (only used by C code).
- |.define ENVREG, r11 // Environment pointer (nested C functions).
-+|.define FUNCREG, r12 // ELFv2 function pointer (overlaps RD)
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
- |.if GPR64
-@@ -199,6 +189,49 @@
- |.define TMPD, TMPD_HI
- |.define TONUM_D, TONUM_HI
- |
-+|.elif ELFV2
-+|
-+|// 392(sp) // \ 32 bit C frame info.
-+|.define SAVE_LR, 384(sp)
-+|.define SAVE_CR, 376(sp) // 64 bit CR save.
-+|.define CFRAME_SPACE, 368 // Delta for sp.
-+|// Back chain for sp: 368(sp) <-- sp entering interpreter
-+|.define SAVE_ERRF, 360(sp) // |
-+|.define SAVE_NRES, 356(sp) // |
-+|.define SAVE_L, 352(sp) // > Parameter save area.
-+|.define SAVE_PC, 348(sp) // |
-+|.define SAVE_MULTRES, 344(sp) // |
-+|.define SAVE_CFRAME, 336(sp) // / 64 bit C frame chain.
-+|.define SAVE_FPR_, 192 // .. 192+18*8: 64 bit FPR saves.
-+|.define SAVE_GPR_, 48 // .. 48+18*8: 64 bit GPR saves.
-+|.if ENDIAN_LE
-+|.define TMPD_HI, 44(sp)
-+|.define TMPD_LO, 40(sp)
-+|.define TONUM_HI, 36(sp)
-+|.define TONUM_LO, 32(sp)
-+|.else
-+|.define TMPD_LO, 44(sp)
-+|.define TMPD_HI, 40(sp)
-+|.define TONUM_LO, 36(sp)
-+|.define TONUM_HI, 32(sp)
-+|.endif
-+|.define SAVE_TOC, 24(sp) // TOC save area.
-+|// Next frame lr: 16(sp)
-+|// Next frame cr: 8(sp)
-+|// Back chain for sp: 0(sp) <-- sp while in interpreter
-+|
-+|.if ENDIAN_LE
-+|.define TMPD_BLO, 32(sp)
-+|.define TMPD, TMPD_LO
-+|.define TONUM_D, TONUM_LO
-+|.else
-+|.define TMPD_BLO, 39(sp)
-+|.define TMPD, TMPD_HI
-+|.define TONUM_D, TONUM_HI
-+|.endif
-+|
-+|.define EXIT_OFFSET, 32
-+|
- |.else
- |
- |// 508(sp) // \ 32 bit C frame info.
-@@ -209,23 +242,39 @@
- |.define SAVE_MULTRES, 456(sp) // |
- |.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain.
- |.define SAVE_LR, 416(sp)
-+|.define SAVE_CR, 408(sp) // 64 bit CR save.
- |.define CFRAME_SPACE, 400 // Delta for sp.
- |// Back chain for sp: 400(sp) <-- sp entering interpreter
- |.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves.
- |.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves.
- |// 48(sp) // Callee parameter save area (ABI mandated).
- |.define SAVE_TOC, 40(sp) // TOC save area.
-+|.if ENDIAN_LE
-+|.define TMPD_HI, 36(sp) // \ Link editor temp (ABI mandated).
-+|.define TMPD_LO, 32(sp) // /
-+|.define TONUM_HI, 28(sp) // \ Compiler temp (ABI mandated).
-+|.define TONUM_LO, 24(sp) // /
-+|.else
- |.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated).
- |.define TMPD_HI, 32(sp) // /
- |.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated).
- |.define TONUM_HI, 24(sp) // /
-+|.endif
- |// Next frame lr: 16(sp)
--|.define SAVE_CR, 8(sp) // 64 bit CR save.
-+|// Next frame cr: 8(sp)
- |// Back chain for sp: 0(sp) <-- sp while in interpreter
- |
-+|.if ENDIAN_LE
-+|.define TMPD_BLO, 32(sp)
-+|.define TMPD, TMPD_LO
-+|.define TONUM_D, TONUM_LO
-+|.else
- |.define TMPD_BLO, 39(sp)
- |.define TMPD, TMPD_HI
- |.define TONUM_D, TONUM_HI
-+|.endif
-+|
-+|.define EXIT_OFFSET, 112
- |
- |.endif
- |.else
-@@ -250,10 +299,17 @@
- |.define SAVE_MULTRES, 28(sp)
- |.define UNUSED1, 24(sp)
- |.if FPU
-+|.if ENDIAN_LE
-+|.define TMPD_HI, 20(sp)
-+|.define TMPD_LO, 16(sp)
-+|.define TONUM_HI, 12(sp)
-+|.define TONUM_LO, 8(sp)
-+|.else
- |.define TMPD_LO, 20(sp)
- |.define TMPD_HI, 16(sp)
- |.define TONUM_LO, 12(sp)
- |.define TONUM_HI, 8(sp)
-+|.endif
- |.else
- |.define SFSAVE_4, 20(sp)
- |.define SFSAVE_3, 16(sp)
-@@ -264,10 +320,22 @@
- |// Back chain for sp: 0(sp) <-- sp while in interpreter
- |
- |.if FPU
-+|.if ENDIAN_LE
-+|.define TMPD_BLO, 16(sp)
-+|.define TMPD, TMPD_LO
-+|.define TONUM_D, TONUM_LO
-+|.else
- |.define TMPD_BLO, 23(sp)
- |.define TMPD, TMPD_HI
- |.define TONUM_D, TONUM_HI
- |.endif
-+|.else
-+|.define TMPD_BLO, 23(sp)
-+|.define TMPD, TMPD_HI
-+|.define TONUM_D, TONUM_HI
-+|.endif
-+|
-+|.define EXIT_OFFSET, 16
- |
- |.endif
- |
-@@ -384,8 +452,35 @@
- |//-----------------------------------------------------------------------
- |
- |// Access to frame relative to BASE.
-+|.if ENDIAN_LE
-+|.define FRAME_PC, -4
-+|.define FRAME_FUNC, -8
-+|.define FRAME_CONTPC, -12
-+|.define FRAME_CONTRET, -16
-+|.define WORD_LO, 0
-+|.define WORD_HI, 4
-+|.define WORD_BLO, 0
-+|.define BASE_LO, BASE
-+|.define BASE_HI, BASEP4
-+|.macro lwzux2, hi, lo, base, idx
-+| lwzux lo, base, idx
-+| lwz hi, 4(base)
-+|.endmacro
-+|.else
- |.define FRAME_PC, -8
- |.define FRAME_FUNC, -4
-+|.define FRAME_CONTPC, -16
-+|.define FRAME_CONTRET, -12
-+|.define WORD_LO, 4
-+|.define WORD_HI, 0
-+|.define WORD_BLO, 7
-+|.define BASE_LO, BASEP4
-+|.define BASE_HI, BASE
-+|.macro lwzux2, hi, lo, base, idx
-+| lwzux hi, base, idx
-+| lwz lo, 4(base)
-+|.endmacro
-+|.endif
- |
- |// Instruction decode.
- |.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
-@@ -446,6 +541,7 @@
- |// Call decode and dispatch.
- |.macro ins_callt
- | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
-+| addi BASEP4, BASE, 4
- | lwz PC, LFUNC:RB->pc
- | lwz INS, 0(PC)
- | addi PC, PC, 4
-@@ -538,7 +634,12 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
- | mr BASE, TMP2 // Restore caller base.
- | // Prepending may overwrite the pcall frame, so do it at the end.
-- | stwu TMP1, FRAME_PC(RA) // Prepend true to results.
-+ | .if ENDIAN_LE
-+ | addi RA, RA, -8
-+ | stw TMP1, WORD_HI(RA) // Prepend true to results.
-+ | .else
-+ | stwu TMP1, -8(RA) // Prepend true to results.
-+ | .endif
- |
- |->vm_returnc:
- | addi RD, RD, 8 // RD = (nresults+1)*8.
-@@ -604,7 +705,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz TMP1, L->maxstack
- | cmplw BASE, TMP1
- | bge >8
-- | stw TISNIL, 0(BASE)
-+ | stw TISNIL, WORD_HI(BASE)
- | addi RD, RD, 8
- | addi BASE, BASE, 8
- | b <2
-@@ -655,7 +756,12 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_unwind_ff_eh: // Landing pad for external unwinder.
- | lwz L, SAVE_L
- | .toc ld TOCREG, SAVE_TOC
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
-+ |.endif
- | lp BASE, L->base
- | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
-@@ -670,7 +776,7 @@ static void build_subroutines(BuildCtx *ctx)
- | la RA, -8(BASE) // Results start at BASE-8.
- | .FPU stw TMP3, TMPD
- | addi DISPATCH, DISPATCH, GG_G2DISP
-- | stw TMP1, 0(RA) // Prepend false to error message.
-+ | stw TMP1, WORD_HI(RA) // Prepend false to error message.
- | li RD, 16 // 2 results: false + error message.
- | st_vmstate
- | .FPU lfs TONUM, TMPD
-@@ -731,7 +837,12 @@ static void build_subroutines(BuildCtx *ctx)
- | stw L, DISPATCH_GL(cur_L)(DISPATCH)
- | mr RA, BASE
- | lp BASE, L->base
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
-+ |.endif
- | lp TMP1, L->top
- | lwz PC, FRAME_PC(BASE)
- | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-@@ -781,7 +892,12 @@ static void build_subroutines(BuildCtx *ctx)
- |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
- | stw L, DISPATCH_GL(cur_L)(DISPATCH)
- | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
-+ |.endif
- | lp TMP1, L->top
- | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | add PC, PC, BASE
-@@ -801,8 +917,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vm_call_dispatch:
- | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
-- | lwz TMP0, FRAME_PC(BASE)
-- | lwz LFUNC:RB, FRAME_FUNC(BASE)
-+ | lwz TMP0, WORD_HI-8(BASE)
-+ | lwz LFUNC:RB, WORD_LO-8(BASE)
- | checkfunc TMP0; bne ->vmeta_call
- |
- |->vm_call_dispatch_f:
-@@ -821,7 +937,9 @@ static void build_subroutines(BuildCtx *ctx)
- | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
- | lp TMP1, L->cframe
- | addi DISPATCH, DISPATCH, GG_G2DISP
-- | .toc lp CARG4, 0(CARG4)
-+ | .opd lp TOCREG, TOC_OFS(CARG4)
-+ | .opdenv lp ENVREG, ENV_OFS(CARG4)
-+ | .opd lp CARG4, 0(CARG4)
- | li TMP2, 0
- | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
- | stw TMP2, SAVE_ERRF // No error function.
-@@ -829,7 +947,9 @@ static void build_subroutines(BuildCtx *ctx)
- | stp sp, L->cframe // Add our C frame to cframe chain.
- | stw L, DISPATCH_GL(cur_L)(DISPATCH)
- | mtctr CARG4
-+ | .elfv2 mr FUNCREG, CARG4
- | bctrl // (lua_State *L, lua_CFunction func, void *ud)
-+ | .toc lp TOCREG, SAVE_TOC
- |.if PPE
- | mr BASE, CRET1
- | cmpwi CRET1, 0
-@@ -851,20 +971,27 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->cont_dispatch:
- | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
-- | lwz TMP0, -12(BASE) // Continuation.
-+ | lwz TMP0, FRAME_CONTRET(BASE) // Continuation.
- | mr RB, BASE
- | mr BASE, TMP2 // Restore caller BASE.
- | lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
- |.if FFI
- | cmplwi TMP0, 1
- |.endif
-- | lwz PC, -16(RB) // Restore PC from [cont|PC].
-- | subi TMP2, RD, 8
-+ | lwz PC, FRAME_CONTPC(RB) // Restore PC from [cont|PC].
-+ | addi BASEP4, BASE, 4
-+ | addi TMP2, RD, WORD_HI-8
- | lwz TMP1, LFUNC:TMP1->pc
- | stwx TISNIL, RA, TMP2 // Ensure one valid arg.
-+ |.if P64
-+ | ld TMP3, 0(DISPATCH)
-+ |.endif
- |.if FFI
- | ble >1
- |.endif
-+ |.if P64
-+ | add TMP0, TMP0, TMP3
-+ |.endif
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | // BASE = base, RA = resultptr, RB = meta base
- | mtctr TMP0
-@@ -915,20 +1042,20 @@ static void build_subroutines(BuildCtx *ctx)
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | li TMP0, LJ_TSTR
- | decode_RB8 RB, INS
-- | stw STR:RC, 4(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
- | add CARG2, BASE, RB
-- | stw TMP0, 0(CARG3)
-+ | stw TMP0, WORD_HI(CARG3)
- | b >1
- |
- |->vmeta_tgets:
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | li TMP0, LJ_TTAB
-- | stw TAB:RB, 4(CARG2)
-+ | stw TAB:RB, WORD_LO(CARG2)
- | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
-- | stw TMP0, 0(CARG2)
-+ | stw TMP0, WORD_HI(CARG2)
- | li TMP1, LJ_TSTR
-- | stw STR:RC, 4(CARG3)
-- | stw TMP1, 0(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
-+ | stw TMP1, WORD_HI(CARG3)
- | b >1
- |
- |->vmeta_tgetb: // TMP0 = index
-@@ -939,8 +1066,8 @@ static void build_subroutines(BuildCtx *ctx)
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | add CARG2, BASE, RB
- |.if DUALNUM
-- | stw TISNUM, 0(CARG3)
-- | stw TMP0, 4(CARG3)
-+ | stw TISNUM, WORD_HI(CARG3)
-+ | stw TMP0, WORD_LO(CARG3)
- |.else
- | stfd f0, 0(CARG3)
- |.endif
-@@ -978,7 +1105,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // BASE = base, L->top = new base, stack = cont/func/t/k
- | subfic TMP1, BASE, FRAME_CONT
- | lp BASE, L->top
-- | stw PC, -16(BASE) // [cont|PC]
-+ | stw PC, FRAME_CONTPC(BASE) // [cont|PC]
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 16 // 2 args for func(t, k).
-@@ -997,7 +1124,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | b ->BC_TGETR_Z
- |1:
-- | stwx TISNIL, BASE, RA
-+ | stwx TISNIL, BASE_HI, RA
- | b ->cont_nop
- |
- |//-----------------------------------------------------------------------
-@@ -1006,20 +1133,20 @@ static void build_subroutines(BuildCtx *ctx)
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | li TMP0, LJ_TSTR
- | decode_RB8 RB, INS
-- | stw STR:RC, 4(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
- | add CARG2, BASE, RB
-- | stw TMP0, 0(CARG3)
-+ | stw TMP0, WORD_HI(CARG3)
- | b >1
- |
- |->vmeta_tsets:
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | li TMP0, LJ_TTAB
-- | stw TAB:RB, 4(CARG2)
-+ | stw TAB:RB, WORD_LO(CARG2)
- | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
-- | stw TMP0, 0(CARG2)
-+ | stw TMP0, WORD_HI(CARG2)
- | li TMP1, LJ_TSTR
-- | stw STR:RC, 4(CARG3)
-- | stw TMP1, 0(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
-+ | stw TMP1, WORD_HI(CARG3)
- | b >1
- |
- |->vmeta_tsetb: // TMP0 = index
-@@ -1030,8 +1157,8 @@ static void build_subroutines(BuildCtx *ctx)
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | add CARG2, BASE, RB
- |.if DUALNUM
-- | stw TISNUM, 0(CARG3)
-- | stw TMP0, 4(CARG3)
-+ | stw TISNUM, WORD_HI(CARG3)
-+ | stw TMP0, WORD_LO(CARG3)
- |.else
- | stfd f0, 0(CARG3)
- |.endif
-@@ -1070,7 +1197,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
- | subfic TMP1, BASE, FRAME_CONT
- | lp BASE, L->top
-- | stw PC, -16(BASE) // [cont|PC]
-+ | stw PC, FRAME_CONTPC(BASE) // [cont|PC]
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 24 // 3 args for func(t, k, v)
-@@ -1100,17 +1227,9 @@ static void build_subroutines(BuildCtx *ctx)
- |->vmeta_comp:
- | mr CARG1, L
- | subi PC, PC, 4
-- |.if DUALNUM
-- | mr CARG2, RA
-- |.else
- | add CARG2, BASE, RA
-- |.endif
- | stw PC, SAVE_PC
-- |.if DUALNUM
-- | mr CARG3, RD
-- |.else
- | add CARG3, BASE, RD
-- |.endif
- | stp BASE, L->base
- | decode_OP1 CARG4, INS
- | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
-@@ -1147,7 +1266,7 @@ static void build_subroutines(BuildCtx *ctx)
- | b ->cont_nop
- |
- |->cont_condt: // RA = resultptr
-- | lwz TMP0, 0(RA)
-+ | lwz TMP0, WORD_HI(RA)
- | .gpr64 extsw TMP0, TMP0
- | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true.
- | subfe CRET1, CRET1, CRET1
-@@ -1155,7 +1274,7 @@ static void build_subroutines(BuildCtx *ctx)
- | b <4
- |
- |->cont_condf: // RA = resultptr
-- | lwz TMP0, 0(RA)
-+ | lwz TMP0, WORD_HI(RA)
- | .gpr64 extsw TMP0, TMP0
- | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false.
- | subfe CRET1, CRET1, CRET1
-@@ -1207,8 +1326,8 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- |
- |->vmeta_unm:
-- | mr CARG3, RD
-- | mr CARG4, RD
-+ | add CARG3, BASE, RD
-+ | add CARG4, BASE, RD
- | b >1
- |
- |->vmeta_arith_vn:
-@@ -1243,7 +1362,7 @@ static void build_subroutines(BuildCtx *ctx)
- |->vmeta_binop:
- | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
- | sub TMP1, CRET1, BASE
-- | stw PC, -16(CRET1) // [cont|PC]
-+ | stw PC, FRAME_CONTPC(CRET1) // [cont|PC]
- | mr TMP2, BASE
- | addi PC, TMP1, FRAME_CONT
- | mr BASE, CRET1
-@@ -1254,7 +1373,7 @@ static void build_subroutines(BuildCtx *ctx)
- #if LJ_52
- | mr SAVE0, CARG1
- #endif
-- | mr CARG2, RD
-+ | add CARG2, BASE, RD
- | stp BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
-@@ -1331,25 +1450,25 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_1, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-- | lwz CARG1, 4(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz CARG1, WORD_LO(BASE)
- | blt ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_2, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-- | lwz CARG4, 8(BASE)
-- | lwz CARG1, 4(BASE)
-- | lwz CARG2, 12(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz CARG4, WORD_HI+8(BASE)
-+ | lwz CARG1, WORD_LO(BASE)
-+ | lwz CARG2, WORD_LO+8(BASE)
- | blt ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_n, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 8
-- | lwz CARG1, 0(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
- |.if FPU
- | lfd FARG1, 0(BASE)
- |.else
-@@ -1362,15 +1481,15 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_nn, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 16
-- | lwz CARG1, 0(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
- |.if FPU
- | lfd FARG1, 0(BASE)
-- | lwz CARG3, 8(BASE)
-+ | lwz CARG3, WORD_HI+8(BASE)
- | lfd FARG2, 8(BASE)
- |.else
-- | lwz CARG2, 4(BASE)
-- | lwz CARG3, 8(BASE)
-- | lwz CARG4, 12(BASE)
-+ | lwz CARG2, WORD_LO(BASE)
-+ | lwz CARG3, WORD_HI+8(BASE)
-+ | lwz CARG4, WORD_LO+8(BASE)
- |.endif
- | blt ->fff_fallback
- | checknum CARG1; bge ->fff_fallback
-@@ -1393,13 +1512,24 @@ static void build_subroutines(BuildCtx *ctx)
- | cmplw cr1, CARG3, TMP1
- | lwz PC, FRAME_PC(BASE)
- | bge cr1, ->fff_fallback
-- | stw CARG3, 0(RA)
-+ | stw CARG3, WORD_HI(RA)
- | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
-+ |.if not ENDIAN_LE
- | addi TMP1, BASE, 8
- | add TMP2, RA, NARGS8:RC
-- | stw CARG1, 4(RA)
-+ |.endif
-+ | stw CARG1, WORD_LO(RA)
- | beq ->fff_res // Done if exactly 1 argument.
-+ |.if ENDIAN_LE
-+ | li TMP1, 8
-+ | subi RC, RC, 8
-+ |.endif
- |1:
-+ |.if ENDIAN_LE
-+ | cmplw TMP1, RC
-+ | lfdx f0, BASE, TMP1
-+ | stfdx f0, RA, TMP1
-+ |.else
- | cmplw TMP1, TMP2
- |.if FPU
- | lfd f0, 0(TMP1)
-@@ -1410,20 +1540,35 @@ static void build_subroutines(BuildCtx *ctx)
- | stw CARG1, -8(TMP1)
- | stw CARG2, -4(TMP1)
- |.endif
-+ |.endif
- | addi TMP1, TMP1, 8
- | bney <1
- | b ->fff_res
- |
- |.ffunc type
- | cmplwi NARGS8:RC, 8
-- | lwz CARG1, 0(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
- | blt ->fff_fallback
- | .gpr64 extsw CARG1, CARG1
-+ |.if P64
-+ | li TMP0, LJ_TNUMX
-+ | srawi TMP3, CARG1, 15
-+ | subfc TMP1, TMP0, CARG1
-+ |.else
- | subfc TMP0, TISNUM, CARG1
-- | subfe TMP2, CARG1, CARG1
-+ |.endif
-+ | subfe TMP2, CARG1, CARG1
-+ |.if P64
-+ | cmpwi TMP3, -2
-+ | orc TMP1, TMP2, TMP1
-+ | subf TMP1, TMP0, TMP1
-+ | beq >1
-+ |.else
- | orc TMP1, TMP2, TMP0
-- | addi TMP1, TMP1, ~LJ_TISNUM+1
-+ | subf TMP1, TISNUM, TMP1
-+ |.endif
- | slwi TMP1, TMP1, 3
-+ |2:
- |.if FPU
- | la TMP2, CFUNC:RB->upvalue
- | lfdx FARG1, TMP2, TMP1
-@@ -1433,6 +1578,11 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
- |.endif
- | b ->fff_resn
-+ |.if P64
-+ |1:
-+ | li TMP1, ~LJ_TLIGHTUD<<3
-+ | b <2
-+ |.endif
- |
- |//-- Base library: getters and setters ---------------------------------
- |
-@@ -1455,10 +1605,10 @@ static void build_subroutines(BuildCtx *ctx)
- | sub TMP1, TMP0, TMP1
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |3: // Rearranged logic, because we expect _not_ to find the key.
-- | lwz CARG4, NODE:TMP2->key
-- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
-- | lwz CARG2, NODE:TMP2->val
-- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
-+ | lwz CARG4, WORD_HI+offsetof(Node, key)(NODE:TMP2)
-+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
-+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
-+ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2)
- | checkstr CARG4; bne >4
- | cmpw TMP0, STR:RC; beq >5
- |4:
-@@ -1476,14 +1626,33 @@ static void build_subroutines(BuildCtx *ctx)
- |6:
- | cmpwi CARG3, LJ_TUDATA; beq <1
- | .gpr64 extsw CARG3, CARG3
-+ |.if P64
-+ | li TMP0, LJ_TNUMX
-+ | srawi TMP3, CARG3, 15
-+ | subfc TMP1, TMP0, CARG3
-+ |.else
- | subfc TMP0, TISNUM, CARG3
-+ |.endif
- | subfe TMP2, CARG3, CARG3
-+ |.if P64
-+ | cmpwi TMP3, -2
-+ | orc TMP1, TMP2, TMP1
-+ | subf TMP1, TMP0, TMP1
-+ | beq >7
-+ |.else
- | orc TMP1, TMP2, TMP0
-- | addi TMP1, TMP1, ~LJ_TISNUM+1
-+ | subf TMP1, TISNUM, TMP1
-+ |.endif
- | slwi TMP1, TMP1, 2
-+ |8:
- | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
- | lwzx TAB:CARG1, TMP2, TMP1
- | b <2
-+ |.if P64
-+ |7:
-+ | li TMP1, ~LJ_TLIGHTUD<<2
-+ | b <8
-+ |.endif
- |
- |.ffunc_2 setmetatable
- | // Fast path: no mt for table yet and not clearing the mt.
-@@ -1501,8 +1670,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc rawget
- | cmplwi NARGS8:RC, 16
-- | lwz CARG4, 0(BASE)
-- | lwz TAB:CARG2, 4(BASE)
-+ | lwz CARG4, WORD_HI(BASE)
-+ | lwz TAB:CARG2, WORD_LO(BASE)
- | blt ->fff_fallback
- | checktab CARG4; bne ->fff_fallback
- | la CARG3, 8(BASE)
-@@ -1522,11 +1691,11 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc tonumber
- | // Only handles the number case inline (without a base argument).
- | cmplwi NARGS8:RC, 8
-- | lwz CARG1, 0(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
- |.if FPU
- | lfd FARG1, 0(BASE)
- |.else
-- | lwz CARG2, 4(BASE)
-+ | lwz CARG2, WORD_LO(BASE)
- |.endif
- | bne ->fff_fallback // Exactly one argument.
- | checknum CARG1; bgt ->fff_fallback
-@@ -1561,10 +1730,15 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc next
- | cmplwi NARGS8:RC, 8
-- | lwz CARG1, 0(BASE)
-- | lwz TAB:CARG2, 4(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
-+ | lwz TAB:CARG2, WORD_LO(BASE)
- | blt ->fff_fallback
-+ |.if ENDIAN_LE
-+ | add TMP1, BASE, NARGS8:RC
-+ | stw TISNIL, WORD_HI(TMP1) // Set missing 2nd arg to nil.
-+ |.else
- | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
-+ |.endif
- | checktab CARG1
- | lwz PC, FRAME_PC(BASE)
- | bne ->fff_fallback
-@@ -1621,7 +1795,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | la RA, -8(BASE)
- #endif
-- | stw TISNIL, 8(BASE)
-+ | stw TISNIL, 8+WORD_HI(BASE)
- | li RD, (3+1)*8
- |.if FPU
- | stfd f0, 0(RA)
-@@ -1633,11 +1807,11 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc ipairs_aux
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-- | lwz TAB:CARG1, 4(BASE)
-- | lwz CARG4, 8(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz TAB:CARG1, WORD_LO(BASE)
-+ | lwz CARG4, 8+WORD_HI(BASE)
- |.if DUALNUM
-- | lwz TMP2, 12(BASE)
-+ | lwz TMP2, 8+WORD_LO(BASE)
- |.else
- | lfd FARG2, 8(BASE)
- |.endif
-@@ -1666,20 +1840,22 @@ static void build_subroutines(BuildCtx *ctx)
- | la RA, -8(BASE)
- | cmplw TMP0, TMP2
- |.if DUALNUM
-- | stw TISNUM, 0(RA)
-+ | stw TISNUM, WORD_HI(RA)
- | slwi TMP3, TMP2, 3
-- | stw TMP2, 4(RA)
-+ | stw TMP2, WORD_LO(RA)
- |.else
- | slwi TMP3, TMP2, 3
- | stfd FARG2, 0(RA)
- |.endif
- | ble >2 // Not in array part?
- |.if FPU
-- | lwzx TMP2, TMP1, TMP3
-- | lfdx f0, TMP1, TMP3
-+ | lfdux f0, TMP1, TMP3
-+ |.if ENDIAN_LE
-+ | lwz TMP2, WORD_HI(TMP1)
-+ |.endif
- |.else
- | lwzux TMP2, TMP1, TMP3
-- | lwz TMP3, 4(TMP1)
-+ | lwz TMP3, WORD_HI(TMP1)
- |.endif
- |1:
- | checknil TMP2
-@@ -1704,7 +1880,7 @@ static void build_subroutines(BuildCtx *ctx)
- | cmplwi CRET1, 0
- | li RD, (0+1)*8
- | beq ->fff_res
-- | lwz TMP2, 0(CRET1)
-+ | lwz TMP2, WORD_HI(CRET1)
- |.if FPU
- | lfd f0, 0(CRET1)
- |.else
-@@ -1737,11 +1913,11 @@ static void build_subroutines(BuildCtx *ctx)
- | la RA, -8(BASE)
- #endif
- |.if DUALNUM
-- | stw TISNUM, 8(BASE)
-+ | stw TISNUM, 8+WORD_HI(BASE)
- |.else
-- | stw ZERO, 8(BASE)
-+ | stw ZERO, 8+WORD_HI(BASE)
- |.endif
-- | stw ZERO, 12(BASE)
-+ | stw ZERO, 8+WORD_LO(BASE)
- | li RD, (3+1)*8
- |.if FPU
- | stfd f0, 0(RA)
-@@ -1767,7 +1943,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc xpcall
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 8(BASE)
-+ | lwz CARG3, 8+WORD_HI(BASE)
- |.if FPU
- | lfd FARG2, 8(BASE)
- | lfd FARG1, 0(BASE)
-@@ -1898,7 +2074,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.if resume
- | li TMP1, LJ_TTRUE
- | la RA, -8(BASE)
-- | stw TMP1, -8(BASE) // Prepend true to results.
-+ | stw TMP1, WORD_HI-8(BASE) // Prepend true to results.
- | addi RD, RD, 16
- |.else
- | mr RA, BASE
-@@ -1923,7 +2099,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
- | li RD, (2+1)*8
-- | stw TMP1, -8(BASE) // Prepend false to results.
-+ | stw TMP1, WORD_HI-8(BASE) // Prepend false to results.
- | la RA, -8(BASE)
- |.if FPU
- | stfd f0, 0(BASE) // Copy error message.
-@@ -1981,8 +2157,8 @@ static void build_subroutines(BuildCtx *ctx)
- |->fff_resi:
- | lwz PC, FRAME_PC(BASE)
- | la RA, -8(BASE)
-- | stw TISNUM, -8(BASE)
-- | stw CRET1, -4(BASE)
-+ | stw TISNUM, WORD_HI-8(BASE)
-+ | stw CRET1, WORD_LO-8(BASE)
- | b ->fff_res1
- |1:
- | lus CARG3, 0x41e0 // 2^31.
-@@ -1997,9 +2173,9 @@ static void build_subroutines(BuildCtx *ctx)
- |->fff_restv:
- | // CARG3/CARG1 = TValue result.
- | lwz PC, FRAME_PC(BASE)
-- | stw CARG3, -8(BASE)
-+ | stw CARG3, WORD_HI-8(BASE)
- | la RA, -8(BASE)
-- | stw CARG1, -4(BASE)
-+ | stw CARG1, WORD_LO-8(BASE)
- |->fff_res1:
- | // RA = results, PC = return.
- | li RD, (1+1)*8
-@@ -2017,10 +2193,11 @@ static void build_subroutines(BuildCtx *ctx)
- | ins_next1
- | // Adjust BASE. KBASE is assumed to be set for the calling frame.
- | sub BASE, RA, TMP0
-+ | addi BASEP4, BASE, 4
- | ins_next2
- |
- |6: // Fill up results with nil.
-- | subi TMP1, RD, 8
-+ | addi TMP1, RD, WORD_HI-8
- | addi RD, RD, 8
- | stwx TISNIL, RA, TMP1
- | b <5
-@@ -2138,7 +2315,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc math_log
- | cmplwi NARGS8:RC, 8
-- | lwz CARG1, 0(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
- | checknum CARG1; bge ->fff_fallback
- |.if FPU
-@@ -2167,20 +2344,20 @@ static void build_subroutines(BuildCtx *ctx)
- |.if DUALNUM
- |.ffunc math_ldexp
- | cmplwi NARGS8:RC, 16
-- | lwz TMP0, 0(BASE)
-+ | lwz TMP0, WORD_HI(BASE)
- |.if FPU
- | lfd FARG1, 0(BASE)
- |.else
-- | lwz CARG1, 0(BASE)
-- | lwz CARG2, 4(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
-+ | lwz CARG2, WORD_LO(BASE)
- |.endif
-- | lwz TMP1, 8(BASE)
-+ | lwz TMP1, WORD_HI+8(BASE)
- |.if GPR64
-- | lwz CARG2, 12(BASE)
-+ | lwz CARG2, WORD_LO+8(BASE)
- |.elif FPU
-- | lwz CARG1, 12(BASE)
-+ | lwz CARG1, WORD_LO+8(BASE)
- |.else
-- | lwz CARG3, 12(BASE)
-+ | lwz CARG3, WORD_LO+8(BASE)
- |.endif
- | blt ->fff_fallback
- | checknum TMP0; bge ->fff_fallback
-@@ -2219,8 +2396,8 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | li RD, (2+1)*8
- |.if DUALNUM
-- | stw TISNUM, 8(RA)
-- | stw TMP1, 12(RA)
-+ | stw TISNUM, WORD_HI+8(RA)
-+ | stw TMP1, WORD_LO+8(RA)
- |.else
- | stfd FARG2, 8(RA)
- |.endif
-@@ -2254,9 +2431,9 @@ static void build_subroutines(BuildCtx *ctx)
- | add SAVE1, BASE, NARGS8:RC
- | bne >4
- |1: // Handle integers.
-- | lwz CARG4, 0(SAVE0)
-+ | lwz CARG4, WORD_HI(SAVE0)
- | cmplw cr1, SAVE0, SAVE1
-- | lwz CARG2, 4(SAVE0)
-+ | lwz CARG2, WORD_LO(SAVE0)
- | bge cr1, ->fff_resi
- | checknum CARG4
- | xoris TMP0, CARG1, 0x8000
-@@ -2297,7 +2474,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | bge ->fff_fallback
- |5: // Handle numbers.
-- | lwz CARG3, 0(SAVE0)
-+ | lwz CARG3, WORD_HI(SAVE0)
- | cmplw cr1, SAVE0, SAVE1
- |.if FPU
- | lfd FARG2, 0(SAVE0)
-@@ -2336,7 +2513,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | b <5
- |7: // Convert integer to number and continue above.
-- | lwz CARG3, 4(SAVE0)
-+ | lwz CARG3, WORD_LO(SAVE0)
- | bne ->fff_fallback
- |.if FPU
- | tonum_i FARG2, CARG3
-@@ -2348,7 +2525,12 @@ static void build_subroutines(BuildCtx *ctx)
- | .ffunc_n name
- | li TMP1, 8
- |1:
-+ |.if ENDIAN_LE
-+ | add CARG2, BASE, TMP1
-+ | lwz CARG2, WORD_HI(CARG2)
-+ |.else
- | lwzx CARG2, BASE, TMP1
-+ |.endif
- | lfdx FARG2, BASE, TMP1
- | cmplw cr1, TMP1, NARGS8:RC
- | checknum CARG2
-@@ -2372,8 +2554,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc string_byte // Only handle the 1-arg case here.
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-- | lwz STR:CARG1, 4(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz STR:CARG1, WORD_LO(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
- | checkstr CARG3
- | bne ->fff_fallback
-@@ -2404,12 +2586,12 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc string_char // Only handle the 1-arg case here.
- | ffgccheck
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
- |.if DUALNUM
-- | lwz TMP0, 4(BASE)
-+ | lwz TMP0, WORD_LO(BASE)
- | bne ->fff_fallback // Exactly 1 argument.
- | checknum CARG3; bne ->fff_fallback
-- | la CARG2, 7(BASE)
-+ | la CARG2, WORD_BLO(BASE)
- |.else
- | lfd FARG1, 0(BASE)
- | bne ->fff_fallback // Exactly 1 argument.
-@@ -2433,16 +2615,16 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc string_sub
- | ffgccheck
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 16(BASE)
-+ | lwz CARG3, WORD_HI+16(BASE)
- |.if not DUALNUM
- | lfd f0, 16(BASE)
- |.endif
-- | lwz TMP0, 0(BASE)
-- | lwz STR:CARG1, 4(BASE)
-+ | lwz TMP0, WORD_HI(BASE)
-+ | lwz STR:CARG1, WORD_LO(BASE)
- | blt ->fff_fallback
-- | lwz CARG2, 8(BASE)
-+ | lwz CARG2, WORD_HI+8(BASE)
- |.if DUALNUM
-- | lwz TMP1, 12(BASE)
-+ | lwz TMP1, WORD_LO+8(BASE)
- |.else
- | lfd f1, 8(BASE)
- |.endif
-@@ -2450,7 +2632,7 @@ static void build_subroutines(BuildCtx *ctx)
- | beq >1
- |.if DUALNUM
- | checknum CARG3
-- | lwz TMP2, 20(BASE)
-+ | lwz TMP2, WORD_LO+16(BASE)
- | bne ->fff_fallback
- |1:
- | checknum CARG2; bne ->fff_fallback
-@@ -2506,8 +2688,8 @@ static void build_subroutines(BuildCtx *ctx)
- | .ffunc string_ .. name
- | ffgccheck
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-- | lwz STR:CARG2, 4(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz STR:CARG2, WORD_LO(BASE)
- | blt ->fff_fallback
- | checkstr CARG3
- | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
-@@ -2545,10 +2727,10 @@ static void build_subroutines(BuildCtx *ctx)
- | addi SAVE0, BASE, 8
- | add SAVE1, BASE, NARGS8:RC
- |1:
-- | lwz CARG4, 0(SAVE0)
-+ | lwz CARG4, WORD_HI(SAVE0)
- | cmplw cr1, SAVE0, SAVE1
- |.if DUALNUM
-- | lwz CARG2, 4(SAVE0)
-+ | lwz CARG2, WORD_LO(SAVE0)
- |.else
- | lfd FARG1, 0(SAVE0)
- |.endif
-@@ -2715,20 +2897,23 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->fff_fallback: // Call fast function fallback handler.
- | // BASE = new base, RB = CFUNC, RC = nargs*8
-- | lp TMP3, CFUNC:RB->f
-+ | lp FUNCREG, CFUNC:RB->f
- | add TMP1, BASE, NARGS8:RC
- | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC.
- | addi TMP0, TMP1, 8*LUA_MINSTACK
- | lwz TMP2, L->maxstack
- | stw PC, SAVE_PC // Redundant (but a defined value).
-- | .toc lp TMP3, 0(TMP3)
-+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
-+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
-+ | .opd lp FUNCREG, 0(FUNCREG)
- | cmplw TMP0, TMP2
- | stp BASE, L->base
- | stp TMP1, L->top
- | mr CARG1, L
- | bgt >5 // Need to grow stack.
-- | mtctr TMP3
-+ | mtctr FUNCREG
- | bctrl // (lua_State *L)
-+ | .toc lp TOCREG, SAVE_TOC
- | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
- | lp BASE, L->base
- | cmpwi CRET1, 0
-@@ -2830,6 +3015,7 @@ static void build_subroutines(BuildCtx *ctx)
- |3:
- | lp BASE, L->base
- |4: // Re-dispatch to static ins.
-+ | addi BASEP4, BASE, 4
- | lwz INS, -4(PC)
- | decode_OPP TMP1, INS
- | decode_RB8 RB, INS
-@@ -2843,7 +3029,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->cont_hook: // Continue from hook yield.
- | addi PC, PC, 4
-- | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins.
-+ | lwz MULTRES, WORD_LO-24(RB) // Restore MULTRES for *M ins.
- | b <4
- |
- |->vm_hotloop: // Hot loop counter underflow.
-@@ -2885,6 +3071,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lp BASE, L->base
- | lp TMP0, L->top
- | stw ZERO, SAVE_PC // Invalidate for subsequent line hook.
-+ | addi BASEP4, BASE, 4
- | sub NARGS8:RC, TMP0, BASE
- | add RA, BASE, RA
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
-@@ -2896,7 +3083,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.if JIT
- | // RA = resultptr, RB = meta base
- | lwz INS, -4(PC)
-- | lwz TRACE:TMP2, -20(RB) // Save previous trace.
-+ | lwz TRACE:TMP2, WORD_LO-24(RB) // Save previous trace.
- | addic. TMP1, MULTRES, -8
- | decode_RA8 RC, INS // Call base.
- | beq >2
-@@ -2942,10 +3129,16 @@ static void build_subroutines(BuildCtx *ctx)
- | mr CARG2, PC
- | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
- | lp BASE, L->base
-+ | addi BASEP4, BASE, 4
- | b ->cont_nop
- |
- |9:
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ | stwx TISNIL, BASEP4, RC
-+ |.else
- | stwx TISNIL, BASE, RC
-+ |.endif
- | addi RC, RC, 8
- | b <3
- |.endif
-@@ -2960,6 +3153,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
- | lp BASE, L->base
- | subi PC, PC, 4
-+ | addi BASEP4, BASE, 4
- | b ->cont_nop
- #endif
- |
-@@ -2969,40 +3163,73 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro savex_, a, b, c, d
- |.if FPU
-- | stfd f..a, 16+a*8(sp)
-- | stfd f..b, 16+b*8(sp)
-- | stfd f..c, 16+c*8(sp)
-- | stfd f..d, 16+d*8(sp)
-+ | stfd f..a, EXIT_OFFSET+a*8(sp)
-+ | stfd f..b, EXIT_OFFSET+b*8(sp)
-+ | stfd f..c, EXIT_OFFSET+c*8(sp)
-+ | stfd f..d, EXIT_OFFSET+d*8(sp)
- |.endif
- |.endmacro
- |
-+ |.macro saver, a
-+ | stp r..a, EXIT_OFFSET+32*8+a*PSIZE(sp)
-+ |.endmacro
-+ |
- |->vm_exit_handler:
- |.if JIT
-- | addi sp, sp, -(16+32*8+32*4)
-- | stmw r2, 16+32*8+2*4(sp)
-+ | addi sp, TMP0, sp, -(EXIT_OFFSET+32*8+32*PSIZE)
-+ | saver 3 // CARG1
-+ | saver 4 // CARG2
-+ | saver 5 // CARG3
-+ | saver 17 // DISPATCH
- | addi DISPATCH, JGL, -GG_DISP2G-32768
- | li CARG2, ~LJ_VMST_EXIT
-- | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain.
-+ | lp CARG1, EXIT_OFFSET+32*8+32*PSIZE(sp) // Get stack chain.
- | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH)
-+ | saver 2
-+ | saver 6
-+ | saver 7
-+ | saver 8
-+ | saver 9
-+ | saver 10
-+ | saver 11
-+ | saver 12
-+ | saver 13
- | savex_ 0,1,2,3
-- | stw CARG1, 0(sp) // Store extended stack chain.
-- | clrso TMP1
-+ | stp CARG1, 0(sp) // Store extended stack chain.
-+
- | savex_ 4,5,6,7
-- | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp.
-+ | saver 14
-+ | saver 15
-+ | saver 16
-+ | saver 18
-+ | addi CARG2, sp, EXIT_OFFSET+32*8+32*PSIZE // Recompute original value of sp.
- | savex_ 8,9,10,11
-- | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP.
-+ | stp CARG2, EXIT_OFFSET+32*8+1*PSIZE(sp) // Store sp in RID_SP.
- | savex_ 12,13,14,15
- | mflr CARG3
- | li TMP1, 0
- | savex_ 16,17,18,19
-- | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP.
-+ | stw TMP1, EXIT_OFFSET+32*8+0*PSIZE(sp) // Clear RID_TMP.
- | savex_ 20,21,22,23
- | lhz CARG4, 2(CARG3) // Load trace number.
- | savex_ 24,25,26,27
- | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
- | savex_ 28,29,30,31
-+ | saver 19
-+ | saver 20
-+ | saver 21
-+ | saver 22
-+ | saver 23
-+ | saver 24
-+ | saver 25
-+ | saver 26
-+ | saver 27
-+ | saver 28
-+ | saver 29
-+ | saver 30
-+ | saver 31
- | sub CARG3, TMP0, CARG3 // Compute exit number.
-- | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
-+ | lwz BASE, DISPATCH_GL(jit_base)(DISPATCH)
- | srwi CARG3, CARG3, 2
- | stp L, DISPATCH_J(L)(DISPATCH)
- | subi CARG3, CARG3, 2
-@@ -3011,11 +3238,11 @@ static void build_subroutines(BuildCtx *ctx)
- | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
- | addi CARG1, DISPATCH, GG_DISP2J
- | stw CARG3, DISPATCH_J(exitno)(DISPATCH)
-- | addi CARG2, sp, 16
-+ | addi CARG2, sp, EXIT_OFFSET
- | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
- | // Returns MULTRES (unscaled) or negated error code.
- | lp TMP1, L->cframe
-- | lwz TMP2, 0(sp)
-+ | lp TMP2, 0(sp)
- | lp BASE, L->base
- |.if GPR64
- | rldicr sp, TMP1, 0, 61
-@@ -3023,7 +3250,7 @@ static void build_subroutines(BuildCtx *ctx)
- | rlwinm sp, TMP1, 0, 0, 29
- |.endif
- | lwz PC, SAVE_PC // Get SAVE_PC.
-- | stw TMP2, 0(sp)
-+ | stp TMP2, 0(sp)
- | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield).
- | b >1
- |.endif
-@@ -3044,7 +3271,12 @@ static void build_subroutines(BuildCtx *ctx)
- | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | // Setup type comparison constants.
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM
-+ |.endif
- | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | .FPU stw TMP3, TMPD
- | li ZERO, 0
-@@ -3064,14 +3296,14 @@ static void build_subroutines(BuildCtx *ctx)
- | decode_RA8 RA, INS
- | lpx TMP0, DISPATCH, TMP1
- | mtctr TMP0
-- | cmplwi TMP1, BC_FUNCF*4 // Function header?
-+ | cmplwi TMP1, BC_FUNCF*PSIZE // Function header?
- | bge >2
- | decode_RB8 RB, INS
- | decode_RD8 RD, INS
- | decode_RC8 RC, INS
- | bctr
- |2:
-- | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
-+ | cmplwi TMP1, (BC_FUNCC+2)*PSIZE // Fast function?
- | blt >3
- | // Check frame below fast function.
- | lwz TMP1, FRAME_PC(BASE)
-@@ -3081,7 +3313,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz TMP2, -4(TMP1)
- | decode_RA8 TMP0, TMP2
- | sub TMP1, BASE, TMP0
-- | lwz LFUNC:TMP2, -12(TMP1)
-+ | lwz LFUNC:TMP2, WORD_LO-16(TMP1)
- | lwz TMP1, LFUNC:TMP2->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- |3:
-@@ -3130,6 +3362,8 @@ static void build_subroutines(BuildCtx *ctx)
- | sfi2d CARG3, CARG4
- |
- |->vm_modi:
-+ | li TMP1, 0
-+ | mtxer TMP1
- | divwo. TMP0, CARG1, CARG2
- | bso >1
- |.if GPR64
-@@ -3148,7 +3382,8 @@ static void build_subroutines(BuildCtx *ctx)
- | cmpwi CARG2, 0
- | li CARG1, 0
- | beqlr
-- | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0.
-+ | // Clear SO for -2147483648 % -1 and return 0.
-+ | crxor 4*cr0+so, 4*cr0+so, 4*cr0+so
- | blr
- |
- |//-----------------------------------------------------------------------
-@@ -3161,10 +3396,18 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_cachesync:
- |.if JIT or FFI
- | // Compute start of first cache line and number of cache lines.
-+ | .if GPR64
-+ | rldicr CARG1, CARG1, 0, 58
-+ | .else
- | rlwinm CARG1, CARG1, 0, 0, 26
-+ | .endif
- | sub CARG2, CARG2, CARG1
- | addi CARG2, CARG2, 31
-+ | .if GPR64
-+ | srdi. CARG2, CARG2, 5
-+ | .else
- | rlwinm. CARG2, CARG2, 27, 5, 31
-+ | .endif
- | beqlr
- | mtctr CARG2
- | mr CARG3, CARG1
-@@ -3186,39 +3429,70 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- FFI helper functions -----------------------------------------------
- |//-----------------------------------------------------------------------
- |
-- |// Handler for callback functions. Callback slot number in r11, g in r12.
-+ |// Handler for callback functions.
-+ |// 32-bit: Callback slot number in r12, g in r11.
-+ |// 64-bit v1: Callback slot number in bits 47+ of r11, g in 0-46, TOC in r2.
-+ |// 64-bit v2: Callback slot number in bits 2-11 of r12, g in r11,
-+ |// vm_ffi_callback in r2.
- |->vm_ffi_callback:
- |.if FFI
- |.type CTSTATE, CTState, PC
-+ | .if OPD
-+ | rldicl r12, r11, 17, 47
-+ | rldicl r11, r11, 0, 17
-+ | .endif
-+ | .if ELFV2
-+ | rlwinm r12, r12, 30, 22, 31
-+ | addisl TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@ha
-+ | addil TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@l
-+ | .endif
- | saveregs
-- | lwz CTSTATE, GL:r12->ctype_state
-- | addi DISPATCH, r12, GG_G2DISP
-- | stw r11, CTSTATE->cb.slot
-- | stw r3, CTSTATE->cb.gpr[0]
-+ | lwz CTSTATE, GL:r11->ctype_state
-+ | addi DISPATCH, r11, GG_G2DISP
-+ | stw r12, CTSTATE->cb.slot
-+ | stp r3, CTSTATE->cb.gpr[0]
- | .FPU stfd f1, CTSTATE->cb.fpr[0]
-- | stw r4, CTSTATE->cb.gpr[1]
-+ | stp r4, CTSTATE->cb.gpr[1]
- | .FPU stfd f2, CTSTATE->cb.fpr[1]
-- | stw r5, CTSTATE->cb.gpr[2]
-+ | stp r5, CTSTATE->cb.gpr[2]
- | .FPU stfd f3, CTSTATE->cb.fpr[2]
-- | stw r6, CTSTATE->cb.gpr[3]
-+ | stp r6, CTSTATE->cb.gpr[3]
- | .FPU stfd f4, CTSTATE->cb.fpr[3]
-- | stw r7, CTSTATE->cb.gpr[4]
-+ | stp r7, CTSTATE->cb.gpr[4]
- | .FPU stfd f5, CTSTATE->cb.fpr[4]
-- | stw r8, CTSTATE->cb.gpr[5]
-+ | stp r8, CTSTATE->cb.gpr[5]
- | .FPU stfd f6, CTSTATE->cb.fpr[5]
-- | stw r9, CTSTATE->cb.gpr[6]
-+ | stp r9, CTSTATE->cb.gpr[6]
- | .FPU stfd f7, CTSTATE->cb.fpr[6]
-- | stw r10, CTSTATE->cb.gpr[7]
-+ | stp r10, CTSTATE->cb.gpr[7]
- | .FPU stfd f8, CTSTATE->cb.fpr[7]
-+ | .if GPR64
-+ | stfd f9, CTSTATE->cb.fpr[8]
-+ | stfd f10, CTSTATE->cb.fpr[9]
-+ | stfd f11, CTSTATE->cb.fpr[10]
-+ | stfd f12, CTSTATE->cb.fpr[11]
-+ | stfd f13, CTSTATE->cb.fpr[12]
-+ | .endif
-+ | .if ELFV2
-+ | addi TMP0, sp, CFRAME_SPACE+96
-+ | .elif GPR64
-+ | addi TMP0, sp, CFRAME_SPACE+112
-+ | .else
- | addi TMP0, sp, CFRAME_SPACE+8
-- | stw TMP0, CTSTATE->cb.stack
-+ | .endif
-+ | stp TMP0, CTSTATE->cb.stack
- | mr CARG1, CTSTATE
- | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
- | mr CARG2, sp
- | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
- | // Returns lua_State *.
- | lp BASE, L:CRET1->base
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
-+ |.endif
- | lp RC, L:CRET1->top
- | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | li ZERO, 0
-@@ -3247,9 +3521,21 @@ static void build_subroutines(BuildCtx *ctx)
- | mr CARG1, CTSTATE
- | mr CARG2, RA
- | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
-- | lwz CRET1, CTSTATE->cb.gpr[0]
-+ | lp CRET1, CTSTATE->cb.gpr[0]
- | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
-- | lwz CRET2, CTSTATE->cb.gpr[1]
-+ | lp CRET2, CTSTATE->cb.gpr[1]
-+ | .if GPR64
-+ | lfd FARG2, CTSTATE->cb.fpr[1]
-+ | .else
-+ | lp CARG3, CTSTATE->cb.gpr[2]
-+ | lp CARG4, CTSTATE->cb.gpr[3]
-+ | .endif
-+ | .elfv2 lfd f3, CTSTATE->cb.fpr[2]
-+ | .elfv2 lfd f4, CTSTATE->cb.fpr[3]
-+ | .elfv2 lfd f5, CTSTATE->cb.fpr[4]
-+ | .elfv2 lfd f6, CTSTATE->cb.fpr[5]
-+ | .elfv2 lfd f7, CTSTATE->cb.fpr[6]
-+ | .elfv2 lfd f8, CTSTATE->cb.fpr[7]
- | b ->vm_leave_unw
- |.endif
- |
-@@ -3262,23 +3548,46 @@ static void build_subroutines(BuildCtx *ctx)
- | lbz CARG2, CCSTATE->nsp
- | lbz CARG3, CCSTATE->nfpr
- | neg TMP1, TMP1
-+ | .if GPR64
-+ | std TMP0, 16(sp)
-+ | .else
- | stw TMP0, 4(sp)
-+ | .endif
- | cmpwi cr1, CARG3, 0
- | mr TMP2, sp
- | addic. CARG2, CARG2, -1
-+ | .if GPR64
-+ | stdux sp, sp, TMP1
-+ | .else
- | stwux sp, sp, TMP1
-+ | .endif
- | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
-- | stw r14, -4(TMP2)
-- | stw CCSTATE, -8(TMP2)
-+ | .if GPR64
-+ | std r14, -8(TMP2)
-+ | std CCSTATE, -16(TMP2)
-+ | .else
-+ | stw r14, -4(TMP2)
-+ | stw CCSTATE, -8(TMP2)
-+ | .endif
- | mr r14, TMP2
- | la TMP1, CCSTATE->stack
-+ | .if GPR64
-+ | sldi CARG2, CARG2, 3
-+ | .else
- | slwi CARG2, CARG2, 2
-+ | .endif
- | blty >2
-- | la TMP2, 8(sp)
-+ | .if ELFV2
-+ | la TMP2, 96(sp)
-+ | .elif GPR64
-+ | la TMP2, 112(sp)
-+ | .else
-+ | la TMP2, 8(sp)
-+ | .endif
- |1:
-- | lwzx TMP0, TMP1, CARG2
-- | stwx TMP0, TMP2, CARG2
-- | addic. CARG2, CARG2, -4
-+ | lpx TMP0, TMP1, CARG2
-+ | stpx TMP0, TMP2, CARG2
-+ | addic. CARG2, CARG2, -PSIZE
- | bge <1
- |2:
- | bney cr1, >3
-@@ -3290,28 +3599,55 @@ static void build_subroutines(BuildCtx *ctx)
- | .FPU lfd f6, CCSTATE->fpr[5]
- | .FPU lfd f7, CCSTATE->fpr[6]
- | .FPU lfd f8, CCSTATE->fpr[7]
-+ | .if GPR64
-+ | .FPU lfd f9, CCSTATE->fpr[8]
-+ | .FPU lfd f10, CCSTATE->fpr[9]
-+ | .FPU lfd f11, CCSTATE->fpr[10]
-+ | .FPU lfd f12, CCSTATE->fpr[11]
-+ | .FPU lfd f13, CCSTATE->fpr[12]
-+ | .endif
- |3:
-- | lp TMP0, CCSTATE->func
-- | lwz CARG2, CCSTATE->gpr[1]
-- | lwz CARG3, CCSTATE->gpr[2]
-- | lwz CARG4, CCSTATE->gpr[3]
-- | lwz CARG5, CCSTATE->gpr[4]
-- | mtctr TMP0
-- | lwz r8, CCSTATE->gpr[5]
-- | lwz r9, CCSTATE->gpr[6]
-- | lwz r10, CCSTATE->gpr[7]
-- | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
-+ | .toc std TOCREG, SAVE_TOC
-+ | lp FUNCREG, CCSTATE->func
-+ | lp CARG2, CCSTATE->gpr[1]
-+ | lp CARG3, CCSTATE->gpr[2]
-+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
-+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
-+ | .opd lp FUNCREG, 0(FUNCREG)
-+ | lp CARG4, CCSTATE->gpr[3]
-+ | lp CARG5, CCSTATE->gpr[4]
-+ | mtctr FUNCREG
-+ | lp r8, CCSTATE->gpr[5]
-+ | lp r9, CCSTATE->gpr[6]
-+ | lp r10, CCSTATE->gpr[7]
-+ | lp CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
- | bctrl
-- | lwz CCSTATE:TMP1, -8(r14)
-- | lwz TMP2, -4(r14)
-+ | .toc lp TOCREG, SAVE_TOC
-+ | .if GPR64
-+ | ld CCSTATE:TMP1, -16(r14)
-+ | ld TMP2, -8(r14)
-+ | ld TMP0, 16(r14)
-+ | .else
-+ | lwz CCSTATE:TMP1, -8(r14)
-+ | lwz TMP2, -4(r14)
- | lwz TMP0, 4(r14)
-- | stw CARG1, CCSTATE:TMP1->gpr[0]
-+ | .endif
-+ | stp CARG1, CCSTATE:TMP1->gpr[0]
- | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
-- | stw CARG2, CCSTATE:TMP1->gpr[1]
-+ | stp CARG2, CCSTATE:TMP1->gpr[1]
-+ | .if GPR64
-+ | stfd FARG2, CCSTATE:TMP1->fpr[1]
-+ | .endif
-+ | .elfv2 stfd FARG3, CCSTATE:TMP1->fpr[2]
-+ | .elfv2 stfd FARG4, CCSTATE:TMP1->fpr[3]
-+ | .elfv2 stfd FARG5, CCSTATE:TMP1->fpr[4]
-+ | .elfv2 stfd FARG6, CCSTATE:TMP1->fpr[5]
-+ | .elfv2 stfd FARG7, CCSTATE:TMP1->fpr[6]
-+ | .elfv2 stfd FARG8, CCSTATE:TMP1->fpr[7]
- | mtlr TMP0
-- | stw CARG3, CCSTATE:TMP1->gpr[2]
-+ | stp CARG3, CCSTATE:TMP1->gpr[2]
- | mr sp, r14
-- | stw CARG4, CCSTATE:TMP1->gpr[3]
-+ | stp CARG4, CCSTATE:TMP1->gpr[3]
- | mr r14, TMP2
- | blr
- |.endif
-@@ -3335,13 +3671,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux CARG1, RA, BASE
-+ | lwzx CARG1, BASE_HI, RA
- | addi PC, PC, 4
-- | lwz CARG2, 4(RA)
-- | lwzux CARG3, RD, BASE
-+ | lwzx CARG2, BASE_LO, RA
-+ | lwzx CARG3, BASE_HI, RD
- | lwz TMP2, -4(PC)
- | checknum cr0, CARG1
-- | lwz CARG4, 4(RD)
-+ | lwzx CARG4, BASE_LO, RD
- | decode_RD4 TMP2, TMP2
- | checknum cr1, CARG3
- | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
-@@ -3365,7 +3701,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |7: // RA is not an integer.
- | bgt cr0, ->vmeta_comp
- | // RA is a number.
-- | .FPU lfd f0, 0(RA)
-+ | .FPU lfdx f0, BASE, RA
- | bgt cr1, ->vmeta_comp
- | blt cr1, >4
- | // RA is a number, RD is an integer.
-@@ -3385,7 +3721,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bl ->vm_sfi2d_1
- |.endif
- |4:
-- | .FPU lfd f1, 0(RD)
-+ | .FPU lfdx f1, BASE, RD
- |5:
- |.if FPU
- | fcmpu cr0, f0, f1
-@@ -3406,10 +3742,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | b <1
- |.else
-- | lwzx TMP0, BASE, RA
-+ | lwzx TMP0, BASE_HI, RA
- | addi PC, PC, 4
- | lfdx f0, BASE, RA
-- | lwzx TMP1, BASE, RD
-+ | lwzx TMP1, BASE_HI, RD
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | lfdx f1, BASE, RD
-@@ -3440,15 +3776,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = op == BC_ISEQV;
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux CARG1, RA, BASE
-+ | lwzx CARG1, BASE_HI, RA
- | addi PC, PC, 4
-- | lwz CARG2, 4(RA)
-- | lwzux CARG3, RD, BASE
-+ | lwzx CARG2, BASE_LO, RA
-+ | .if ENDIAN_LE
-+ | lwzx CARG3, BASE_HI, RD
-+ | .else
-+ | lwzux CARG3, RD, BASE_HI
-+ | .endif
- | checknum cr0, CARG1
- | lwz SAVE0, -4(PC)
- | checknum cr1, CARG3
- | decode_RD4 SAVE0, SAVE0
-- | lwz CARG4, 4(RD)
-+ | .if ENDIAN_LE
-+ | lwzux CARG4, RD, BASE_LO
-+ | .else
-+ | lwz CARG4, WORD_LO(RD)
-+ | .endif
- | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
- | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- if (vk) {
-@@ -3457,11 +3801,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ble cr7, ->BC_ISNEN_Z
- }
- |.else
-- | lwzux CARG1, RA, BASE
-+ | lwzx CARG1, BASE_HI, RA
- | lwz SAVE0, 0(PC)
-- | lfd f0, 0(RA)
-+ | lfdx f0, BASE, RA
- | addi PC, PC, 4
-- | lwzux CARG3, RD, BASE
-+ | lwzx CARG3, BASE_HI, RD
- | checknum cr0, CARG1
- | decode_RD4 SAVE0, SAVE0
- | lfd f1, 0(RD)
-@@ -3482,8 +3826,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.endif
- |5: // Either or both types are not numbers.
- |.if not DUALNUM
-- | lwz CARG2, 4(RA)
-- | lwz CARG4, 4(RD)
-+ | lwzx CARG2, BASE_LO, RA
-+ | lwzx CARG4, BASE_LO, RD
- |.endif
- |.if FFI
- | cmpwi cr7, CARG1, LJ_TCDATA
-@@ -3499,10 +3843,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.if FFI
- | beq cr7, ->vmeta_equal_cd
- |.endif
-+ |.if P64
-+ | cmplwi cr7, TMP2, ~LJ_TUDATA // Avoid 64 bit lightuserdata.
-+ |.endif
- | cmplw cr5, CARG2, CARG4
- | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
- | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
-+ |.if P64
-+ | cror 4*cr6+lt, 4*cr6+lt, 4*cr7+gt
-+ |.endif
- | mr SAVE1, PC
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
-@@ -3542,9 +3892,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQS: case BC_ISNES:
- vk = op == BC_ISEQS;
- | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
-- | lwzux TMP0, RA, BASE
-+ | lwzx TMP0, BASE_HI, RA
- | srwi RD, RD, 1
-- | lwz STR:TMP3, 4(RA)
-+ | lwzx STR:TMP3, BASE_LO, RA
- | lwz TMP2, 0(PC)
- | subfic RD, RD, -4
- | addi PC, PC, 4
-@@ -3576,15 +3926,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = op == BC_ISEQN;
- | // RA = src*8, RD = num_const*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux CARG1, RA, BASE
-+ | lwzx CARG1, BASE_HI, RA
- | addi PC, PC, 4
-- | lwz CARG2, 4(RA)
-- | lwzux CARG3, RD, KBASE
-+ | lwzx CARG2, BASE_LO, RA
-+ | lwzux2 CARG3, CARG4, RD, KBASE
- | checknum cr0, CARG1
- | lwz SAVE0, -4(PC)
- | checknum cr1, CARG3
- | decode_RD4 SAVE0, SAVE0
-- | lwz CARG4, 4(RD)
- | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- if (vk) {
- |->BC_ISEQN_Z:
-@@ -3601,7 +3950,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- } else {
- |->BC_ISNEN_Z: // Dummy label.
- }
-- | lwzx CARG1, BASE, RA
-+ | lwzx CARG1, BASE_HI, RA
- | addi PC, PC, 4
- | lfdx f0, BASE, RA
- | lwz SAVE0, -4(PC)
-@@ -3639,7 +3988,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |7: // RA is not an integer.
- | bge cr0, <3
- | // RA is a number.
-- | .FPU lfd f0, 0(RA)
-+ | .FPU lfdx f0, BASE, RA
- | blt cr1, >1
- | // RA is a number, RD is an integer.
- |.if FPU
-@@ -3671,7 +4020,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQP: case BC_ISNEP:
- vk = op == BC_ISEQP;
- | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
-- | lwzx TMP0, BASE, RA
-+ | lwzx TMP0, BASE_HI, RA
- | srwi TMP1, RD, 3
- | lwz TMP2, 0(PC)
- | not TMP1, TMP1
-@@ -3701,7 +4050,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
- | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
-- | lwzx TMP0, BASE, RD
-+ | lwzx TMP0, BASE_HI, RD
- | lwz INS, 0(PC)
- | addi PC, PC, 4
- if (op == BC_IST || op == BC_ISF) {
-@@ -3746,7 +4095,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISTYPE:
- | // RA = src*8, RD = -type*8
-- | lwzx TMP0, BASE, RA
-+ | lwzx TMP0, BASE_HI, RA
- | srwi TMP1, RD, 3
- | ins_next1
- |.if not PPE and not GPR64
-@@ -3760,7 +4109,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_ISNUM:
- | // RA = src*8, RD = -(TISNUM-1)*8
-- | lwzx TMP0, BASE, RA
-+ | lwzx TMP0, BASE_HI, RA
- | ins_next1
- | checknum TMP0
- | bge ->vmeta_istype
-@@ -3786,17 +4135,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_NOT:
- | // RA = dst*8, RD = src*8
- | ins_next1
-- | lwzx TMP0, BASE, RD
-+ | lwzx TMP0, BASE_HI, RD
- | .gpr64 extsw TMP0, TMP0
- | subfic TMP1, TMP0, LJ_TTRUE
- | adde TMP0, TMP0, TMP1
-- | stwx TMP0, BASE, RA
-+ | stwx TMP0, BASE_HI, RA
- | ins_next2
- break;
- case BC_UNM:
- | // RA = dst*8, RD = src*8
-- | lwzux TMP1, RD, BASE
-- | lwz TMP0, 4(RD)
-+ | lwzx TMP1, BASE_HI, RD
-+ | lwzx TMP0, BASE_LO, RD
-+ |.if DUALNUM and not GPR64
-+ | mtxer ZERO
-+ |.endif
- | checknum TMP1
- |.if DUALNUM
- | bne >5
-@@ -3808,18 +4160,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.else
- | nego. TMP0, TMP0
- | bso >4
-- |1:
- |.endif
- | ins_next1
-- | stwux TISNUM, RA, BASE
-- | stw TMP0, 4(RA)
-+ | stwx TISNUM, BASE_HI, RA
-+ | stwx TMP0, BASE_LO, RA
- |3:
- | ins_next2
- |4:
-- |.if not GPR64
-- | // Potential overflow.
-- | checkov TMP1, <1 // Ignore unrelated overflow.
-- |.endif
- | lus TMP1, 0x41e0 // 2^31.
- | li TMP0, 0
- | b >7
-@@ -3829,8 +4176,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | xoris TMP1, TMP1, 0x8000
- |7:
- | ins_next1
-- | stwux TMP1, RA, BASE
-- | stw TMP0, 4(RA)
-+ | stwx TMP1, BASE_HI, RA
-+ | stwx TMP0, BASE_LO, RA
- |.if DUALNUM
- | b <3
- |.else
-@@ -3839,15 +4186,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_LEN:
- | // RA = dst*8, RD = src*8
-- | lwzux TMP0, RD, BASE
-- | lwz CARG1, 4(RD)
-+ | lwzx TMP0, BASE_HI, RD
-+ | lwzx CARG1, BASE_LO, RD
- | checkstr TMP0; bne >2
- | lwz CRET1, STR:CARG1->len
- |1:
- |.if DUALNUM
- | ins_next1
-- | stwux TISNUM, RA, BASE
-- | stw CRET1, 4(RA)
-+ | stwx TISNUM, BASE_HI, RA
-+ | stwx CRET1, BASE_LO, RA
- |.else
- | tonum_u f0, CRET1 // Result is a non-negative integer.
- | ins_next1
-@@ -3882,9 +4229,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-- | lwzx CARG1, BASE, RB
-+ | .if ENDIAN_LE and DUALNUM
-+ | addi CARG3, RC, 4
-+ | .endif
-+ | lwzx CARG1, BASE_HI, RB
- | .if DUALNUM
-- | lwzx CARG3, KBASE, RC
-+ | .if ENDIAN_LE
-+ | lwzx CARG3, KBASE, CARG3
-+ | .else
-+ | lwzx CARG3, KBASE, RC
-+ | .endif
- | .endif
- | .if FPU
- | lfdx f14, BASE, RB
-@@ -3905,9 +4259,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | .endif
- || break;
- ||case 1:
-- | lwzx CARG1, BASE, RB
-+ | .if ENDIAN_LE and DUALNUM
-+ | addi CARG3, RC, 4
-+ | .endif
-+ | lwzx CARG1, BASE_HI, RB
- | .if DUALNUM
-- | lwzx CARG3, KBASE, RC
-+ | .if ENDIAN_LE
-+ | lwzx CARG3, KBASE, CARG3
-+ | .else
-+ | lwzx CARG3, KBASE, RC
-+ | .endif
- | .endif
- | .if FPU
- | lfdx f15, BASE, RB
-@@ -3928,8 +4289,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | .endif
- || break;
- ||default:
-- | lwzx CARG1, BASE, RB
-- | lwzx CARG3, BASE, RC
-+ | lwzx CARG1, BASE_HI, RB
-+ | lwzx CARG3, BASE_HI, RC
- | .if FPU
- | lfdx f14, BASE, RB
- | lfdx f15, BASE, RC
-@@ -4015,47 +4376,74 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-- | lwzux CARG1, RB, BASE
-- | lwzux CARG3, RC, KBASE
-+ | .if ENDIAN_LE
-+ | lwzx TMP1, RB, BASE_HI
-+ | lwzux CARG2, RC, KBASE
-+ | lwz TMP2, 4(RC)
-+ | checknum cr0, TMP1
-+ | lwzux CARG1, RB, BASE
-+ | checknum cr1, TMP2
-+ | .else
-+ | lwzux CARG1, RB, BASE
-+ | lwzux CARG3, RC, KBASE
- | lwz CARG2, 4(RB)
-- | checknum cr0, CARG1
-- | lwz CARG4, 4(RC)
-- | checknum cr1, CARG3
-+ | checknum cr0, CARG1
-+ | lwz CARG4, 4(RC)
-+ | checknum cr1, CARG3
-+ | .endif
- || break;
- ||case 1:
-- | lwzux CARG3, RB, BASE
-- | lwzux CARG1, RC, KBASE
-- | lwz CARG4, 4(RB)
-- | checknum cr0, CARG3
-- | lwz CARG2, 4(RC)
-- | checknum cr1, CARG1
-+ | .if ENDIAN_LE
-+ | lwzux CARG1, RC, KBASE
-+ | lwzx TMP1, RB, BASE_HI
-+ | lwz TMP2, 4(RC)
-+ | checknum cr0, TMP1
-+ | lwzux CARG2, RB, BASE
-+ | checknum cr1, TMP2
-+ | .else
-+ | lwzux CARG3, RB, BASE
-+ | lwzux CARG1, RC, KBASE
-+ | lwz CARG4, 4(RB)
-+ | checknum cr0, CARG3
-+ | lwz CARG2, 4(RC)
-+ | checknum cr1, CARG1
-+ | .endif
- || break;
- ||default:
-- | lwzux CARG1, RB, BASE
-- | lwzux CARG3, RC, BASE
-- | lwz CARG2, 4(RB)
-- | checknum cr0, CARG1
-- | lwz CARG4, 4(RC)
-- | checknum cr1, CARG3
-+ | .if ENDIAN_LE
-+ | lwzx TMP1, RB, BASE_HI
-+ | lwzx TMP2, RC, BASE_HI
-+ | lwzux CARG1, RB, BASE
-+ | checknum cr0, TMP1
-+ | lwzux CARG2, RC, BASE
-+ | checknum cr1, TMP2
-+ | .else
-+ | lwzux CARG1, RB, BASE
-+ | lwzux CARG3, RC, BASE
-+ | lwz CARG2, 4(RB)
-+ | checknum cr0, CARG1
-+ | lwz CARG4, 4(RC)
-+ | checknum cr1, CARG3
-+ | .endif
- || break;
- ||}
- | bne >5
- | bne cr1, >5
-- |.if "intins" == "intmod"
-- | mr CARG1, CARG2
-- | mr CARG2, CARG4
-- |.endif
-+ |.if ENDIAN_LE
-+ | intins CARG1, CARG1, CARG2
-+ |.else
-+ | .if "intins" == "intmod"
-+ | mr CARG1, CARG2
-+ | mr CARG2, CARG4
-+ | .endif
- | intins CARG1, CARG2, CARG4
-- | bso >4
-- |1:
-+ |.endif
-+ | ins_arithfallback bso
- | ins_next1
-- | stwux TISNUM, RA, BASE
-- | stw CARG1, 4(RA)
-+ | stwx TISNUM, BASE_HI, RA
-+ | stwx CARG1, BASE_LO, RA
- |2:
- | ins_next2
-- |4: // Overflow.
-- | checkov TMP0, <1 // Ignore unrelated overflow.
-- | ins_arithfallback b
- |5: // FP variant.
- |.if FPU
- ||if (vk == 1) {
-@@ -4138,7 +4526,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_POW:
- | // NYI: (partial) integer arithmetic.
-- | lwzx CARG1, BASE, RB
-+ | lwzx CARG1, BASE_HI, RB
- | lwzx CARG3, BASE, RC
- |.if FPU
- | lfdx FARG1, BASE, RB
-@@ -4178,6 +4566,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Returns NULL (finished) or TValue * (metamethod).
- | cmplwi CRET1, 0
- | lp BASE, L->base
-+ | addi BASEP4, BASE, 4
- | bne ->vmeta_binop
- | ins_next1
- |.if FPU
-@@ -4201,8 +4590,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next1
- | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
- | li TMP2, LJ_TSTR
-- | stwux TMP2, RA, BASE
-- | stw TMP0, 4(RA)
-+ | stwx TMP2, BASE_HI, RA
-+ | stwx TMP0, BASE_LO, RA
- | ins_next2
- break;
- case BC_KCDATA:
-@@ -4213,8 +4602,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next1
- | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
- | li TMP2, LJ_TCDATA
-- | stwux TMP2, RA, BASE
-- | stw TMP0, 4(RA)
-+ | stwx TMP2, BASE_HI, RA
-+ | stwx TMP0, BASE_LO, RA
- | ins_next2
- |.endif
- break;
-@@ -4224,14 +4613,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi RD, RD, 13
- | srawi RD, RD, 16
- | ins_next1
-- | stwux TISNUM, RA, BASE
-- | stw RD, 4(RA)
-+ | stwx TISNUM, BASE_HI, RA
-+ | stwx RD, BASE_LO, RA
- | ins_next2
- |.else
- | // The soft-float approach is faster.
- | slwi RD, RD, 13
- | srawi TMP1, RD, 31
- | xor TMP2, TMP1, RD
-+ | .gpr64 extsw RD, RD
- | sub TMP2, TMP2, TMP1 // TMP2 = abs(x)
- | cntlzw TMP3, TMP2
- | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1
-@@ -4243,8 +4633,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add RD, RD, TMP1 // hi = hi + exponent-1
- | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi
- | ins_next1
-- | stwux RD, RA, BASE
-- | stw ZERO, 4(RA)
-+ | stwx RD, BASE_HI, RA
-+ | stwx ZERO, BASE_LO, RA
- | ins_next2
- |.endif
- break;
-@@ -4267,15 +4657,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | srwi TMP1, RD, 3
- | not TMP0, TMP1
- | ins_next1
-- | stwx TMP0, BASE, RA
-+ | stwx TMP0, BASE_HI, RA
- | ins_next2
- break;
- case BC_KNIL:
- | // RA = base*8, RD = end*8
-- | stwx TISNIL, BASE, RA
-+ | stwx TISNIL, BASE_HI, RA
- | addi RA, RA, 8
- |1:
-- | stwx TISNIL, BASE, RA
-+ | stwx TISNIL, BASE_HI, RA
- | cmpw RA, RD
- | addi RA, RA, 8
- | blt <1
-@@ -4319,7 +4709,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz CARG2, UPVAL:RB->v
- | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
- | lbz TMP0, UPVAL:RB->closed
-- | lwz TMP2, 0(RD)
-+ | lwz TMP2, WORD_HI(RD)
- |.if FPU
- | stfd f0, 0(CARG2)
- |.else
-@@ -4327,7 +4717,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stw CARG3, 4(CARG2)
- |.endif
- | cmplwi cr1, TMP0, 0
-- | lwz TMP1, 4(RD)
-+ | lwz TMP1, WORD_LO(RD)
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
- | subi TMP2, TMP2, (LJ_TNUMX+1)
- | bne >2 // Upvalue is closed and black?
-@@ -4360,8 +4750,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lbz TMP3, STR:TMP1->marked
- | lbz TMP2, UPVAL:RB->closed
- | li TMP0, LJ_TSTR
-- | stw STR:TMP1, 4(CARG2)
-- | stw TMP0, 0(CARG2)
-+ | stw STR:TMP1, WORD_LO(CARG2)
-+ | stw TMP0, WORD_HI(CARG2)
- | bne >2
- |1:
- | ins_next
-@@ -4408,7 +4798,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | ins_next1
- | lwz TMP1, UPVAL:RB->v
-- | stw TMP0, 0(TMP1)
-+ | stw TMP0, WORD_HI(TMP1)
- | ins_next2
- break;
-
-@@ -4423,6 +4813,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add CARG2, BASE, RA
- | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
- | lp BASE, L->base
-+ | addi BASEP4, BASE, 4
- |1:
- | ins_next
- break;
-@@ -4441,8 +4832,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Returns GCfuncL *.
- | lp BASE, L->base
- | li TMP0, LJ_TFUNC
-- | stwux TMP0, RA, BASE
-- | stw LFUNC:CRET1, 4(RA)
-+ | addi BASEP4, BASE, 4
-+ | stwx TMP0, BASE_HI, RA
-+ | stwx LFUNC:CRET1, BASE_LO, RA
- | ins_next
- break;
-
-@@ -4475,8 +4867,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | lp BASE, L->base
- | li TMP0, LJ_TTAB
-- | stwux TMP0, RA, BASE
-- | stw TAB:CRET1, 4(RA)
-+ | addi BASEP4, BASE, 4
-+ | stwx TMP0, BASE_HI, RA
-+ | stwx TAB:CRET1, BASE_LO, RA
- | ins_next
- if (op == BC_TNEW) {
- |3:
-@@ -4509,13 +4902,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_TGETV:
- | // RA = dst*8, RB = table*8, RC = key*8
-- | lwzux CARG1, RB, BASE
-- | lwzux CARG2, RC, BASE
-- | lwz TAB:RB, 4(RB)
-+ | lwzx CARG1, BASE_HI, RB
-+ | lwzx CARG2, BASE_HI, RC
-+ | lwzx TAB:RB, BASE_LO, RB
- |.if DUALNUM
-- | lwz RC, 4(RC)
-+ | lwzx RC, BASE_LO, RC
- |.else
-- | lfd f0, 0(RC)
-+ | lfdx f0, BASE, RC
- |.endif
- | checktab CARG1
- | checknum cr1, CARG2
-@@ -4542,9 +4935,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi TMP2, TMP2, 3
- |.endif
- | ble ->vmeta_tgetv // Integer key and in array part?
-- | lwzx TMP0, TMP1, TMP2
- |.if FPU
-- | lfdx f14, TMP1, TMP2
-+ | .if ENDIAN_LE
-+ | lfdux f14, TMP1, TMP2
-+ | lwz TMP0, WORD_HI(TMP1)
-+ | .else
-+ | lwzx TMP0, TMP1, TMP2
-+ | lfdx f14, TMP1, TMP2
-+ | .endif
- |.else
- | lwzux SAVE0, TMP1, TMP2
- | lwz SAVE1, 4(TMP1)
-@@ -4572,15 +4970,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |5:
- | checkstr CARG2; bne ->vmeta_tgetv
- |.if not DUALNUM
-- | lwz STR:RC, 4(RC)
-+ | lwzx STR:RC, BASE_LO, RC
- |.endif
- | b ->BC_TGETS_Z // String key?
- break;
- case BC_TGETS:
- | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
-- | lwzux CARG1, RB, BASE
-+ | lwzx CARG1, BASE_HI, RB
- | srwi TMP1, RC, 1
-- | lwz TAB:RB, 4(RB)
-+ | lwzx TAB:RB, BASE_LO, RB
- | subfic TMP1, TMP1, -4
- | checktab CARG1
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
-@@ -4596,16 +4994,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | sub TMP1, TMP0, TMP1
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |1:
-- | lwz CARG1, NODE:TMP2->key
-- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
-- | lwz CARG2, NODE:TMP2->val
-- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
-+ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2)
-+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
-+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
-+ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2)
- | checkstr CARG1; bne >4
- | cmpw TMP0, STR:RC; bne >4
- | checknil CARG2; beq >5 // Key found, but nil value?
- |3:
-- | stwux CARG2, RA, BASE
-- | stw TMP1, 4(RA)
-+ | stwx CARG2, BASE_HI, RA
-+ | stwx TMP1, BASE_LO, RA
- | ins_next
- |
- |4: // Follow hash chain.
-@@ -4626,16 +5024,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETB:
- | // RA = dst*8, RB = table*8, RC = index*8
-- | lwzux CARG1, RB, BASE
-+ | lwzx CARG1, BASE_HI, RB
- | srwi TMP0, RC, 3
-- | lwz TAB:RB, 4(RB)
-+ | lwzx TAB:RB, BASE_LO, RB
- | checktab CARG1; bne ->vmeta_tgetb
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
- | cmplw TMP0, TMP1; bge ->vmeta_tgetb
- |.if FPU
-- | lwzx TMP1, TMP2, RC
-- | lfdx f0, TMP2, RC
-+ | .if ENDIAN_LE
-+ | lfdux f0, TMP2, RC
-+ | lwz TMP1, WORD_HI(TMP2)
-+ | .else
-+ | lwzx TMP1, TMP2, RC
-+ | lfdx f0, TMP2, RC
-+ | .endif
- |.else
- | lwzux TMP1, TMP2, RC
- | lwz TMP3, 4(TMP2)
-@@ -4662,12 +5065,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETR:
- | // RA = dst*8, RB = table*8, RC = key*8
-- | add RB, BASE, RB
-- | lwz TAB:CARG1, 4(RB)
-+ | lwzx TAB:CARG1, BASE_LO, RB
- |.if DUALNUM
-- | add RC, BASE, RC
- | lwz TMP0, TAB:CARG1->asize
-- | lwz CARG2, 4(RC)
-+ | lwzx CARG2, BASE_LO, RC
- | lwz TMP1, TAB:CARG1->array
- |.else
- | lfdx f0, BASE, RC
-@@ -4697,13 +5098,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_TSETV:
- | // RA = src*8, RB = table*8, RC = key*8
-- | lwzux CARG1, RB, BASE
-- | lwzux CARG2, RC, BASE
-- | lwz TAB:RB, 4(RB)
-+ | lwzx CARG1, BASE_HI, RB
-+ | lwzx CARG2, BASE_HI, RC
-+ | lwzx TAB:RB, BASE_LO, RB
- |.if DUALNUM
-- | lwz RC, 4(RC)
-+ | lwzx RC, BASE_LO, RC
- |.else
-- | lfd f0, 0(RC)
-+ | lfdx f0, BASE, RC
- |.endif
- | checktab CARG1
- | checknum cr1, CARG2
-@@ -4730,7 +5131,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi TMP0, TMP2, 3
- |.endif
- | ble ->vmeta_tsetv // Integer key and in array part?
-+ | .if ENDIAN_LE
-+ | addi TMP2, TMP1, 4
-+ | lwzx TMP2, TMP2, TMP0
-+ | .else
- | lwzx TMP2, TMP1, TMP0
-+ | .endif
- | lbz TMP3, TAB:RB->marked
- |.if FPU
- | lfdx f14, BASE, RA
-@@ -4764,7 +5170,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |5:
- | checkstr CARG2; bne ->vmeta_tsetv
- |.if not DUALNUM
-- | lwz STR:RC, 4(RC)
-+ | lwzx STR:RC, BASE_LO, RC
- |.endif
- | b ->BC_TSETS_Z // String key?
- |
-@@ -4774,9 +5180,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETS:
- | // RA = src*8, RB = table*8, RC = str_const*8 (~)
-- | lwzux CARG1, RB, BASE
-+ | lwzx CARG1, BASE_HI, RB
- | srwi TMP1, RC, 1
-- | lwz TAB:RB, 4(RB)
-+ | lwzx TAB:RB, BASE_LO, RB
- | subfic TMP1, TMP1, -4
- | checktab CARG1
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
-@@ -4801,9 +5207,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lbz TMP3, TAB:RB->marked
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |1:
-- | lwz CARG1, NODE:TMP2->key
-- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
-- | lwz CARG2, NODE:TMP2->val
-+ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2)
-+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
-+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
- | lwz NODE:TMP1, NODE:TMP2->next
- | checkstr CARG1; bne >5
- | cmpw TMP0, STR:RC; bne >5
-@@ -4848,9 +5254,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
- |6:
- | li TMP0, LJ_TSTR
-- | stw STR:RC, 4(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
- | mr CARG2, TAB:RB
-- | stw TMP0, 0(CARG3)
-+ | stw TMP0, WORD_HI(CARG3)
- | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
- | // Returns TValue *.
- | lp BASE, L->base
-@@ -4860,6 +5266,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stw SAVE0, 0(CRET1)
- | stw SAVE1, 4(CRET1)
- |.endif
-+ | addi BASEP4, BASE, 4
- | b <3 // No 2nd write barrier needed.
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-@@ -4868,9 +5275,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETB:
- | // RA = src*8, RB = table*8, RC = index*8
-- | lwzux CARG1, RB, BASE
-+ | lwzx CARG1, BASE_HI, RB
- | srwi TMP0, RC, 3
-- | lwz TAB:RB, 4(RB)
-+ | lwzx TAB:RB, BASE_LO, RB
- | checktab CARG1; bne ->vmeta_tsetb
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
-@@ -4884,7 +5291,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz SAVE1, 4(CARG2)
- |.endif
- | bge ->vmeta_tsetb
-- | lwzx TMP1, TMP2, RC
-+ | .if ENDIAN_LE
-+ | addi TMP1, TMP2, 4
-+ | lwzx TMP1, TMP1, RC
-+ | .else
-+ | lwzx TMP1, TMP2, RC
-+ | .endif
- | checknil TMP1; beq >5
- |1:
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
-@@ -4913,13 +5325,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETR:
- | // RA = dst*8, RB = table*8, RC = key*8
-- | add RB, BASE, RB
-- | lwz TAB:CARG2, 4(RB)
-+ | lwzx TAB:CARG2, BASE_LO, RB
- |.if DUALNUM
-- | add RC, BASE, RC
- | lbz TMP3, TAB:CARG2->marked
- | lwz TMP0, TAB:CARG2->asize
-- | lwz CARG3, 4(RC)
-+ | lwzx CARG3, BASE_LO, RC
- | lwz TMP1, TAB:CARG2->array
- |.else
- | lfdx f0, BASE, RC
-@@ -4960,9 +5370,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add RA, BASE, RA
- |1:
- | add TMP3, KBASE, RD
-- | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table.
-+ | lwz TAB:CARG2, WORD_LO-8(RA) // Guaranteed to be a table.
- | addic. TMP0, MULTRES, -8
-- | lwz TMP3, 4(TMP3) // Integer constant is in lo-word.
-+ | lwz TMP3, WORD_LO(TMP3) // Integer constant is in lo-word.
- | srwi CARG3, TMP0, 3
- | beq >4 // Nothing to copy?
- | add CARG3, CARG3, TMP3
-@@ -5021,8 +5431,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_CALL:
- | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
- | mr TMP2, BASE
-- | lwzux TMP0, BASE, RA
-- | lwz LFUNC:RB, 4(BASE)
-+ | lwzux2 TMP0, LFUNC:RB, BASE, RA
- | subi NARGS8:RC, NARGS8:RC, 8
- | addi BASE, BASE, 8
- | checkfunc TMP0; bne ->vmeta_call
-@@ -5036,8 +5445,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_CALLT:
- | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
-- | lwzux TMP0, RA, BASE
-- | lwz LFUNC:RB, 4(RA)
-+ | lwzux2 TMP0, LFUNC:RB, RA, BASE
- | subi NARGS8:RC, NARGS8:RC, 8
- | lwz TMP1, FRAME_PC(BASE)
- | checkfunc TMP0
-@@ -5100,8 +5508,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
- | mr TMP2, BASE
- | add BASE, BASE, RA
-- | lwz TMP1, -24(BASE)
-- | lwz LFUNC:RB, -20(BASE)
-+ | lwz TMP1, WORD_HI-24(BASE)
-+ | lwz LFUNC:RB, WORD_LO-24(BASE)
- |.if FPU
- | lfd f1, -8(BASE)
- | lfd f0, -16(BASE)
-@@ -5111,8 +5519,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz CARG3, -16(BASE)
- | lwz CARG4, -12(BASE)
- |.endif
-- | stw TMP1, 0(BASE) // Copy callable.
-- | stw LFUNC:RB, 4(BASE)
-+ | stw TMP1, WORD_HI(BASE) // Copy callable.
-+ | stw LFUNC:RB, WORD_LO(BASE)
- | checkfunc TMP1
- | li NARGS8:RC, 16 // Iterators get 2 arguments.
- |.if FPU
-@@ -5134,8 +5542,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // NYI: add hotloop, record BC_ITERN.
- |.endif
- | add RA, BASE, RA
-- | lwz TAB:RB, -12(RA)
-- | lwz RC, -4(RA) // Get index from control var.
-+ | lwz TAB:RB, WORD_LO-16(RA)
-+ | lwz RC, WORD_LO-8(RA) // Get index from control var.
- | lwz TMP0, TAB:RB->asize
- | lwz TMP1, TAB:RB->array
- | addi PC, PC, 4
-@@ -5143,10 +5551,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cmplw RC, TMP0
- | slwi TMP3, RC, 3
- | bge >5 // Index points after array part?
-- | lwzx TMP2, TMP1, TMP3
- |.if FPU
-- | lfdx f0, TMP1, TMP3
-+ | lfdux f0, TMP3, TMP1
-+ | lwz TMP2, WORD_HI(TMP3)
- |.else
-+ | lwzx TMP2, TMP1, TMP3
- | lwzux CARG1, TMP3, TMP1
- | lwz CARG2, 4(TMP3)
- |.endif
-@@ -5154,8 +5563,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz INS, -4(PC)
- | beq >4
- |.if DUALNUM
-- | stw RC, 4(RA)
-- | stw TISNUM, 0(RA)
-+ | stw RC, WORD_LO(RA)
-+ | stw TISNUM, WORD_HI(RA)
- |.else
- | tonum_u f1, RC
- |.endif
-@@ -5168,7 +5577,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stw CARG2, 12(RA)
- |.endif
- | decode_RD4 TMP1, INS
-- | stw RC, -4(RA) // Update control var.
-+ | stw RC, WORD_LO-8(RA) // Update control var.
- | add PC, TMP1, TMP3
- |.if not DUALNUM
- | stfd f1, 0(RA)
-@@ -5190,15 +5599,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bgty <3
- | slwi RB, RC, 3
- | sub TMP3, TMP3, RB
-- | lwzx RB, TMP2, TMP3
- |.if FPU
-- | lfdx f0, TMP2, TMP3
-+ | lfdux f0, TMP3, TMP2
-+ | lwz RB, WORD_HI(TMP3)
- |.else
- | add CARG3, TMP2, TMP3
- | lwz CARG1, 0(CARG3)
- | lwz CARG2, 4(CARG3)
-- |.endif
- | add NODE:TMP3, TMP2, TMP3
-+ |.endif
- | checknil RB
- | lwz INS, -4(PC)
- | beq >7
-@@ -5225,7 +5634,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.endif
- | addi RC, RC, 1
- | add PC, TMP1, TMP2
-- | stw RC, -4(RA) // Update control var.
-+ | stw RC, WORD_LO-8(RA) // Update control var.
- | b <3
- |
- |7: // Skip holes in hash part.
-@@ -5236,10 +5645,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISNEXT:
- | // RA = base*8, RD = target (points to ITERN)
- | add RA, BASE, RA
-- | lwz TMP0, -24(RA)
-- | lwz CFUNC:TMP1, -20(RA)
-- | lwz TMP2, -16(RA)
-- | lwz TMP3, -8(RA)
-+ | lwz TMP0, WORD_HI-24(RA)
-+ | lwz CFUNC:TMP1, WORD_LO-24(RA)
-+ | lwz TMP2, WORD_HI-16(RA)
-+ | lwz TMP3, WORD_HI-8(RA)
- | cmpwi cr0, TMP2, LJ_TTAB
- | cmpwi cr1, TMP0, LJ_TFUNC
- | cmpwi cr6, TMP3, LJ_TNIL
-@@ -5253,17 +5662,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bne cr0, >5
- | lus TMP1, 0xfffe
- | ori TMP1, TMP1, 0x7fff
-- | stw ZERO, -4(RA) // Initialize control var.
-- | stw TMP1, -8(RA)
-+ | stw ZERO, WORD_LO-8(RA) // Initialize control var.
-+ | stw TMP1, WORD_HI-8(RA)
- | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
- |1:
- | ins_next
- |5: // Despecialize bytecode if any of the checks fail.
- | li TMP0, BC_JMP
- | li TMP1, BC_ITERC
-+ | .if ENDIAN_LE
-+ | stb TMP0, -4(PC)
-+ | .else
- | stb TMP0, -1(PC)
-+ | .endif
- | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
-+ | .if ENDIAN_LE
-+ | stb TMP1, 0(PC)
-+ | .else
- | stb TMP1, 3(PC)
-+ | .endif
- | b <1
- break;
-
-@@ -5307,7 +5724,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addi RA, RA, 8
- | blt cr1, <1 // More vararg slots?
- |2: // Fill up remainder with nil.
-- | stw TISNIL, 0(RA)
-+ | stw TISNIL, WORD_HI(RA)
- | cmplw RA, TMP2
- | addi RA, RA, 8
- | blt <2
-@@ -5354,6 +5771,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add RA, BASE, RA
- | add RC, BASE, SAVE0
- | subi TMP3, BASE, 8
-+ | addi BASEP4, BASE, 4
- | b <6
- break;
-
-@@ -5426,13 +5844,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bgt >6
- | sub BASE, TMP2, RA
- | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
-+ | addi BASEP4, BASE, 4
- | ins_next1
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next2
- |
- |6: // Fill up results with nil.
-- | subi TMP1, RD, 8
-+ | addi TMP1, RD, WORD_HI-8
- | addi RD, RD, 8
- | stwx TISNIL, TMP2, TMP1
- | b <5
-@@ -5475,13 +5894,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bgt >6
- | sub BASE, TMP2, RA
- | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
-+ | addi BASEP4, BASE, 4
- | ins_next1
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next2
- |
- |6: // Fill up results with nil.
-- | subi TMP1, RD, 8
-+ | addi TMP1, RD, WORD_HI-8
- | addi RD, RD, 8
- | stwx TISNIL, TMP2, TMP1
- | b <5
-@@ -5507,11 +5927,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = (op == BC_IFORL || op == BC_JFORL);
- |.if DUALNUM
- | // Integer loop.
-- | lwzux TMP1, RA, BASE
-- | lwz CARG1, FORL_IDX*8+4(RA)
-+ | lwzux2 TMP1, CARG1, RA, BASE
-+ if (vk) {
-+ | mtxer ZERO
-+ }
- | cmplw cr0, TMP1, TISNUM
- if (vk) {
-- | lwz CARG3, FORL_STEP*8+4(RA)
-+ | lwz CARG3, FORL_STEP*8+WORD_LO(RA)
- | bne >9
- |.if GPR64
- | // Need to check overflow for (a<<32) + (b<<32).
-@@ -5523,15 +5945,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addo. CARG1, CARG1, CARG3
- |.endif
- | cmpwi cr6, CARG3, 0
-- | lwz CARG2, FORL_STOP*8+4(RA)
-- | bso >6
-+ | lwz CARG2, FORL_STOP*8+WORD_LO(RA)
-+ | bso >2
- |4:
-- | stw CARG1, FORL_IDX*8+4(RA)
-+ | stw CARG1, FORL_IDX*8+WORD_LO(RA)
- } else {
-- | lwz SAVE0, FORL_STEP*8(RA)
-- | lwz CARG3, FORL_STEP*8+4(RA)
-- | lwz TMP2, FORL_STOP*8(RA)
-- | lwz CARG2, FORL_STOP*8+4(RA)
-+ | lwz SAVE0, FORL_STEP*8+WORD_HI(RA)
-+ | lwz CARG3, FORL_STEP*8+WORD_LO(RA)
-+ | lwz TMP2, FORL_STOP*8+WORD_HI(RA)
-+ | lwz CARG2, FORL_STOP*8+WORD_LO(RA)
- | cmplw cr7, SAVE0, TISNUM
- | cmplw cr1, TMP2, TISNUM
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
-@@ -5542,11 +5964,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | blt cr6, >5
- | cmpw CARG1, CARG2
- |1:
-- | stw TISNUM, FORL_EXT*8(RA)
-+ | stw TISNUM, FORL_EXT*8+WORD_HI(RA)
- if (op != BC_JFORL) {
- | srwi RD, RD, 1
- }
-- | stw CARG1, FORL_EXT*8+4(RA)
-+ | stw CARG1, FORL_EXT*8+WORD_LO(RA)
- if (op != BC_JFORL) {
- | add RD, PC, RD
- }
-@@ -5566,11 +5988,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |5: // Invert check for negative step.
- | cmpw CARG2, CARG1
- | b <1
-- if (vk) {
-- |6: // Potential overflow.
-- | checkov TMP0, <4 // Ignore unrelated overflow.
-- | b <2
-- }
- |.endif
- if (vk) {
- |.if DUALNUM
-@@ -5600,12 +6017,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz CARG3, FORL_STOP*8(RA)
- | lwz CARG4, FORL_STOP*8+4(RA)
- |.endif
-- | lwz SAVE0, FORL_STEP*8(RA)
-+ | lwz SAVE0, FORL_STEP*8+WORD_HI(RA)
- } else {
- |.if DUALNUM
- |9: // FP loop.
- |.else
-+ |.if ENDIAN_LE
-+ | lwzx TMP1, RA, BASE_LO
-+ | add RA, RA, BASE
-+ |.else
- | lwzux TMP1, RA, BASE
-+ |.endif
- | lwz SAVE0, FORL_STEP*8(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | cmplw cr0, TMP1, TISNUM
-@@ -5708,17 +6130,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- #endif
- case BC_IITERL:
- | // RA = base*8, RD = target
-- | lwzux TMP1, RA, BASE
-- | lwz TMP2, 4(RA)
-+ | lwzux2 TMP1, TMP2, RA, BASE
- | checknil TMP1; beq >1 // Stop if iterator returned nil.
- if (op == BC_JITERL) {
-- | stw TMP1, -8(RA)
-- | stw TMP2, -4(RA)
-+ | stw TMP1, WORD_HI-8(RA)
-+ | stw TMP2, WORD_LO-8(RA)
- | b =>BC_JLOOP
- } else {
- | branch_RD // Otherwise save control var + branch.
-- | stw TMP1, -8(RA)
-- | stw TMP2, -4(RA)
-+ | stw TMP1, WORD_HI-8(RA)
-+ | stw TMP2, WORD_LO-8(RA)
- }
- |1:
- | ins_next
-@@ -5747,7 +6168,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Traces on PPC don't store the trace number, so use 0.
- | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH)
- | lwzx TRACE:TMP2, TMP1, RD
-- | clrso TMP1
-+ | mtxer ZERO
- | lp TMP2, TRACE:TMP2->mcode
- | stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
- | mtctr TMP2
-@@ -5799,7 +6220,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- |
- |3: // Clear missing parameters.
-- | stwx TISNIL, BASE, NARGS8:RC
-+ | stwx TISNIL, BASE_HI, NARGS8:RC
- | addi NARGS8:RC, NARGS8:RC, 8
- | b <2
- break;
-@@ -5816,11 +6237,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz TMP2, L->maxstack
- | add TMP1, BASE, RC
- | add TMP0, RA, RC
-- | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC.
-+ | stw LFUNC:RB, WORD_LO(TMP1) // Store copy of LFUNC.
- | addi TMP3, RC, 8+FRAME_VARG
- | lwz KBASE, -4+PC2PROTO(k)(PC)
- | cmplw TMP0, TMP2
-- | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG.
-+ | stw TMP3, WORD_HI(TMP1) // Store delta + FRAME_VARG.
- | bge ->vm_growstack_l
- | lbz TMP2, -4+PC2PROTO(numparams)(PC)
- | mr RA, BASE
-@@ -5831,18 +6252,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | beq >3
- |1:
- | cmplw RA, RC // Less args than parameters?
-- | lwz TMP0, 0(RA)
-- | lwz TMP3, 4(RA)
-+ | lwz TMP0, WORD_HI(RA)
-+ | lwz TMP3, WORD_LO(RA)
- | bge >4
-- | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
-+ | stw TISNIL, WORD_HI(RA) // Clear old fixarg slot (help the GC).
- | addi RA, RA, 8
- |2:
- | addic. TMP2, TMP2, -1
-- | stw TMP0, 8(TMP1)
-- | stw TMP3, 12(TMP1)
-+ | stw TMP0, WORD_HI+8(TMP1)
-+ | stw TMP3, WORD_LO+8(TMP1)
- | addi TMP1, TMP1, 8
- | bne <1
- |3:
-+ | addi BASEP4, BASE, 4
- | ins_next2
- |
- |4: // Clear missing parameters.
-@@ -5854,35 +6276,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_FUNCCW:
- | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
- if (op == BC_FUNCC) {
-- | lp RD, CFUNC:RB->f
-+ | lp FUNCREG, CFUNC:RB->f
- } else {
-- | lp RD, DISPATCH_GL(wrapf)(DISPATCH)
-+ | lp FUNCREG, DISPATCH_GL(wrapf)(DISPATCH)
- }
- | add TMP1, RA, NARGS8:RC
- | lwz TMP2, L->maxstack
-- | .toc lp TMP3, 0(RD)
-+ | .opd lp TMP3, 0(FUNCREG)
- | add RC, BASE, NARGS8:RC
- | stp BASE, L->base
- | cmplw TMP1, TMP2
- | stp RC, L->top
- | li_vmstate C
-- |.if TOC
-+ |.if OPD
- | mtctr TMP3
- |.else
-- | mtctr RD
-+ | mtctr FUNCREG
- |.endif
- if (op == BC_FUNCCW) {
- | lp CARG2, CFUNC:RB->f
- }
- | mr CARG1, L
- | bgt ->vm_growstack_c // Need to grow stack.
-- | .toc lp TOCREG, TOC_OFS(RD)
-- | .tocenv lp ENVREG, ENV_OFS(RD)
-+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
-+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
- | st_vmstate
- | bctrl // (lua_State *L [, lua_CFunction f])
-+ | .toc lp TOCREG, SAVE_TOC
- | // Returns nresults.
- | lp BASE, L->base
-- | .toc ld TOCREG, SAVE_TOC
- | slwi RD, CRET1, 3
- | lp TMP1, L->top
- | li_vmstate INTERP
-@@ -5933,7 +6355,11 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.byte 0x1\n"
- "\t.string \"\"\n"
- "\t.uleb128 0x1\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.sleb128 -8\n"
-+#else
- "\t.sleb128 -4\n"
-+#endif
- "\t.byte 65\n"
- "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
- "\t.align 2\n"
-@@ -5946,14 +6372,24 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.long .Lbegin\n"
- "\t.long %d\n"
- "\t.byte 0xe\n\t.uleb128 %d\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
-+ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n",
-+#else
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
-+#endif
- fcofs, CFRAME_SIZE);
- for (i = 14; i <= 31; i++)
- fprintf(ctx->fp,
- "\t.byte %d\n\t.uleb128 %d\n"
- "\t.byte %d\n\t.uleb128 %d\n",
-- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
-+#if LJ_ARCH_PPC32ON64
-+ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i)
-+#else
-+ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)
-+#endif
-+ );
- fprintf(ctx->fp,
- "\t.align 2\n"
- ".LEFDE0:\n\n");
-@@ -5969,8 +6405,12 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.long lj_vm_ffi_call\n"
- #endif
- "\t.long %d\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
-+#else
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
-- "\t.byte 0x8e\n\t.uleb128 2\n"
-+#endif
-+ "\t.byte 0x8e\n\t.uleb128 1\n"
- "\t.byte 0xd\n\t.uleb128 0xe\n"
- "\t.align 2\n"
- ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
-@@ -5985,7 +6425,11 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.byte 0x1\n"
- "\t.string \"zPR\"\n"
- "\t.uleb128 0x1\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.sleb128 -8\n"
-+#else
- "\t.sleb128 -4\n"
-+#endif
- "\t.byte 65\n"
- "\t.uleb128 6\n" /* augmentation length */
- "\t.byte 0x1b\n" /* pcrel|sdata4 */
-@@ -6003,14 +6447,24 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.long %d\n"
- "\t.uleb128 0\n" /* augmentation length */
- "\t.byte 0xe\n\t.uleb128 %d\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
-+ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n",
-+#else
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
-+#endif
- fcofs, CFRAME_SIZE);
- for (i = 14; i <= 31; i++)
- fprintf(ctx->fp,
- "\t.byte %d\n\t.uleb128 %d\n"
- "\t.byte %d\n\t.uleb128 %d\n",
-- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
-+#if LJ_ARCH_PPC32ON64
-+ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i)
-+#else
-+ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)
-+#endif
-+ );
- fprintf(ctx->fp,
- "\t.align 2\n"
- ".LEFDE2:\n\n");
-@@ -6038,8 +6492,12 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.long lj_vm_ffi_call-.\n"
- "\t.long %d\n"
- "\t.uleb128 0\n" /* augmentation length */
-+#if LJ_ARCH_PPC32ON64
-+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
-+#else
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
-- "\t.byte 0x8e\n\t.uleb128 2\n"
-+#endif
-+ "\t.byte 0x8e\n\t.uleb128 1\n"
- "\t.byte 0xd\n\t.uleb128 0xe\n"
- "\t.align 2\n"
- ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
---
-2.21.0
-
diff --git a/remove-setrlimit-on-freebsd.patch b/remove-setrlimit-on-freebsd.patch
deleted file mode 100644
index 34f0986..0000000
--- a/remove-setrlimit-on-freebsd.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-commit 320b751a3ec8c2495503ec0b9270f61e3459283a
-Author: myfreeweb <greg(a)unrelenting.technology>
-Date: Mon May 20 21:53:04 2019 +0300
-
- Remove setrlimit on FreeBSD
-
- An embeddable interpreter setting a limit that's inherited by spawned child
processes is a disaster.. (in fact, the weird limit in Neovim's terminal was what
caused me to discover this.)
-
- This code is a relic from the FreeBSD <10 days, and the check for FreeBSD version
was not correct (__FreeBSD_kernel__ is defined anyway on newer versions). Let's just
get rid of this.
-
-diff --git a/src/lj_alloc.c b/src/lj_alloc.c
-index 33a2eb8f..73913011 100644
---- a/src/lj_alloc.c
-+++ b/src/lj_alloc.c
-@@ -344,20 +344,6 @@ static void *CALL_MMAP(size_t size)
- }
- #endif
-
--#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__
< 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
--
--#include <sys/resource.h>
--
--static void init_mmap(void)
--{
-- struct rlimit rlim;
-- rlim.rlim_cur = rlim.rlim_max = 0x10000;
-- setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */
--}
--#define INIT_MMAP() init_mmap()
--
--#endif
--
- static int CALL_MUNMAP(void *ptr, size_t size)
- {
- int olderr = errno;
diff --git a/test-check-for-package_searchers-only-in-compat5_2.patch
b/test-check-for-package_searchers-only-in-compat5_2.patch
deleted file mode 100644
index 04e58a1..0000000
--- a/test-check-for-package_searchers-only-in-compat5_2.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From 2d4d73df7f04c0e3cb6ec2983d23e418342f17d9 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Mon, 17 Jun 2019 13:50:57 +0530
-Subject: [PATCH] test: Check for package.searchers only in compat5.2
-
-LuaJIT version check for lua will return true for +lua<5.2 since it
-does not fully implement 5.2. Move the (not package.searchers) check
-to +compat5.2 instead of the version check since it is implemented by
-compat5.2.
----
- test/lib/contents.lua | 5 ++++-
- 1 file changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/test/lib/contents.lua b/test/lib/contents.lua
-index 2baacd5c..09866f6f 100644
---- a/test/lib/contents.lua
-+++ b/test/lib/contents.lua
-@@ -121,10 +121,13 @@ end
-
- do --- pre-5.2 package +lua<5.2
- assert(package.loaders)
-- assert(not package.searchers)
- assert(package.seeall)
- end
-
-+do --- 5.2 compat package +compat5.2
-+ assert(package.searchers)
-+end
-+
- do --- 5.2 package +lua>=5.2
- assert(not package.loaders)
- assert(package.searchers)
---
-2.21.0
-