[iortcw] 431/497: All: Add Ludwig Nussel's ARMv7 VM support
Simon McVittie
smcv at debian.org
Fri Sep 8 10:37:42 UTC 2017
This is an automated email from the git hooks/post-receive script.
smcv pushed a commit to annotated tag 1.42d
in repository iortcw.
commit 001a7e0b4246845af54436a38cea15ca3e9c0ad2
Author: Donny <donny at raspberrypi.(none)>
Date: Wed Nov 18 05:58:07 2015 -0500
All: Add Ludwig Nussel's ARMv7 VM support
---
MP/Makefile | 12 +
MP/code/qcommon/vm_armv7l.c | 1210 +++++++++++++++++++++++++++++++++++++++++++
MP/make-raspberrypi.sh | 5 +-
SP/Makefile | 12 +
SP/code/qcommon/vm_armv7l.c | 1210 +++++++++++++++++++++++++++++++++++++++++++
SP/make-raspberrypi.sh | 1 -
6 files changed, 2446 insertions(+), 4 deletions(-)
diff --git a/MP/Makefile b/MP/Makefile
index 5f1ff8a..1a35b4e 100644
--- a/MP/Makefile
+++ b/MP/Makefile
@@ -8,6 +8,8 @@ COMPILE_PLATFORM=$(shell uname|sed -e s/_.*//|tr '[:upper:]' '[:lower:]'|sed -e
COMPILE_ARCH=$(shell uname -m | sed -e s/i.86/i386/ | sed -e 's/^arm.*/arm/')
+ARM_VER_CHECK=$(shell uname -m)
+
ifeq ($(COMPILE_PLATFORM),sunos)
# Solaris uname and GNU uname differ
COMPILE_ARCH=$(shell uname -p | sed -e s/i.86/i386/)
@@ -396,6 +398,9 @@ ifneq (,$(findstring "$(PLATFORM)", "linux" "gnu_kfreebsd" "kfreebsd-gnu" "gnu")
OPTIMIZEVM += -mtune=ultrasparc3 -mv8plus
HAVE_VM_COMPILED=true
endif
+ ifeq ($(ARM_VER_CHECK),armv7l)
+ HAVE_VM_COMPILED=true
+ endif
ifeq ($(ARCH),alpha)
# According to http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=410555
# -ffast-math will cause the client to die with SIGFPE on Alpha
@@ -1356,6 +1361,7 @@ targets: makedirs
@echo " VERSION: $(VERSION)"
@echo " COMPILE_PLATFORM: $(COMPILE_PLATFORM)"
@echo " COMPILE_ARCH: $(COMPILE_ARCH)"
+ @echo " HAVE_VM_COMPILED: $(HAVE_VM_COMPILED)"
@echo " CC: $(CC)"
@echo " CXX: $(CXX)"
ifdef MINGW
@@ -2155,6 +2161,9 @@ ifeq ($(HAVE_VM_COMPILED),true)
ifeq ($(ARCH),sparc)
Q3OBJ += $(B)/client/vm_sparc.o
endif
+ ifeq ($(ARM_VER_CHECK),armv7l)
+ Q3OBJ += $(B)/client/vm_armv7l.o
+ endif
endif
ifdef MINGW
@@ -2328,6 +2337,9 @@ ifeq ($(HAVE_VM_COMPILED),true)
ifeq ($(ARCH),sparc)
Q3DOBJ += $(B)/ded/vm_sparc.o
endif
+ ifeq ($(ARM_VER_CHECK),armv7l)
+ Q3DOBJ += $(B)/client/vm_armv7l.o
+ endif
endif
ifdef MINGW
diff --git a/MP/code/qcommon/vm_armv7l.c b/MP/code/qcommon/vm_armv7l.c
new file mode 100644
index 0000000..dd63f5c
--- /dev/null
+++ b/MP/code/qcommon/vm_armv7l.c
@@ -0,0 +1,1210 @@
+/*
+===========================================================================
+Copyright (C) 2009 David S. Miller <davem at davemloft.net>
+Copyright (C) 2013,2014 SUSE Linux Products GmbH
+
+This file is part of Quake III Arena source code.
+
+Quake III Arena source code is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the License,
+or (at your option) any later version.
+
+Quake III Arena source code is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Quake III Arena source code; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+===========================================================================
+
+ARMv7l VM by Ludwig Nussel <ludwig.nussel at suse.de>
+
+TODO: optimization
+
+Docu:
+http://www.coranac.com/tonc/text/asm.htm
+http://www.heyrick.co.uk/armwiki/Category:Opcodes
+ARMv7-A_ARMv7-R_DDI0406_2007.pdf
+*/
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stddef.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "vm_local.h"
+#define R0 0
+#define R1 1
+#define R2 2
+#define R3 3
+#define R4 4
+
+#define R12 12
+
+#define FP 11
+#define SP 13
+#define LR 14
+#define PC 15
+
+#define APSR_nzcv 15
+
+#define S14 14
+#define S15 15
+
+#define rOPSTACK 5
+#define rOPSTACKBASE 6
+#define rCODEBASE 7
+#define rPSTACK 8
+#define rDATABASE 9
+#define rDATAMASK 10
+
+#define bit(x) (1<<x)
+
+/* arm eabi, builtin gcc functions */
+int __aeabi_idiv (int, int);
+unsigned __aeabi_uidiv (unsigned, unsigned);
+void __aeabi_idivmod(void);
+void __aeabi_uidivmod(void);
+
+/* exit() won't be called but use it because it is marked with noreturn */
+#define DIE( reason, args... ) \
+ do { \
+ Com_Error(ERR_DROP, "vm_arm compiler error: " reason, ##args); \
+ exit(1); \
+ } while(0)
+
+/*
+ * opcode information table:
+ * - length of immediate value
+ * - returned register type
+ * - required register(s) type
+ */
+#define opImm0 0x0000 /* no immediate */
+#define opImm1 0x0001 /* 1 byte immadiate value after opcode */
+#define opImm4 0x0002 /* 4 bytes immediate value after opcode */
+
+#define opRet0 0x0000 /* returns nothing */
+#define opRetI 0x0004 /* returns integer */
+#define opRetF 0x0008 /* returns float */
+#define opRetIF (opRetI | opRetF) /* returns integer or float */
+
+#define opArg0 0x0000 /* requires nothing */
+#define opArgI 0x0010 /* requires integer(s) */
+#define opArgF 0x0020 /* requires float(s) */
+#define opArgIF (opArgI | opArgF) /* requires integer or float */
+
+#define opArg2I 0x0040 /* requires second argument, integer */
+#define opArg2F 0x0080 /* requires second argument, float */
+#define opArg2IF (opArg2I | opArg2F) /* requires second argument, integer or float */
+
+static const unsigned char vm_opInfo[256] =
+{
+ [OP_UNDEF] = opImm0,
+ [OP_IGNORE] = opImm0,
+ [OP_BREAK] = opImm0,
+ [OP_ENTER] = opImm4,
+ /* OP_LEAVE has to accept floats, they will be converted to ints */
+ [OP_LEAVE] = opImm4 | opRet0 | opArgIF,
+ /* only STORE4 and POP use values from OP_CALL,
+ * no need to convert floats back */
+ [OP_CALL] = opImm0 | opRetI | opArgI,
+ [OP_PUSH] = opImm0 | opRetIF,
+ [OP_POP] = opImm0 | opRet0 | opArgIF,
+ [OP_CONST] = opImm4 | opRetIF,
+ [OP_LOCAL] = opImm4 | opRetI,
+ [OP_JUMP] = opImm0 | opRet0 | opArgI,
+
+ [OP_EQ] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_NE] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_LTI] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_LEI] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_GTI] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_GEI] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_LTU] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_LEU] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_GTU] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_GEU] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_EQF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_NEF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_LTF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_LEF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_GTF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_GEF] = opImm4 | opRet0 | opArgF | opArg2F,
+
+ [OP_LOAD1] = opImm0 | opRetI | opArgI,
+ [OP_LOAD2] = opImm0 | opRetI | opArgI,
+ [OP_LOAD4] = opImm0 | opRetIF| opArgI,
+ [OP_STORE1] = opImm0 | opRet0 | opArgI | opArg2I,
+ [OP_STORE2] = opImm0 | opRet0 | opArgI | opArg2I,
+ [OP_STORE4] = opImm0 | opRet0 | opArgIF| opArg2I,
+ [OP_ARG] = opImm1 | opRet0 | opArgIF,
+ [OP_BLOCK_COPY] = opImm4 | opRet0 | opArgI | opArg2I,
+
+ [OP_SEX8] = opImm0 | opRetI | opArgI,
+ [OP_SEX16] = opImm0 | opRetI | opArgI,
+ [OP_NEGI] = opImm0 | opRetI | opArgI,
+ [OP_ADD] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_SUB] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_DIVI] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_DIVU] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_MODI] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_MODU] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_MULI] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_MULU] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_BAND] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_BOR] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_BXOR] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_BCOM] = opImm0 | opRetI | opArgI,
+ [OP_LSH] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_RSHI] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_RSHU] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_NEGF] = opImm0 | opRetF | opArgF,
+ [OP_ADDF] = opImm0 | opRetF | opArgF | opArg2F,
+ [OP_SUBF] = opImm0 | opRetF | opArgF | opArg2F,
+ [OP_DIVF] = opImm0 | opRetF | opArgF | opArg2F,
+ [OP_MULF] = opImm0 | opRetF | opArgF | opArg2F,
+ [OP_CVIF] = opImm0 | opRetF | opArgI,
+ [OP_CVFI] = opImm0 | opRetI | opArgF,
+};
+
+#ifdef DEBUG_VM
+static const char *opnames[256] = {
+ "OP_UNDEF", "OP_IGNORE", "OP_BREAK", "OP_ENTER", "OP_LEAVE", "OP_CALL",
+ "OP_PUSH", "OP_POP", "OP_CONST", "OP_LOCAL", "OP_JUMP",
+ "OP_EQ", "OP_NE", "OP_LTI", "OP_LEI", "OP_GTI", "OP_GEI",
+ "OP_LTU", "OP_LEU", "OP_GTU", "OP_GEU", "OP_EQF", "OP_NEF",
+ "OP_LTF", "OP_LEF", "OP_GTF", "OP_GEF",
+ "OP_LOAD1", "OP_LOAD2", "OP_LOAD4", "OP_STORE1", "OP_STORE2",
+ "OP_STORE4", "OP_ARG", "OP_BLOCK_COPY",
+ "OP_SEX8", "OP_SEX16",
+ "OP_NEGI", "OP_ADD", "OP_SUB", "OP_DIVI", "OP_DIVU",
+ "OP_MODI", "OP_MODU", "OP_MULI", "OP_MULU", "OP_BAND",
+ "OP_BOR", "OP_BXOR", "OP_BCOM", "OP_LSH", "OP_RSHI", "OP_RSHU",
+ "OP_NEGF", "OP_ADDF", "OP_SUBF", "OP_DIVF", "OP_MULF",
+ "OP_CVIF", "OP_CVFI",
+};
+
+#define NOTIMPL(x) \
+ do { Com_Error(ERR_DROP, "instruction not implemented: %s", opnames[x]); } while(0)
+#else
+#define NOTIMPL(x) \
+ do { Com_Printf(S_COLOR_RED "instruction not implemented: %x\n", x); vm->compiled = qfalse; return; } while(0)
+#endif
+
+static void VM_Destroy_Compiled(vm_t *vm)
+{
+ if (vm->codeBase) {
+ if (munmap(vm->codeBase, vm->codeLength))
+ Com_Printf(S_COLOR_RED "Memory unmap failed, possible memory leak\n");
+ }
+ vm->codeBase = NULL;
+}
+
+/*
+=================
+ErrJump
+Error handler for jump/call to invalid instruction number
+=================
+*/
+
+static void __attribute__((__noreturn__)) ErrJump(unsigned num)
+{
+ Com_Error(ERR_DROP, "program tried to execute code outside VM (%x)", num);
+}
+
+static int asmcall(int call, int pstack)
+{
+ // save currentVM so as to allow for recursive VM entry
+ vm_t *savedVM = currentVM;
+ int i, ret;
+
+ // modify VM stack pointer for recursive VM entry
+ currentVM->programStack = pstack - 4;
+
+ if (sizeof(intptr_t) == sizeof(int)) {
+ intptr_t *argPosition = (intptr_t *)((byte *)currentVM->dataBase + pstack + 4);
+ argPosition[0] = -1 - call;
+ ret = currentVM->systemCall(argPosition);
+ } else {
+ intptr_t args[MAX_VMSYSCALL_ARGS];
+
+ args[0] = -1 - call;
+ int *argPosition = (int *)((byte *)currentVM->dataBase + pstack + 4);
+ for( i = 1; i < ARRAY_LEN(args); i++ )
+ args[i] = argPosition[i];
+
+ ret = currentVM->systemCall(args);
+ }
+
+ currentVM = savedVM;
+
+ return ret;
+}
+
+void _emit(vm_t *vm, unsigned isn, int pass)
+{
+#if 0
+ static int fd = -2;
+ if (fd == -2)
+ fd = open("code.bin", O_TRUNC|O_WRONLY|O_CREAT, 0644);
+ if (fd > 0)
+ write(fd, &isn, 4);
+#endif
+
+ if (pass)
+ memcpy(vm->codeBase+vm->codeLength, &isn, 4);
+ vm->codeLength+=4;
+}
+
+#define emit(isn) _emit(vm, isn, pass)
+
+static unsigned char off8(unsigned val)
+{
+ if (val&3)
+ DIE("offset must be multiple of four");
+ if (val > 1020)
+ DIE("offset too large");
+ return val>>2;
+}
+
+// ARM is really crazy ...
+static unsigned short rimm(unsigned val)
+{
+ unsigned shift = 0;
+ if (val < 256)
+ return val;
+ // rotate the value until it fits
+ while (shift < 16 && (val>255 || !(val&3))) {
+ val = (val&3)<<30 | val>>2;
+ ++shift;
+ }
+ if (shift > 15 || val > 255) {
+ DIE("immediate cannot be encoded (%d, %d)\n", shift, val);
+ }
+ return (16-shift)<<8 | val;
+}
+
+// same as rimm but doesn't die, returns 0 if not encodable so don't call with zero as argument!
+static unsigned short can_encode(unsigned val)
+{
+ unsigned shift = 0;
+ if (!val)
+ DIE("can_encode: invalid argument");
+ if (val < 256)
+ return val;
+ // rotate the value until it fits
+ while (shift < 16 && (val>255 || !(val&3))) {
+ val = (val&3)<<30 | val>>2;
+ ++shift;
+ }
+ if (shift > 15 || val > 255) {
+ return 0;
+ }
+ return (16-shift)<<8 | val;
+}
+
+#define PREINDEX (1<<24)
+
+#define rASR(i, reg) (0b10<<5 | ((i&31)<<7) | reg)
+#define rLSL(i, reg) (0b00<<5 | ((i&31)<<7) | reg)
+#define rLSR(i, reg) (0b01<<5 | ((i&31)<<7) | reg)
+#define rROR(i, reg) (0b11<<5 | ((i&31)<<7) | reg)
+
+// conditions
+#define EQ (0b0000<<28)
+#define NE (0b0001<<28)
+#define CS (0b0010<<28)
+#define HS CS
+#define CC (0b0011<<28)
+#define LO CC
+#define MI (0b0100<<28)
+#define PL (0b0101<<28)
+#define VS (0b0110<<28)
+#define VC (0b0111<<28)
+#define HI (0b1000<<28)
+#define LS (0b1001<<28)
+#define GE (0b1010<<28)
+#define LT (0b1011<<28)
+#define GT (0b1100<<28)
+#define LE (0b1101<<28)
+#define AL (0b1110<<28)
+#define cond(what, op) (what | (op&~AL))
+
+// XXX: v not correctly computed
+#define BKPT(v) (AL | 0b10010<<20 | ((v&~0xF)<<4) | 0b0111<<4 | (v&0xF))
+
+#define YIELD (0b110010<<20 | 0b1111<<12 | 1)
+#define NOP cond(AL, YIELD)
+
+// immediate value must fit in 0xFF!
+#define ANDi(dst, src, i) (AL | (0b001<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define EORi(dst, src, i) (AL | (0b001<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define SUBi(dst, src, i) (AL | (0b001<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define RSBi(dst, src, i) (AL | (0b001<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define ADDi(dst, src, i) (AL | (0b001<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define ADCi(dst, src, i) (AL | (0b001<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define SBCi(dst, src, i) (AL | (0b001<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define RSCi(dst, src, i) (AL | (0b001<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | rimm(i))
+
+#define ORRi(dst, src, i) (AL | (0b001<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define MOVi(dst, i) (AL | (0b001<<25) | (0b11010<<20) | (dst<<12) | rimm(i))
+#define BICi(dst, src, i) (AL | (0b001<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define MVNi(dst, i) (AL | (0b001<<25) | (0b11110<<20) | (dst<<12) | rimm(i))
+
+#define MOVW(dst, i) (AL | (0b11<<24) | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1)))
+#define MOVT(dst, i) (AL | (0b11<<24) | (0b0100<<20) | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1)))
+
+#define TSTi( src, i) (AL | (0b001<<25) | (0b10001<<20) | (src<<16) | rimm(i))
+#define TEQi( src, i) (AL | (0b001<<25) | (0b10011<<20) | (src<<16) | rimm(i))
+#define CMPi( src, i) (AL | (0b001<<25) | (0b10101<<20) | (src<<16) | rimm(i))
+#define CMNi( src, i) (AL | (0b001<<25) | (0b10111<<20) | (src<<16) | rimm(i))
+
+#define ANDSi(dst, src, i) (ANDi(dst, src, i) | (1<<20))
+#define EORSi(dst, src, i) (EORi(dst, src, i) | (1<<20))
+#define SUBSi(dst, src, i) (SUBi(dst, src, i) | (1<<20))
+#define RSBSi(dst, src, i) (RSBi(dst, src, i) | (1<<20))
+#define ADDSi(dst, src, i) (ADDi(dst, src, i) | (1<<20))
+#define ADCSi(dst, src, i) (ADCi(dst, src, i) | (1<<20))
+#define SBCSi(dst, src, i) (SBCi(dst, src, i) | (1<<20))
+#define RSCSi(dst, src, i) (RSCi(dst, src, i) | (1<<20))
+
+#define ORRSi(dst, src, i) (ORRi(dst, src, i) | (1<<20))
+#define MOVSi(dst, i) (MOVi(dst, i) | (1<<20))
+#define BICSi(dst, src, i) (BICi(dst, src, i) | (1<<20))
+#define MVNSi(dst, i) (MVNi(dst, src, i) | (1<<20))
+
+#define AND(dst, src, reg) (AL | (0b000<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | reg)
+#define EOR(dst, src, reg) (AL | (0b000<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | reg)
+#define SUB(dst, src, reg) (AL | (0b000<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | reg)
+#define RSB(dst, src, reg) (AL | (0b000<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | reg)
+#define ADD(dst, src, reg) (AL | (0b000<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | reg)
+#define ADC(dst, src, reg) (AL | (0b000<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | reg)
+#define SBC(dst, src, reg) (AL | (0b000<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | reg)
+#define RSC(dst, src, reg) (AL | (0b000<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | reg)
+
+#define ORR(dst, src, reg) (AL | (0b000<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | reg)
+#define MOV(dst, src) (AL | (0b000<<25) | (0b11010<<20) | (dst<<12) | src)
+
+#define LSL(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0001<<4) | src)
+#define LSR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0011<<4) | src)
+#define ASR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0101<<4) | src)
+#define ROR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0111<<4) | src)
+
+#define LSLi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b000<<4) | src)
+#define LSRi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b010<<4) | src)
+#define ASRi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b100<<4) | src)
+#define RORi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b110<<4) | src)
+#define RRX(dst, src) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (0b110<<4) | src)
+
+#define BIC(dst, src, reg) (AL | (0b000<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | reg)
+#define MVN(dst, reg) (AL | (0b000<<25) | (0b11110<<20) | (dst<<12) | reg)
+
+#define TST( src, reg) (AL | (0b000<<25) | (0b10001<<20) | (src<<16) | reg)
+#define TEQ( src, reg) (AL | (0b000<<25) | (0b10011<<20) | (src<<16) | reg)
+#define CMP( src, reg) (AL | (0b000<<25) | (0b10101<<20) | (src<<16) | reg)
+#define CMN( src, reg) (AL | (0b000<<25) | (0b10111<<20) | (src<<16) | reg)
+
+#define LDRa(dst, base, off) (AL | (0b011<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | off)
+#define LDRx(dst, base, off) (AL | (0b011<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | off)
+
+#define LDRai(dst, base, off) (AL | (0b010<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+#define LDRxi(dst, base, off) (AL | (0b010<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+#define LDRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+
+#define LDRTa(dst, base, off) (AL | (0b011<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | off)
+#define LDRTx(dst, base, off) (AL | (0b011<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | off)
+#define LDRTai(dst, base, off) (AL | (0b010<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+#define LDRTxi(dst, base, off) (AL | (0b010<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+
+#define LDRBa(dst, base, off) (AL | (0b011<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | off)
+#define LDRSBai(dst, base, off) (AL | (0b000<<25) | (0b0110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1101<<4|(off&0x0F))
+#define STRBa(dst, base, off) (AL | (0b011<<25) | (0b1110<<21) | (0<<20) | base<<16 | dst<<12 | off)
+
+#define LDRHa(dst, base, off) (AL | (0b000<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | (0b1011<<4) | off)
+#define LDRSHai(dst, base, off) (AL | (0b000<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1111<<4|(off&0x0F))
+#define STRHa(dst, base, off) (AL | (0b000<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | (0b1011<<4) | off)
+
+#define STRa(dst, base, off) (AL | (0b011<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | off)
+#define STRx(dst, base, off) (AL | (0b011<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | off)
+#define STRai(dst, base, off) (AL | (0b010<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
+#define STRxi(dst, base, off) (AL | (0b010<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
+#define STRaiw(dst, base, off) (AL | (0b010<<25) | (0b1101<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
+#define STRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
+
+// load with post-increment
+#define POP1(reg) (AL | (0b010<<25) | (0b0100<<21) | (1<<20) | SP<<16 | reg<<12 | reg)
+// store with post-increment
+#define PUSH1(reg) (AL | (0b010<<25) | (0b1001<<21) | (0<<20) | SP<<16 | reg<<12 | 4)
+
+// branch to target address (for small jumps)
+#define Bi(i) \
+ (AL | (0b10)<<26 | (1<<25) /*I*/ | (0<<24) /*L*/ | (i))
+// call subroutine
+#define BLi(i) \
+ (AL | (0b10)<<26 | (1<<25) /*I*/ | (1<<24) /*L*/ | (i))
+// branch and exchange (register)
+#define BX(reg) \
+ (AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0001<<4 | reg)
+// call subroutine (register)
+#define BLX(reg) \
+ (AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0011<<4 | reg)
+
+#define PUSH(mask) (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) | mask)
+#define PUSH2(r1, r2) (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) | 1<<r1 | 1<<r2)
+//#define PUSH1(reg) STRxiw(SP, reg, 4)
+
+#define POP(mask) (0xe8bd0000|mask)
+
+#define STM(base, regs) \
+ (AL | 0b100<<25 | 0<<24/*P*/| 0<<24/*U*/| 0<<24/*S*/| 0<<24/*W*/ | (base<<16) | (regs&~(1<<16)))
+
+// note: op1 and op2 must not be the same
+#define MUL(op1, op2, op3) \
+ (AL | 0b0000000<<21 | (1<<20) /*S*/ | (op1<<16) | (op3<<8) | 0b1001<<4 | (op2))
+
+// puts integer in R0
+#define emit_MOVR0i(arg) emit_MOVRxi(R0, arg)
+
+// puts integer arg in register reg
+#define emit_MOVRxi(reg, arg) do { \
+ emit(MOVW(reg, (arg&0xFFFF))); \
+ if (arg > 0xFFFF) \
+ emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \
+ } while(0)
+
+// puts integer arg in register reg. adds nop if only one instr is needed to
+// make size constant
+#define emit_MOVRxi_or_NOP(reg, arg) do { \
+ emit(MOVW(reg, (arg&0xFFFF))); \
+ if (arg > 0xFFFF) \
+ emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \
+ else \
+ emit(NOP); \
+ } while(0)
+
+// arm core register -> singe precision register
+#define VMOVass(Vn, Rt) (AL|(0b1110<<24)|(0b000<<21)|(0<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4))
+// singe precision register -> arm core register
+#define VMOVssa(Rt, Vn) (AL|(0b1110<<24)|(0b000<<21)|(1<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4))
+
+#define _VCVT_F(Vd, Vm, opc2, op) \
+ (AL|(0b11101<<23)|((Vd&1)<<22)|(0b111<<19)|(opc2<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(op<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VCVT_F32_U32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 0 /* unsigned */)
+#define VCVT_U32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b100, 1 /* round zero */)
+#define VCVT_F32_S32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 1 /* unsigned */)
+#define VCVT_S32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b101, 1 /* round zero */)
+
+#define VLDRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|1<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i))
+#define VSTRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|0<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i))
+
+#define VNEG_F32(Vd, Vm) \
+ (AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|(1<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
+
+#define VADD_F32(Vd, Vn, Vm) \
+ (AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VSUB_F32(Vd, Vn, Vm) \
+ (AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VMUL_F32(Vd, Vn, Vm) \
+ (AL|(0b11100<<23)|((Vd&1)<<22)|(0b10<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101)<<9|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VDIV_F32(Vd, Vn, Vm) \
+ (AL|(0b11101<<23)|((Vd&1)<<22)|(0b00<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
+
+#define _VCMP_F32(Vd, Vm, E) \
+ (AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|((0b0100)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(E<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VCMP_F32(Vd, Vm) _VCMP_F32(Vd, Vm, 0)
+
+#define VMRS(Rt) \
+ (AL|(0b11101111<<20)|(0b0001<<16)|(Rt<<12)|(0b1010<<8)|(1<<4))
+
+// check if instruction in R0 is within range. Clobbers R1, R12
+#define CHECK_JUMP do { \
+ static int bytes_to_skip = -1; \
+ static unsigned branch = -1; \
+ emit_MOVRxi(R1, (unsigned)vm->instructionCount); \
+ emit(CMP(R0, R1)); \
+ if (branch == -1) \
+ branch = vm->codeLength; \
+ emit(cond(LT, Bi(j_rel(bytes_to_skip)))); \
+ emit_MOVRxi_or_NOP(R12, (unsigned)ErrJump); \
+ emit(BLX(R12)); \
+ if (bytes_to_skip == -1) \
+ bytes_to_skip = vm->codeLength - branch; \
+} while(0)
+
+//#define CONST_OPTIMIZE
+#ifdef CONST_OPTIMIZE
+#define MAYBE_EMIT_CONST() \
+ if (got_const) \
+ { \
+ got_const = 0; \
+ vm->instructionPointers[instruction-1] = assembler_get_code_size(); \
+ STACK_PUSH(4); \
+ emit("movl $%d, (%%r9, %%rbx, 4)", const_value); \
+ }
+#else
+#define MAYBE_EMIT_CONST()
+#endif
+
+// optimize: use load multiple
+#define IJ(comparator) do { \
+ MAYBE_EMIT_CONST(); \
+ emit_MOVRxi(R0, arg.i); \
+ CHECK_JUMP; \
+ emit(LDRTxi(R0, rOPSTACK, 4)); \
+ emit(LDRTxi(R1, rOPSTACK, 4)); \
+ emit(CMP(R1, R0)); \
+ emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \
+} while (0)
+
+#define FJ(comparator) do { \
+ emit_MOVRxi(R0, arg.i); \
+ CHECK_JUMP; \
+ emit(SUBi(rOPSTACK, rOPSTACK, 8)); \
+ emit(VLDRa(S15, rOPSTACK, 4)); \
+ emit(VLDRa(S14, rOPSTACK, 8)); \
+ emit(VCMP_F32(S15, S14)); \
+ emit(VMRS(APSR_nzcv)); \
+ emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \
+} while (0)
+
+#define printreg(reg) emit(PUSH1(R3)); emit(BLX(reg)); emit(POP1(R3));
+
+static inline unsigned _j_rel(int x, int pc)
+{
+ if (x&3) goto err;
+ x = (x>>2)-2;
+ if (x < 0)
+ {
+ if ((x&(0xFF<<24)) != 0xFF<<24)
+ goto err;
+ x &= ~(0xFF<<24);
+ }
+ else if (x&(0xFF<<24))
+ goto err;
+ return x;
+err:
+ DIE("jump %d out of range at %d", x, pc);
+}
+
+void VM_Compile(vm_t *vm, vmHeader_t *header)
+{
+ unsigned char *code;
+ int i_count, pc = 0;
+ int pass;
+ int codeoffsets[1024];
+
+#define j_rel(x) (pass?_j_rel(x, pc):0xBAD)
+#define OFFSET(i) (pass?(j_rel(codeoffsets[i]-vm->codeLength)):(0xF000000F))
+#define new_offset() (offsidx++)
+#define get_offset(i) (codeoffsets[i])
+#define save_offset(i) (codeoffsets[i] = vm->codeLength)
+#define OFF_CODE 0
+#define OFF_IMMEDIATES 1
+
+ vm->compiled = qfalse;
+
+ vm->codeBase = NULL;
+ vm->codeLength = 0;
+
+ for (pass = 0; pass < 2; ++pass) {
+
+ int offsidx = 0;
+
+ // const optimization
+ unsigned got_const = 0, const_value = 0;
+
+ if(pass)
+ {
+ vm->codeBase = mmap(NULL, vm->codeLength, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if(vm->codeBase == MAP_FAILED)
+ Com_Error(ERR_FATAL, "VM_CompileARM: can't mmap memory");
+ vm->codeLength = 0;
+ }
+
+ //int (*entry)(vm_t*, int*, int*);
+ emit(PUSH((((1<<8)-1)<<4)|(1<<14))); // push R4-R11, LR
+ emit(SUBi(SP, SP, 12)); // align stack!
+ emit(LDRai(rCODEBASE, R0, offsetof(vm_t, codeBase)));
+ emit(LDRai(rDATABASE, R0, offsetof(vm_t, dataBase)));
+ emit(LDRai(rDATAMASK, R0, offsetof(vm_t, dataMask)));
+ emit(LDRai(rPSTACK, R1, 0));
+ emit(MOV(rOPSTACK, R2)); // TODO: reverse opstack to avoid writing to return address
+ emit(MOV(rOPSTACKBASE, rOPSTACK));
+
+ emit(BLi(OFFSET(OFF_CODE)));
+
+ // save return value in r0
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+
+ emit(ADDi(SP, SP, 12)); // align stack!
+ emit(POP((((1<<8)-1)<<4)|(1<<15))); // pop R4-R11, LR -> PC
+
+ /* save some immediates here */
+ emit(BKPT(0));
+ emit(BKPT(0));
+ save_offset(OFF_IMMEDIATES);
+// emit((unsigned)whatever);
+ emit(BKPT(0));
+ emit(BKPT(0));
+
+ save_offset(OFF_CODE);
+ offsidx = OFF_IMMEDIATES+1;
+
+ code = (unsigned char *) header + header->codeOffset;
+ pc = 0;
+
+ for (i_count = 0; i_count < header->instructionCount; i_count++) {
+ union {
+ unsigned char b[4];
+ unsigned int i;
+ } arg;
+ unsigned char op = code[pc++];
+
+ vm->instructionPointers[i_count] = vm->codeLength;
+
+ if (vm_opInfo[op] & opImm4)
+ {
+ memcpy(arg.b, &code[pc], 4);
+ pc += 4;
+#ifdef EXCESSIVE_DEBUG
+ Com_Printf("%d: instruction %d (%s %d), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength);
+#endif
+ }
+ else if (vm_opInfo[op] & opImm1)
+ {
+ arg.b[0] = code[pc];
+ ++pc;
+#ifdef EXCESSIVE_DEBUG
+ Com_Printf("%d: instruction %d (%s %hhd), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength);
+#endif
+ }
+ else
+ {
+#ifdef EXCESSIVE_DEBUG
+ Com_Printf("%d: instruction %d (%s), offset %d\n", pass, i_count, opnames[op], vm->codeLength);
+#endif
+ }
+
+ // TODO: for debug only
+ //emit_MOVRxi(R4, i_count);
+
+ switch ( op )
+ {
+ case OP_UNDEF:
+ break;
+
+ case OP_IGNORE:
+ NOTIMPL(op);
+ break;
+
+ case OP_BREAK:
+ emit(BKPT(0));
+ break;
+
+ case OP_ENTER:
+ MAYBE_EMIT_CONST();
+ emit(PUSH1(LR));
+ emit(SUBi(SP, SP, 12)); // align stack
+ if (arg.i == 0 || can_encode(arg.i))
+ {
+ emit(SUBi(rPSTACK, rPSTACK, arg.i)); // pstack -= arg
+ }
+ else
+ {
+ emit_MOVR0i(arg.i);
+ emit(SUB(rPSTACK, rPSTACK, R0)); // pstack -= arg
+ }
+ break;
+
+ case OP_LEAVE:
+ if (arg.i == 0 || can_encode(arg.i))
+ {
+ emit(ADDi(rPSTACK, rPSTACK, arg.i)); // pstack += arg
+ }
+ else
+ {
+ emit_MOVR0i(arg.i);
+ emit(ADD(rPSTACK, rPSTACK, R0)); // pstack += arg
+ }
+ emit(ADDi(SP, SP, 12));
+ emit(0xe49df004); // pop pc
+ break;
+
+ case OP_CALL:
+#if 0
+ // save next instruction
+ emit_MOVR0i(i_count);
+ emit(STRa(R0, rDATABASE, rPSTACK)); // dataBase[pstack] = r0
+#endif
+ if (got_const) {
+ NOTIMPL(op);
+ } else {
+ static int bytes_to_skip = -1;
+ static unsigned start_block = -1;
+ MAYBE_EMIT_CONST();
+ // get instruction nr from stack
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(CMPi(R0, 0)); // check if syscall
+ if (start_block == -1)
+ start_block = vm->codeLength;
+ emit(cond(LT, Bi(j_rel(bytes_to_skip))));
+ CHECK_JUMP;
+ emit_MOVRxi_or_NOP(R1, (unsigned)vm->instructionPointers);
+ emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0]
+ emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0
+ emit(BLX(R0));
+ emit(Bi(j_rel(vm->instructionPointers[i_count+1]-vm->codeLength)));
+ if (bytes_to_skip == -1)
+ bytes_to_skip = vm->codeLength - start_block;
+ emit(MOV(R1, rPSTACK));
+ emit_MOVRxi(R12, (unsigned)asmcall);
+ emit(BLX(R12));
+ // store return value
+ emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0
+ }
+ break;
+
+ case OP_PUSH:
+ MAYBE_EMIT_CONST();
+ emit(ADDi(rOPSTACK, rOPSTACK, 4));
+ break;
+
+ case OP_POP:
+ MAYBE_EMIT_CONST();
+ emit(SUBi(rOPSTACK, rOPSTACK, 4));
+ break;
+
+ case OP_CONST:
+ MAYBE_EMIT_CONST();
+ emit_MOVR0i(arg.i);
+ emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0
+ break;
+
+ case OP_LOCAL:
+ MAYBE_EMIT_CONST();
+ if (arg.i == 0 || can_encode(arg.i))
+ {
+ emit(ADDi(R0, rPSTACK, arg.i)); // r0 = pstack+arg
+ }
+ else
+ {
+ emit_MOVR0i(arg.i);
+ emit(ADD(R0, rPSTACK, R0)); // r0 = pstack+arg
+ }
+ emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0
+ break;
+
+ case OP_JUMP:
+ if(got_const) {
+ NOTIMPL(op);
+ } else {
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ CHECK_JUMP;
+ emit_MOVRxi(R1, (unsigned)vm->instructionPointers);
+ emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0]
+ emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0
+ emit(BLX(R0));
+ }
+ break;
+
+ case OP_EQ:
+ IJ(EQ);
+ break;
+
+ case OP_NE:
+ IJ(NE);
+ break;
+
+ case OP_LTI:
+ IJ(LT);
+ break;
+
+ case OP_LEI:
+ IJ(LE);
+ break;
+
+ case OP_GTI:
+ IJ(GT);
+ break;
+
+ case OP_GEI:
+ IJ(GE);
+ break;
+
+ case OP_LTU:
+ IJ(LO);
+ break;
+
+ case OP_LEU:
+ IJ(LS);
+ break;
+
+ case OP_GTU:
+ IJ(HI);
+ break;
+
+ case OP_GEU:
+ IJ(HS);
+ break;
+
+ case OP_EQF:
+ FJ(EQ);
+ break;
+
+ case OP_NEF:
+ FJ(NE);
+ break;
+
+ case OP_LTF:
+ FJ(LT);
+ break;
+
+ case OP_LEF:
+ FJ(LE);
+ break;
+
+ case OP_GTF:
+ FJ(GT);
+ break;
+
+ case OP_GEF:
+ FJ(GE);
+ break;
+
+ case OP_LOAD1:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK
+ emit(LDRBa(R0, rDATABASE, R0)); // r0 = (unsigned char)dataBase[r0]
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_LOAD2:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK
+ emit(LDRHa(R0, rDATABASE, R0)); // r0 = (unsigned short)dataBase[r0]
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_LOAD4:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK
+ emit(LDRa(R0, rDATABASE, R0)); // r0 = dataBase[r0]
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_STORE1:
+ MAYBE_EMIT_CONST();
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4
+ emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
+ emit(STRBa(R0, rDATABASE, R1)); // database[r1] = r0
+ break;
+
+ case OP_STORE2:
+ MAYBE_EMIT_CONST();
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4
+ emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
+ emit(STRHa(R0, rDATABASE, R1)); // database[r1] = r0
+ break;
+
+ case OP_STORE4:
+ MAYBE_EMIT_CONST();
+ // optimize: use load multiple
+ // value
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ // pointer
+ emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4
+ emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
+ // store value at pointer
+ emit(STRa(R0, rDATABASE, R1)); // database[r1] = r0
+ break;
+
+ case OP_ARG:
+ MAYBE_EMIT_CONST();
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(ADDi(R1, rPSTACK, arg.b[0])); // r1 = programStack+arg
+ emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
+ emit(STRa(R0, rDATABASE, R1)); // dataBase[r1] = r0
+ break;
+
+ case OP_BLOCK_COPY:
+ MAYBE_EMIT_CONST();
+ emit(LDRTxi(R1, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(LDRTxi(R0, rOPSTACK, 4));
+ emit_MOVRxi(R2, arg.i);
+ emit_MOVRxi(R12, (unsigned)VM_BlockCopy);
+ emit(BLX(R12));
+ break;
+
+ case OP_SEX8:
+ MAYBE_EMIT_CONST();
+ emit(LDRSBai(R0, rOPSTACK, 0)); // sign extend *opstack
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_SEX16:
+ MAYBE_EMIT_CONST();
+ emit(LDRSHai(R0, rOPSTACK, 0)); // sign extend *opstack
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_NEGI:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(RSBi(R0, R0, 0)); // r0 = -r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_ADD:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(ADD(R0, R1, R0)); // r0 = r1 + r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_SUB:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(SUB(R0, R1, R0)); // r0 = r1 - r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_DIVI:
+ case OP_DIVU:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R1, rOPSTACK, 0)); // r1 = *opstack
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack
+ if ( op == OP_DIVI )
+ emit_MOVRxi(R12, (unsigned)__aeabi_idiv);
+ else
+ emit_MOVRxi(R12, (unsigned)__aeabi_uidiv);
+ emit(BLX(R12));
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_MODI:
+ case OP_MODU:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R1, rOPSTACK, 0)); // r1 = *opstack
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack
+ if ( op == OP_MODI )
+ emit_MOVRxi(R12, (unsigned)__aeabi_idivmod);
+ else
+ emit_MOVRxi(R12, (unsigned)__aeabi_uidivmod);
+ emit(BLX(R12));
+ emit(STRai(R1, rOPSTACK, 0)); // *opstack = r1
+ break;
+
+ case OP_MULI:
+ case OP_MULU:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(MUL(R0, R1, R0)); // r0 = r1 * r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_BAND:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(AND(R0, R1, R0)); // r0 = r1 & r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_BOR:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(ORR(R0, R1, R0)); // r0 = r1 | r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_BXOR:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(EOR(R0, R1, R0)); // r0 = r1 ^ r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_BCOM:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(MVN(R0, R0)); // r0 = ~r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_LSH:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(LSL(R0, R1, R0)); // r0 = r1 << r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_RSHI:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(ASR(R0, R1, R0)); // r0 = r1 >> r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_RSHU:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(LSR(R0, R1, R0)); // r0 = (unsigned)r1 >> r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_NEGF:
+ MAYBE_EMIT_CONST();
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ emit(VNEG_F32(S14, S14)); // s15 = -s14
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_ADDF:
+ MAYBE_EMIT_CONST();
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ // vldr can't modify rOPSTACK so
+ // we'd either need to change it
+ // with sub or use regular ldr+vmov
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(VMOVass(S15,R0)); // s15 = r0
+ emit(VADD_F32(S14, S15, S14)); // s14 = s14 + s15
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_SUBF:
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ // see OP_ADDF
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(VMOVass(S15,R0)); // s15 = r0
+ emit(VSUB_F32(S14, S15, S14)); // s14 = s14 - s15
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_DIVF:
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ // see OP_ADDF
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(VMOVass(S15,R0)); // s15 = r0
+ emit(VDIV_F32(S14, S15, S14)); // s14 = s14 / s15
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_MULF:
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ // see OP_ADDF
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(VMOVass(S15,R0)); // s15 = r0
+ emit(VMUL_F32(S14, S15, S14)); // s14 = s14 * s15
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_CVIF:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(VMOVass(S14,R0)); // s14 = r0
+ emit(VCVT_F32_S32(S14, S14)); // s15 = (float)s14
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_CVFI:
+ MAYBE_EMIT_CONST();
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ emit(VCVT_S32_F32(S14, S14)); // s15 = (int)s14
+ emit(VMOVssa(R0,S14)); // s14 = r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+ }
+ }
+
+ // never reached
+ emit(BKPT(0));
+ } // pass
+
+ if (mprotect(vm->codeBase, vm->codeLength, PROT_READ|PROT_EXEC/* |PROT_WRITE */)) {
+ VM_Destroy_Compiled(vm);
+ DIE("mprotect failed");
+ }
+
+ // clear icache, http://blogs.arm.com/software-enablement/141-caches-and-self-modifying-code/
+ __clear_cache(vm->codeBase, vm->codeBase+vm->codeLength);
+
+ vm->destroy = VM_Destroy_Compiled;
+ vm->compiled = qtrue;
+}
+
+int VM_CallCompiled(vm_t *vm, int *args)
+{
+ byte stack[OPSTACK_SIZE + 15];
+ int *opStack;
+ int programStack = vm->programStack;
+ int stackOnEntry = programStack;
+ byte *image = vm->dataBase;
+ int *argPointer;
+ int retVal;
+
+ currentVM = vm;
+
+ vm->currentlyInterpreting = qtrue;
+
+ programStack -= ( 8 + 4 * MAX_VMMAIN_ARGS );
+ argPointer = (int *)&image[ programStack + 8 ];
+ memcpy( argPointer, args, 4 * MAX_VMMAIN_ARGS );
+ argPointer[-1] = 0;
+ argPointer[-2] = -1;
+
+
+ opStack = PADP(stack, 16);
+ *opStack = 0xDEADBEEF;
+
+#if 0
+ Com_Printf("r5 opStack:\t\t%p\n", opStack);
+ Com_Printf("r7 codeBase:\t\t%p\n", vm->codeBase);
+ Com_Printf("r8 programStack:\t0x%x\n", programStack);
+ Com_Printf("r9 dataBase:\t\t%p\n", vm->dataBase);
+#endif
+
+ /* call generated code */
+ {
+ //int (*entry)(void *, int, void *, int);
+ int (*entry)(vm_t*, int*, int*);
+
+ entry = (void *)(vm->codeBase);
+ //__asm__ volatile("bkpt");
+ //retVal = entry(vm->codeBase, programStack, vm->dataBase, vm->dataMask);
+ retVal = entry(vm, &programStack, opStack);
+ }
+
+ if(*opStack != 0xDEADBEEF)
+ {
+ Com_Error(ERR_DROP, "opStack corrupted in compiled code");
+ }
+
+ if(programStack != stackOnEntry - (8 + 4 * MAX_VMMAIN_ARGS))
+ Com_Error(ERR_DROP, "programStack corrupted in compiled code");
+
+ vm->programStack = stackOnEntry;
+ vm->currentlyInterpreting = qfalse;
+
+ return retVal;
+}
diff --git a/MP/make-raspberrypi.sh b/MP/make-raspberrypi.sh
index 3e9a1ba..b9ae24f 100755
--- a/MP/make-raspberrypi.sh
+++ b/MP/make-raspberrypi.sh
@@ -2,8 +2,8 @@
USE_CODEC_VORBIS=0 \
USE_CODEC_OPUS=0 \
- USE_CURL=1 \
- USE_CURL_DLOPEN=1 \
+ USE_CURL=0 \
+ USE_CURL_DLOPEN=0 \
USE_OPENAL=1 \
USE_OPENAL_DLOPEN=1 \
USE_RENDERER_DLOPEN=0 \
@@ -17,7 +17,6 @@
RASPBERRY_PI=1 \
USE_MUMBLE=0 \
BUILD_GAME_SO=1 \
- BUILD_GAME_QVM=0 \
BUILD_RENDERER_REND2=0 \
ARCH=arm \
PLATFORM=linux \
diff --git a/SP/Makefile b/SP/Makefile
index f6de890..80e1517 100644
--- a/SP/Makefile
+++ b/SP/Makefile
@@ -8,6 +8,8 @@ COMPILE_PLATFORM=$(shell uname|sed -e s/_.*//|tr '[:upper:]' '[:lower:]'|sed -e
COMPILE_ARCH=$(shell uname -m | sed -e s/i.86/i386/ | sed -e 's/^arm.*/arm/')
+ARM_VER_CHECK=$(shell uname -m)
+
ifeq ($(COMPILE_PLATFORM),sunos)
# Solaris uname and GNU uname differ
COMPILE_ARCH=$(shell uname -p | sed -e s/i.86/i386/)
@@ -388,6 +390,9 @@ ifneq (,$(findstring "$(PLATFORM)", "linux" "gnu_kfreebsd" "kfreebsd-gnu" "gnu")
OPTIMIZEVM += -mtune=ultrasparc3 -mv8plus
HAVE_VM_COMPILED=true
endif
+ ifeq ($(ARM_VER_CHECK),armv7l)
+ HAVE_VM_COMPILED=true
+ endif
ifeq ($(ARCH),alpha)
# According to http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=410555
# -ffast-math will cause the client to die with SIGFPE on Alpha
@@ -1340,6 +1345,7 @@ targets: makedirs
@echo " VERSION: $(VERSION)"
@echo " COMPILE_PLATFORM: $(COMPILE_PLATFORM)"
@echo " COMPILE_ARCH: $(COMPILE_ARCH)"
+ @echo " HAVE_VM_COMPILED: $(HAVE_VM_COMPILED) "
@echo " CC: $(CC)"
@echo " CXX: $(CXX)"
ifdef MINGW
@@ -2133,6 +2139,9 @@ ifeq ($(HAVE_VM_COMPILED),true)
ifeq ($(ARCH),sparc)
Q3OBJ += $(B)/client/vm_sparc.o
endif
+ ifeq ($(ARM_VER_CHECK),armv7l)
+ Q3OBJ += $(B)/client/vm_armv7l.o
+ endif
endif
ifdef MINGW
@@ -2302,6 +2311,9 @@ ifeq ($(HAVE_VM_COMPILED),true)
ifeq ($(ARCH),sparc)
Q3DOBJ += $(B)/ded/vm_sparc.o
endif
+ ifeq ($ARM_VER_CHECK),armv7l)
+ Q#DOBJ += $(B)/ded/vm_armv7l.o
+ endif
endif
ifdef MINGW
diff --git a/SP/code/qcommon/vm_armv7l.c b/SP/code/qcommon/vm_armv7l.c
new file mode 100644
index 0000000..dd63f5c
--- /dev/null
+++ b/SP/code/qcommon/vm_armv7l.c
@@ -0,0 +1,1210 @@
+/*
+===========================================================================
+Copyright (C) 2009 David S. Miller <davem at davemloft.net>
+Copyright (C) 2013,2014 SUSE Linux Products GmbH
+
+This file is part of Quake III Arena source code.
+
+Quake III Arena source code is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the License,
+or (at your option) any later version.
+
+Quake III Arena source code is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Quake III Arena source code; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+===========================================================================
+
+ARMv7l VM by Ludwig Nussel <ludwig.nussel at suse.de>
+
+TODO: optimization
+
+Docu:
+http://www.coranac.com/tonc/text/asm.htm
+http://www.heyrick.co.uk/armwiki/Category:Opcodes
+ARMv7-A_ARMv7-R_DDI0406_2007.pdf
+*/
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stddef.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "vm_local.h"
+#define R0 0
+#define R1 1
+#define R2 2
+#define R3 3
+#define R4 4
+
+#define R12 12
+
+#define FP 11
+#define SP 13
+#define LR 14
+#define PC 15
+
+#define APSR_nzcv 15
+
+#define S14 14
+#define S15 15
+
+#define rOPSTACK 5
+#define rOPSTACKBASE 6
+#define rCODEBASE 7
+#define rPSTACK 8
+#define rDATABASE 9
+#define rDATAMASK 10
+
+#define bit(x) (1<<x)
+
+/* arm eabi, builtin gcc functions */
+int __aeabi_idiv (int, int);
+unsigned __aeabi_uidiv (unsigned, unsigned);
+void __aeabi_idivmod(void);
+void __aeabi_uidivmod(void);
+
+/* exit() won't be called but use it because it is marked with noreturn */
+#define DIE( reason, args... ) \
+ do { \
+ Com_Error(ERR_DROP, "vm_arm compiler error: " reason, ##args); \
+ exit(1); \
+ } while(0)
+
+/*
+ * opcode information table:
+ * - length of immediate value
+ * - returned register type
+ * - required register(s) type
+ */
+#define opImm0 0x0000 /* no immediate */
+#define opImm1 0x0001 /* 1 byte immadiate value after opcode */
+#define opImm4 0x0002 /* 4 bytes immediate value after opcode */
+
+#define opRet0 0x0000 /* returns nothing */
+#define opRetI 0x0004 /* returns integer */
+#define opRetF 0x0008 /* returns float */
+#define opRetIF (opRetI | opRetF) /* returns integer or float */
+
+#define opArg0 0x0000 /* requires nothing */
+#define opArgI 0x0010 /* requires integer(s) */
+#define opArgF 0x0020 /* requires float(s) */
+#define opArgIF (opArgI | opArgF) /* requires integer or float */
+
+#define opArg2I 0x0040 /* requires second argument, integer */
+#define opArg2F 0x0080 /* requires second argument, float */
+#define opArg2IF (opArg2I | opArg2F) /* requires second argument, integer or float */
+
+static const unsigned char vm_opInfo[256] =
+{
+ [OP_UNDEF] = opImm0,
+ [OP_IGNORE] = opImm0,
+ [OP_BREAK] = opImm0,
+ [OP_ENTER] = opImm4,
+ /* OP_LEAVE has to accept floats, they will be converted to ints */
+ [OP_LEAVE] = opImm4 | opRet0 | opArgIF,
+ /* only STORE4 and POP use values from OP_CALL,
+ * no need to convert floats back */
+ [OP_CALL] = opImm0 | opRetI | opArgI,
+ [OP_PUSH] = opImm0 | opRetIF,
+ [OP_POP] = opImm0 | opRet0 | opArgIF,
+ [OP_CONST] = opImm4 | opRetIF,
+ [OP_LOCAL] = opImm4 | opRetI,
+ [OP_JUMP] = opImm0 | opRet0 | opArgI,
+
+ [OP_EQ] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_NE] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_LTI] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_LEI] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_GTI] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_GEI] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_LTU] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_LEU] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_GTU] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_GEU] = opImm4 | opRet0 | opArgI | opArg2I,
+ [OP_EQF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_NEF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_LTF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_LEF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_GTF] = opImm4 | opRet0 | opArgF | opArg2F,
+ [OP_GEF] = opImm4 | opRet0 | opArgF | opArg2F,
+
+ [OP_LOAD1] = opImm0 | opRetI | opArgI,
+ [OP_LOAD2] = opImm0 | opRetI | opArgI,
+ [OP_LOAD4] = opImm0 | opRetIF| opArgI,
+ [OP_STORE1] = opImm0 | opRet0 | opArgI | opArg2I,
+ [OP_STORE2] = opImm0 | opRet0 | opArgI | opArg2I,
+ [OP_STORE4] = opImm0 | opRet0 | opArgIF| opArg2I,
+ [OP_ARG] = opImm1 | opRet0 | opArgIF,
+ [OP_BLOCK_COPY] = opImm4 | opRet0 | opArgI | opArg2I,
+
+ [OP_SEX8] = opImm0 | opRetI | opArgI,
+ [OP_SEX16] = opImm0 | opRetI | opArgI,
+ [OP_NEGI] = opImm0 | opRetI | opArgI,
+ [OP_ADD] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_SUB] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_DIVI] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_DIVU] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_MODI] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_MODU] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_MULI] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_MULU] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_BAND] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_BOR] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_BXOR] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_BCOM] = opImm0 | opRetI | opArgI,
+ [OP_LSH] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_RSHI] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_RSHU] = opImm0 | opRetI | opArgI | opArg2I,
+ [OP_NEGF] = opImm0 | opRetF | opArgF,
+ [OP_ADDF] = opImm0 | opRetF | opArgF | opArg2F,
+ [OP_SUBF] = opImm0 | opRetF | opArgF | opArg2F,
+ [OP_DIVF] = opImm0 | opRetF | opArgF | opArg2F,
+ [OP_MULF] = opImm0 | opRetF | opArgF | opArg2F,
+ [OP_CVIF] = opImm0 | opRetF | opArgI,
+ [OP_CVFI] = opImm0 | opRetI | opArgF,
+};
+
+#ifdef DEBUG_VM
+static const char *opnames[256] = {
+ "OP_UNDEF", "OP_IGNORE", "OP_BREAK", "OP_ENTER", "OP_LEAVE", "OP_CALL",
+ "OP_PUSH", "OP_POP", "OP_CONST", "OP_LOCAL", "OP_JUMP",
+ "OP_EQ", "OP_NE", "OP_LTI", "OP_LEI", "OP_GTI", "OP_GEI",
+ "OP_LTU", "OP_LEU", "OP_GTU", "OP_GEU", "OP_EQF", "OP_NEF",
+ "OP_LTF", "OP_LEF", "OP_GTF", "OP_GEF",
+ "OP_LOAD1", "OP_LOAD2", "OP_LOAD4", "OP_STORE1", "OP_STORE2",
+ "OP_STORE4", "OP_ARG", "OP_BLOCK_COPY",
+ "OP_SEX8", "OP_SEX16",
+ "OP_NEGI", "OP_ADD", "OP_SUB", "OP_DIVI", "OP_DIVU",
+ "OP_MODI", "OP_MODU", "OP_MULI", "OP_MULU", "OP_BAND",
+ "OP_BOR", "OP_BXOR", "OP_BCOM", "OP_LSH", "OP_RSHI", "OP_RSHU",
+ "OP_NEGF", "OP_ADDF", "OP_SUBF", "OP_DIVF", "OP_MULF",
+ "OP_CVIF", "OP_CVFI",
+};
+
+#define NOTIMPL(x) \
+ do { Com_Error(ERR_DROP, "instruction not implemented: %s", opnames[x]); } while(0)
+#else
+#define NOTIMPL(x) \
+ do { Com_Printf(S_COLOR_RED "instruction not implemented: %x\n", x); vm->compiled = qfalse; return; } while(0)
+#endif
+
+static void VM_Destroy_Compiled(vm_t *vm)
+{
+ if (vm->codeBase) {
+ if (munmap(vm->codeBase, vm->codeLength))
+ Com_Printf(S_COLOR_RED "Memory unmap failed, possible memory leak\n");
+ }
+ vm->codeBase = NULL;
+}
+
+/*
+=================
+ErrJump
+Error handler for jump/call to invalid instruction number
+=================
+*/
+
+static void __attribute__((__noreturn__)) ErrJump(unsigned num)
+{
+ Com_Error(ERR_DROP, "program tried to execute code outside VM (%x)", num);
+}
+
+static int asmcall(int call, int pstack)
+{
+ // save currentVM so as to allow for recursive VM entry
+ vm_t *savedVM = currentVM;
+ int i, ret;
+
+ // modify VM stack pointer for recursive VM entry
+ currentVM->programStack = pstack - 4;
+
+ if (sizeof(intptr_t) == sizeof(int)) {
+ intptr_t *argPosition = (intptr_t *)((byte *)currentVM->dataBase + pstack + 4);
+ argPosition[0] = -1 - call;
+ ret = currentVM->systemCall(argPosition);
+ } else {
+ intptr_t args[MAX_VMSYSCALL_ARGS];
+
+ args[0] = -1 - call;
+ int *argPosition = (int *)((byte *)currentVM->dataBase + pstack + 4);
+ for( i = 1; i < ARRAY_LEN(args); i++ )
+ args[i] = argPosition[i];
+
+ ret = currentVM->systemCall(args);
+ }
+
+ currentVM = savedVM;
+
+ return ret;
+}
+
+void _emit(vm_t *vm, unsigned isn, int pass)
+{
+#if 0
+ static int fd = -2;
+ if (fd == -2)
+ fd = open("code.bin", O_TRUNC|O_WRONLY|O_CREAT, 0644);
+ if (fd > 0)
+ write(fd, &isn, 4);
+#endif
+
+ if (pass)
+ memcpy(vm->codeBase+vm->codeLength, &isn, 4);
+ vm->codeLength+=4;
+}
+
+#define emit(isn) _emit(vm, isn, pass)
+
+static unsigned char off8(unsigned val)
+{
+ if (val&3)
+ DIE("offset must be multiple of four");
+ if (val > 1020)
+ DIE("offset too large");
+ return val>>2;
+}
+
+// ARM is really crazy ...
+static unsigned short rimm(unsigned val)
+{
+ unsigned shift = 0;
+ if (val < 256)
+ return val;
+ // rotate the value until it fits
+ while (shift < 16 && (val>255 || !(val&3))) {
+ val = (val&3)<<30 | val>>2;
+ ++shift;
+ }
+ if (shift > 15 || val > 255) {
+ DIE("immediate cannot be encoded (%d, %d)\n", shift, val);
+ }
+ return (16-shift)<<8 | val;
+}
+
+// same as rimm but doesn't die, returns 0 if not encodable so don't call with zero as argument!
+static unsigned short can_encode(unsigned val)
+{
+ unsigned shift = 0;
+ if (!val)
+ DIE("can_encode: invalid argument");
+ if (val < 256)
+ return val;
+ // rotate the value until it fits
+ while (shift < 16 && (val>255 || !(val&3))) {
+ val = (val&3)<<30 | val>>2;
+ ++shift;
+ }
+ if (shift > 15 || val > 255) {
+ return 0;
+ }
+ return (16-shift)<<8 | val;
+}
+
+#define PREINDEX (1<<24)
+
+#define rASR(i, reg) (0b10<<5 | ((i&31)<<7) | reg)
+#define rLSL(i, reg) (0b00<<5 | ((i&31)<<7) | reg)
+#define rLSR(i, reg) (0b01<<5 | ((i&31)<<7) | reg)
+#define rROR(i, reg) (0b11<<5 | ((i&31)<<7) | reg)
+
+// conditions
+#define EQ (0b0000<<28)
+#define NE (0b0001<<28)
+#define CS (0b0010<<28)
+#define HS CS
+#define CC (0b0011<<28)
+#define LO CC
+#define MI (0b0100<<28)
+#define PL (0b0101<<28)
+#define VS (0b0110<<28)
+#define VC (0b0111<<28)
+#define HI (0b1000<<28)
+#define LS (0b1001<<28)
+#define GE (0b1010<<28)
+#define LT (0b1011<<28)
+#define GT (0b1100<<28)
+#define LE (0b1101<<28)
+#define AL (0b1110<<28)
+#define cond(what, op) (what | (op&~AL))
+
+// XXX: v not correctly computed
+#define BKPT(v) (AL | 0b10010<<20 | ((v&~0xF)<<4) | 0b0111<<4 | (v&0xF))
+
+#define YIELD (0b110010<<20 | 0b1111<<12 | 1)
+#define NOP cond(AL, YIELD)
+
+// immediate value must fit in 0xFF!
+#define ANDi(dst, src, i) (AL | (0b001<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define EORi(dst, src, i) (AL | (0b001<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define SUBi(dst, src, i) (AL | (0b001<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define RSBi(dst, src, i) (AL | (0b001<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define ADDi(dst, src, i) (AL | (0b001<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define ADCi(dst, src, i) (AL | (0b001<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define SBCi(dst, src, i) (AL | (0b001<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define RSCi(dst, src, i) (AL | (0b001<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | rimm(i))
+
+#define ORRi(dst, src, i) (AL | (0b001<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define MOVi(dst, i) (AL | (0b001<<25) | (0b11010<<20) | (dst<<12) | rimm(i))
+#define BICi(dst, src, i) (AL | (0b001<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | rimm(i))
+#define MVNi(dst, i) (AL | (0b001<<25) | (0b11110<<20) | (dst<<12) | rimm(i))
+
+#define MOVW(dst, i) (AL | (0b11<<24) | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1)))
+#define MOVT(dst, i) (AL | (0b11<<24) | (0b0100<<20) | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1)))
+
+#define TSTi( src, i) (AL | (0b001<<25) | (0b10001<<20) | (src<<16) | rimm(i))
+#define TEQi( src, i) (AL | (0b001<<25) | (0b10011<<20) | (src<<16) | rimm(i))
+#define CMPi( src, i) (AL | (0b001<<25) | (0b10101<<20) | (src<<16) | rimm(i))
+#define CMNi( src, i) (AL | (0b001<<25) | (0b10111<<20) | (src<<16) | rimm(i))
+
+#define ANDSi(dst, src, i) (ANDi(dst, src, i) | (1<<20))
+#define EORSi(dst, src, i) (EORi(dst, src, i) | (1<<20))
+#define SUBSi(dst, src, i) (SUBi(dst, src, i) | (1<<20))
+#define RSBSi(dst, src, i) (RSBi(dst, src, i) | (1<<20))
+#define ADDSi(dst, src, i) (ADDi(dst, src, i) | (1<<20))
+#define ADCSi(dst, src, i) (ADCi(dst, src, i) | (1<<20))
+#define SBCSi(dst, src, i) (SBCi(dst, src, i) | (1<<20))
+#define RSCSi(dst, src, i) (RSCi(dst, src, i) | (1<<20))
+
+#define ORRSi(dst, src, i) (ORRi(dst, src, i) | (1<<20))
+#define MOVSi(dst, i) (MOVi(dst, i) | (1<<20))
+#define BICSi(dst, src, i) (BICi(dst, src, i) | (1<<20))
+#define MVNSi(dst, i) (MVNi(dst, src, i) | (1<<20))
+
+#define AND(dst, src, reg) (AL | (0b000<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | reg)
+#define EOR(dst, src, reg) (AL | (0b000<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | reg)
+#define SUB(dst, src, reg) (AL | (0b000<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | reg)
+#define RSB(dst, src, reg) (AL | (0b000<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | reg)
+#define ADD(dst, src, reg) (AL | (0b000<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | reg)
+#define ADC(dst, src, reg) (AL | (0b000<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | reg)
+#define SBC(dst, src, reg) (AL | (0b000<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | reg)
+#define RSC(dst, src, reg) (AL | (0b000<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | reg)
+
+#define ORR(dst, src, reg) (AL | (0b000<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | reg)
+#define MOV(dst, src) (AL | (0b000<<25) | (0b11010<<20) | (dst<<12) | src)
+
+#define LSL(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0001<<4) | src)
+#define LSR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0011<<4) | src)
+#define ASR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0101<<4) | src)
+#define ROR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0111<<4) | src)
+
+#define LSLi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b000<<4) | src)
+#define LSRi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b010<<4) | src)
+#define ASRi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b100<<4) | src)
+#define RORi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b110<<4) | src)
+#define RRX(dst, src) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (0b110<<4) | src)
+
+#define BIC(dst, src, reg) (AL | (0b000<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | reg)
+#define MVN(dst, reg) (AL | (0b000<<25) | (0b11110<<20) | (dst<<12) | reg)
+
+#define TST( src, reg) (AL | (0b000<<25) | (0b10001<<20) | (src<<16) | reg)
+#define TEQ( src, reg) (AL | (0b000<<25) | (0b10011<<20) | (src<<16) | reg)
+#define CMP( src, reg) (AL | (0b000<<25) | (0b10101<<20) | (src<<16) | reg)
+#define CMN( src, reg) (AL | (0b000<<25) | (0b10111<<20) | (src<<16) | reg)
+
+#define LDRa(dst, base, off) (AL | (0b011<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | off)
+#define LDRx(dst, base, off) (AL | (0b011<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | off)
+
+#define LDRai(dst, base, off) (AL | (0b010<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+#define LDRxi(dst, base, off) (AL | (0b010<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+#define LDRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+
+#define LDRTa(dst, base, off) (AL | (0b011<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | off)
+#define LDRTx(dst, base, off) (AL | (0b011<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | off)
+#define LDRTai(dst, base, off) (AL | (0b010<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+#define LDRTxi(dst, base, off) (AL | (0b010<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off))
+
+#define LDRBa(dst, base, off) (AL | (0b011<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | off)
+#define LDRSBai(dst, base, off) (AL | (0b000<<25) | (0b0110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1101<<4|(off&0x0F))
+#define STRBa(dst, base, off) (AL | (0b011<<25) | (0b1110<<21) | (0<<20) | base<<16 | dst<<12 | off)
+
+#define LDRHa(dst, base, off) (AL | (0b000<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | (0b1011<<4) | off)
+#define LDRSHai(dst, base, off) (AL | (0b000<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1111<<4|(off&0x0F))
+#define STRHa(dst, base, off) (AL | (0b000<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | (0b1011<<4) | off)
+
+#define STRa(dst, base, off) (AL | (0b011<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | off)
+#define STRx(dst, base, off) (AL | (0b011<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | off)
+#define STRai(dst, base, off) (AL | (0b010<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
+#define STRxi(dst, base, off) (AL | (0b010<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
+#define STRaiw(dst, base, off) (AL | (0b010<<25) | (0b1101<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
+#define STRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off))
+
+// load with post-increment
+#define POP1(reg) (AL | (0b010<<25) | (0b0100<<21) | (1<<20) | SP<<16 | reg<<12 | reg)
+// store with post-increment
+#define PUSH1(reg) (AL | (0b010<<25) | (0b1001<<21) | (0<<20) | SP<<16 | reg<<12 | 4)
+
+// branch to target address (for small jumps)
+#define Bi(i) \
+ (AL | (0b10)<<26 | (1<<25) /*I*/ | (0<<24) /*L*/ | (i))
+// call subroutine
+#define BLi(i) \
+ (AL | (0b10)<<26 | (1<<25) /*I*/ | (1<<24) /*L*/ | (i))
+// branch and exchange (register)
+#define BX(reg) \
+ (AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0001<<4 | reg)
+// call subroutine (register)
+#define BLX(reg) \
+ (AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0011<<4 | reg)
+
+#define PUSH(mask) (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) | mask)
+#define PUSH2(r1, r2) (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) | 1<<r1 | 1<<r2)
+//#define PUSH1(reg) STRxiw(SP, reg, 4)
+
+#define POP(mask) (0xe8bd0000|mask)
+
+#define STM(base, regs) \
+ (AL | 0b100<<25 | 0<<24/*P*/| 0<<24/*U*/| 0<<24/*S*/| 0<<24/*W*/ | (base<<16) | (regs&~(1<<16)))
+
+// note: op1 and op2 must not be the same
+#define MUL(op1, op2, op3) \
+ (AL | 0b0000000<<21 | (1<<20) /*S*/ | (op1<<16) | (op3<<8) | 0b1001<<4 | (op2))
+
+// puts integer in R0
+#define emit_MOVR0i(arg) emit_MOVRxi(R0, arg)
+
+// puts integer arg in register reg
+#define emit_MOVRxi(reg, arg) do { \
+ emit(MOVW(reg, (arg&0xFFFF))); \
+ if (arg > 0xFFFF) \
+ emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \
+ } while(0)
+
+// puts integer arg in register reg. adds nop if only one instr is needed to
+// make size constant
+#define emit_MOVRxi_or_NOP(reg, arg) do { \
+ emit(MOVW(reg, (arg&0xFFFF))); \
+ if (arg > 0xFFFF) \
+ emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \
+ else \
+ emit(NOP); \
+ } while(0)
+
+// arm core register -> singe precision register
+#define VMOVass(Vn, Rt) (AL|(0b1110<<24)|(0b000<<21)|(0<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4))
+// singe precision register -> arm core register
+#define VMOVssa(Rt, Vn) (AL|(0b1110<<24)|(0b000<<21)|(1<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4))
+
+#define _VCVT_F(Vd, Vm, opc2, op) \
+ (AL|(0b11101<<23)|((Vd&1)<<22)|(0b111<<19)|(opc2<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(op<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VCVT_F32_U32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 0 /* unsigned */)
+#define VCVT_U32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b100, 1 /* round zero */)
+#define VCVT_F32_S32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 1 /* unsigned */)
+#define VCVT_S32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b101, 1 /* round zero */)
+
+#define VLDRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|1<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i))
+#define VSTRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|0<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i))
+
+#define VNEG_F32(Vd, Vm) \
+ (AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|(1<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
+
+#define VADD_F32(Vd, Vn, Vm) \
+ (AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VSUB_F32(Vd, Vn, Vm) \
+ (AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VMUL_F32(Vd, Vn, Vm) \
+ (AL|(0b11100<<23)|((Vd&1)<<22)|(0b10<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101)<<9|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VDIV_F32(Vd, Vn, Vm) \
+ (AL|(0b11101<<23)|((Vd&1)<<22)|(0b00<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1))
+
+#define _VCMP_F32(Vd, Vm, E) \
+ (AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|((0b0100)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(E<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1))
+#define VCMP_F32(Vd, Vm) _VCMP_F32(Vd, Vm, 0)
+
+#define VMRS(Rt) \
+ (AL|(0b11101111<<20)|(0b0001<<16)|(Rt<<12)|(0b1010<<8)|(1<<4))
+
+// check if instruction in R0 is within range. Clobbers R1, R12
+#define CHECK_JUMP do { \
+ static int bytes_to_skip = -1; \
+ static unsigned branch = -1; \
+ emit_MOVRxi(R1, (unsigned)vm->instructionCount); \
+ emit(CMP(R0, R1)); \
+ if (branch == -1) \
+ branch = vm->codeLength; \
+ emit(cond(LT, Bi(j_rel(bytes_to_skip)))); \
+ emit_MOVRxi_or_NOP(R12, (unsigned)ErrJump); \
+ emit(BLX(R12)); \
+ if (bytes_to_skip == -1) \
+ bytes_to_skip = vm->codeLength - branch; \
+} while(0)
+
+//#define CONST_OPTIMIZE
+#ifdef CONST_OPTIMIZE
+#define MAYBE_EMIT_CONST() \
+ if (got_const) \
+ { \
+ got_const = 0; \
+ vm->instructionPointers[instruction-1] = assembler_get_code_size(); \
+ STACK_PUSH(4); \
+ emit("movl $%d, (%%r9, %%rbx, 4)", const_value); \
+ }
+#else
+#define MAYBE_EMIT_CONST()
+#endif
+
+// optimize: use load multiple
+#define IJ(comparator) do { \
+ MAYBE_EMIT_CONST(); \
+ emit_MOVRxi(R0, arg.i); \
+ CHECK_JUMP; \
+ emit(LDRTxi(R0, rOPSTACK, 4)); \
+ emit(LDRTxi(R1, rOPSTACK, 4)); \
+ emit(CMP(R1, R0)); \
+ emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \
+} while (0)
+
+#define FJ(comparator) do { \
+ emit_MOVRxi(R0, arg.i); \
+ CHECK_JUMP; \
+ emit(SUBi(rOPSTACK, rOPSTACK, 8)); \
+ emit(VLDRa(S15, rOPSTACK, 4)); \
+ emit(VLDRa(S14, rOPSTACK, 8)); \
+ emit(VCMP_F32(S15, S14)); \
+ emit(VMRS(APSR_nzcv)); \
+ emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \
+} while (0)
+
+#define printreg(reg) emit(PUSH1(R3)); emit(BLX(reg)); emit(POP1(R3));
+
+static inline unsigned _j_rel(int x, int pc)
+{
+ if (x&3) goto err;
+ x = (x>>2)-2;
+ if (x < 0)
+ {
+ if ((x&(0xFF<<24)) != 0xFF<<24)
+ goto err;
+ x &= ~(0xFF<<24);
+ }
+ else if (x&(0xFF<<24))
+ goto err;
+ return x;
+err:
+ DIE("jump %d out of range at %d", x, pc);
+}
+
+void VM_Compile(vm_t *vm, vmHeader_t *header)
+{
+ unsigned char *code;
+ int i_count, pc = 0;
+ int pass;
+ int codeoffsets[1024];
+
+#define j_rel(x) (pass?_j_rel(x, pc):0xBAD)
+#define OFFSET(i) (pass?(j_rel(codeoffsets[i]-vm->codeLength)):(0xF000000F))
+#define new_offset() (offsidx++)
+#define get_offset(i) (codeoffsets[i])
+#define save_offset(i) (codeoffsets[i] = vm->codeLength)
+#define OFF_CODE 0
+#define OFF_IMMEDIATES 1
+
+ vm->compiled = qfalse;
+
+ vm->codeBase = NULL;
+ vm->codeLength = 0;
+
+ for (pass = 0; pass < 2; ++pass) {
+
+ int offsidx = 0;
+
+ // const optimization
+ unsigned got_const = 0, const_value = 0;
+
+ if(pass)
+ {
+ vm->codeBase = mmap(NULL, vm->codeLength, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if(vm->codeBase == MAP_FAILED)
+ Com_Error(ERR_FATAL, "VM_CompileARM: can't mmap memory");
+ vm->codeLength = 0;
+ }
+
+ //int (*entry)(vm_t*, int*, int*);
+ emit(PUSH((((1<<8)-1)<<4)|(1<<14))); // push R4-R11, LR
+ emit(SUBi(SP, SP, 12)); // align stack!
+ emit(LDRai(rCODEBASE, R0, offsetof(vm_t, codeBase)));
+ emit(LDRai(rDATABASE, R0, offsetof(vm_t, dataBase)));
+ emit(LDRai(rDATAMASK, R0, offsetof(vm_t, dataMask)));
+ emit(LDRai(rPSTACK, R1, 0));
+ emit(MOV(rOPSTACK, R2)); // TODO: reverse opstack to avoid writing to return address
+ emit(MOV(rOPSTACKBASE, rOPSTACK));
+
+ emit(BLi(OFFSET(OFF_CODE)));
+
+ // save return value in r0
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+
+ emit(ADDi(SP, SP, 12)); // align stack!
+ emit(POP((((1<<8)-1)<<4)|(1<<15))); // pop R4-R11, LR -> PC
+
+ /* save some immediates here */
+ emit(BKPT(0));
+ emit(BKPT(0));
+ save_offset(OFF_IMMEDIATES);
+// emit((unsigned)whatever);
+ emit(BKPT(0));
+ emit(BKPT(0));
+
+ save_offset(OFF_CODE);
+ offsidx = OFF_IMMEDIATES+1;
+
+ code = (unsigned char *) header + header->codeOffset;
+ pc = 0;
+
+ for (i_count = 0; i_count < header->instructionCount; i_count++) {
+ union {
+ unsigned char b[4];
+ unsigned int i;
+ } arg;
+ unsigned char op = code[pc++];
+
+ vm->instructionPointers[i_count] = vm->codeLength;
+
+ if (vm_opInfo[op] & opImm4)
+ {
+ memcpy(arg.b, &code[pc], 4);
+ pc += 4;
+#ifdef EXCESSIVE_DEBUG
+ Com_Printf("%d: instruction %d (%s %d), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength);
+#endif
+ }
+ else if (vm_opInfo[op] & opImm1)
+ {
+ arg.b[0] = code[pc];
+ ++pc;
+#ifdef EXCESSIVE_DEBUG
+ Com_Printf("%d: instruction %d (%s %hhd), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength);
+#endif
+ }
+ else
+ {
+#ifdef EXCESSIVE_DEBUG
+ Com_Printf("%d: instruction %d (%s), offset %d\n", pass, i_count, opnames[op], vm->codeLength);
+#endif
+ }
+
+ // TODO: for debug only
+ //emit_MOVRxi(R4, i_count);
+
+ switch ( op )
+ {
+ case OP_UNDEF:
+ break;
+
+ case OP_IGNORE:
+ NOTIMPL(op);
+ break;
+
+ case OP_BREAK:
+ emit(BKPT(0));
+ break;
+
+ case OP_ENTER:
+ MAYBE_EMIT_CONST();
+ emit(PUSH1(LR));
+ emit(SUBi(SP, SP, 12)); // align stack
+ if (arg.i == 0 || can_encode(arg.i))
+ {
+ emit(SUBi(rPSTACK, rPSTACK, arg.i)); // pstack -= arg
+ }
+ else
+ {
+ emit_MOVR0i(arg.i);
+ emit(SUB(rPSTACK, rPSTACK, R0)); // pstack -= arg
+ }
+ break;
+
+ case OP_LEAVE:
+ if (arg.i == 0 || can_encode(arg.i))
+ {
+ emit(ADDi(rPSTACK, rPSTACK, arg.i)); // pstack += arg
+ }
+ else
+ {
+ emit_MOVR0i(arg.i);
+ emit(ADD(rPSTACK, rPSTACK, R0)); // pstack += arg
+ }
+ emit(ADDi(SP, SP, 12));
+ emit(0xe49df004); // pop pc
+ break;
+
+ case OP_CALL:
+#if 0
+ // save next instruction
+ emit_MOVR0i(i_count);
+ emit(STRa(R0, rDATABASE, rPSTACK)); // dataBase[pstack] = r0
+#endif
+ if (got_const) {
+ NOTIMPL(op);
+ } else {
+ static int bytes_to_skip = -1;
+ static unsigned start_block = -1;
+ MAYBE_EMIT_CONST();
+ // get instruction nr from stack
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(CMPi(R0, 0)); // check if syscall
+ if (start_block == -1)
+ start_block = vm->codeLength;
+ emit(cond(LT, Bi(j_rel(bytes_to_skip))));
+ CHECK_JUMP;
+ emit_MOVRxi_or_NOP(R1, (unsigned)vm->instructionPointers);
+ emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0]
+ emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0
+ emit(BLX(R0));
+ emit(Bi(j_rel(vm->instructionPointers[i_count+1]-vm->codeLength)));
+ if (bytes_to_skip == -1)
+ bytes_to_skip = vm->codeLength - start_block;
+ emit(MOV(R1, rPSTACK));
+ emit_MOVRxi(R12, (unsigned)asmcall);
+ emit(BLX(R12));
+ // store return value
+ emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0
+ }
+ break;
+
+ case OP_PUSH:
+ MAYBE_EMIT_CONST();
+ emit(ADDi(rOPSTACK, rOPSTACK, 4));
+ break;
+
+ case OP_POP:
+ MAYBE_EMIT_CONST();
+ emit(SUBi(rOPSTACK, rOPSTACK, 4));
+ break;
+
+ case OP_CONST:
+ MAYBE_EMIT_CONST();
+ emit_MOVR0i(arg.i);
+ emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0
+ break;
+
+ case OP_LOCAL:
+ MAYBE_EMIT_CONST();
+ if (arg.i == 0 || can_encode(arg.i))
+ {
+ emit(ADDi(R0, rPSTACK, arg.i)); // r0 = pstack+arg
+ }
+ else
+ {
+ emit_MOVR0i(arg.i);
+ emit(ADD(R0, rPSTACK, R0)); // r0 = pstack+arg
+ }
+ emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0
+ break;
+
+ case OP_JUMP:
+ if(got_const) {
+ NOTIMPL(op);
+ } else {
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ CHECK_JUMP;
+ emit_MOVRxi(R1, (unsigned)vm->instructionPointers);
+ emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0]
+ emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0
+ emit(BLX(R0));
+ }
+ break;
+
+ case OP_EQ:
+ IJ(EQ);
+ break;
+
+ case OP_NE:
+ IJ(NE);
+ break;
+
+ case OP_LTI:
+ IJ(LT);
+ break;
+
+ case OP_LEI:
+ IJ(LE);
+ break;
+
+ case OP_GTI:
+ IJ(GT);
+ break;
+
+ case OP_GEI:
+ IJ(GE);
+ break;
+
+ case OP_LTU:
+ IJ(LO);
+ break;
+
+ case OP_LEU:
+ IJ(LS);
+ break;
+
+ case OP_GTU:
+ IJ(HI);
+ break;
+
+ case OP_GEU:
+ IJ(HS);
+ break;
+
+ case OP_EQF:
+ FJ(EQ);
+ break;
+
+ case OP_NEF:
+ FJ(NE);
+ break;
+
+ case OP_LTF:
+ FJ(LT);
+ break;
+
+ case OP_LEF:
+ FJ(LE);
+ break;
+
+ case OP_GTF:
+ FJ(GT);
+ break;
+
+ case OP_GEF:
+ FJ(GE);
+ break;
+
+ case OP_LOAD1:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK
+ emit(LDRBa(R0, rDATABASE, R0)); // r0 = (unsigned char)dataBase[r0]
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_LOAD2:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK
+ emit(LDRHa(R0, rDATABASE, R0)); // r0 = (unsigned short)dataBase[r0]
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_LOAD4:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK
+ emit(LDRa(R0, rDATABASE, R0)); // r0 = dataBase[r0]
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_STORE1:
+ MAYBE_EMIT_CONST();
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4
+ emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
+ emit(STRBa(R0, rDATABASE, R1)); // database[r1] = r0
+ break;
+
+ case OP_STORE2:
+ MAYBE_EMIT_CONST();
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4
+ emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
+ emit(STRHa(R0, rDATABASE, R1)); // database[r1] = r0
+ break;
+
+ case OP_STORE4:
+ MAYBE_EMIT_CONST();
+ // optimize: use load multiple
+ // value
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ // pointer
+ emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4
+ emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
+ // store value at pointer
+ emit(STRa(R0, rDATABASE, R1)); // database[r1] = r0
+ break;
+
+ case OP_ARG:
+ MAYBE_EMIT_CONST();
+ emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(ADDi(R1, rPSTACK, arg.b[0])); // r1 = programStack+arg
+ emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK
+ emit(STRa(R0, rDATABASE, R1)); // dataBase[r1] = r0
+ break;
+
+ case OP_BLOCK_COPY:
+ MAYBE_EMIT_CONST();
+ emit(LDRTxi(R1, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4
+ emit(LDRTxi(R0, rOPSTACK, 4));
+ emit_MOVRxi(R2, arg.i);
+ emit_MOVRxi(R12, (unsigned)VM_BlockCopy);
+ emit(BLX(R12));
+ break;
+
+ case OP_SEX8:
+ MAYBE_EMIT_CONST();
+ emit(LDRSBai(R0, rOPSTACK, 0)); // sign extend *opstack
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_SEX16:
+ MAYBE_EMIT_CONST();
+ emit(LDRSHai(R0, rOPSTACK, 0)); // sign extend *opstack
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_NEGI:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(RSBi(R0, R0, 0)); // r0 = -r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_ADD:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(ADD(R0, R1, R0)); // r0 = r1 + r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_SUB:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(SUB(R0, R1, R0)); // r0 = r1 - r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_DIVI:
+ case OP_DIVU:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R1, rOPSTACK, 0)); // r1 = *opstack
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack
+ if ( op == OP_DIVI )
+ emit_MOVRxi(R12, (unsigned)__aeabi_idiv);
+ else
+ emit_MOVRxi(R12, (unsigned)__aeabi_uidiv);
+ emit(BLX(R12));
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_MODI:
+ case OP_MODU:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R1, rOPSTACK, 0)); // r1 = *opstack
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack
+ if ( op == OP_MODI )
+ emit_MOVRxi(R12, (unsigned)__aeabi_idivmod);
+ else
+ emit_MOVRxi(R12, (unsigned)__aeabi_uidivmod);
+ emit(BLX(R12));
+ emit(STRai(R1, rOPSTACK, 0)); // *opstack = r1
+ break;
+
+ case OP_MULI:
+ case OP_MULU:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(MUL(R0, R1, R0)); // r0 = r1 * r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_BAND:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(AND(R0, R1, R0)); // r0 = r1 & r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_BOR:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(ORR(R0, R1, R0)); // r0 = r1 | r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_BXOR:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(EOR(R0, R1, R0)); // r0 = r1 ^ r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_BCOM:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(MVN(R0, R0)); // r0 = ~r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_LSH:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(LSL(R0, R1, R0)); // r0 = r1 << r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_RSHI:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(ASR(R0, R1, R0)); // r0 = r1 >> r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_RSHU:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(LSR(R0, R1, R0)); // r0 = (unsigned)r1 >> r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+
+ case OP_NEGF:
+ MAYBE_EMIT_CONST();
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ emit(VNEG_F32(S14, S14)); // s15 = -s14
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_ADDF:
+ MAYBE_EMIT_CONST();
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ // vldr can't modify rOPSTACK so
+ // we'd either need to change it
+ // with sub or use regular ldr+vmov
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(VMOVass(S15,R0)); // s15 = r0
+ emit(VADD_F32(S14, S15, S14)); // s14 = s14 + s15
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_SUBF:
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ // see OP_ADDF
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(VMOVass(S15,R0)); // s15 = r0
+ emit(VSUB_F32(S14, S15, S14)); // s14 = s14 - s15
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_DIVF:
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ // see OP_ADDF
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(VMOVass(S15,R0)); // s15 = r0
+ emit(VDIV_F32(S14, S15, S14)); // s14 = s14 / s15
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_MULF:
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ // see OP_ADDF
+ emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack
+ emit(VMOVass(S15,R0)); // s15 = r0
+ emit(VMUL_F32(S14, S15, S14)); // s14 = s14 * s15
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_CVIF:
+ MAYBE_EMIT_CONST();
+ emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack
+ emit(VMOVass(S14,R0)); // s14 = r0
+ emit(VCVT_F32_S32(S14, S14)); // s15 = (float)s14
+ emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15
+ break;
+
+ case OP_CVFI:
+ MAYBE_EMIT_CONST();
+ emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack)
+ emit(VCVT_S32_F32(S14, S14)); // s15 = (int)s14
+ emit(VMOVssa(R0,S14)); // s14 = r0
+ emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0
+ break;
+ }
+ }
+
+ // never reached
+ emit(BKPT(0));
+ } // pass
+
+ if (mprotect(vm->codeBase, vm->codeLength, PROT_READ|PROT_EXEC/* |PROT_WRITE */)) {
+ VM_Destroy_Compiled(vm);
+ DIE("mprotect failed");
+ }
+
+ // clear icache, http://blogs.arm.com/software-enablement/141-caches-and-self-modifying-code/
+ __clear_cache(vm->codeBase, vm->codeBase+vm->codeLength);
+
+ vm->destroy = VM_Destroy_Compiled;
+ vm->compiled = qtrue;
+}
+
+int VM_CallCompiled(vm_t *vm, int *args)
+{
+ byte stack[OPSTACK_SIZE + 15];
+ int *opStack;
+ int programStack = vm->programStack;
+ int stackOnEntry = programStack;
+ byte *image = vm->dataBase;
+ int *argPointer;
+ int retVal;
+
+ currentVM = vm;
+
+ vm->currentlyInterpreting = qtrue;
+
+ programStack -= ( 8 + 4 * MAX_VMMAIN_ARGS );
+ argPointer = (int *)&image[ programStack + 8 ];
+ memcpy( argPointer, args, 4 * MAX_VMMAIN_ARGS );
+ argPointer[-1] = 0;
+ argPointer[-2] = -1;
+
+
+ opStack = PADP(stack, 16);
+ *opStack = 0xDEADBEEF;
+
+#if 0
+ Com_Printf("r5 opStack:\t\t%p\n", opStack);
+ Com_Printf("r7 codeBase:\t\t%p\n", vm->codeBase);
+ Com_Printf("r8 programStack:\t0x%x\n", programStack);
+ Com_Printf("r9 dataBase:\t\t%p\n", vm->dataBase);
+#endif
+
+ /* call generated code */
+ {
+ //int (*entry)(void *, int, void *, int);
+ int (*entry)(vm_t*, int*, int*);
+
+ entry = (void *)(vm->codeBase);
+ //__asm__ volatile("bkpt");
+ //retVal = entry(vm->codeBase, programStack, vm->dataBase, vm->dataMask);
+ retVal = entry(vm, &programStack, opStack);
+ }
+
+ if(*opStack != 0xDEADBEEF)
+ {
+ Com_Error(ERR_DROP, "opStack corrupted in compiled code");
+ }
+
+ if(programStack != stackOnEntry - (8 + 4 * MAX_VMMAIN_ARGS))
+ Com_Error(ERR_DROP, "programStack corrupted in compiled code");
+
+ vm->programStack = stackOnEntry;
+ vm->currentlyInterpreting = qfalse;
+
+ return retVal;
+}
diff --git a/SP/make-raspberrypi.sh b/SP/make-raspberrypi.sh
index 56bcc6c..b9ae24f 100755
--- a/SP/make-raspberrypi.sh
+++ b/SP/make-raspberrypi.sh
@@ -17,7 +17,6 @@
RASPBERRY_PI=1 \
USE_MUMBLE=0 \
BUILD_GAME_SO=1 \
- BUILD_GAME_QVM=0 \
BUILD_RENDERER_REND2=0 \
ARCH=arm \
PLATFORM=linux \
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/iortcw.git
More information about the Pkg-games-commits
mailing list