[Pkg-mono-svn-commits] [mono] 02/04: Backport 3cd04f97199ff38d7316587e44381638ba469565 to 4.2 branch

Jo Shields directhex at moszumanska.debian.org
Mon Apr 11 18:24:09 UTC 2016


This is an automated email from the git hooks/post-receive script.

directhex pushed a commit to branch master-patches/arm64_port
in repository mono.

commit bba162fb4cd69d60947409a91b0007332d2d350c
Author: Jo Shields <jo.shields at xamarin.com>
Date:   Mon Apr 11 14:57:05 2016 +0100

    Backport 3cd04f97199ff38d7316587e44381638ba469565 to 4.2 branch
---
 mono/arch/arm64/arm64-codegen.h |  850 ++++++-
 mono/mini/aot-compiler.c        |  325 ++-
 mono/mini/exceptions-arm64.c    |  586 ++++-
 mono/mini/mini-arm64.c          | 5203 ++++++++++++++++++++++++++++++++++++++-
 mono/mini/mini-arm64.h          |  251 +-
 mono/mini/tramp-arm64.c         |  658 ++++-
 6 files changed, 7867 insertions(+), 6 deletions(-)

diff --git a/mono/arch/arm64/arm64-codegen.h b/mono/arch/arm64/arm64-codegen.h
index 259ff96..1744235 100644
--- a/mono/arch/arm64/arm64-codegen.h
+++ b/mono/arch/arm64/arm64-codegen.h
@@ -1,3 +1,851 @@
-#include "../../../../mono-extensions/mono/arch/arm64/arm64-codegen.h"
+/*
+ * arm64-codegen.h: ARM64 code generation macros
+ *
+ * Author:
+ *   Zoltan Varga (vargaz at gmail.com)
+ *
+ * Copyright 2013 Xamarin, Inc (http://www.xamarin.com)
+ */
 
+#ifndef __ARM64_CODEGEN_H__
+#define __ARM64_CODEGEN_H__
 
+#include <glib.h>
+
+enum {
+	ARMREG_R0 = 0,
+	ARMREG_R1 = 1,
+	ARMREG_R2 = 2,
+	ARMREG_R3 = 3,
+	ARMREG_R4 = 4,
+	ARMREG_R5 = 5,
+	ARMREG_R6 = 6,
+	ARMREG_R7 = 7,
+	ARMREG_R8 = 8,
+	ARMREG_R9 = 9,
+	ARMREG_R10 = 10,
+	ARMREG_R11 = 11,
+	ARMREG_R12 = 12,
+	ARMREG_R13 = 13,
+	ARMREG_R14 = 14,
+	ARMREG_R15 = 15,
+	ARMREG_R16 = 16,
+	ARMREG_R17 = 17,
+	ARMREG_R18 = 18,
+	ARMREG_R19 = 19,
+	ARMREG_R20 = 20,
+	ARMREG_R21 = 21,
+	ARMREG_R22 = 22,
+	ARMREG_R23 = 23,
+	ARMREG_R24 = 24,
+	ARMREG_R25 = 25,
+	ARMREG_R26 = 26,
+	ARMREG_R27 = 27,
+	ARMREG_R28 = 28,
+	ARMREG_R29 = 29,
+	ARMREG_R30 = 30,
+	ARMREG_SP = 31,
+	ARMREG_RZR = 31,
+
+	ARMREG_IP0 = ARMREG_R16,
+	ARMREG_IP1 = ARMREG_R17,
+	ARMREG_FP = ARMREG_R29,
+	ARMREG_LR = ARMREG_R30
+};
+
+enum {
+	ARMREG_D0 = 0,
+	ARMREG_D1 = 1,
+	ARMREG_D2 = 2,
+	ARMREG_D3 = 3,
+	ARMREG_D4 = 4,
+	ARMREG_D5 = 5,
+	ARMREG_D6 = 6,
+	ARMREG_D7 = 7,
+	ARMREG_D8 = 8,
+	ARMREG_D9 = 9,
+	ARMREG_D10 = 10,
+	ARMREG_D11 = 11,
+	ARMREG_D12 = 12,
+	ARMREG_D13 = 13,
+	ARMREG_D14 = 14,
+	ARMREG_D15 = 15,
+	ARMREG_D16 = 16,
+	ARMREG_D17 = 17,
+	ARMREG_D18 = 18,
+	ARMREG_D19 = 19,
+	ARMREG_D20 = 20,
+	ARMREG_D21 = 21,
+	ARMREG_D22 = 22,
+	ARMREG_D23 = 23,
+	ARMREG_D24 = 24,
+	ARMREG_D25 = 25,
+	ARMREG_D26 = 26,
+	ARMREG_D27 = 27,
+	ARMREG_D28 = 28,
+	ARMREG_D29 = 29,
+	ARMREG_D30 = 30,
+	ARMREG_D31 = 31
+};
+
+typedef enum {
+	ARMCOND_EQ = 0x0,          /* Equal; Z = 1 */
+	ARMCOND_NE = 0x1,          /* Not equal, or unordered; Z = 0 */
+	ARMCOND_CS = 0x2,          /* Carry set; C = 1 */
+	ARMCOND_HS = ARMCOND_CS,   /* Unsigned higher or same; */
+	ARMCOND_CC = 0x3,          /* Carry clear; C = 0 */
+	ARMCOND_LO = ARMCOND_CC,   /* Unsigned lower */
+	ARMCOND_MI = 0x4,          /* Negative; N = 1 */
+	ARMCOND_PL = 0x5,          /* Positive or zero; N = 0 */
+	ARMCOND_VS = 0x6,          /* Overflow; V = 1 */
+	ARMCOND_VC = 0x7,          /* No overflow; V = 0 */
+	ARMCOND_HI = 0x8,          /* Unsigned higher; C = 1 && Z = 0 */
+	ARMCOND_LS = 0x9,          /* Unsigned lower or same; C = 0 || Z = 1 */
+	ARMCOND_GE = 0xA,          /* Signed greater than or equal; N = V */
+	ARMCOND_LT = 0xB,          /* Signed less than; N != V */
+	ARMCOND_GT = 0xC,          /* Signed greater than; Z = 0 && N = V */
+	ARMCOND_LE = 0xD,          /* Signed less than or equal; Z = 1 || N != V */
+	ARMCOND_AL = 0xE,          /* Always */
+	ARMCOND_NV = 0xF,          /* Never */
+} ARMCond;
+
+typedef enum {
+	ARMSHIFT_LSL = 0x0,
+	ARMSHIFT_LSR = 0x1,
+	ARMSHIFT_ASR = 0x2
+} ARMShift;
+
+typedef enum {
+	ARMSIZE_B = 0x0,
+	ARMSIZE_H = 0x1,
+	ARMSIZE_W = 0x2,
+	ARMSIZE_X = 0x3
+} ARMSize;
+
+#define arm_emit(p, ins) do { *(guint32*)(p) = (ins); (p) += 4; } while (0)
+
+/* Overwrite bits [offset,offset+nbits] with value */
+static G_GNUC_UNUSED inline void
+arm_set_ins_bits (void *p, int offset, int nbits, guint32 value)
+{
+	*(guint32*)p = (*(guint32*)p & ~(((1 << nbits) - 1) << offset)) | (value << offset);
+}
+
+/*
+ * Naming conventions for codegen macros:
+ * - 64 bit opcodes have an 'X' suffix
+ * - 32 bit opcodes have a 'W' suffix
+ * - the order of operands is the same as in assembly
+ */
+
+/*
+ * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a/index.html
+ */
+
+/* Uncoditional branch (register) */
+
+// 0b1101011 == 0x6b
+#define arm_format_breg(p, opc, op2, op3, op4, rn) arm_emit ((p), (0x6b << 25) | ((opc) << 21) | ((op2) << 16) | ((op3) << 10) | ((rn) << 5) | ((op4) << 0))
+
+// 0b0000 == 0x0, 0b11111 == 0x1f
+#define arm_brx(p, reg) arm_format_breg ((p), 0x0, 0x1f, 0x0, 0x0, (reg))
+
+// 0b0001 == 0x1
+#define arm_blrx(p, reg) arm_format_breg ((p), 0x1, 0x1f, 0x0, 0x0, (reg))
+
+//0b0010 == 0x2
+#define arm_retx(p, reg) arm_format_breg ((p), 0x2, 0x1f, 0x0, 0x0, (reg))
+
+/* Unconditional branch (immeditate) */
+
+static G_GNUC_UNUSED inline gboolean
+arm_is_bl_disp (void *code, void *target)
+{
+	gint64 disp = ((char*)(target) - (char*)(code)) / 4;
+
+	return (disp > -(1 << 25)) && (disp < (1 << 25));
+}
+
+static G_GNUC_UNUSED inline unsigned int
+arm_get_disp (void *p, void *target)
+{
+	unsigned int disp = ((char*)target - (char*)p) / 4;
+
+	if (target)
+		g_assert (arm_is_bl_disp (p, target));
+
+	return (disp & 0x3ffffff);
+}
+
+// 0b00101 == 0x5
+#define arm_b(p, target) arm_emit (p, (0x0 << 31) | (0x5 << 26) | ((arm_get_disp ((p), (target)) << 0)))
+
+#define arm_bl(p, target) arm_emit (p, (0x1 << 31) | (0x5 << 26) | ((arm_get_disp ((p), (target)) << 0)))
+
+/* Conditional branch */
+
+static G_GNUC_UNUSED inline gboolean
+arm_is_disp19 (void *code, void *target)
+{
+	gint64 disp = ((char*)(target) - (char*)(code)) / 4;
+
+	return (disp > -(1 << 18)) && (disp < (1 << 18));
+}
+
+static G_GNUC_UNUSED inline unsigned int
+arm_get_disp19 (void *p, void *target)
+{
+	unsigned int disp = ((char*)target - (char*)p) / 4;
+
+	if (target)
+		g_assert (arm_is_disp19 (p, target));
+
+	return (disp & 0x7ffff);
+}
+
+// 0b0101010 == 0x2a
+#define arm_format_condbr(p, o1, o0, cond, disp) arm_emit ((p), (0x2a << 25) | ((o1) << 24) | ((disp) << 5) | ((o0) << 4) | ((cond) << 0))
+#define arm_get_bcc_cond(p) ((*(guint32*)p) & 0xf)
+
+#define arm_bcc(p, cond, target) arm_format_condbr ((p), 0x0, 0x0, (cond), arm_get_disp19 ((p), (target)))
+
+// 0b011010 == 0x1a
+#define arm_format_cmpbr(p, sf, op, rt, target) arm_emit ((p), ((sf) << 31) | (0x1a << 25) | ((op) << 24) | (arm_get_disp19 ((p), (target)) << 5) | ((rt) << 0))
+
+#define arm_set_cbz_target(p, target) arm_set_ins_bits (p, 5, 19, arm_get_disp19 ((p), (target)))
+
+#define arm_cbzx(p, rt, target) arm_format_cmpbr ((p), 0x1, 0x0, (rt), (target))
+#define arm_cbzw(p, rt, target) arm_format_cmpbr ((p), 0x0, 0x0, (rt), (target))
+
+#define arm_cbnzx(p, rt, target) arm_format_cmpbr ((p), 0x1, 0x1, (rt), (target))
+#define arm_cbnzw(p, rt, target) arm_format_cmpbr ((p), 0x0, 0x1, (rt), (target))
+
+static G_GNUC_UNUSED inline unsigned int
+arm_get_disp15 (void *p, void *target)
+{
+	unsigned int disp = ((char*)target - (char*)p) / 4;
+	return (disp & 0x7fff);
+}
+
+// 0b011011 == 0x1b
+#define arm_format_tbimm(p, op, rt, bit, target) arm_emit ((p), ((((bit) >> 5) & 1) << 31) | (0x1b << 25) | ((op) << 24) | (((bit) & 0x1f) << 19) | (arm_get_disp15 ((p), (target)) << 5) | ((rt) << 0))
+
+#define arm_tbz(p, rt, bit, target) arm_format_tbimm ((p), 0x0, (rt), (bit), (target))
+#define arm_tbnz(p, rt, bit, target) arm_format_tbimm ((p), 0x1, (rt), (bit), (target))
+
+/* Memory access */
+
+#define arm_is_pimm12_scaled(pimm,size) ((pimm) >= 0 && (pimm) / (size) <= 0xfff && ((pimm) % (size)) == 0)
+
+static G_GNUC_UNUSED unsigned int
+arm_encode_pimm12 (int pimm, int size)
+{
+	g_assert (arm_is_pimm12_scaled (pimm, size));
+	return ((unsigned int)(pimm / size)) & 0xfff;
+}
+
+#define arm_is_strb_imm(pimm) arm_is_pimm12_scaled((pimm), 1)
+#define arm_is_strh_imm(pimm) arm_is_pimm12_scaled((pimm), 2)
+#define arm_is_strw_imm(pimm) arm_is_pimm12_scaled((pimm), 4)
+#define arm_is_strx_imm(pimm) arm_is_pimm12_scaled((pimm), 8)
+
+/* Load/Store register + scaled immediate */
+/* No pre-index/post-index yet */
+#define arm_format_mem_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0x39 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0))
+
+/* C5.6.83 LDR (immediate) */
+#define arm_ldrx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_X, 0x1, (rt), (rn), (pimm), 8)
+#define arm_ldrw(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_W, 0x1, (rt), (rn), (pimm), 4)
+/* C5.6.86 LDRB (immediate) */
+#define arm_ldrb(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_B, 0x1, (rt), (rn), (pimm), 1)
+/* C5.6.88 LDRH (immediate) */
+#define arm_ldrh(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_H, 0x1, (rt), (rn), (pimm), 2)
+/* C5.6.90 LDRSB (immediate) */
+#define arm_ldrsbx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_B, 0x2, (rt), (rn), (pimm), 1)
+#define arm_ldrsbw(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_B, 0x3, (rt), (rn), (pimm), 1)
+/* C5.6.92 LDRSH (immediate) */
+#define arm_ldrshx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_H, 0x2, (rt), (rn), (pimm), 2)
+#define arm_ldrshw(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_H, 0x3, (rt), (rn), (pimm), 2)
+/* C5.6.94 LDRSW (immediate) */
+#define arm_ldrswx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_W, 0x2, (rt), (rn), (pimm), 4)
+
+/* C5.6.178 STR (immediate) */
+#define arm_strx(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_X, 0x0, (rt), (rn), (pimm), 8)
+#define arm_strw(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_W, 0x0, (rt), (rn), (pimm), 4)
+/* C5.6.182 STR (immediate) */
+#define arm_strh(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_H, 0x0, (rt), (rn), (pimm), 2)
+#define arm_strb(p, rt, rn, pimm) arm_format_mem_imm (p, ARMSIZE_B, 0x0, (rt), (rn), (pimm), 1)
+
+/* C3.3.9 Load/store register (immediate post-indexed) */
+static G_GNUC_UNUSED unsigned int
+arm_encode_simm9 (int simm)
+{
+	g_assert (simm >= -256 && simm <= 255);
+	return ((unsigned int)simm) & 0x1ff;
+}
+
+#define arm_format_mem_imm_post(p, size, V, opc, rt, rn, simm) arm_emit ((p), ((size) << 30) | (0x7 << 27) | ((V) << 26) | (0x0 << 24) | ((opc) << 22) | (arm_encode_simm9 ((simm)) << 12) | (0x1 << 10) | ((rn) << 5) | ((rt) << 0))
+
+#define arm_ldrx_post(p, rt, rn, simm) arm_format_mem_imm_post (p, ARMSIZE_X, 0x0, 0x1, (rt), (rn), (simm))
+#define arm_ldrw_post(p, rt, rn, simm) arm_format_mem_imm_post (p, ARMSIZE_W, 0x0, 0x1, (rt), (rn), (simm))
+
+#define arm_strx_post(p, rt, rn, simm) arm_format_mem_imm_post (p, ARMSIZE_X, 0x0, 0x0, (rt), (rn), (simm))
+#define arm_strw_post(p, rt, rn, simm) arm_format_mem_imm_post (p, ARMSIZE_W, 0x0, 0x0, (rt), (rn), (simm))
+
+/* C3.3.9 Load/store register (immediate pre-indexed) */
+#define arm_format_mem_imm_pre(p, size, V, opc, rt, rn, simm) arm_emit ((p), ((size) << 30) | (0x7 << 27) | ((V) << 26) | (0x0 << 24) | ((opc) << 22) | (arm_encode_simm9 ((simm)) << 12) | (0x3 << 10) | ((rn) << 5) | ((rt) << 0))
+
+#define arm_ldrx_pre(p, rt, rn, simm) arm_format_mem_imm_pre (p, ARMSIZE_X, 0x0, 0x1, (rt), (rn), (simm))
+#define arm_ldrw_pre(p, rt, rn, simm) arm_format_mem_imm_pre (p, ARMSIZE_W, 0x0, 0x1, (rt), (rn), (simm))
+
+#define arm_strx_pre(p, rt, rn, simm) arm_format_mem_imm_pre (p, ARMSIZE_X, 0x0, 0x0, (rt), (rn), (simm))
+#define arm_strw_pre(p, rt, rn, simm) arm_format_mem_imm_pre (p, ARMSIZE_W, 0x0, 0x0, (rt), (rn), (simm))
+
+/* Load/Store register + register */
+/* No extend/scale yet */
+#define arm_format_mem_reg(p, size, opc, rt, rn, rm) arm_emit ((p), ((size) << 30) | (0x38 << 24) | ((opc) << 22) | (0x1 << 21) | ((rm) << 16) | (0x3 << 13) | (0 << 12) | (0x2 << 10) | ((rn) << 5) | ((rt) << 0))
+
+/* C5.6.85 LDR (register) */
+#define arm_ldrx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_X, 0x1, (rt), (rn), (rm))
+#define arm_ldrw_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_W, 0x1, (rt), (rn), (rm))
+/* C5.6.87 LDRB (register) */
+#define arm_ldrb_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_B, 0x1, (rt), (rn), (rm))
+/* C5.6.88 LDRH (register) */
+#define arm_ldrh_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_H, 0x1, (rt), (rn), (rm))
+/* C5.6.91 LDRSB (register) */
+#define arm_ldrsbx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_B, 0x2, (rt), (rn), (rm))
+#define arm_ldrsbw_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_B, 0x3, (rt), (rn), (rm))
+/* C5.6.93 LDRSH (register) */
+#define arm_ldrshx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_H, 0x2, (rt), (rn), (rm))
+#define arm_ldrshw_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_H, 0x3, (rt), (rn), (rm))
+/* C5.6.96 LDRSW (register) */
+#define arm_ldrswx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_W, 0x2, (rt), (rn), (rm))
+
+/* C5.6.179 STR (register) */
+#define arm_strx_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_X, 0x0, (rt), (rn), (rm))
+#define arm_strw_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_W, 0x0, (rt), (rn), (rm))
+/* C5.6.181 STRB (register) */
+#define arm_strb_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_B, 0x0, (rt), (rn), (rm))
+/* C5.6.183 STRH (register) */
+#define arm_strh_reg(p, rt, rn, rm) arm_format_mem_reg ((p), ARMSIZE_H, 0x0, (rt), (rn), (rm))
+
+/* PC relative */
+
+/* C5.6.84 LDR (literal) */
+
+#define arm_get_ldr_lit_reg(p) (*(guint32*)(p) & 0x1f)
+
+#define arm_ldrx_lit(p, rt, target) arm_emit ((p), (0x01 << 30) | (0x18 << 24) | (arm_get_disp19 ((p), (target)) << 5) | ((rt) << 0))
+#define arm_ldrw_lit(p, rt, target) arm_emit ((p), (0x00 << 30) | (0x18 << 24) | (arm_get_disp19 ((p), (target)) << 5) | ((rt) << 0))
+#define arm_ldrswx_lit(p, rt, target) arm_emit ((p), (0x2 << 30) | (0x18 << 24) | (arm_get_disp19 ((p), (target)) << 5) | ((rt) << 0))
+
+/* Unscaled offset */
+/* FIXME: Not yet */
+
+/* Load/Store Pair */
+
+static G_GNUC_UNUSED unsigned int
+arm_encode_imm7 (int imm, int size)
+{
+	g_assert (imm / size >= -64 && imm / size <= 63 && (imm % size) == 0);
+	return ((unsigned int)(imm / size)) & 0x7f;
+}
+
+#define arm_is_imm7_scaled(imm, size) ((imm) / (size) >= -64 && (imm) / (size) <= 63 && ((imm) % (size)) == 0)
+
+#define arm_is_ldpx_imm(imm) arm_is_imm7_scaled ((imm), 8)
+
+/* C3.3.14 */
+#define arm_format_mem_p(p, size, opc, L, rt1, rt2, rn, imm) arm_emit ((p), (opc << 30) | (0x52 << 23) | ((L) << 22) | (arm_encode_imm7 (imm, size) << 15) | ((rt2) << 10) | ((rn) << 5) | ((rt1) << 0))
+
+#define arm_ldpx(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 8, 0x2, 1, (rt1), (rt2), (rn), (imm))
+#define arm_ldpw(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 4, 0x0, 1, (rt1), (rt2), (rn), (imm))
+#define arm_ldpsw(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 4, 0x1, 1, (rt1), (rt2), (rn), (imm))
+#define arm_stpx(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 8, 0x2, 0, (rt1), (rt2), (rn), (imm))
+#define arm_stpw(p, rt1, rt2, rn, imm) arm_format_mem_p ((p), 4, 0x0, 0, (rt1), (rt2), (rn), (imm))
+
+/* Load/Store Pair (Pre-indexed) */
+/* C3.3.16 */
+#define arm_format_mem_p_pre(p, size, opc, L, rt1, rt2, rn, imm) arm_emit ((p), (opc << 30) | (0x53 << 23) | ((L) << 22) | (arm_encode_imm7 (imm, size) << 15) | ((rt2) << 10) | ((rn) << 5) | ((rt1) << 0))
+
+#define arm_ldpx_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 8, 0x2, 1, (rt1), (rt2), (rn), (imm))
+#define arm_ldpw_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 4, 0x0, 1, (rt1), (rt2), (rn), (imm))
+#define arm_ldpsw_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 4, 0x1, 1, (rt1), (rt2), (rn), (imm))
+#define arm_stpx_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 8, 0x2, 0, (rt1), (rt2), (rn), (imm))
+#define arm_stpw_pre(p, rt1, rt2, rn, imm) arm_format_mem_p_pre ((p), 4, 0x0, 0, (rt1), (rt2), (rn), (imm))
+
+/* Not an official alias */
+#define arm_pushpx (p, rt1, rt2) arm_LDPX_pre (p, rt1, rt2, ARMREG_RSP, -8)
+
+/* Load/Store Pair (Post-indexed) */
+/* C3.3.15 */
+#define arm_format_mem_p_post(p, size, opc, L, rt1, rt2, rn, imm) arm_emit ((p), (opc << 30) | (0x51 << 23) | ((L) << 22) | (arm_encode_imm7 (imm, size) << 15) | ((rt2) << 10) | ((rn) << 5) | ((rt1) << 0))
+
+#define arm_ldpx_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 8, 0x2, 1, (rt1), (rt2), (rn), (imm))
+#define arm_ldpw_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 4, 0x0, 1, (rt1), (rt2), (rn), (imm))
+#define arm_ldpsw_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 4, 0x1, 1, (rt1), (rt2), (rn), (imm))
+#define arm_stpx_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 8, 0x2, 0, (rt1), (rt2), (rn), (imm))
+#define arm_stpw_post(p, rt1, rt2, rn, imm) arm_format_mem_p_post ((p), 4, 0x0, 0, (rt1), (rt2), (rn), (imm))
+
+/* Not an official alias */
+#define arm_poppx (p, rt1, rt2) arm_ldpx_post (p, rt1, rt2, ARMREG_RSP, 8)
+
+/* Load/Store Exclusive */
+#define arm_format_ldxr(p, size, rt, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x1 << 22) | (0x0 << 21) | (0x1f << 16) | (0x0 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0))
+#define arm_format_ldxp(p, size, rt1, rt2, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x1 << 22) | (0x1 << 21) | (0x1f << 16) | (0x0 << 15) | ((rt2) << 10)| ((rn) << 5) | ((rt1) << 0))
+#define arm_format_stxr(p, size, rs, rt, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x0 << 22) | (0x0 << 21) | ((rs) << 16) | (0x0 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0))
+#define arm_format_stxp(p, size, rs, rt1, rt2, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x0 << 22) | (0x1 << 21) | ((rs) << 16) | (0x0 << 15) | ((rt2) << 10)| ((rn) << 5) | ((rt1) << 0))
+
+#define arm_ldxrx(p, rt, rn) arm_format_ldxr ((p), ARMSIZE_X, (rt), (rn))
+#define arm_ldxrw(p, rt, rn) arm_format_ldxr ((p), ARMSIZE_W, (rt), (rn))
+#define arm_ldxrh(p, rt, rn) arm_format_ldxr ((p), ARMSIZE_H, (rt), (rn))
+#define arm_ldxrb(p, rt, rn) arm_format_ldxr ((p), ARMSIZE_B, (rt), (rn))
+#define arm_ldxpx(p, rt1, rt2, rn) arm_format_ldxp ((p), ARMSIZE_X, (rt1), (rt2), (rn))
+#define arm_ldxpw(p, rt1, rt2, rn) arm_format_ldxp ((p), ARMSIZE_W, (rt1), (rt2), (rn))
+#define arm_stxrx(p, rs, rt, rn) arm_format_stxr ((p), ARMSIZE_X, (rs), (rt), (rn))
+#define arm_stxrw(p, rs, rt, rn) arm_format_stxr ((p), ARMSIZE_W, (rs), (rt), (rn))
+#define arm_stxrh(p, rs, rt, rn) arm_format_stxr ((p), ARMSIZE_H, (rs), (rt), (rn))
+#define arm_stxrb(p, rs, rt, rn) arm_format_stxr ((p), ARMSIZE_B, (rs), (rt), (rn))
+#define arm_stxpx(p, rs, rt1, rt2, rn) arm_format_stxp ((p), ARMSIZE_X, (rs), (rt1), (rt2), (rn))
+#define arm_stxpw(p, rs, rt1, rt2, rn) arm_format_stxp ((p), ARMSIZE_W, (rs), (rt1), (rt2), (rn))
+
+/* C5.6.73 LDAR: Load-Acquire Register */
+
+#define arm_format_ldar(p, size, rt, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x1 << 23) | (0x1 << 22) | (0x0 << 21) | (0x1f << 16) | (0x1 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0))
+
+#define arm_ldarx(p, rt, rn) arm_format_ldar ((p), ARMSIZE_X, (rt), (rn))
+#define arm_ldarw(p, rt, rn) arm_format_ldar ((p), ARMSIZE_W, (rt), (rn))
+#define arm_ldarh(p, rt, rn) arm_format_ldar ((p), ARMSIZE_H, (rt), (rn))
+#define arm_ldarb(p, rt, rn) arm_format_ldar ((p), ARMSIZE_B, (rt), (rn))
+
+/* C5.6.169 STLR: Store-Release Register */
+
+#define arm_format_stlr(p, size, rt, rn) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x1 << 23) | (0x0 << 22) | (0x0 << 21) | (0x1f << 16) | (0x1 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0))
+
+#define arm_stlrx(p, rn, rt) arm_format_stlr ((p), ARMSIZE_X, (rt), (rn))
+#define arm_stlrw(p, rn, rt) arm_format_stlr ((p), ARMSIZE_W, (rt), (rn))
+#define arm_stlrh(p, rn, rt) arm_format_stlr ((p), ARMSIZE_H, (rt), (rn))
+#define arm_stlrb(p, rn, rt) arm_format_stlr ((p), ARMSIZE_B, (rt), (rn))
+
+/* C5.6.77 LDAXR */
+#define arm_format_ldaxr(p, size, rn, rt) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x1 << 22) | (0x0 << 21) | (0x1f << 16) | (0x1 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0))
+
+#define arm_ldaxrx(p, rt, rn) arm_format_ldaxr ((p), 0x3, (rn), (rt))
+#define arm_ldaxrw(p, rt, rn) arm_format_ldaxr ((p), 0x2, (rn), (rt))
+
+/* C5.6.173 STLXR */
+#define arm_format_stlxr(p, size, rs, rn, rt) arm_emit ((p), ((size) << 30) | (0x8 << 24) | (0x0 << 23) | (0x0 << 22) | (0x0 << 21) | ((rs) << 16) | (0x1 << 15) | (0x1f << 10) | ((rn) << 5) | ((rt) << 0))
+
+#define arm_stlxrx(p, rs, rt, rn) arm_format_stlxr ((p), 0x3, (rs), (rn), (rt))
+#define arm_stlxrw(p, rs, rt, rn) arm_format_stlxr ((p), 0x2, (rs), (rn), (rt))
+
+/* Load/Store SIMD&FP */
+
+/* C6.3.285 STR (immediate, SIMD&FP) */
+#define arm_format_strfp_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0xf << 26) | (0x1 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0))
+
+/* Store double */
+#define arm_strfpx(p, dt, xn, simm) arm_format_strfp_imm ((p), ARMSIZE_X, 0x0, (dt), (xn), (simm), 8)
+/* Store single */
+#define arm_strfpw(p, st, xn, simm) arm_format_strfp_imm ((p), ARMSIZE_W, 0x0, (st), (xn), (simm), 4)
+
+/* C6.3.166 LDR (immediate, SIMD&FP) */
+#define arm_format_ldrfp_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0xf << 26) | (0x1 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0))
+
+/* Load double */
+#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, dt, xn, simm, 8)
+/* Load single */
+#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, dt, xn, simm, 4)
+
+/* Arithmetic (immediate) */
+static G_GNUC_UNUSED inline guint32
+arm_encode_arith_imm (int imm, guint32 *shift)
+{
+	// FIXME:
+	g_assert ((imm >= 0) && (imm < 0xfff));
+	*shift = 0;
+	return (guint32)imm;
+}
+
+// FIXME:
+#define arm_is_arith_imm(imm)  (((imm) >= 0) && ((imm) < 0xfff))
+
+#define arm_format_alu_imm(p, sf, op, S, rd, rn, imm) do { \
+	guint32 _imm12, _shift; \
+	_imm12 = arm_encode_arith_imm ((imm), &_shift); arm_emit ((p), ((sf) << 31) | ((op) << 30) | ((S) << 29) | (0x11 << 24) | ((_shift) << 22) | ((_imm12) << 10) | ((rn) << 5) | ((rd) << 0)); \
+} while (0)
+
+/* rd/rn can be SP for addx/subx */
+#define arm_addx_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x1, 0x0, 0x0, (rd), (rn), (imm))
+#define arm_addw_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x0, 0x0, 0x0, (rd), (rn), (imm))
+#define arm_addsx_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x1, 0x0, 0x1, (rd), (rn), (imm))
+#define arm_addsw_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x0, 0x0, 0x1, (rd), (rn), (imm))
+#define arm_subx_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x1, 0x1, 0x0, (rd), (rn), (imm))
+#define arm_subw_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x0, 0x1, 0x0, (rd), (rn), (imm))
+#define arm_subsx_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x1, 0x1, 0x1, (rd), (rn), (imm))
+#define arm_subsw_imm(p, rd, rn, imm) arm_format_alu_imm ((p), 0x0, 0x1, 0x1, (rd), (rn), (imm))
+
+#define arm_cmpx_imm(p, rn, imm) arm_subsx_imm ((p), ARMREG_RZR, (rn), (imm))
+#define arm_cmpw_imm(p, rn, imm) arm_subsw_imm ((p), ARMREG_RZR, (rn), (imm))
+#define arm_cmnx_imm(p, rn, imm) arm_addsx_imm ((p), ARMREG_RZR, (rn), (imm))
+#define arm_cmnw_imm(p, rn, imm) arm_addsw_imm ((p), ARMREG_RZR, (rn), (imm))
+
+/* Logical (immediate) */
+
+// FIXME: imm
+#if 0
+#define arm_format_and(p, sf, opc, rd, rn, imm) arm_emit ((p), ((sf) << 31) | ((opc) << 29) | (0x24 << 23) | ((0) << 22) | ((imm) << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_andx_imm(p, rd, rn, imm) arm_format_and ((p), 0x1, 0x0, (rd), (rn), (imm))
+#define arm_andw_imm(p, rd, rn, imm) arm_format_and ((p), 0x0, 0x0, (rd), (rn), (imm))
+#define arm_andsx_imm(p, rd, rn, imm) arm_format_and ((p), 0x1, 0x3, (rd), (rn), (imm))
+#define arm_andsw_imm(p, rd, rn, imm) arm_format_and ((p), 0x0, 0x3, (rd), (rn), (imm))
+#define arm_eorx_imm(p, rd, rn, imm) arm_format_and ((p), 0x1, 0x2, (rd), (rn), (imm))
+#define arm_eorw_imm(p, rd, rn, imm) arm_format_and ((p), 0x0, 0x2, (rd), (rn), (imm))
+#define arm_orrx_imm(p, rd, rn, imm) arm_format_and ((p), 0x1, 0x1, (rd), (rn), (imm))
+#define arm_orrw_imm(p, rd, rn, imm) arm_format_and ((p), 0x0, 0x1, (rd), (rn), (imm))
+
+#define arm_tstx_imm(p, rn, imm) arm_andsx_imm ((p), ARMREG_RZR, (rn), (imm))
+#define arm_tstw_imm(p, rn, imm) arm_andsw_imm ((p), ARMREG_RZR, (rn), (imm))
+#endif
+
+/* Move (wide immediate) */
+#define arm_format_mov(p, sf, opc, hw, rd, imm16) arm_emit ((p), ((sf) << 31) | ((opc) << 29) | (0x25 << 23) | ((hw) << 21) | (((guint32)(imm16) & 0xffff) << 5) | ((rd) << 0))
+
+#define arm_get_movzx_rd(p) ((*(guint32*)p) & 0x1f)
+
+#define arm_movzx(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x1, 0x2, (shift) / 16, (rd), (imm)); } while (0)
+#define arm_movzw(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x0, 0x2, (shift) / 16, (rd), (imm)); } while (0)
+#define arm_movnx(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x1, 0x0, (shift) / 16, (rd), (imm)); } while (0)
+#define arm_movnw(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x0, 0x0, (shift) / 16, (rd), (imm)); } while (0)
+#define arm_movkx(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x1, 0x3, (shift) / 16, (rd), (imm)); } while (0)
+#define arm_movkw(p, rd, imm, shift) do { g_assert ((shift) % 16 == 0); arm_format_mov ((p), 0x0, 0x3, (shift) / 16, (rd), (imm)); } while (0)
+
+/* PC-relative address calculation */
+#define arm_format_adrp(p, op, rd, target) do { guint64 imm1 = (guint64)(target); guint64 imm2 = (guint64)(p); int _imm = imm1 - imm2; arm_emit ((p), ((op) << 31) | (((_imm) & 0x3) << 29) | (0x10 << 24) | (((_imm >> 2) & 0x7ffff) << 5) | ((rd) << 0)); } while (0)
+
+#define arm_adrpx(p, rd, target) arm_format_adrp ((p), 0x1, (rd), (target))
+#define arm_adrx(p, rd, target) arm_format_adrp ((p), 0x0, (rd), (target))
+
+/* Bitfield move */
+#define arm_format_bfm(p, sf, opc, N, immr, imms, rn, rd) arm_emit ((p), ((sf) << 31) | ((opc) << 29) | (0x26 << 23) | ((N) << 22) | ((N) << 22) | ((immr) << 16) | ((imms) << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_bfmx(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x1, 0x1, 0x1, (immr), (imms), (rn), (rd))
+#define arm_bfmw(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x0, 0x1, 0x0, (immr), (imms), (rn), (rd))
+#define arm_sbfmx(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x1, 0x0, 0x1, (immr), (imms), (rn), (rd))
+#define arm_sbfmw(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x0, 0x0, 0x0, (immr), (imms), (rn), (rd))
+#define arm_ubfmx(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x1, 0x2, 0x1, (immr), (imms), (rn), (rd))
+#define arm_ubfmw(p, rd, rn, immr, imms) arm_format_bfm ((p), 0x0, 0x2, 0x0, (immr), (imms), (rn), (rd))
+
+/* Sign extend and Zero-extend */
+#define arm_sxtbx(p, rd, rn) arm_sbfmx ((p), (rd), (rn), 0, 7)
+#define arm_sxtbw(p, rd, rn) arm_sbfmw ((p), (rd), (rn), 0, 7)
+#define arm_sxthx(p, rd, rn) arm_sbfmx ((p), (rd), (rn), 0, 15)
+#define arm_sxthw(p, rd, rn) arm_sbfmw ((p), (rd), (rn), 0, 15)
+#define arm_sxtwx(p, rd, rn) arm_sbfmx ((p), (rd), (rn), 0, 31)
+#define arm_uxtbx(p, rd, rn) arm_ubfmx ((p), (rd), (rn), 0, 7)
+#define arm_uxtbw(p, rd, rn) arm_ubfmw ((p), (rd), (rn), 0, 7)
+#define arm_uxthx(p, rd, rn) arm_ubfmx ((p), (rd), (rn), 0, 15)
+#define arm_uxthw(p, rd, rn) arm_ubfmw ((p), (rd), (rn), 0, 15)
+
+/* Extract register */
+#define arm_format_extr(p, sf, N, rd, rn, rm, imms) arm_emit ((p), ((sf) << 31) | (0x27 << 23) | ((N) << 22) | (0x0 << 21) | ((rm) << 16) | ((imms) << 10) | ((rn) << 5) | ((rd) << 0))
+#define arm_extrx(p, rd, rn, rm, lsb) arm_format_extr ((p), 0x1, 0x1, (rd), (rn), (rm), (lsb))
+#define arm_extrw(p, rd, rn, rm, lsb) arm_format_extr ((p), 0x0, 0x0, (rd), (rn), (rm), (lsb))
+
+/* Shift (immediate) */
+#define arm_asrx(p, rd, rn, shift) arm_sbfmx ((p), (rd), (rn), (shift), 63)
+#define arm_asrw(p, rd, rn, shift) arm_sbfmw ((p), (rd), (rn), (shift), 31)
+#define arm_lslx(p, rd, rn, shift) arm_ubfmx ((p), (rd), (rn), 64 - ((shift) % 64), 63 - ((shift) % 64))
+#define arm_lslw(p, rd, rn, shift) arm_ubfmw ((p), (rd), (rn), 32 - ((shift) % 32), 31 - ((shift) % 32))
+#define arm_lsrx(p, rd, rn, shift) arm_ubfmx ((p), (rd), (rn), shift, 63)
+#define arm_lsrw(p, rd, rn, shift) arm_ubfmw ((p), (rd), (rn), shift, 31)
+#define arm_rorx(p, rd, rs, shift) arm_extrx ((p), (rd), (rs), (rs), (shift))
+#define arm_rorw(p, rd, rs, shift) arm_extrw ((p), (rd), (rs), (rs), (shift))
+
+/* Arithmetic (shifted register) */
+#define arm_format_alu_shift(p, sf, op, S, rd, rn, rm, shift, imm6) arm_emit ((p), ((sf) << 31) | ((op) << 30) | ((S) << 29) | (0xb << 24) | ((shift) << 22) | (0x0 << 21) | ((rm) << 16) | ((imm6) << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_addx_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x1, 0x0, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_addw_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x0, 0x0, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_addsx_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x1, 0x0, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_addsw_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x0, 0x0, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_subx_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x1, 0x1, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_subw_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x0, 0x1, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_subsx_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x1, 0x1, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_subsw_shift(p, rd, rn, rm, shift_type, amount) arm_format_alu_shift ((p), 0x0, 0x1, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_cmnx_shift(p, rn, rm, shift_type, amount) arm_addsx_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount))
+#define arm_cmnw_shift(p, rn, rm, shift_type, amount) arm_addsw_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount))
+#define arm_cmpx_shift(p, rn, rm, shift_type, amount) arm_subsx_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount))
+#define arm_cmpw_shift(p, rn, rm, shift_type, amount) arm_subsw_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount))
+#define arm_negx_shift(p, rd, rm, shift_type, amount) arm_subx_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount))
+#define arm_negw_shift(p, rd, rm, shift_type, amount) arm_subw_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount))
+#define arm_negsx_shift(p, rd, rm, shift_type, amount) arm_subsx_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount))
+#define arm_negsw_shift(p, rd, rm, shift_type, amount) arm_subsw_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount))
+
+#define arm_addx(p, rd, rn, rm) arm_addx_shift ((p), (rd), (rn), (rm), 0, 0)
+#define arm_addw(p, rd, rn, rm) arm_addw_shift ((p), (rd), (rn), (rm), 0, 0)
+#define arm_subx(p, rd, rn, rm) arm_subx_shift ((p), (rd), (rn), (rm), 0, 0)
+#define arm_subw(p, rd, rn, rm) arm_subw_shift ((p), (rd), (rn), (rm), 0, 0)
+#define arm_addsx(p, rd, rn, rm) arm_addsx_shift ((p), (rd), (rn), (rm), 0, 0)
+#define arm_addsw(p, rd, rn, rm) arm_addsw_shift ((p), (rd), (rn), (rm), 0, 0)
+#define arm_subsx(p, rd, rn, rm) arm_subsx_shift ((p), (rd), (rn), (rm), 0, 0)
+#define arm_subsw(p, rd, rn, rm) arm_subsw_shift ((p), (rd), (rn), (rm), 0, 0)
+#define arm_cmpx(p, rd, rn) arm_cmpx_shift ((p), (rd), (rn), 0, 0)
+#define arm_cmpw(p, rd, rn) arm_cmpw_shift ((p), (rd), (rn), 0, 0)
+#define arm_negx(p, rd, rn) arm_negx_shift ((p), (rd), (rn), 0, 0)
+#define arm_negw(p, rd, rn) arm_negw_shift ((p), (rd), (rn), 0, 0)
+
+/* Arithmetic with carry */
+#define arm_format_adc(p, sf, op, S, rd, rn, rm) arm_emit ((p), ((sf) << 31) | ((op) << 30) | ((S) << 29) | (0xd0 << 21) | ((rm) << 16) | (0x0 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_adcx(p, rd, rn, rm) arm_format_adc ((p), 0x1, 0x0, 0x0, (rd), (rn), (rm))
+#define arm_adcw(p, rd, rn, rm) arm_format_adc ((p), 0x0, 0x0, 0x0, (rd), (rn), (rm))
+#define arm_adcsx(p, rd, rn, rm) arm_format_adc ((p), 0x1, 0x0, 0x1, (rd), (rn), (rm))
+#define arm_adcsw(p, rd, rn, rm) arm_format_adc ((p), 0x0, 0x0, 0x1, (rd), (rn), (rm))
+#define arm_sbcx(p, rd, rn, rm) arm_format_adc ((p), 0x1, 0x1, 0x0, (rd), (rn), (rm))
+#define arm_sbcw(p, rd, rn, rm) arm_format_adc ((p), 0x0, 0x1, 0x0, (rd), (rn), (rm))
+#define arm_sbcsx(p, rd, rn, rm) arm_format_adc ((p), 0x1, 0x1, 0x1, (rd), (rn), (rm))
+#define arm_sbcsw(p, rd, rn, rm) arm_format_adc ((p), 0x0, 0x1, 0x1, (rd), (rn), (rm))
+#define arm_ngcx(p, rd, rm) arm_sbcx ((p), (rd), ARMREG_RZR, (rm))
+#define arm_ngcw(p, rd, rm) arm_sbcw ((p), (rd), ARMREG_RZR, (rm))
+#define arm_ngcsx(p, rd, rm) arm_sbcsx ((p), (rd), ARMREG_RZR, (rm))
+#define arm_ngcsw(p, rd, rm) arm_sbcsw ((p), (rd), ARMREG_RZR, (rm))
+
+/* Logical (shifted register) */
+#define arm_format_logical_shift(p, sf, op, N, rd, rn, rm, shift, imm6) arm_emit ((p), ((sf) << 31) | ((op) << 29) | (0xa << 24) | ((shift) << 22) | ((N) << 21) | ((rm) << 16) | ((imm6) << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_andx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x0, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_andw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x0, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_andsx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x3, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_andsw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x3, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_bicx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x0, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_bicw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x0, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_bicsx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x3, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_bicsw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x3, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_eonx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x2, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_eonw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x2, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_eorx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x2, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_eorw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x2, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_orrx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x1, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_orrw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x1, 0x0, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_ornx_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x1, 0x1, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_ornw_shift(p, rd, rn, rm, shift_type, amount) arm_format_logical_shift ((p), 0x0, 0x1, 0x1, (rd), (rn), (rm), (shift_type), (amount))
+#define arm_mvnx_shift(p, rd, rm, shift_type, amount) arm_ornx_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount))
+#define arm_mvnw_shift(p, rd, rm, shift_type, amount) arm_ornw_shift ((p), (rd), ARMREG_RZR, (rm), (shift_type), (amount))
+#define arm_tstx_shift(p, rn, rm, shift_type, amount) arm_andsx_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount))
+#define arm_tstw_shift(p, rn, rm, shift_type, amount) arm_andsw_shift ((p), ARMREG_RZR, (rn), (rm), (shift_type), (amount))
+/* Aliases */
+#define arm_andx(p, rd, rn, rm) arm_andx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_andw(p, rd, rn, rm) arm_andw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_andsx(p, rd, rn, rm) arm_andsx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_andsw(p, rd, rn, rm) arm_andsw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_bixx(p, rd, rn, rm) arm_bixx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_bixw(p, rd, rn, rm) arm_bixw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_bixsx(p, rd, rn, rm) arm_bixsx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_bixsw(p, rd, rn, rm) arm_bixsw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_eonx(p, rd, rn, rm) arm_eonx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_eonw(p, rd, rn, rm) arm_eonw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_eorx(p, rd, rn, rm) arm_eorx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_eorw(p, rd, rn, rm) arm_eorw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_orrx(p, rd, rn, rm) arm_orrx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_orrw(p, rd, rn, rm) arm_orrw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_ornx(p, rd, rn, rm) arm_ornx_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_ornw(p, rd, rn, rm) arm_ornw_shift(p, rd, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_mvnx(p, rd, rm) arm_mvnx_shift(p, rd, rm, ARMSHIFT_LSL, 0)
+#define arm_mvnw(p, rd, rm) arm_mvnw_shift(p, rd, rm, ARMSHIFT_LSL, 0)
+#define arm_tstx(p, rn, rm) arm_tstx_shift(p, rn, rm, ARMSHIFT_LSL, 0)
+#define arm_tstw(p, rn, rm) arm_tstw_shift(p, rn, rm, ARMSHIFT_LSL, 0)
+
+/* Move (register) */
+#define arm_movx(p, rn, rm) arm_orrx_shift ((p), (rn), ARMREG_RZR, (rm), ARMSHIFT_LSL, 0)
+#define arm_movw(p, rn, rm) arm_orrw_shift ((p), (rn), ARMREG_RZR, (rm), ARMSHIFT_LSL, 0)
+
+/* Not an official alias */
+#define arm_movspx(p, rn, rm) arm_addx_imm ((p), (rn), (rm), 0)
+
+/* Shift (register) */
+#define arm_format_shift_reg(p, sf, op2, rd, rn, rm) arm_emit ((p), ((sf) << 31) | (0xd6 << 21) | ((rm) << 16) | (0x2 << 12) | ((op2) << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_asrvx(p, rd, rn, rm) arm_format_shift_reg ((p), 0x1, 0x2, (rd), (rn), (rm))
+#define arm_asrvw(p, rd, rn, rm) arm_format_shift_reg ((p), 0x0, 0x2, (rd), (rn), (rm))
+#define arm_lslvx(p, rd, rn, rm) arm_format_shift_reg ((p), 0x1, 0x0, (rd), (rn), (rm))
+#define arm_lslvw(p, rd, rn, rm) arm_format_shift_reg ((p), 0x0, 0x0, (rd), (rn), (rm))
+#define arm_lsrvx(p, rd, rn, rm) arm_format_shift_reg ((p), 0x1, 0x1, (rd), (rn), (rm))
+#define arm_lsrvw(p, rd, rn, rm) arm_format_shift_reg ((p), 0x0, 0x1, (rd), (rn), (rm))
+#define arm_rorvx(p, rd, rn, rm) arm_format_shift_reg ((p), 0x1, 0x3, (rd), (rn), (rm))
+#define arm_rorvw(p, rd, rn, rm) arm_format_shift_reg ((p), 0x0, 0x3, (rd), (rn), (rm))
+
+/* Multiply */
+#define arm_format_mul(p, sf, o0, rd, rn, rm, ra) arm_emit ((p), ((sf) << 31) | (0x0 << 29) | (0x1b << 24) | (0x0 << 21) | ((rm) << 16) | ((o0) << 15) | ((ra) << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_maddx(p, rd, rn, rm, ra) arm_format_mul((p), 0x1, 0x0, (rd), (rn), (rm), (ra))
+#define arm_maddw(p, rd, rn, rm, ra) arm_format_mul((p), 0x0, 0x0, (rd), (rn), (rm), (ra))
+#define arm_msubx(p, rd, rn, rm, ra) arm_format_mul((p), 0x1, 0x1, (rd), (rn), (rm), (ra))
+#define arm_msubw(p, rd, rn, rm, ra) arm_format_mul((p), 0x0, 0x1, (rd), (rn), (rm), (ra))
+#define arm_mnegx(p, rd, rn, rm) arm_msubx ((p), (rd), (rn), (rm), ARMREG_RZR)
+#define arm_mnegw(p, rd, rn, rm) arm_msubw ((p), (rd), (rn), (rm), ARMREG_RZR)
+#define arm_mulx(p, rd, rn, rm) arm_maddx ((p), (rd), (rn), (rm), ARMREG_RZR)
+#define arm_mulw(p, rd, rn, rm) arm_maddw ((p), (rd), (rn), (rm), ARMREG_RZR)
+
+/* FIXME: Missing multiple opcodes */
+
+/* Division */
+#define arm_format_div(p, sf, o1, rd, rn, rm) arm_emit ((p), ((sf) << 31) | (0xd6 << 21) | ((rm) << 16) | (0x1 << 11) | ((o1) << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_sdivx(p, rd, rn, rm) arm_format_div ((p), 0x1, 0x1, (rd), (rn), (rm))
+#define arm_sdivw(p, rd, rn, rm) arm_format_div ((p), 0x0, 0x1, (rd), (rn), (rm))
+#define arm_udivx(p, rd, rn, rm) arm_format_div ((p), 0x1, 0x0, (rd), (rn), (rm))
+#define arm_udivw(p, rd, rn, rm) arm_format_div ((p), 0x0, 0x0, (rd), (rn), (rm))
+
+/* Conditional select */
+#define arm_format_csel(p, sf, op, op2, cond, rd, rn, rm) arm_emit ((p), ((sf) << 31) | ((op) << 30) | (0xd4 << 21) | ((rm) << 16) | ((cond) << 12) | ((op2) << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_cselx(p, cond, rd, rn, rm) arm_format_csel ((p), 0x1, 0x0, 0x0, (cond), (rd), (rn), (rm))
+#define arm_cselw(p, cond, rd, rn, rm) arm_format_csel ((p), 0x0, 0x0, 0x0, (cond), (rd), (rn), (rm))
+#define arm_csincx(p, cond, rd, rn, rm) arm_format_csel ((p), 0x1, 0x0, 0x1, (cond), (rd), (rn), (rm))
+#define arm_csincw(p, cond, rd, rn, rm) arm_format_csel ((p), 0x0, 0x0, 0x1, (cond), (rd), (rn), (rm))
+#define arm_csinvx(p, cond, rd, rn, rm) arm_format_csel ((p), 0x1, 0x1, 0x0, (cond), (rd), (rn), (rm))
+#define arm_csinvw(p, cond, rd, rn, rm) arm_format_csel ((p), 0x0, 0x1, 0x0, (cond), (rd), (rn), (rm))
+#define arm_csnegx(p, cond, rd, rn, rm) arm_format_csel ((p), 0x1, 0x1, 0x1, (cond), (rd), (rn), (rm))
+#define arm_csnegw(p, cond, rd, rn, rm) arm_format_csel ((p), 0x0, 0x1, 0x1, (cond), (rd), (rn), (rm))
+
+#define arm_cset(p, cond, rd) arm_csincx ((p), ((cond) ^ 0x1), (rd), ARMREG_RZR, ARMREG_RZR)
+
+/* C5.6.68 (HINT) */
+#define arm_hint(p, imm) arm_emit ((p), (0xd5032 << 12) | ((imm) << 5) | (0x1f << 0))
+#define arm_nop(p) arm_hint ((p), 0x0)
+
+/* C5.6.29 BRK */
+#define arm_brk(p, imm) arm_emit ((p), (0xd4 << 24) | (0x1 << 21) | ((imm) << 5))
+
+/* C6.3.114 FMOV (General) */
+#define arm_format_fmov_gr(p, sf, type, rmode, opcode, rn, rd) arm_emit ((p), ((sf) << 31) | (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rmode) << 19) | ((opcode) << 16) | ((rn) << 5) | ((rd) << 0))
+
+/* Move gr->vfp */
+#define arm_fmov_rx_to_double(p, dd, xn) arm_format_fmov_gr ((p), 0x1, 0x1, 0x0, 0x7, (xn), (dd))
+
+/* Move vfp->gr */
+#define arm_fmov_double_to_rx(p, xd, dn) arm_format_fmov_gr ((p), 0x1, 0x1, 0x0, 0x6, (dn), (xd))
+
+/* C6.3.113 FMOV (register) */
+#define arm_format_fmov(p, type, rn, rd) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x10 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_fmovd(p, dd, dn) arm_format_fmov ((p), 0x1, (dn), (dd))
+#define arm_fmovs(p, dd, dn) arm_format_fmov ((p), 0x0, (dn), (dd))
+
+/* C6.3.54 FCMP */
+#define arm_format_fcmp(p, type, opc, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x8 << 10) | ((rn) << 5) | ((opc) << 3))
+
+#define arm_fcmpd(p, dn, dm) arm_format_fcmp (p, 0x1, 0x0, (dn), (dm))
+#define arm_fcmps(p, dn, dm) arm_format_fcmp (p, 0x0, 0x0, (dn), (dm))
+
+/* Float precision */
+#define arm_format_fcvt(p, type, opc, rn, rd) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x1 << 17) | ((opc) << 15) | (0x10 << 10) | ((rn) << 5) | ((rd) << 0))
+
+/* C6.3.57 FCVT */
+/* single->double */
+#define arm_fcvt_sd(p, dd, sn) arm_format_fcvt ((p), 0x0, 0x1, (sn), (dd))
+/* double->single */
+#define arm_fcvt_ds(p, sd, dn) arm_format_fcvt ((p), 0x1, 0x0, (dn), (sd))
+
+/* Float conversion to integer conversion */
+#define arm_format_fcvtz(p, sf, type, rmode, opcode, rn, rd) arm_emit ((p), ((sf) << 31) | (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rmode) << 19) | ((opcode) << 16) | ((rn) << 5) | ((rd) << 0))
+
+/* C6.3.80 FCVTZS (scalar, integer) */
+#define arm_fcvtzs_dw(p, rd, rn) arm_format_fcvtz ((p), 0x0, 0x1, 0x3, 0x0, (rn), (rd))
+#define arm_fcvtzs_dx(p, rd, rn) arm_format_fcvtz ((p), 0x1, 0x1, 0x3, 0x0, (rn), (rd))
+#define arm_fcvtzs_sw(p, rd, rn) arm_format_fcvtz ((p), 0x0, 0x0, 0x3, 0x0, (rn), (rd))
+#define arm_fcvtzs_sx(p, rd, rn) arm_format_fcvtz ((p), 0x1, 0x0, 0x3, 0x0, (rn), (rd))
+
+/* C6.3.84 FCVTZU (scalar, integer) */
+#define arm_fcvtzu_dw(p, rd, rn) arm_format_fcvtz ((p), 0x0, 0x1, 0x3, 0x1, (rn), (rd))
+#define arm_fcvtzu_dx(p, rd, rn) arm_format_fcvtz ((p), 0x1, 0x1, 0x3, 0x1, (rn), (rd))
+#define arm_fcvtzu_sw(p, rd, rn) arm_format_fcvtz ((p), 0x0, 0x0, 0x3, 0x1, (rn), (rd))
+#define arm_fcvtzu_sx(p, rd, rn) arm_format_fcvtz ((p), 0x1, 0x0, 0x3, 0x1, (rn), (rd))
+
+/* C6.3.208 SCVTF (vector, integer) */
+#define arm_format_scvtf_vector(p, sz, rn, rd) arm_emit ((p), (0x1 << 30) | (0x0 << 29) | (0x1e << 24) | ((sz) << 22) | (0x10 << 17) | (0x1d << 12) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_scvtf_d(p, dd, dn) arm_format_scvtf_vector ((p), 0x1, (dn), (dd))
+#define arm_scvtf_s(p, sd, sn) arm_format_scvtf_vector ((p), 0x0, (sn), (sd))
+
+/* C6.3.210 SCVTF (scalar, integer) */
+#define arm_format_scvtf_scalar(p, sf, type, rn, rd) arm_emit ((p), ((sf) << 31) | (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x2 << 16) | (0x0 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_scvtf_rx_to_d(p, dd, rn) arm_format_scvtf_scalar ((p), 0x1, 0x1, rn, dd)
+#define arm_scvtf_rw_to_d(p, dd, rn) arm_format_scvtf_scalar ((p), 0x0, 0x1, rn, dd)
+#define arm_scvtf_rx_to_s(p, dd, rn) arm_format_scvtf_scalar ((p), 0x1, 0x0, rn, dd)
+#define arm_scvtf_rw_to_s(p, dd, rn) arm_format_scvtf_scalar ((p), 0x0, 0x0, rn, dd)
+
+/* C6.3.306 UCVTF (vector, integer) */
+#define arm_format_ucvtf_vector(p, sz, rn, rd) arm_emit ((p), (0x1 << 30) | (0x1 << 29) | (0x1e << 24) | ((sz) << 22) | (0x10 << 17) | (0x1d << 12) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_ucvtf_d(p, dd, dn) arm_format_ucvtf_vector ((p), 0x1, (dn), (dd))
+#define arm_ucvtf_s(p, sd, sn) arm_format_ucvtf_vector ((p), 0x0, (sn), (sd))
+
+/* C6.3.308 UCVTF (scalar, integer) */
+#define arm_format_ucvtf_scalar(p, sf, type, rn, rd) arm_emit ((p), ((sf) << 31) | (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x3 << 16) | (0x0 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_ucvtf_rx_to_d(p, dd, rn) arm_format_ucvtf_scalar ((p), 0x1, 0x1, rn, dd)
+#define arm_ucvtf_rw_to_d(p, dd, rn) arm_format_ucvtf_scalar ((p), 0x0, 0x1, rn, dd)
+
+/* C6.3.41 FADD (scalar) */
+#define arm_format_fadd_scalar(p, type, rd, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x1 << 13) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_fadd_d(p, rd, rn, rm) arm_format_fadd_scalar ((p), 0x1, (rd), (rn), (rm))
+#define arm_fadd_s(p, rd, rn, rm) arm_format_fadd_scalar ((p), 0x0, (rd), (rn), (rm))
+
+/* C6.3.149 FSUB (scalar) */
+#define arm_format_fsub_scalar(p, type, rd, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x1 << 13) | (0x1 << 12) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_fsub_d(p, rd, rn, rm) arm_format_fsub_scalar ((p), 0x1, (rd), (rn), (rm))
+#define arm_fsub_s(p, rd, rn, rm) arm_format_fsub_scalar ((p), 0x0, (rd), (rn), (rm))
+
+/* C6.3.119 FMUL (scalar) */
+#define arm_format_fmul_scalar(p, type, rd, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_fmul_d(p, rd, rn, rm) arm_format_fmul_scalar ((p), 0x1, (rd), (rn), (rm))
+#define arm_fmul_s(p, rd, rn, rm) arm_format_fmul_scalar ((p), 0x0, (rd), (rn), (rm))
+
+/* C6.3.86 FDIV (scalar) */
+#define arm_format_fdiv_scalar(p, type, rd, rn, rm) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((rm) << 16) | (0x1 << 12) | (0x2 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_fdiv_d(p, rd, rn, rm) arm_format_fdiv_scalar ((p), 0x1, (rd), (rn), (rm))
+#define arm_fdiv_s(p, rd, rn, rm) arm_format_fdiv_scalar ((p), 0x0, (rd), (rn), (rm))
+
+/* C6.3.116 FMSUB */
+#define arm_format_fmsub(p, type, rd, rn, rm, ra) arm_emit ((p), (0x1f << 24) | ((type) << 22) | (0x0 << 21) | ((rm) << 16) | (0x1 << 15) | ((ra) << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_fmsub_d(p, rd, rn, rm, ra) arm_format_fmsub ((p), 0x1, (rd), (rn), (rm), (ra))
+
+/* C6.3.123 FNEG */
+#define arm_format_fneg(p, type, rd, rn) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | (0x2 << 15) | (0x10 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_fneg_d(p, rd, rn) arm_format_fneg ((p), 0x1, (rd), (rn))
+#define arm_fneg_s(p, rd, rn) arm_format_fneg ((p), 0x0, (rd), (rn))
+
+/* C6.3.37 FABS (scalar) */
+#define arm_format_fabs(p, type, opc, rd, rn) arm_emit ((p), (0x1e << 24) | ((type) << 22) | (0x1 << 21) | ((opc) << 15) | (0x10 << 10) | ((rn) << 5) | ((rd) << 0))
+
+#define arm_fabs_d(p, rd, rn) arm_format_fabs ((p), 0x1, 0x1, (rd), (rn))
+
+/* C5.6.60 DMB */
+#define arm_format_dmb(p, opc, CRm) arm_emit ((p), (0x354 << 22) | (0x3 << 16) | (0x3 << 12) | ((CRm) << 8) | (0x1 << 7) | ((opc) << 5) | (0x1f << 0))
+
+#define ARM_DMB_LD 0x1
+#define ARM_DMB_ST 0x2
+#define ARM_DMB_ALL 0x3
+#define ARM_DMB_SY 0xc
+
+#define arm_dmb(p, imm) arm_format_dmb ((p), 0x1, (imm))
+
+/* C5.6.129 MRS */
+
+#define ARM_MRS_REG_TPIDR_EL0 0x5e82
+
+#define arm_format_mrs(p, sysreg, rt) arm_emit ((p), (0x354 << 22) | (0x1 << 21) | (0x1 << 20) | ((sysreg) << 5) | ((rt) << 0))
+
+#define arm_mrs(p, rt, sysreg) arm_format_mrs ((p), (sysreg), (rt))
+
+#endif /* __arm_CODEGEN_H__ */
diff --git a/mono/mini/aot-compiler.c b/mono/mini/aot-compiler.c
index f90e82a..864359b 100644
--- a/mono/mini/aot-compiler.c
+++ b/mono/mini/aot-compiler.c
@@ -836,7 +836,330 @@ arch_init (MonoAotCompile *acfg)
 
 #ifdef TARGET_ARM64
 
-#include "../../../mono-extensions/mono/mini/aot-compiler-arm64.c"
+
+/* Load the contents of GOT_SLOT into dreg, clobbering ip0 */
+static void
+arm64_emit_load_got_slot (MonoAotCompile *acfg, int dreg, int got_slot)
+{
+	int offset;
+
+	g_assert (acfg->fp);
+	emit_unset_mode (acfg);
+	/* r16==ip0 */
+	offset = (int)(got_slot * sizeof (gpointer));
+#ifdef TARGET_MACH
+	/* clang's integrated assembler */
+	fprintf (acfg->fp, "adrp x16, %s at PAGE+%d\n", acfg->got_symbol, offset & 0xfffff000);
+	fprintf (acfg->fp, "add x16, x16, %s at PAGEOFF\n", acfg->got_symbol);
+	fprintf (acfg->fp, "ldr x%d, [x16, #%d]\n", dreg, offset & 0xfff);
+#else
+	/* Linux GAS */
+	fprintf (acfg->fp, "adrp x16, %s+%d\n", acfg->got_symbol, offset & 0xfffff000);
+	fprintf (acfg->fp, "add x16, x16, :lo12:%s\n", acfg->got_symbol);
+	fprintf (acfg->fp, "ldr x%d, [x16, %d]\n", dreg, offset & 0xfff);
+#endif
+}
+
+static void
+arm64_emit_objc_selector_ref (MonoAotCompile *acfg, guint8 *code, int index, int *code_size)
+{
+	int reg;
+
+	g_assert (acfg->fp);
+	emit_unset_mode (acfg);
+
+	/* ldr rt, target */
+	reg = arm_get_ldr_lit_reg (code);
+
+	fprintf (acfg->fp, "adrp x%d, L_OBJC_SELECTOR_REFERENCES_%d at PAGE\n", reg, index);
+	fprintf (acfg->fp, "add x%d, x%d, L_OBJC_SELECTOR_REFERENCES_%d at PAGEOFF\n", reg, reg, index);
+	fprintf (acfg->fp, "ldr x%d, [x%d]\n", reg, reg);
+
+	*code_size = 12;
+}
+
+static void
+arm64_emit_direct_call (MonoAotCompile *acfg, const char *target, gboolean external, gboolean thumb, MonoJumpInfo *ji, int *call_size)
+{
+	g_assert (acfg->fp);
+	emit_unset_mode (acfg);
+	if (ji && ji->relocation == MONO_R_ARM64_B) {
+		fprintf (acfg->fp, "b %s\n", target);
+	} else {
+		if (ji)
+			g_assert (ji->relocation == MONO_R_ARM64_BL);
+		fprintf (acfg->fp, "bl %s\n", target);
+	}
+	*call_size = 4;
+}
+
+static void
+arm64_emit_got_access (MonoAotCompile *acfg, guint8 *code, int got_slot, int *code_size)
+{
+	int reg;
+
+	/* ldr rt, target */
+	reg = arm_get_ldr_lit_reg (code);
+	arm64_emit_load_got_slot (acfg, reg, got_slot);
+	*code_size = 12;
+}
+
+static void
+arm64_emit_plt_entry (MonoAotCompile *acfg, const char *got_symbol, int offset, int info_offset)
+{
+	arm64_emit_load_got_slot (acfg, ARMREG_R16, offset / sizeof (gpointer));
+	fprintf (acfg->fp, "br x16\n");
+	/* Used by mono_aot_get_plt_info_offset () */
+	fprintf (acfg->fp, "%s %d\n", acfg->inst_directive, info_offset);
+}
+
+static void
+arm64_emit_tramp_page_common_code (MonoAotCompile *acfg, int pagesize, int arg_reg, int *size)
+{
+	guint8 buf [256];
+	guint8 *code;
+	int imm;
+
+	/* The common code */
+	code = buf;
+	imm = pagesize;
+	/* The trampoline address is in IP0 */
+	arm_movzx (code, ARMREG_IP1, imm & 0xffff, 0);
+	arm_movkx (code, ARMREG_IP1, (imm >> 16) & 0xffff, 16);
+	/* Compute the data slot address */
+	arm_subx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1);
+	/* Trampoline argument */
+	arm_ldrx (code, arg_reg, ARMREG_IP0, 0);
+	/* Address */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 8);
+	arm_brx (code, ARMREG_IP0);
+
+	/* Emit it */
+	emit_code_bytes (acfg, buf, code - buf);
+
+	*size = code - buf;
+}
+
+static void
+arm64_emit_tramp_page_specific_code (MonoAotCompile *acfg, int pagesize, int common_tramp_size, int specific_tramp_size)
+{
+	guint8 buf [256];
+	guint8 *code;
+	int i, count;
+
+	count = (pagesize - common_tramp_size) / specific_tramp_size;
+	for (i = 0; i < count; ++i) {
+		code = buf;
+		arm_adrx (code, ARMREG_IP0, code);
+		/* Branch to the generic code */
+		arm_b (code, code - 4 - (i * specific_tramp_size) - common_tramp_size);
+		/* This has to be 2 pointers long */
+		arm_nop (code);
+		arm_nop (code);
+		g_assert (code - buf == specific_tramp_size);
+		emit_code_bytes (acfg, buf, code - buf);
+	}
+}
+
+static void
+arm64_emit_specific_trampoline_pages (MonoAotCompile *acfg)
+{
+	guint8 buf [128];
+	guint8 *code;
+	guint8 *labels [16];
+	int common_tramp_size;
+	int specific_tramp_size = 2 * 8;
+	int imm, pagesize;
+	char symbol [128];
+
+	if (!acfg->aot_opts.use_trampolines_page)
+		return;
+
+#ifdef TARGET_MACH
+	/* Have to match the target pagesize */
+	pagesize = 16384;
+#else
+	pagesize = mono_pagesize ();
+#endif
+	acfg->tramp_page_size = pagesize;
+
+	/* The specific trampolines */
+	sprintf (symbol, "%sspecific_trampolines_page", acfg->user_symbol_prefix);
+	emit_alignment (acfg, pagesize);
+	emit_global (acfg, symbol, TRUE);
+	emit_label (acfg, symbol);
+
+	/* The common code */
+	arm64_emit_tramp_page_common_code (acfg, pagesize, ARMREG_IP1, &common_tramp_size);
+	acfg->tramp_page_code_offsets [MONO_AOT_TRAMP_SPECIFIC] = common_tramp_size;
+
+	arm64_emit_tramp_page_specific_code (acfg, pagesize, common_tramp_size, specific_tramp_size);
+
+	/* The rgctx trampolines */
+	/* These are the same as the specific trampolines, but they load the argument into MONO_ARCH_RGCTX_REG */
+	sprintf (symbol, "%srgctx_trampolines_page", acfg->user_symbol_prefix);
+	emit_alignment (acfg, pagesize);
+	emit_global (acfg, symbol, TRUE);
+	emit_label (acfg, symbol);
+
+	/* The common code */
+	arm64_emit_tramp_page_common_code (acfg, pagesize, MONO_ARCH_RGCTX_REG, &common_tramp_size);
+	acfg->tramp_page_code_offsets [MONO_AOT_TRAMP_STATIC_RGCTX] = common_tramp_size;
+
+	arm64_emit_tramp_page_specific_code (acfg, pagesize, common_tramp_size, specific_tramp_size);
+
+	/* The gsharedvt arg trampolines */
+	/* These are the same as the specific trampolines */
+	sprintf (symbol, "%sgsharedvt_arg_trampolines_page", acfg->user_symbol_prefix);
+	emit_alignment (acfg, pagesize);
+	emit_global (acfg, symbol, TRUE);
+	emit_label (acfg, symbol);
+
+	arm64_emit_tramp_page_common_code (acfg, pagesize, ARMREG_IP1, &common_tramp_size);
+	acfg->tramp_page_code_offsets [MONO_AOT_TRAMP_GSHAREDVT_ARG] = common_tramp_size;
+
+	arm64_emit_tramp_page_specific_code (acfg, pagesize, common_tramp_size, specific_tramp_size);
+
+	/* The IMT trampolines */
+	sprintf (symbol, "%simt_trampolines_page", acfg->user_symbol_prefix);
+	emit_alignment (acfg, pagesize);
+	emit_global (acfg, symbol, TRUE);
+	emit_label (acfg, symbol);
+
+	code = buf;
+	imm = pagesize;
+	/* The trampoline address is in IP0 */
+	arm_movzx (code, ARMREG_IP1, imm & 0xffff, 0);
+	arm_movkx (code, ARMREG_IP1, (imm >> 16) & 0xffff, 16);
+	/* Compute the data slot address */
+	arm_subx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1);
+	/* Trampoline argument */
+	arm_ldrx (code, ARMREG_IP1, ARMREG_IP0, 0);
+
+	/* Same as arch_emit_imt_thunk () */
+	labels [0] = code;
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 0);
+	arm_cmpx (code, ARMREG_IP0, MONO_ARCH_RGCTX_REG);
+	labels [1] = code;
+	arm_bcc (code, ARMCOND_EQ, 0);
+
+	/* End-of-loop check */
+	labels [2] = code;
+	arm_cbzx (code, ARMREG_IP0, 0);
+
+	/* Loop footer */
+	arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 2 * 8);
+	arm_b (code, labels [0]);
+
+	/* Match */
+	mono_arm_patch (labels [1], code, MONO_R_ARM64_BCC);
+	/* Load vtable slot addr */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 8);
+	/* Load vtable slot */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 0);
+	arm_brx (code, ARMREG_IP0);
+
+	/* No match */
+	mono_arm_patch (labels [2], code, MONO_R_ARM64_CBZ);
+	/* Load fail addr */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 8);
+	arm_brx (code, ARMREG_IP0);
+
+	emit_code_bytes (acfg, buf, code - buf);
+
+	common_tramp_size = code - buf;
+	acfg->tramp_page_code_offsets [MONO_AOT_TRAMP_IMT_THUNK] = common_tramp_size;
+
+	arm64_emit_tramp_page_specific_code (acfg, pagesize, common_tramp_size, specific_tramp_size);
+}
+
+static void
+arm64_emit_specific_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size)
+{
+	/* Load argument from second GOT slot */
+	arm64_emit_load_got_slot (acfg, ARMREG_R17, offset + 1);
+	/* Load generic trampoline address from first GOT slot */
+	arm64_emit_load_got_slot (acfg, ARMREG_R16, offset);
+	fprintf (acfg->fp, "br x16\n");
+	*tramp_size = 7 * 4;
+}
+
+static void
+arm64_emit_unbox_trampoline (MonoAotCompile *acfg, MonoCompile *cfg, MonoMethod *method, const char *call_target)
+{
+	emit_unset_mode (acfg);
+	fprintf (acfg->fp, "add x0, x0, %d\n", (int)(sizeof (MonoObject)));
+	fprintf (acfg->fp, "b %s\n", call_target);
+}
+
+static void
+arm64_emit_static_rgctx_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size)
+{
+	/* Similar to the specific trampolines, but use the rgctx reg instead of ip1 */
+
+	/* Load argument from first GOT slot */
+	g_assert (MONO_ARCH_RGCTX_REG == 27);
+	arm64_emit_load_got_slot (acfg, ARMREG_R27, offset);
+	/* Load generic trampoline address from second GOT slot */
+	arm64_emit_load_got_slot (acfg, ARMREG_R16, offset + 1);
+	fprintf (acfg->fp, "br x16\n");
+	*tramp_size = 7 * 4;
+}
+
+static void
+arm64_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size)
+{
+	guint8 buf [128];
+	guint8 *code, *labels [16];
+
+	/* Load parameter from GOT slot into ip1 */
+	arm64_emit_load_got_slot (acfg, ARMREG_R17, offset);
+
+	code = buf;
+	labels [0] = code;
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 0);
+	arm_cmpx (code, ARMREG_IP0, MONO_ARCH_RGCTX_REG);
+	labels [1] = code;
+	arm_bcc (code, ARMCOND_EQ, 0);
+
+	/* End-of-loop check */
+	labels [2] = code;
+	arm_cbzx (code, ARMREG_IP0, 0);
+
+	/* Loop footer */
+	arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 2 * 8);
+	arm_b (code, labels [0]);
+
+	/* Match */
+	mono_arm_patch (labels [1], code, MONO_R_ARM64_BCC);
+	/* Load vtable slot addr */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 8);
+	/* Load vtable slot */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 0);
+	arm_brx (code, ARMREG_IP0);
+
+	/* No match */
+	mono_arm_patch (labels [2], code, MONO_R_ARM64_CBZ);
+	/* Load fail addr */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP1, 8);
+	arm_brx (code, ARMREG_IP0);
+
+	emit_code_bytes (acfg, buf, code - buf);
+
+	*tramp_size = code - buf + (3 * 4);
+}
+
+static void
+arm64_emit_gsharedvt_arg_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size)
+{
+	/* Similar to the specific trampolines, but the address is in the second slot */
+	/* Load argument from first GOT slot */
+	arm64_emit_load_got_slot (acfg, ARMREG_R17, offset);
+	/* Load generic trampoline address from second GOT slot */
+	arm64_emit_load_got_slot (acfg, ARMREG_R16, offset + 1);
+	fprintf (acfg->fp, "br x16\n");
+	*tramp_size = 7 * 4;
+}
 
 #endif
 
diff --git a/mono/mini/exceptions-arm64.c b/mono/mini/exceptions-arm64.c
index 333fd13..b2db64b 100644
--- a/mono/mini/exceptions-arm64.c
+++ b/mono/mini/exceptions-arm64.c
@@ -1 +1,585 @@
-#include "../../../mono-extensions/mono/mini/exceptions-arm64.c"
+/*
+ * exceptions-arm64.c: exception support for ARM64
+ *
+ * Copyright 2013 Xamarin Inc
+ *
+ * Based on exceptions-arm.c:
+ *
+ * Authors:
+ *   Dietmar Maurer (dietmar at ximian.com)
+ *   Paolo Molaro (lupus at ximian.com)
+ *
+ * (C) 2001 Ximian, Inc.
+ */
+
+#include "mini.h"
+
+#include <mono/arch/arm64/arm64-codegen.h>
+#include <mono/metadata/abi-details.h>
+
+#define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
+
+#ifndef DISABLE_JIT
+
+gpointer
+mono_arch_get_restore_context (MonoTrampInfo **info, gboolean aot)
+{
+	guint8 *start, *code;
+	MonoJumpInfo *ji = NULL;
+	GSList *unwind_ops = NULL;
+	int i, ctx_reg, size;
+
+	size = 256;
+	code = start = mono_global_codeman_reserve (size);
+
+	arm_movx (code, ARMREG_IP0, ARMREG_R0);
+	ctx_reg = ARMREG_IP0;
+	/* Restore fregs */
+	for (i = 0; i < 32; ++i)
+		arm_ldrfpx (code, i, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, fregs) + (i * 8));
+	/* Restore gregs */
+	// FIXME: Restore less registers
+	// FIXME: fp should be restored later
+	code = mono_arm_emit_load_regarray (code, 0xffffffff & ~(1 << ctx_reg) & ~(1 << ARMREG_SP), ctx_reg, MONO_STRUCT_OFFSET (MonoContext, regs));
+	/* ip0/ip1 doesn't need to be restored */
+	/* ip1 = pc */
+	arm_ldrx (code, ARMREG_IP1, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, pc));
+	/* ip0 = sp */
+	arm_ldrx (code, ARMREG_IP0, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_SP * 8));
+	/* Restore sp, ctx is no longer valid */
+	arm_movspx (code, ARMREG_SP, ARMREG_IP0); 
+	/* Branch to pc */
+	arm_brx (code, ARMREG_IP1);
+	/* Not reached */
+	arm_brk (code, 0);
+
+	g_assert ((code - start) < size);
+	mono_arch_flush_icache (start, code - start);
+	mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_EXCEPTION_HANDLING, NULL);
+
+	if (info)
+		*info = mono_tramp_info_create ("restore_context", start, code - start, ji, unwind_ops);
+
+	return start;
+}
+
+gpointer
+mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot)
+{
+	guint8 *code;
+	guint8* start;
+	int size, offset, gregs_offset, fregs_offset, ctx_offset, num_fregs, frame_size;
+	MonoJumpInfo *ji = NULL;
+	GSList *unwind_ops = NULL;
+
+	size = 512;
+	start = code = mono_global_codeman_reserve (size);
+
+	/* Compute stack frame size and offsets */
+	offset = 0;
+	/* frame block */
+	offset += 2 * 8;
+	/* gregs */
+	gregs_offset = offset;
+	offset += 32 * 8;
+	/* fregs */
+	num_fregs = 8;
+	fregs_offset = offset;
+	offset += num_fregs * 8;
+	ctx_offset = offset;
+	ctx_offset += 8;
+	frame_size = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT);
+
+	/*
+	 * We are being called from C code, ctx is in r0, the address to call is in r1.
+	 * We need to save state, restore ctx, make the call, then restore the previous state,
+	 * returning the value returned by the call.
+	 */
+
+	/* Setup a frame */
+	arm_stpx_pre (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, -frame_size);
+	arm_movspx (code, ARMREG_FP, ARMREG_SP);
+
+	/* Save ctx */
+	arm_strx (code, ARMREG_R0, ARMREG_FP, ctx_offset);
+	/* Save gregs */
+	code = mono_arm_emit_store_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS | (1 << ARMREG_FP), ARMREG_FP, gregs_offset);
+	/* No need to save/restore fregs, since we don't currently use them */
+
+	/* Load regs from ctx */
+	code = mono_arm_emit_load_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, regs));
+	/* Load fp */
+	arm_ldrx (code, ARMREG_FP, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_FP * 8));
+
+	/* Make the call */
+	arm_blrx (code, ARMREG_R1);
+	/* For filters, the result is in R0 */
+
+	/* Restore fp */
+	arm_ldrx (code, ARMREG_FP, ARMREG_SP, gregs_offset + (ARMREG_FP * 8));
+	/* Load ctx */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_FP, ctx_offset);
+	/* Save registers back to ctx */
+	/* This isn't strictly neccessary since we don't allocate variables used in eh clauses to registers */
+	code = mono_arm_emit_store_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoContext, regs));
+
+	/* Restore regs */
+	code = mono_arm_emit_load_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS, ARMREG_FP, gregs_offset);
+	/* Destroy frame */
+	code = mono_arm_emit_destroy_frame (code, frame_size, (1 << ARMREG_IP0));
+	arm_retx (code, ARMREG_LR);
+
+	g_assert ((code - start) < size);
+	mono_arch_flush_icache (start, code - start);
+	mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_EXCEPTION_HANDLING, NULL);
+
+	if (info)
+		*info = mono_tramp_info_create ("call_filter", start, code - start, ji, unwind_ops);
+
+	return start;
+}
+
+static gpointer 
+get_throw_trampoline (int size, gboolean corlib, gboolean rethrow, gboolean llvm, gboolean resume_unwind, const char *tramp_name, MonoTrampInfo **info, gboolean aot)
+{
+	guint8 *start, *code;
+	MonoJumpInfo *ji = NULL;
+	GSList *unwind_ops = NULL;
+	int i, offset, gregs_offset, fregs_offset, frame_size, num_fregs;
+
+	code = start = mono_global_codeman_reserve (size);
+
+	/* We are being called by JITted code, the exception object/type token is in R0 */
+
+	/* Compute stack frame size and offsets */
+	offset = 0;
+	/* frame block */
+	offset += 2 * 8;
+	/* gregs */
+	gregs_offset = offset;
+	offset += 32 * 8;
+	/* fregs */
+	num_fregs = 8;
+	fregs_offset = offset;
+	offset += num_fregs * 8;
+	frame_size = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT);
+
+	/* Setup a frame */
+	arm_stpx_pre (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, -frame_size);
+	arm_movspx (code, ARMREG_FP, ARMREG_SP);
+
+	/* Save gregs */
+	code = mono_arm_emit_store_regarray (code, 0xffffffff, ARMREG_FP, gregs_offset);
+	if (corlib && !llvm)
+		/* The real LR is in R1 */
+		arm_strx (code, ARMREG_R1, ARMREG_FP, gregs_offset + (ARMREG_LR * 8));
+	/* Save fp/sp */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_FP, 0);
+	arm_strx (code, ARMREG_IP0, ARMREG_FP, gregs_offset + (ARMREG_FP * 8));
+	arm_addx_imm (code, ARMREG_IP0, ARMREG_FP, frame_size);
+	arm_strx (code, ARMREG_IP0, ARMREG_FP, gregs_offset + (ARMREG_SP * 8));	
+	/* Save fregs */
+	for (i = 0; i < num_fregs; ++i)
+		arm_strfpx (code, ARMREG_D8 + i, ARMREG_FP, fregs_offset + (i * 8));
+
+	/* Call the C trampoline function */
+	/* Arg1 =  exception object/type token */
+	arm_movx (code, ARMREG_R0, ARMREG_R0);
+	/* Arg2 = caller ip */
+	if (corlib) {
+		if (llvm)
+			arm_ldrx (code, ARMREG_R1, ARMREG_FP, gregs_offset + (ARMREG_LR * 8));
+		else
+			arm_movx (code, ARMREG_R1, ARMREG_R1);
+	} else {
+		arm_ldrx (code, ARMREG_R1, ARMREG_FP, 8);
+	}
+	/* Arg 3 = gregs */
+	arm_addx_imm (code, ARMREG_R2, ARMREG_FP, gregs_offset);
+	/* Arg 4 = fregs */
+	arm_addx_imm (code, ARMREG_R3, ARMREG_FP, fregs_offset);
+	/* Arg 5 = corlib */
+	arm_movzx (code, ARMREG_R4, corlib ? 1 : 0, 0);
+	/* Arg 6 = rethrow */
+	arm_movzx (code, ARMREG_R5, rethrow ? 1 : 0, 0);
+	/* Call the function */
+	if (aot) {
+		const char *icall_name;
+
+		if (resume_unwind)
+			icall_name = "mono_arm_resume_unwind";
+		else
+			icall_name = "mono_arm_throw_exception";
+
+		code = mono_arm_emit_aotconst (&ji, code, start, ARMREG_LR, MONO_PATCH_INFO_JIT_ICALL_ADDR, icall_name);
+	} else {
+		gpointer icall_func;
+
+		if (resume_unwind)
+			icall_func = mono_arm_resume_unwind;
+		else
+			icall_func = mono_arm_throw_exception;
+
+		code = mono_arm_emit_imm64 (code, ARMREG_LR, (guint64)icall_func);
+	}
+	arm_blrx (code, ARMREG_LR);
+	/* This shouldn't return */
+	arm_brk (code, 0x0);
+
+	g_assert ((code - start) < size);
+	mono_arch_flush_icache (start, code - start);
+	mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_EXCEPTION_HANDLING, NULL);
+
+	if (info)
+		*info = mono_tramp_info_create (tramp_name, start, code - start, ji, unwind_ops);
+
+	return start;
+}
+
+gpointer 
+mono_arch_get_throw_exception (MonoTrampInfo **info, gboolean aot)
+{
+	return get_throw_trampoline (256, FALSE, FALSE, FALSE, FALSE, "throw_exception", info, aot);
+}
+
+gpointer
+mono_arch_get_rethrow_exception (MonoTrampInfo **info, gboolean aot)
+{
+	return get_throw_trampoline (256, FALSE, TRUE, FALSE, FALSE, "rethrow_exception", info, aot);
+}
+
+gpointer 
+mono_arch_get_throw_corlib_exception (MonoTrampInfo **info, gboolean aot)
+{
+	return get_throw_trampoline (256, TRUE, FALSE, FALSE, FALSE, "throw_corlib_exception", info, aot);
+}
+
+GSList*
+mono_arm_get_exception_trampolines (gboolean aot)
+{
+	MonoTrampInfo *info;
+	GSList *tramps = NULL;
+
+	/* LLVM uses the normal trampolines, but with a different name */
+	get_throw_trampoline (256, TRUE, FALSE, FALSE, FALSE, "llvm_throw_corlib_exception_trampoline", &info, aot);
+	tramps = g_slist_prepend (tramps, info);
+	
+	get_throw_trampoline (256, TRUE, FALSE, TRUE, FALSE, "llvm_throw_corlib_exception_abs_trampoline", &info, aot);
+	tramps = g_slist_prepend (tramps, info);
+
+	get_throw_trampoline (256, FALSE, FALSE, FALSE, TRUE, "llvm_resume_unwind_trampoline", &info, aot);
+	tramps = g_slist_prepend (tramps, info);
+
+	return tramps;
+}
+
+#else /* DISABLE_JIT */
+
+gpointer
+mono_arch_get_restore_context (MonoTrampInfo **info, gboolean aot)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+gpointer
+mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+gpointer 
+mono_arch_get_throw_exception (MonoTrampInfo **info, gboolean aot)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+gpointer
+mono_arch_get_rethrow_exception (MonoTrampInfo **info, gboolean aot)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+gpointer 
+mono_arch_get_throw_corlib_exception (MonoTrampInfo **info, gboolean aot)
+{
+	g_assert_not_reached ();
+	return NULL;
+}	
+
+GSList*
+mono_arm_get_exception_trampolines (gboolean aot)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+#endif /* !DISABLE_JIT */
+
+void
+mono_arch_exceptions_init (void)
+{
+	guint8 *tramp;
+	GSList *tramps, *l;
+
+	if (mono_aot_only) {
+		tramp = mono_aot_get_trampoline ("llvm_throw_corlib_exception_trampoline");
+		mono_register_jit_icall (tramp, "llvm_throw_corlib_exception_trampoline", NULL, TRUE);
+		tramp = mono_aot_get_trampoline ("llvm_throw_corlib_exception_abs_trampoline");
+		mono_register_jit_icall (tramp, "llvm_throw_corlib_exception_abs_trampoline", NULL, TRUE);
+		tramp = mono_aot_get_trampoline ("llvm_resume_unwind_trampoline");
+		mono_register_jit_icall (tramp, "llvm_resume_unwind_trampoline", NULL, TRUE);
+	} else {
+		tramps = mono_arm_get_exception_trampolines (FALSE);
+		for (l = tramps; l; l = l->next) {
+			MonoTrampInfo *info = l->data;
+
+			mono_register_jit_icall (info->code, g_strdup (info->name), NULL, TRUE);
+			mono_tramp_info_register (info);
+		}
+		g_slist_free (tramps);
+	}
+}
+
+/*
+ * mono_arm_throw_exception:
+ *
+ *   This function is called by the exception trampolines.
+ * FP_REGS points to the 8 callee saved fp regs.
+ */
+void
+mono_arm_throw_exception (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble *fp_regs, gboolean corlib, gboolean rethrow)
+{
+	MonoContext ctx;
+	MonoObject *exc = NULL;
+	guint32 ex_token_index, ex_token;
+
+	if (!corlib)
+		exc = arg;
+	else {
+		ex_token_index = (guint64)arg;
+		ex_token = MONO_TOKEN_TYPE_DEF | ex_token_index;
+		exc = (MonoObject*)mono_exception_from_token (mono_defaults.corlib, ex_token);
+	}
+
+	/* Adjust pc so it points into the call instruction */
+	pc -= 4;
+
+	/* Initialize a ctx based on the arguments */
+	memset (&ctx, 0, sizeof (MonoContext));
+	memcpy (&(ctx.regs [0]), int_regs, sizeof (mgreg_t) * 32);
+	memcpy (&(ctx.fregs [ARMREG_D8]), fp_regs, sizeof (double) * 8);
+	ctx.pc = pc;
+
+	if (mono_object_isinst (exc, mono_defaults.exception_class)) {
+		MonoException *mono_ex = (MonoException*)exc;
+		if (!rethrow)
+			mono_ex->stack_trace = NULL;
+	}
+
+	mono_handle_exception (&ctx, exc);
+
+	mono_restore_context (&ctx);
+}
+
+void
+mono_arm_resume_unwind (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble *fp_regs, gboolean corlib, gboolean rethrow)
+{
+	MonoContext ctx;
+
+	/* Adjust pc so it points into the call instruction */
+	pc -= 4;
+
+	/* Initialize a ctx based on the arguments */
+	memset (&ctx, 0, sizeof (MonoContext));
+	memcpy (&(ctx.regs [0]), int_regs, sizeof (mgreg_t) * 32);
+	memcpy (&(ctx.fregs [ARMREG_D8]), fp_regs, sizeof (double) * 8);
+	ctx.pc = pc;
+
+	mono_resume_unwind (&ctx);
+}
+
+/* 
+ * mono_arch_find_jit_info:
+ *
+ * See exceptions-amd64.c for docs;
+ */
+gboolean
+mono_arch_find_jit_info (MonoDomain *domain, MonoJitTlsData *jit_tls, 
+							 MonoJitInfo *ji, MonoContext *ctx, 
+							 MonoContext *new_ctx, MonoLMF **lmf,
+							 mgreg_t **save_locations,
+							 StackFrameInfo *frame)
+{
+	gpointer ip = MONO_CONTEXT_GET_IP (ctx);
+
+	memset (frame, 0, sizeof (StackFrameInfo));
+	frame->ji = ji;
+
+	*new_ctx = *ctx;
+
+	if (ji != NULL) {
+		mgreg_t regs [MONO_MAX_IREGS + 8 + 1];
+		guint8 *cfa;
+		guint32 unwind_info_len;
+		guint8 *unwind_info;
+
+		frame->type = FRAME_TYPE_MANAGED;
+
+		unwind_info = mono_jinfo_get_unwind_info (ji, &unwind_info_len);
+
+		memcpy (regs, &new_ctx->regs, sizeof (mgreg_t) * 32);
+		/* v8..v15 are callee saved */
+		memcpy (regs + MONO_MAX_IREGS, &(new_ctx->fregs [8]), sizeof (mgreg_t) * 8);
+
+		mono_unwind_frame (unwind_info, unwind_info_len, ji->code_start, 
+						   (guint8*)ji->code_start + ji->code_size,
+						   ip, NULL, regs, MONO_MAX_IREGS + 8,
+						   save_locations, MONO_MAX_IREGS, &cfa);
+
+		memcpy (&new_ctx->regs, regs, sizeof (mgreg_t) * 32);
+		memcpy (&(new_ctx->fregs [8]), regs + MONO_MAX_IREGS, sizeof (mgreg_t) * 8);
+
+		new_ctx->pc = regs [ARMREG_LR];
+		new_ctx->regs [ARMREG_SP] = (mgreg_t)cfa;
+
+		if (*lmf && (*lmf)->gregs [MONO_ARCH_LMF_REG_SP] && (MONO_CONTEXT_GET_SP (ctx) >= (gpointer)(*lmf)->gregs [MONO_ARCH_LMF_REG_SP])) {
+			/* remove any unused lmf */
+			*lmf = (gpointer)(((gsize)(*lmf)->previous_lmf) & ~3);
+		}
+
+		/* we substract 1, so that the IP points into the call instruction */
+		new_ctx->pc--;
+
+		return TRUE;
+	} else if (*lmf) {
+		if (((gsize)(*lmf)->previous_lmf) & 2) {
+			/* 
+			 * This LMF entry is created by the soft debug code to mark transitions to
+			 * managed code done during invokes.
+			 */
+			MonoLMFExt *ext = (MonoLMFExt*)(*lmf);
+
+			g_assert (ext->debugger_invoke);
+
+			memcpy (new_ctx, &ext->ctx, sizeof (MonoContext));
+
+			*lmf = (gpointer)(((gsize)(*lmf)->previous_lmf) & ~3);
+
+			frame->type = FRAME_TYPE_DEBUGGER_INVOKE;
+
+			return TRUE;
+		}
+
+		frame->type = FRAME_TYPE_MANAGED_TO_NATIVE;
+
+		ji = mini_jit_info_table_find (domain, (gpointer)(*lmf)->pc, NULL);
+		if (!ji)
+			return FALSE;
+
+		g_assert (MONO_ARCH_LMF_REGS == ((0x3ff << 19) | (1 << ARMREG_FP) | (1 << ARMREG_SP)));
+		memcpy (&new_ctx->regs [ARMREG_R19], &(*lmf)->gregs [0], sizeof (mgreg_t) * 10);
+		new_ctx->regs [ARMREG_FP] = (*lmf)->gregs [MONO_ARCH_LMF_REG_FP];
+		new_ctx->regs [ARMREG_SP] = (*lmf)->gregs [MONO_ARCH_LMF_REG_SP];
+		new_ctx->pc = (*lmf)->pc;
+
+		/* we substract 1, so that the IP points into the call instruction */
+		new_ctx->pc--;
+
+		*lmf = (gpointer)(((gsize)(*lmf)->previous_lmf) & ~3);
+
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+void
+mono_arch_sigctx_to_monoctx (void *sigctx, MonoContext *mctx)
+{
+	mono_sigctx_to_monoctx (sigctx, mctx);
+}
+
+void
+mono_arch_monoctx_to_sigctx (MonoContext *mctx, void *sigctx)
+{
+	mono_monoctx_to_sigctx (mctx, sigctx);
+}
+
+/*
+ * handle_exception:
+ *
+ *   Called by resuming from a signal handler.
+ */
+static void
+handle_signal_exception (gpointer obj)
+{
+	MonoJitTlsData *jit_tls = mono_native_tls_get_value (mono_jit_tls_id);
+	MonoContext ctx;
+
+	memcpy (&ctx, &jit_tls->ex_ctx, sizeof (MonoContext));
+
+	mono_handle_exception (&ctx, obj);
+
+	mono_restore_context (&ctx);
+}
+
+/*
+ * This is the function called from the signal handler
+ */
+gboolean
+mono_arch_handle_exception (void *ctx, gpointer obj)
+{
+#if defined(MONO_CROSS_COMPILE)
+	g_assert_not_reached ();
+#else
+	MonoJitTlsData *jit_tls;
+	void *sigctx = ctx;
+
+	/*
+	 * Resume into the normal stack and handle the exception there.
+	 */
+	jit_tls = mono_native_tls_get_value (mono_jit_tls_id);
+
+	/* Pass the ctx parameter in TLS */
+	mono_arch_sigctx_to_monoctx (sigctx, &jit_tls->ex_ctx);
+	/* The others in registers */
+	UCONTEXT_REG_R0 (sigctx) = (gsize)obj;
+
+	UCONTEXT_REG_PC (sigctx) = (gsize)handle_signal_exception;
+	UCONTEXT_REG_SP (sigctx) = UCONTEXT_REG_SP (sigctx) - MONO_ARCH_REDZONE_SIZE;
+#endif
+
+	return TRUE;
+}
+
+gpointer
+mono_arch_ip_from_context (void *sigctx)
+{
+#ifdef MONO_CROSS_COMPILE
+	g_assert_not_reached ();
+	return NULL;
+#else
+	return (gpointer)UCONTEXT_REG_PC (sigctx);
+#endif
+}
+
+void
+mono_arch_setup_async_callback (MonoContext *ctx, void (*async_cb)(void *fun), gpointer user_data)
+{
+	NOT_IMPLEMENTED;
+}
+
+/*
+ * mono_arch_setup_resume_sighandler_ctx:
+ *
+ *   Setup CTX so execution continues at FUNC.
+ */
+void
+mono_arch_setup_resume_sighandler_ctx (MonoContext *ctx, gpointer func)
+{
+	MONO_CONTEXT_SET_IP (ctx,func);
+}
diff --git a/mono/mini/mini-arm64.c b/mono/mini/mini-arm64.c
index 517f6c6..372cb83 100644
--- a/mono/mini/mini-arm64.c
+++ b/mono/mini/mini-arm64.c
@@ -1 +1,5202 @@
-#include "../../../mono-extensions/mono/mini/mini-arm64.c"
+/*
+ * mini-arm64.c: ARM64 backend for the Mono code generator
+ *
+ * Copyright 2013 Xamarin, Inc (http://www.xamarin.com)
+ * 
+ * Based on mini-arm.c:
+ *
+ * Authors:
+ *   Paolo Molaro (lupus at ximian.com)
+ *   Dietmar Maurer (dietmar at ximian.com)
+ *
+ * (C) 2003 Ximian, Inc.
+ * Copyright 2003-2011 Novell, Inc (http://www.novell.com)
+ * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
+ */
+
+#include "mini.h"
+#include "cpu-arm64.h"
+#include "ir-emit.h"
+
+#include <mono/arch/arm64/arm64-codegen.h>
+#include <mono/utils/mono-mmap.h>
+#include <mono/utils/mono-memory-model.h>
+#include <mono/metadata/abi-details.h>
+
+/*
+ * Documentation:
+ *
+ * - ARM(R) Architecture Reference Manual, ARMv8, for ARMv8-A architecture profile (DDI0487A_a_armv8_arm.pdf)
+ * - Procedure Call Standard for the ARM 64-bit Architecture (AArch64) (IHI0055B_aapcs64.pdf)
+ * - ELF for the ARM 64-bit Architecture (IHI0056B_aaelf64.pdf)
+ *
+ * Register usage:
+ * - ip0/ip1/lr are used as temporary registers
+ * - r27 is used as the rgctx/imt register
+ * - r28 is used to access arguments passed on the stack
+ * - d15/d16 are used as fp temporary registers
+ */
+
+#define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
+
+#define FP_TEMP_REG ARMREG_D16
+#define FP_TEMP_REG2 ARMREG_D15
+
+#define THUNK_SIZE (4 * 4)
+
+/* The single step trampoline */
+static gpointer ss_trampoline;
+
+/* The breakpoint trampoline */
+static gpointer bp_trampoline;
+
+static gboolean ios_abi;
+
+static __attribute__((warn_unused_result)) guint8* emit_load_regset (guint8 *code, guint64 regs, int basereg, int offset);
+
+const char*
+mono_arch_regname (int reg)
+{
+	static const char * rnames[] = {
+		"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",
+		"r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19",
+		"r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "fp",
+		"lr", "sp"
+	};
+	if (reg >= 0 && reg < 32)
+		return rnames [reg];
+	return "unknown";
+}
+
+const char*
+mono_arch_fregname (int reg)
+{
+	static const char * rnames[] = {
+		"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9",
+		"d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19",
+		"d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29",
+		"d30", "d31"
+	};
+	if (reg >= 0 && reg < 32)
+		return rnames [reg];
+	return "unknown fp";
+}
+
+int
+mono_arch_get_argument_info (MonoGenericSharingContext *gsctx, MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
+{
+	NOT_IMPLEMENTED;
+	return 0;
+}
+
+#define MAX_ARCH_DELEGATE_PARAMS 7
+
+static gpointer
+get_delegate_invoke_impl (gboolean has_target, gboolean param_count, guint32 *code_size)
+{
+	guint8 *code, *start;
+
+	if (has_target) {
+		start = code = mono_global_codeman_reserve (12);
+
+		/* Replace the this argument with the target */
+		arm_ldrx (code, ARMREG_IP0, ARMREG_R0, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr));
+		arm_ldrx (code, ARMREG_R0, ARMREG_R0, MONO_STRUCT_OFFSET (MonoDelegate, target));
+		arm_brx (code, ARMREG_IP0);
+
+		g_assert ((code - start) <= 12);
+
+		mono_arch_flush_icache (start, 12);
+	} else {
+		int size, i;
+
+		size = 8 + param_count * 4;
+		start = code = mono_global_codeman_reserve (size);
+
+		arm_ldrx (code, ARMREG_IP0, ARMREG_R0, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr));
+		/* slide down the arguments */
+		for (i = 0; i < param_count; ++i)
+			arm_movx (code, i, i + 1);
+		arm_brx (code, ARMREG_IP0);
+
+		g_assert ((code - start) <= size);
+
+		mono_arch_flush_icache (start, size);
+	}
+
+	if (code_size)
+		*code_size = code - start;
+
+	return start;
+}
+
+/*
+ * mono_arch_get_delegate_invoke_impls:
+ *
+ *   Return a list of MonoAotTrampInfo structures for the delegate invoke impl
+ * trampolines.
+ */
+GSList*
+mono_arch_get_delegate_invoke_impls (void)
+{
+	GSList *res = NULL;
+	guint8 *code;
+	guint32 code_len;
+	int i;
+	char *tramp_name;
+
+	code = get_delegate_invoke_impl (TRUE, 0, &code_len);
+	res = g_slist_prepend (res, mono_tramp_info_create ("delegate_invoke_impl_has_target", code, code_len, NULL, NULL));
+
+	for (i = 0; i <= MAX_ARCH_DELEGATE_PARAMS; ++i) {
+		code = get_delegate_invoke_impl (FALSE, i, &code_len);
+		tramp_name = g_strdup_printf ("delegate_invoke_impl_target_%d", i);
+		res = g_slist_prepend (res, mono_tramp_info_create (tramp_name, code, code_len, NULL, NULL));
+		g_free (tramp_name);
+	}
+
+	return res;
+}
+
+gpointer
+mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
+{
+	guint8 *code, *start;
+
+	/*
+	 * vtypes are returned in registers, or using the dedicated r8 register, so
+	 * they can be supported by delegate invokes.
+	 */
+
+	if (has_target) {
+		static guint8* cached = NULL;
+
+		if (cached)
+			return cached;
+
+		if (mono_aot_only)
+			start = mono_aot_get_trampoline ("delegate_invoke_impl_has_target");
+		else
+			start = get_delegate_invoke_impl (TRUE, 0, NULL);
+		mono_memory_barrier ();
+		cached = start;
+		return cached;
+	} else {
+		static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
+		int i;
+
+		if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
+			return NULL;
+		for (i = 0; i < sig->param_count; ++i)
+			if (!mono_is_regsize_var (sig->params [i]))
+				return NULL;
+
+		code = cache [sig->param_count];
+		if (code)
+			return code;
+
+		if (mono_aot_only) {
+			char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count);
+			start = mono_aot_get_trampoline (name);
+			g_free (name);
+		} else {
+			start = get_delegate_invoke_impl (FALSE, sig->param_count, NULL);
+		}
+		mono_memory_barrier ();
+		cache [sig->param_count] = start;
+		return start;
+	}
+
+	return NULL;
+}
+
+gpointer
+mono_arch_get_delegate_virtual_invoke_impl (MonoMethodSignature *sig, MonoMethod *method, int offset, gboolean load_imt_reg)
+{
+	return NULL;
+}
+
+gpointer
+mono_arch_get_this_arg_from_call (mgreg_t *regs, guint8 *code)
+{
+	return (gpointer)regs [ARMREG_R0];
+}
+
+void
+mono_arch_cpu_init (void)
+{
+}
+
+void
+mono_arch_init (void)
+{
+	mono_aot_register_jit_icall ("mono_arm_throw_exception", mono_arm_throw_exception);
+	mono_aot_register_jit_icall ("mono_arm_resume_unwind", mono_arm_resume_unwind);
+
+	if (!mono_aot_only)
+		bp_trampoline = mini_get_breakpoint_trampoline ();
+
+#if defined(TARGET_IOS)
+	ios_abi = TRUE;
+#endif
+}
+
+void
+mono_arch_cleanup (void)
+{
+}
+
+guint32
+mono_arch_cpu_optimizations (guint32 *exclude_mask)
+{
+	*exclude_mask = 0;
+	return 0;
+}
+
+guint32
+mono_arch_cpu_enumerate_simd_versions (void)
+{
+	return 0;
+}
+
+void
+mono_arch_register_lowlevel_calls (void)
+{
+}
+
+void
+mono_arch_finish_init (void)
+{
+}
+
+/* The maximum length is 2 instructions */
+static guint8*
+emit_imm (guint8 *code, int dreg, int imm)
+{
+	// FIXME: Optimize this
+	if (imm < 0) {
+		gint64 limm = imm;
+		arm_movnx (code, dreg, (~limm) & 0xffff, 0);
+		arm_movkx (code, dreg, (limm >> 16) & 0xffff, 16);
+	} else {
+		arm_movzx (code, dreg, imm & 0xffff, 0);
+		if (imm >> 16)
+			arm_movkx (code, dreg, (imm >> 16) & 0xffff, 16);
+	}
+
+	return code;
+}
+
+/* The maximum length is 4 instructions */
+static guint8*
+emit_imm64 (guint8 *code, int dreg, guint64 imm)
+{
+	// FIXME: Optimize this
+	arm_movzx (code, dreg, imm & 0xffff, 0);
+	if ((imm >> 16) & 0xffff)
+		arm_movkx (code, dreg, (imm >> 16) & 0xffff, 16);
+	if ((imm >> 32) & 0xffff)
+		arm_movkx (code, dreg, (imm >> 32) & 0xffff, 32);
+	if ((imm >> 48) & 0xffff)
+		arm_movkx (code, dreg, (imm >> 48) & 0xffff, 48);
+
+	return code;
+}
+
+guint8*
+mono_arm_emit_imm64 (guint8 *code, int dreg, gint64 imm)
+{
+	return emit_imm64 (code, dreg, imm);
+}
+
+/*
+ * emit_imm_template:
+ *
+ *   Emit a patchable code sequence for constructing a 64 bit immediate.
+ */
+static guint8*
+emit_imm64_template (guint8 *code, int dreg)
+{
+	arm_movzx (code, dreg, 0, 0);
+	arm_movkx (code, dreg, 0, 16);
+	arm_movkx (code, dreg, 0, 32);
+	arm_movkx (code, dreg, 0, 48);
+
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_addw_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	if (!arm_is_arith_imm (imm)) {
+		code = emit_imm (code, ARMREG_LR, imm);
+		arm_addw (code, dreg, sreg, ARMREG_LR);
+	} else {
+		arm_addw_imm (code, dreg, sreg, imm);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_addx_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	if (!arm_is_arith_imm (imm)) {
+		code = emit_imm (code, ARMREG_LR, imm);
+		arm_addx (code, dreg, sreg, ARMREG_LR);
+	} else {
+		arm_addx_imm (code, dreg, sreg, imm);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_subw_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	if (!arm_is_arith_imm (imm)) {
+		code = emit_imm (code, ARMREG_LR, imm);
+		arm_subw (code, dreg, sreg, ARMREG_LR);
+	} else {
+		arm_subw_imm (code, dreg, sreg, imm);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_subx_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	if (!arm_is_arith_imm (imm)) {
+		code = emit_imm (code, ARMREG_LR, imm);
+		arm_subx (code, dreg, sreg, ARMREG_LR);
+	} else {
+		arm_subx_imm (code, dreg, sreg, imm);
+	}
+	return code;
+}
+
+/* Emit sp+=imm. Clobbers ip0/ip1 */
+static inline __attribute__((warn_unused_result)) guint8*
+emit_addx_sp_imm (guint8 *code, int imm)
+{
+	code = emit_imm (code, ARMREG_IP0, imm);
+	arm_movspx (code, ARMREG_IP1, ARMREG_SP);
+	arm_addx (code, ARMREG_IP1, ARMREG_IP1, ARMREG_IP0);
+	arm_movspx (code, ARMREG_SP, ARMREG_IP1);
+	return code;
+}
+
+/* Emit sp-=imm. Clobbers ip0/ip1 */
+static inline __attribute__((warn_unused_result)) guint8*
+emit_subx_sp_imm (guint8 *code, int imm)
+{
+	code = emit_imm (code, ARMREG_IP0, imm);
+	arm_movspx (code, ARMREG_IP1, ARMREG_SP);
+	arm_subx (code, ARMREG_IP1, ARMREG_IP1, ARMREG_IP0);
+	arm_movspx (code, ARMREG_SP, ARMREG_IP1);
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_andw_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	// FIXME:
+	code = emit_imm (code, ARMREG_LR, imm);
+	arm_andw (code, dreg, sreg, ARMREG_LR);
+
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_andx_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	// FIXME:
+	code = emit_imm (code, ARMREG_LR, imm);
+	arm_andx (code, dreg, sreg, ARMREG_LR);
+
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_orrw_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	// FIXME:
+	code = emit_imm (code, ARMREG_LR, imm);
+	arm_orrw (code, dreg, sreg, ARMREG_LR);
+
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_orrx_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	// FIXME:
+	code = emit_imm (code, ARMREG_LR, imm);
+	arm_orrx (code, dreg, sreg, ARMREG_LR);
+
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_eorw_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	// FIXME:
+	code = emit_imm (code, ARMREG_LR, imm);
+	arm_eorw (code, dreg, sreg, ARMREG_LR);
+
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_eorx_imm (guint8 *code, int dreg, int sreg, int imm)
+{
+	// FIXME:
+	code = emit_imm (code, ARMREG_LR, imm);
+	arm_eorx (code, dreg, sreg, ARMREG_LR);
+
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_cmpw_imm (guint8 *code, int sreg, int imm)
+{
+	if (imm == 0) {
+		arm_cmpw (code, sreg, ARMREG_RZR);
+	} else {
+		// FIXME:
+		code = emit_imm (code, ARMREG_LR, imm);
+		arm_cmpw (code, sreg, ARMREG_LR);
+	}
+
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_cmpx_imm (guint8 *code, int sreg, int imm)
+{
+	if (imm == 0) {
+		arm_cmpx (code, sreg, ARMREG_RZR);
+	} else {
+		// FIXME:
+		code = emit_imm (code, ARMREG_LR, imm);
+		arm_cmpx (code, sreg, ARMREG_LR);
+	}
+
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_strb (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_strb_imm (imm)) {
+		arm_strb (code, rt, rn, imm);
+	} else {
+		g_assert (rt != ARMREG_IP0);
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_strb_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_strh (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_strh_imm (imm)) {
+		arm_strh (code, rt, rn, imm);
+	} else {
+		g_assert (rt != ARMREG_IP0);
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_strh_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_strw (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_strw_imm (imm)) {
+		arm_strw (code, rt, rn, imm);
+	} else {
+		g_assert (rt != ARMREG_IP0);
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_strw_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_strfpw (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_strw_imm (imm)) {
+		arm_strfpw (code, rt, rn, imm);
+	} else {
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0);
+		arm_strfpw (code, rt, ARMREG_IP0, 0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_strfpx (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_strx_imm (imm)) {
+		arm_strfpx (code, rt, rn, imm);
+	} else {
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0);
+		arm_strfpx (code, rt, ARMREG_IP0, 0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_strx (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_strx_imm (imm)) {
+		arm_strx (code, rt, rn, imm);
+	} else {
+		g_assert (rt != ARMREG_IP0);
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_strx_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_ldrb (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_pimm12_scaled (imm, 1)) {
+		arm_ldrb (code, rt, rn, imm);
+	} else {
+		g_assert (rt != ARMREG_IP0);
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_ldrb_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_ldrsbx (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_pimm12_scaled (imm, 1)) {
+		arm_ldrsbx (code, rt, rn, imm);
+	} else {
+		g_assert (rt != ARMREG_IP0);
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_ldrsbx_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_ldrh (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_pimm12_scaled (imm, 2)) {
+		arm_ldrh (code, rt, rn, imm);
+	} else {
+		g_assert (rt != ARMREG_IP0);
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_ldrh_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_ldrshx (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_pimm12_scaled (imm, 2)) {
+		arm_ldrshx (code, rt, rn, imm);
+	} else {
+		g_assert (rt != ARMREG_IP0);
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_ldrshx_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_ldrswx (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_pimm12_scaled (imm, 4)) {
+		arm_ldrswx (code, rt, rn, imm);
+	} else {
+		g_assert (rt != ARMREG_IP0);
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_ldrswx_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_ldrw (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_pimm12_scaled (imm, 4)) {
+		arm_ldrw (code, rt, rn, imm);
+	} else {
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_ldrw_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_ldrx (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_pimm12_scaled (imm, 8)) {
+		arm_ldrx (code, rt, rn, imm);
+	} else {
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_ldrx_reg (code, rt, rn, ARMREG_IP0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_ldrfpw (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_pimm12_scaled (imm, 4)) {
+		arm_ldrfpw (code, rt, rn, imm);
+	} else {
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0);
+		arm_ldrfpw (code, rt, ARMREG_IP0, 0);
+	}
+	return code;
+}
+
+static inline __attribute__((warn_unused_result)) guint8*
+emit_ldrfpx (guint8 *code, int rt, int rn, int imm)
+{
+	if (arm_is_pimm12_scaled (imm, 8)) {
+		arm_ldrfpx (code, rt, rn, imm);
+	} else {
+		g_assert (rn != ARMREG_IP0);
+		code = emit_imm (code, ARMREG_IP0, imm);
+		arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0);
+		arm_ldrfpx (code, rt, ARMREG_IP0, 0);
+	}
+	return code;
+}
+
+guint8*
+mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm)
+{
+	return emit_ldrx (code, rt, rn, imm);
+}
+
+static guint8*
+emit_call (MonoCompile *cfg, guint8* code, guint32 patch_type, gconstpointer data)
+{
+	/*
+	mono_add_patch_info_rel (cfg, code - cfg->native_code, patch_type, data, MONO_R_ARM64_IMM);
+	code = emit_imm64_template (code, ARMREG_LR);
+	arm_blrx (code, ARMREG_LR);
+	*/
+	mono_add_patch_info_rel (cfg, code - cfg->native_code, patch_type, data, MONO_R_ARM64_BL);
+	arm_bl (code, code);
+	cfg->thunk_area += THUNK_SIZE;
+	return code;
+}
+
+static guint8*
+emit_aotconst_full (MonoCompile *cfg, MonoJumpInfo **ji, guint8 *code, guint8 *start, int dreg, guint32 patch_type, gconstpointer data)
+{
+	if (cfg)
+		mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
+	else
+		*ji = mono_patch_info_list_prepend (*ji, code - start, patch_type, data);
+	/* See arch_emit_got_access () in aot-compiler.c */
+	arm_ldrx_lit (code, dreg, 0);
+	arm_nop (code);
+	arm_nop (code);
+	return code;
+}
+
+static guint8*
+emit_aotconst (MonoCompile *cfg, guint8 *code, int dreg, guint32 patch_type, gconstpointer data)
+{
+	return emit_aotconst_full (cfg, NULL, code, NULL, dreg, patch_type, data);
+}
+
+/*
+ * mono_arm_emit_aotconst:
+ *
+ *   Emit code to load an AOT constant into DREG. Usable from trampolines.
+ */
+guint8*
+mono_arm_emit_aotconst (gpointer ji, guint8 *code, guint8 *code_start, int dreg, guint32 patch_type, gconstpointer data)
+{
+	return emit_aotconst_full (NULL, (MonoJumpInfo**)ji, code, code_start, dreg, patch_type, data);
+}
+
+static guint8*
+emit_tls_get (guint8 *code, int dreg, int tls_offset)
+{
+	arm_mrs (code, dreg, ARM_MRS_REG_TPIDR_EL0);
+	if (tls_offset < 256) {
+		arm_ldrx (code, dreg, dreg, tls_offset);
+	} else {
+		code = emit_addx_imm (code, dreg, dreg, tls_offset);
+		arm_ldrx (code, dreg, dreg, 0);
+	}
+	return code;
+}
+
+static guint8*
+emit_tls_get_reg (guint8 *code, int dreg, int offset_reg)
+{
+	g_assert (offset_reg != ARMREG_IP0);
+	arm_mrs (code, ARMREG_IP0, ARM_MRS_REG_TPIDR_EL0);
+	arm_ldrx_reg (code, dreg, ARMREG_IP0, offset_reg);
+	return code;
+}
+
+static guint8*
+emit_tls_set (guint8 *code, int sreg, int tls_offset)
+{
+	int tmpreg = ARMREG_IP0;
+
+	g_assert (sreg != tmpreg);
+	arm_mrs (code, tmpreg, ARM_MRS_REG_TPIDR_EL0);
+	if (tls_offset < 256) {
+		arm_strx (code, sreg, tmpreg, tls_offset);
+	} else {
+		code = emit_addx_imm (code, tmpreg, tmpreg, tls_offset);
+		arm_strx (code, sreg, tmpreg, 0);
+	}
+	return code;
+}
+
+
+static guint8*
+emit_tls_set_reg (guint8 *code, int sreg, int offset_reg)
+{
+	int tmpreg = ARMREG_IP0;
+
+	g_assert (sreg != tmpreg);
+	arm_mrs (code, tmpreg, ARM_MRS_REG_TPIDR_EL0);
+	arm_strx_reg (code, sreg, tmpreg, offset_reg);
+	return code;
+}
+
+/*
+ * Emits
+ * - mov sp, fp
+ * - ldrp [fp, lr], [sp], !stack_offfset
+ * Clobbers TEMP_REGS.
+ */
+__attribute__((warn_unused_result)) guint8*
+mono_arm_emit_destroy_frame (guint8 *code, int stack_offset, guint64 temp_regs)
+{
+	arm_movspx (code, ARMREG_SP, ARMREG_FP);
+
+	if (arm_is_ldpx_imm (stack_offset)) {
+		arm_ldpx_post (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, stack_offset);
+	} else {
+		arm_ldpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0);
+		/* sp += stack_offset */
+		g_assert (temp_regs & (1 << ARMREG_IP0));
+		if (temp_regs & (1 << ARMREG_IP1)) {
+			code = emit_addx_sp_imm (code, stack_offset);
+		} else {
+			int imm = stack_offset;
+
+			/* Can't use addx_sp_imm () since we can't clobber ip0/ip1 */
+			arm_addx_imm (code, ARMREG_IP0, ARMREG_SP, 0);
+			while (imm > 256) {
+				arm_addx_imm (code, ARMREG_IP0, ARMREG_IP0, 256);
+				imm -= 256;
+			}
+			arm_addx_imm (code, ARMREG_SP, ARMREG_IP0, imm);
+		}
+	}
+	return code;
+}
+
+#define is_call_imm(diff) ((gint)(diff) >= -33554432 && (gint)(diff) <= 33554431)
+
+static guint8*
+emit_thunk (guint8 *code, gconstpointer target)
+{
+	guint8 *p = code;
+
+	arm_ldrx_lit (code, ARMREG_IP0, code + 8);
+	arm_brx (code, ARMREG_IP0);
+	*(guint64*)code = (guint64)target;
+
+	mono_arch_flush_icache (p, code - p);
+	return code;
+}
+
+static gpointer
+create_thunk (MonoCompile *cfg, MonoDomain *domain, guchar *code, const guchar *target)
+{
+	MonoJitInfo *ji;
+	MonoThunkJitInfo *info;
+	guint8 *thunks, *p;
+	int thunks_size;
+	guint8 *orig_target;
+	guint8 *target_thunk;
+
+	if (!domain)
+		domain = mono_domain_get ();
+
+	if (cfg) {
+		/*
+		 * This can be called multiple times during JITting,
+		 * save the current position in cfg->arch to avoid
+		 * doing a O(n^2) search.
+		 */
+		if (!cfg->arch.thunks) {
+			cfg->arch.thunks = cfg->thunks;
+			cfg->arch.thunks_size = cfg->thunk_area;
+		}
+		thunks = cfg->arch.thunks;
+		thunks_size = cfg->arch.thunks_size;
+		if (!thunks_size) {
+			g_print ("thunk failed %p->%p, thunk space=%d method %s", code, target, thunks_size, mono_method_full_name (cfg->method, TRUE));
+			g_assert_not_reached ();
+		}
+
+		g_assert (*(guint32*)thunks == 0);
+		emit_thunk (thunks, target);
+
+		cfg->arch.thunks += THUNK_SIZE;
+		cfg->arch.thunks_size -= THUNK_SIZE;
+
+		return thunks;
+	} else {
+		ji = mini_jit_info_table_find (domain, (char*)code, NULL);
+		g_assert (ji);
+		info = mono_jit_info_get_thunk_info (ji);
+		g_assert (info);
+
+		thunks = (guint8*)ji->code_start + info->thunks_offset;
+		thunks_size = info->thunks_size;
+
+		orig_target = mono_arch_get_call_target (code + 4);
+
+		mono_domain_lock (domain);
+
+		target_thunk = NULL;
+		if (orig_target >= thunks && orig_target < thunks + thunks_size) {
+			/* The call already points to a thunk, because of trampolines etc. */
+			target_thunk = orig_target;
+		} else {
+			for (p = thunks; p < thunks + thunks_size; p += THUNK_SIZE) {
+				if (((guint32*)p) [0] == 0) {
+					/* Free entry */
+					target_thunk = p;
+					break;
+				} else if (((guint64*)p) [1] == (guint64)target) {
+					/* Thunk already points to target */
+					target_thunk = p;
+					break;
+				}
+			}
+		}
+
+		//printf ("THUNK: %p %p %p\n", code, target, target_thunk);
+
+		if (!target_thunk) {
+			mono_domain_unlock (domain);
+			g_print ("thunk failed %p->%p, thunk space=%d method %s", code, target, thunks_size, cfg ? mono_method_full_name (cfg->method, TRUE) : mono_method_full_name (jinfo_get_method (ji), TRUE));
+			g_assert_not_reached ();
+		}
+
+		emit_thunk (target_thunk, target);
+
+		mono_domain_unlock (domain);
+
+		return target_thunk;
+	}
+}
+
+static void
+arm_patch_full (MonoCompile *cfg, MonoDomain *domain, guint8 *code, guint8 *target, int relocation)
+{
+	switch (relocation) {
+	case MONO_R_ARM64_B:
+		arm_b (code, target);
+		break;
+	case MONO_R_ARM64_BCC: {
+		int cond;
+
+		cond = arm_get_bcc_cond (code);
+		arm_bcc (code, cond, target);
+		break;
+	}
+	case MONO_R_ARM64_CBZ:
+		arm_set_cbz_target (code, target);
+		break;
+	case MONO_R_ARM64_IMM: {
+		guint64 imm = (guint64)target;
+		int dreg;
+
+		/* emit_imm64_template () */
+		dreg = arm_get_movzx_rd (code);
+		arm_movzx (code, dreg, imm & 0xffff, 0);
+		arm_movkx (code, dreg, (imm >> 16) & 0xffff, 16);
+		arm_movkx (code, dreg, (imm >> 32) & 0xffff, 32);
+		arm_movkx (code, dreg, (imm >> 48) & 0xffff, 48);
+		break;
+	}
+	case MONO_R_ARM64_BL:
+		if (arm_is_bl_disp (code, target)) {
+			arm_bl (code, target);
+		} else {
+			gpointer thunk;
+
+			thunk = create_thunk (cfg, domain, code, target);
+			g_assert (arm_is_bl_disp (code, thunk));
+			arm_bl (code, thunk);			
+		}
+		break;
+	default:
+		g_assert_not_reached ();
+	}
+}
+
+static void
+arm_patch_rel (guint8 *code, guint8 *target, int relocation)
+{
+	arm_patch_full (NULL, NULL, code, target, relocation);
+}
+
+void
+mono_arm_patch (guint8 *code, guint8 *target, int relocation)
+{
+	arm_patch_rel (code, target, relocation);
+}
+
+void
+mono_arch_patch_code_new (MonoCompile *cfg, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gpointer target)
+{
+	guint8 *ip;
+
+	ip = ji->ip.i + code;
+
+	switch (ji->type) {
+	case MONO_PATCH_INFO_METHOD_JUMP:
+		/* ji->relocation is not set by the caller */
+		arm_patch_rel (ip, (guint8*)target, MONO_R_ARM64_B);
+		break;
+	default:
+		arm_patch_full (cfg, domain, ip, (guint8*)target, ji->relocation);
+		break;
+	}
+}
+
+void
+mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
+{
+}
+
+void
+mono_arch_flush_register_windows (void)
+{
+}
+
+MonoMethod*
+mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
+{
+	return (gpointer)regs [MONO_ARCH_RGCTX_REG];
+}
+
+MonoVTable*
+mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
+{
+	return (gpointer)regs [MONO_ARCH_RGCTX_REG];
+}
+
+mgreg_t
+mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
+{
+	return ctx->regs [reg];
+}
+
+void
+mono_arch_context_set_int_reg (MonoContext *ctx, int reg, mgreg_t val)
+{
+	ctx->regs [reg] = val;
+}
+
+/*
+ * mono_arch_set_target:
+ *
+ *   Set the target architecture the JIT backend should generate code for, in the form
+ * of a GNU target triplet. Only used in AOT mode.
+ */
+void
+mono_arch_set_target (char *mtriple)
+{
+	if (strstr (mtriple, "darwin") || strstr (mtriple, "ios")) {
+		ios_abi = TRUE;
+	}
+}
+
+static void
+add_general (CallInfo *cinfo, ArgInfo *ainfo, int size, gboolean sign)
+{
+	if (cinfo->gr >= PARAM_REGS) {
+		ainfo->storage = ArgOnStack;
+		if (ios_abi) {
+			/* Assume size == align */
+			cinfo->stack_usage = ALIGN_TO (cinfo->stack_usage, size);
+			ainfo->offset = cinfo->stack_usage;
+			ainfo->slot_size = size;
+			ainfo->sign = sign;
+			cinfo->stack_usage += size;
+		} else {
+			ainfo->offset = cinfo->stack_usage;
+			ainfo->slot_size = 8;
+			ainfo->sign = FALSE;
+			/* Put arguments into 8 byte aligned stack slots */
+			cinfo->stack_usage += 8;
+		}
+	} else {
+		ainfo->storage = ArgInIReg;
+		ainfo->reg = cinfo->gr;
+		cinfo->gr ++;
+	}
+}
+
+static void
+add_fp (CallInfo *cinfo, ArgInfo *ainfo, gboolean single)
+{
+	int size = single ? 4 : 8;
+
+	if (cinfo->fr >= FP_PARAM_REGS) {
+		ainfo->storage = single ? ArgOnStackR4 : ArgOnStackR8;
+		if (ios_abi) {
+			cinfo->stack_usage = ALIGN_TO (cinfo->stack_usage, size);
+			ainfo->offset = cinfo->stack_usage;
+			ainfo->slot_size = size;
+			cinfo->stack_usage += size;
+		} else {
+			ainfo->offset = cinfo->stack_usage;
+			ainfo->slot_size = 8;
+			/* Put arguments into 8 byte aligned stack slots */
+			cinfo->stack_usage += 8;
+		}
+	} else {
+		if (single)
+			ainfo->storage = ArgInFRegR4;
+		else
+			ainfo->storage = ArgInFReg;
+		ainfo->reg = cinfo->fr;
+		cinfo->fr ++;
+	}
+}
+
+static gboolean
+is_hfa (MonoType *t, int *out_nfields, int *out_esize, int *field_offsets)
+{
+	MonoClass *klass;
+	gpointer iter;
+	MonoClassField *field;
+	MonoType *ftype, *prev_ftype = NULL;
+	int i, nfields = 0;
+
+	klass = mono_class_from_mono_type (t);
+	iter = NULL;
+	while ((field = mono_class_get_fields (klass, &iter))) {
+		if (field->type->attrs & FIELD_ATTRIBUTE_STATIC)
+			continue;
+		ftype = mono_field_get_type (field);
+		ftype = mini_type_get_underlying_type (NULL, ftype);
+
+		if (MONO_TYPE_ISSTRUCT (ftype)) {
+			int nested_nfields, nested_esize;
+			int nested_field_offsets [16];
+
+			if (!is_hfa (ftype, &nested_nfields, &nested_esize, nested_field_offsets))
+				return FALSE;
+			if (nested_esize == 4)
+				ftype = &mono_defaults.single_class->byval_arg;
+			else
+				ftype = &mono_defaults.double_class->byval_arg;
+			if (prev_ftype && prev_ftype->type != ftype->type)
+				return FALSE;
+			prev_ftype = ftype;
+			for (i = 0; i < nested_nfields; ++i) {
+				if (nfields + i < 4)
+					field_offsets [nfields + i] = field->offset - sizeof (MonoObject) + nested_field_offsets [i];
+			}
+			nfields += nested_nfields;
+		} else {
+			if (!(!ftype->byref && (ftype->type == MONO_TYPE_R4 || ftype->type == MONO_TYPE_R8)))
+				return FALSE;
+			if (prev_ftype && prev_ftype->type != ftype->type)
+				return FALSE;
+			prev_ftype = ftype;
+			if (nfields < 4)
+				field_offsets [nfields] = field->offset - sizeof (MonoObject);
+			nfields ++;
+		}
+	}
+	if (nfields == 0 || nfields > 4)
+		return FALSE;
+	*out_nfields = nfields;
+	*out_esize = prev_ftype->type == MONO_TYPE_R4 ? 4 : 8;
+	return TRUE;
+}
+
+static void
+add_valuetype (CallInfo *cinfo, MonoGenericSharingContext *gsctx, ArgInfo *ainfo, MonoType *t)
+{
+	int i, size, align_size, nregs, nfields, esize;
+	int field_offsets [16];
+	guint32 align;
+
+	size = mini_type_stack_size_full (gsctx, t, &align, FALSE);
+	align_size = ALIGN_TO (size, 8);
+
+	nregs = size / 8;
+	if (is_hfa (t, &nfields, &esize, field_offsets)) {
+		/*
+		 * The struct might include nested float structs aligned at 8,
+		 * so need to keep track of the offsets of the individual fields.
+		 */
+		if (cinfo->fr + nfields <= FP_PARAM_REGS) {
+			ainfo->storage = ArgHFA;
+			ainfo->reg = cinfo->fr;
+			ainfo->nregs = nfields;
+			ainfo->size = size;
+			ainfo->esize = esize;
+			for (i = 0; i < nfields; ++i)
+				ainfo->foffsets [i] = field_offsets [i];
+			cinfo->fr += ainfo->nregs;
+		} else {
+			cinfo->fr = FP_PARAM_REGS;
+			size = ALIGN_TO (size, 8);
+			ainfo->storage = ArgVtypeOnStack;
+			ainfo->offset = cinfo->stack_usage;
+			ainfo->size = size;
+			ainfo->hfa = TRUE;
+			ainfo->nregs = nfields;
+			ainfo->esize = esize;
+			cinfo->stack_usage += size;
+		}
+		return;
+	}
+
+	if (align_size > 16) {
+		ainfo->storage = ArgVtypeByRef;
+		ainfo->size = size;
+		return;
+	}
+
+	if (cinfo->gr + nregs > PARAM_REGS) {
+		size = ALIGN_TO (size, 8);
+		ainfo->storage = ArgVtypeOnStack;
+		ainfo->offset = cinfo->stack_usage;
+		ainfo->size = size;
+		cinfo->stack_usage += size;
+		cinfo->gr = PARAM_REGS;
+	} else {
+		ainfo->storage = ArgVtypeInIRegs;
+		ainfo->reg = cinfo->gr;
+		ainfo->nregs = nregs;
+		ainfo->size = size;
+		cinfo->gr += nregs;
+	}
+}
+
+static void
+add_param (CallInfo *cinfo, MonoGenericSharingContext *gsctx, ArgInfo *ainfo, MonoType *t)
+{
+	MonoType *ptype;
+
+	ptype = mini_type_get_underlying_type (gsctx, t);
+	switch (ptype->type) {
+	case MONO_TYPE_I1:
+		add_general (cinfo, ainfo, 1, TRUE);
+		break;
+	case MONO_TYPE_BOOLEAN:
+	case MONO_TYPE_U1:
+		add_general (cinfo, ainfo, 1, FALSE);
+		break;
+	case MONO_TYPE_I2:
+		add_general (cinfo, ainfo, 2, TRUE);
+		break;
+	case MONO_TYPE_U2:
+	case MONO_TYPE_CHAR:
+		add_general (cinfo, ainfo, 2, FALSE);
+		break;
+	case MONO_TYPE_I4:
+		add_general (cinfo, ainfo, 4, TRUE);
+		break;
+	case MONO_TYPE_U4:
+		add_general (cinfo, ainfo, 4, FALSE);
+		break;
+	case MONO_TYPE_I:
+	case MONO_TYPE_U:
+	case MONO_TYPE_PTR:
+	case MONO_TYPE_FNPTR:
+	case MONO_TYPE_CLASS:
+	case MONO_TYPE_OBJECT:
+	case MONO_TYPE_SZARRAY:
+	case MONO_TYPE_ARRAY:
+	case MONO_TYPE_STRING:
+	case MONO_TYPE_U8:
+	case MONO_TYPE_I8:
+		add_general (cinfo, ainfo, 8, FALSE);
+		break;
+	case MONO_TYPE_R8:
+		add_fp (cinfo, ainfo, FALSE);
+		break;
+	case MONO_TYPE_R4:
+		add_fp (cinfo, ainfo, TRUE);
+		break;
+	case MONO_TYPE_VALUETYPE:
+	case MONO_TYPE_TYPEDBYREF:
+		add_valuetype (cinfo, gsctx, ainfo, ptype);
+		break;
+	case MONO_TYPE_VOID:
+		ainfo->storage = ArgNone;
+		break;
+	case MONO_TYPE_GENERICINST:
+		if (!mono_type_generic_inst_is_valuetype (ptype)) {
+			add_general (cinfo, ainfo, 8, FALSE);
+		} else if (mini_is_gsharedvt_variable_type_gsctx (gsctx, ptype)) {
+			/*
+			 * Treat gsharedvt arguments as large vtypes
+			 */
+			ainfo->storage = ArgVtypeByRef;
+			ainfo->gsharedvt = TRUE;
+		} else {
+			add_valuetype (cinfo, gsctx, ainfo, ptype);
+		}
+		break;
+	case MONO_TYPE_VAR:
+	case MONO_TYPE_MVAR:
+		g_assert (mini_is_gsharedvt_type_gsctx (gsctx, ptype));
+		ainfo->storage = ArgVtypeByRef;
+		ainfo->gsharedvt = TRUE;
+		break;
+	default:
+		g_assert_not_reached ();
+		break;
+	}
+}
+
+/*
+ * get_call_info:
+ *
+ *  Obtain information about a call according to the calling convention.
+ */
+static CallInfo*
+get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig)
+{
+	CallInfo *cinfo;
+	ArgInfo *ainfo;
+	int n, pstart, pindex;
+
+	n = sig->hasthis + sig->param_count;
+
+	if (mp)
+		cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
+	else
+		cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
+
+	cinfo->nargs = n;
+
+	/* Return value */
+	add_param (cinfo, gsctx, &cinfo->ret, sig->ret);
+	if (cinfo->ret.storage == ArgVtypeByRef)
+		cinfo->ret.reg = ARMREG_R8;
+	/* Reset state */
+	cinfo->gr = 0;
+	cinfo->fr = 0;
+	cinfo->stack_usage = 0;
+
+	/* Parameters */
+	if (sig->hasthis)
+		add_general (cinfo, cinfo->args + 0, 8, FALSE);
+	pstart = 0;
+	for (pindex = pstart; pindex < sig->param_count; ++pindex) {
+		ainfo = cinfo->args + sig->hasthis + pindex;
+
+		if ((sig->call_convention == MONO_CALL_VARARG) && (pindex == sig->sentinelpos)) {
+			/* Prevent implicit arguments and sig_cookie from
+			   being passed in registers */
+			cinfo->gr = PARAM_REGS;
+			cinfo->fr = FP_PARAM_REGS;
+			/* Emit the signature cookie just before the implicit arguments */
+			add_param (cinfo, gsctx, &cinfo->sig_cookie, &mono_defaults.int_class->byval_arg);
+		}
+
+		add_param (cinfo, gsctx, ainfo, sig->params [pindex]);
+		if (ainfo->storage == ArgVtypeByRef) {
+			/* Pass the argument address in the next register */
+			if (cinfo->gr >= PARAM_REGS) {
+				ainfo->storage = ArgVtypeByRefOnStack;
+				ainfo->offset = cinfo->stack_usage;
+				cinfo->stack_usage += 8;
+			} else {
+				ainfo->reg = cinfo->gr;
+				cinfo->gr ++;
+			}
+		}
+	}
+
+	/* Handle the case where there are no implicit arguments */
+	if ((sig->call_convention == MONO_CALL_VARARG) && (pindex == sig->sentinelpos)) {
+		/* Prevent implicit arguments and sig_cookie from
+		   being passed in registers */
+		cinfo->gr = PARAM_REGS;
+		cinfo->fr = FP_PARAM_REGS;
+		/* Emit the signature cookie just before the implicit arguments */
+		add_param (cinfo, gsctx, &cinfo->sig_cookie, &mono_defaults.int_class->byval_arg);
+	}
+
+	cinfo->stack_usage = ALIGN_TO (cinfo->stack_usage, MONO_ARCH_FRAME_ALIGNMENT);
+
+	return cinfo;
+}
+
+typedef struct {
+	MonoMethodSignature *sig;
+	CallInfo *cinfo;
+	MonoType *rtype;
+	MonoType **param_types;
+	int n_fpargs, n_fpret;
+} ArchDynCallInfo;
+
+static gboolean
+dyn_call_supported (CallInfo *cinfo, MonoMethodSignature *sig)
+{
+	int i;
+
+	if (sig->hasthis + sig->param_count > PARAM_REGS + DYN_CALL_STACK_ARGS)
+		return FALSE;
+
+	// FIXME: Add more cases
+	switch (cinfo->ret.storage) {
+	case ArgNone:
+	case ArgInIReg:
+	case ArgInFReg:
+	case ArgInFRegR4:
+	case ArgVtypeByRef:
+		break;
+	case ArgVtypeInIRegs:
+		if (cinfo->ret.nregs > 2)
+			return FALSE;
+		break;
+	case ArgHFA:
+		break;
+	default:
+		return FALSE;
+	}
+
+	for (i = 0; i < cinfo->nargs; ++i) {
+		ArgInfo *ainfo = &cinfo->args [i];
+
+		switch (ainfo->storage) {
+		case ArgInIReg:
+		case ArgVtypeInIRegs:
+		case ArgInFReg:
+		case ArgInFRegR4:
+		case ArgHFA:
+		case ArgVtypeByRef:
+			break;
+		case ArgOnStack:
+			if (ainfo->offset >= DYN_CALL_STACK_ARGS * sizeof (mgreg_t))
+				return FALSE;
+			break;
+		default:
+			return FALSE;
+		}
+	}
+
+	return TRUE;
+}
+
+MonoDynCallInfo*
+mono_arch_dyn_call_prepare (MonoMethodSignature *sig)
+{
+	ArchDynCallInfo *info;
+	CallInfo *cinfo;
+	int i;
+
+	cinfo = get_call_info (NULL, NULL, sig);
+
+	if (!dyn_call_supported (cinfo, sig)) {
+		g_free (cinfo);
+		return NULL;
+	}
+
+	info = g_new0 (ArchDynCallInfo, 1);
+	// FIXME: Preprocess the info to speed up start_dyn_call ()
+	info->sig = sig;
+	info->cinfo = cinfo;
+	info->rtype = mini_replace_type (sig->ret);
+	info->param_types = g_new0 (MonoType*, sig->param_count);
+	for (i = 0; i < sig->param_count; ++i)
+		info->param_types [i] = mini_replace_type (sig->params [i]);
+
+	switch (cinfo->ret.storage) {
+	case ArgInFReg:
+	case ArgInFRegR4:
+		info->n_fpret = 1;
+		break;
+	case ArgHFA:
+		info->n_fpret = cinfo->ret.nregs;
+		break;
+	default:
+		break;
+	}
+	
+	return (MonoDynCallInfo*)info;
+}
+
+void
+mono_arch_dyn_call_free (MonoDynCallInfo *info)
+{
+	ArchDynCallInfo *ainfo = (ArchDynCallInfo*)info;
+
+	g_free (ainfo->cinfo);
+	g_free (ainfo->param_types);
+	g_free (ainfo);
+}
+
+static double
+bitcast_r4_to_r8 (float f)
+{
+	float *p = &f;
+
+	return *(double*)p;
+}
+
+static float
+bitcast_r8_to_r4 (double f)
+{
+	double *p = &f;
+
+	return *(float*)p;
+}
+
+void
+mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, guint8 *buf, int buf_len)
+{
+	ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
+	DynCallArgs *p = (DynCallArgs*)buf;
+	int aindex, arg_index, greg, i, pindex;
+	MonoMethodSignature *sig = dinfo->sig;
+	CallInfo *cinfo = dinfo->cinfo;
+	int buffer_offset = 0;
+
+	g_assert (buf_len >= sizeof (DynCallArgs));
+
+	p->res = 0;
+	p->ret = ret;
+	p->n_fpargs = dinfo->n_fpargs;
+	p->n_fpret = dinfo->n_fpret;
+
+	arg_index = 0;
+	greg = 0;
+	pindex = 0;
+
+	if (sig->hasthis)
+		p->regs [greg ++] = (mgreg_t)*(args [arg_index ++]);
+
+	if (cinfo->ret.storage == ArgVtypeByRef)
+		p->regs [ARMREG_R8] = (mgreg_t)ret;
+
+	for (aindex = pindex; aindex < sig->param_count; aindex++) {
+		MonoType *t = dinfo->param_types [aindex];
+		gpointer *arg = args [arg_index ++];
+		ArgInfo *ainfo = &cinfo->args [aindex + sig->hasthis];
+		int slot = -1;
+
+		if (ainfo->storage == ArgOnStack) {
+			slot = PARAM_REGS + 1 + (ainfo->offset / sizeof (mgreg_t));
+		} else {
+			slot = ainfo->reg;
+		}
+
+		if (t->byref) {
+			p->regs [slot] = (mgreg_t)*arg;
+			continue;
+		}
+
+		if (ios_abi && ainfo->storage == ArgOnStack) {
+			guint8 *stack_arg = (guint8*)&(p->regs [PARAM_REGS + 1]) + ainfo->offset;
+			gboolean handled = TRUE;
+
+			/* Special case arguments smaller than 1 machine word */
+			switch (t->type) {
+			case MONO_TYPE_BOOLEAN:
+			case MONO_TYPE_U1:
+				*(guint8*)stack_arg = *(guint8*)arg;
+				break;
+			case MONO_TYPE_I1:
+				*(gint8*)stack_arg = *(gint8*)arg;
+				break;
+			case MONO_TYPE_U2:
+			case MONO_TYPE_CHAR:
+				*(guint16*)stack_arg = *(guint16*)arg;
+				break;
+			case MONO_TYPE_I2:
+				*(gint16*)stack_arg = *(gint16*)arg;
+				break;
+			case MONO_TYPE_I4:
+				*(gint32*)stack_arg = *(gint32*)arg;
+				break;
+			case MONO_TYPE_U4:
+				*(guint32*)stack_arg = *(guint32*)arg;
+				break;
+			default:
+				handled = FALSE;
+				break;
+			}
+			if (handled)
+				continue;
+		}
+
+		switch (t->type) {
+		case MONO_TYPE_STRING:
+		case MONO_TYPE_CLASS:
+		case MONO_TYPE_ARRAY:
+		case MONO_TYPE_SZARRAY:
+		case MONO_TYPE_OBJECT:
+		case MONO_TYPE_PTR:
+		case MONO_TYPE_I:
+		case MONO_TYPE_U:
+		case MONO_TYPE_I8:
+		case MONO_TYPE_U8:
+			p->regs [slot] = (mgreg_t)*arg;
+			break;
+		case MONO_TYPE_BOOLEAN:
+		case MONO_TYPE_U1:
+			p->regs [slot] = *(guint8*)arg;
+			break;
+		case MONO_TYPE_I1:
+			p->regs [slot] = *(gint8*)arg;
+			break;
+		case MONO_TYPE_I2:
+			p->regs [slot] = *(gint16*)arg;
+			break;
+		case MONO_TYPE_U2:
+		case MONO_TYPE_CHAR:
+			p->regs [slot] = *(guint16*)arg;
+			break;
+		case MONO_TYPE_I4:
+			p->regs [slot] = *(gint32*)arg;
+			break;
+		case MONO_TYPE_U4:
+			p->regs [slot] = *(guint32*)arg;
+			break;
+		case MONO_TYPE_R4:
+			p->fpregs [ainfo->reg] = bitcast_r4_to_r8 (*(float*)arg);
+			p->n_fpargs ++;
+			break;
+		case MONO_TYPE_R8:
+			p->fpregs [ainfo->reg] = *(double*)arg;
+			p->n_fpargs ++;
+			break;
+		case MONO_TYPE_GENERICINST:
+			if (MONO_TYPE_IS_REFERENCE (t)) {
+				p->regs [slot] = (mgreg_t)*arg;
+				break;
+			} else {
+				if (t->type == MONO_TYPE_GENERICINST && mono_class_is_nullable (mono_class_from_mono_type (t))) {
+					MonoClass *klass = mono_class_from_mono_type (t);
+					guint8 *nullable_buf;
+					int size;
+
+					/*
+					 * Use p->buffer as a temporary buffer since the data needs to be available after this call
+					 * if the nullable param is passed by ref.
+					 */
+					size = mono_class_value_size (klass, NULL);
+					nullable_buf = p->buffer + buffer_offset;
+					buffer_offset += size;
+					g_assert (buffer_offset <= 256);
+
+					/* The argument pointed to by arg is either a boxed vtype or null */
+					mono_nullable_init (nullable_buf, (MonoObject*)arg, klass);
+
+					arg = (gpointer*)nullable_buf;
+					/* Fall though */
+				} else {
+					/* Fall though */
+				}
+			}
+		case MONO_TYPE_VALUETYPE:
+			switch (ainfo->storage) {
+			case ArgVtypeInIRegs:
+				for (i = 0; i < ainfo->nregs; ++i)
+					p->regs [slot ++] = ((mgreg_t*)arg) [i];
+				break;
+			case ArgHFA:
+				if (ainfo->esize == 4) {
+					for (i = 0; i < ainfo->nregs; ++i)
+						p->fpregs [ainfo->reg + i] = bitcast_r4_to_r8 (((float*)arg) [ainfo->foffsets [i] / 4]);
+				} else {
+					for (i = 0; i < ainfo->nregs; ++i)
+						p->fpregs [ainfo->reg + i] = ((double*)arg) [ainfo->foffsets [i] / 8];
+				}
+				p->n_fpargs += ainfo->nregs;
+				break;
+			case ArgVtypeByRef:
+				p->regs [slot] = (mgreg_t)arg;
+				break;
+			default:
+				g_assert_not_reached ();
+				break;
+			}
+			break;
+		default:
+			g_assert_not_reached ();
+		}
+	}
+}
+
+void
+mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
+{
+	ArchDynCallInfo *ainfo = (ArchDynCallInfo*)info;
+	CallInfo *cinfo = ainfo->cinfo;
+	DynCallArgs *args = (DynCallArgs*)buf;
+	MonoType *ptype = ainfo->rtype;
+	guint8 *ret = args->ret;
+	mgreg_t res = args->res;
+	mgreg_t res2 = args->res2;
+	int i;
+
+	if (cinfo->ret.storage == ArgVtypeByRef)
+		return;
+
+	switch (ptype->type) {
+	case MONO_TYPE_VOID:
+		*(gpointer*)ret = NULL;
+		break;
+	case MONO_TYPE_STRING:
+	case MONO_TYPE_CLASS:
+	case MONO_TYPE_ARRAY:
+	case MONO_TYPE_SZARRAY:
+	case MONO_TYPE_OBJECT:
+	case MONO_TYPE_I:
+	case MONO_TYPE_U:
+	case MONO_TYPE_PTR:
+		*(gpointer*)ret = (gpointer)res;
+		break;
+	case MONO_TYPE_I1:
+		*(gint8*)ret = res;
+		break;
+	case MONO_TYPE_U1:
+	case MONO_TYPE_BOOLEAN:
+		*(guint8*)ret = res;
+		break;
+	case MONO_TYPE_I2:
+		*(gint16*)ret = res;
+		break;
+	case MONO_TYPE_U2:
+	case MONO_TYPE_CHAR:
+		*(guint16*)ret = res;
+		break;
+	case MONO_TYPE_I4:
+		*(gint32*)ret = res;
+		break;
+	case MONO_TYPE_U4:
+		*(guint32*)ret = res;
+		break;
+	case MONO_TYPE_I8:
+	case MONO_TYPE_U8:
+		*(guint64*)ret = res;
+		break;
+	case MONO_TYPE_R4:
+		*(float*)ret = bitcast_r8_to_r4 (args->fpregs [0]);
+		break;
+	case MONO_TYPE_R8:
+		*(double*)ret = args->fpregs [0];
+		break;
+	case MONO_TYPE_GENERICINST:
+		if (MONO_TYPE_IS_REFERENCE (ptype)) {
+			*(gpointer*)ret = (gpointer)res;
+			break;
+		} else {
+			/* Fall though */
+		}
+	case MONO_TYPE_VALUETYPE:
+		switch (ainfo->cinfo->ret.storage) {
+		case ArgVtypeInIRegs:
+			*(mgreg_t*)ret = res;
+			if (ainfo->cinfo->ret.nregs > 1)
+				((mgreg_t*)ret) [1] = res2;
+			break;
+		case ArgHFA:
+			/* Use the same area for returning fp values */
+			if (cinfo->ret.esize == 4) {
+				for (i = 0; i < cinfo->ret.nregs; ++i)
+					((float*)ret) [cinfo->ret.foffsets [i] / 4] = bitcast_r8_to_r4 (args->fpregs [i]);
+			} else {
+				for (i = 0; i < cinfo->ret.nregs; ++i)
+					((double*)ret) [cinfo->ret.foffsets [i] / 8] = args->fpregs [i];
+			}
+			break;
+		default:
+			g_assert_not_reached ();
+			break;
+		}
+		break;
+	default:
+		g_assert_not_reached ();
+	}
+}
+
+#if __APPLE__
+void sys_icache_invalidate (void *start, size_t len);
+#endif
+
+void
+mono_arch_flush_icache (guint8 *code, gint size)
+{
+#ifndef MONO_CROSS_COMPILE
+#if __APPLE__
+	sys_icache_invalidate (code, size);
+#else
+	__clear_cache (code, code + size);
+#endif
+#endif
+}
+
+#ifndef DISABLE_JIT
+
+gboolean
+mono_arch_opcode_needs_emulation (MonoCompile *cfg, int opcode)
+{
+	NOT_IMPLEMENTED;
+	return FALSE;
+}
+
+GList *
+mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
+{
+	GList *vars = NULL;
+	int i;
+
+	for (i = 0; i < cfg->num_varinfo; i++) {
+		MonoInst *ins = cfg->varinfo [i];
+		MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
+
+		/* unused vars */
+		if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
+			continue;
+
+		if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
+		    (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
+			continue;
+
+		if (mono_is_regsize_var (ins->inst_vtype)) {
+			g_assert (MONO_VARINFO (cfg, i)->reg == -1);
+			g_assert (i == vmv->idx);
+			vars = g_list_prepend (vars, vmv);
+		}
+	}
+
+	vars = mono_varlist_sort (cfg, vars, 0);
+
+	return vars;
+}
+
+GList *
+mono_arch_get_global_int_regs (MonoCompile *cfg)
+{
+	GList *regs = NULL;
+	int i;
+
+	/* r28 is reserved for cfg->arch.args_reg */
+	/* r27 is reserved for the imt argument */
+	for (i = ARMREG_R19; i <= ARMREG_R26; ++i)
+		regs = g_list_prepend (regs, GUINT_TO_POINTER (i));
+
+	return regs;
+}
+
+guint32
+mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
+{
+	MonoInst *ins = cfg->varinfo [vmv->idx];
+
+	if (ins->opcode == OP_ARG)
+		return 1;
+	else
+		return 2;
+}
+
+void
+mono_arch_create_vars (MonoCompile *cfg)
+{
+	MonoMethodSignature *sig;
+	CallInfo *cinfo;
+
+	sig = mono_method_signature (cfg->method);
+	if (!cfg->arch.cinfo)
+		cfg->arch.cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
+	cinfo = cfg->arch.cinfo;
+
+	if (cinfo->ret.storage == ArgVtypeByRef) {
+		cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+		cfg->vret_addr->flags |= MONO_INST_VOLATILE;
+	}
+
+	if (cfg->gen_sdb_seq_points) {
+		MonoInst *ins;
+
+		if (cfg->compile_aot) {
+			ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+			ins->flags |= MONO_INST_VOLATILE;
+			cfg->arch.seq_point_info_var = ins;
+		}
+
+		ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+		ins->flags |= MONO_INST_VOLATILE;
+		cfg->arch.ss_tramp_var = ins;
+
+		ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+		ins->flags |= MONO_INST_VOLATILE;
+		cfg->arch.bp_tramp_var = ins;
+	}
+
+	if (cfg->method->save_lmf) {
+		cfg->create_lmf_var = TRUE;
+		cfg->lmf_ir = TRUE;
+#ifndef TARGET_MACH
+		cfg->lmf_ir_mono_lmf = TRUE;
+#endif
+	}
+}
+
+void
+mono_arch_allocate_vars (MonoCompile *cfg)
+{
+	MonoMethodSignature *sig;
+	MonoInst *ins;
+	CallInfo *cinfo;
+	ArgInfo *ainfo;
+	int i, offset, size, align;
+	guint32 locals_stack_size, locals_stack_align;
+	gint32 *offsets;
+
+	/*
+	 * Allocate arguments and locals to either register (OP_REGVAR) or to a stack slot (OP_REGOFFSET).
+	 * Compute cfg->stack_offset and update cfg->used_int_regs.
+	 */
+
+	sig = mono_method_signature (cfg->method);
+
+	if (!cfg->arch.cinfo)
+		cfg->arch.cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
+	cinfo = cfg->arch.cinfo;
+
+	/*
+	 * The ARM64 ABI always uses a frame pointer.
+	 * The instruction set prefers positive offsets, so fp points to the bottom of the
+	 * frame, and stack slots are at positive offsets.
+	 * If some arguments are received on the stack, their offsets relative to fp can
+	 * not be computed right now because the stack frame might grow due to spilling
+	 * done by the local register allocator. To solve this, we reserve a register
+	 * which points to them.
+	 * The stack frame looks like this:
+	 * args_reg -> <bottom of parent frame>
+	 *             <locals etc>
+	 *       fp -> <saved fp+lr>
+     *       sp -> <localloc/params area>
+	 */
+	cfg->frame_reg = ARMREG_FP;
+	cfg->flags |= MONO_CFG_HAS_SPILLUP;
+	offset = 0;
+
+	/* Saved fp+lr */
+	offset += 16;
+
+	if (cinfo->stack_usage) {
+		g_assert (!(cfg->used_int_regs & (1 << ARMREG_R28)));
+		cfg->arch.args_reg = ARMREG_R28;
+		cfg->used_int_regs |= 1 << ARMREG_R28;
+	}
+
+	if (cfg->method->save_lmf) {
+		/* The LMF var is allocated normally */
+	} else {
+		/* Callee saved regs */
+		cfg->arch.saved_gregs_offset = offset;
+		for (i = 0; i < 32; ++i)
+			if ((MONO_ARCH_CALLEE_SAVED_REGS & (1 << i)) && (cfg->used_int_regs & (1 << i)))
+				offset += 8;
+	}
+
+	/* Return value */
+	switch (cinfo->ret.storage) {
+	case ArgNone:
+		break;
+	case ArgInIReg:
+	case ArgInFReg:
+	case ArgInFRegR4:
+		cfg->ret->opcode = OP_REGVAR;
+		cfg->ret->dreg = cinfo->ret.reg;
+		break;
+	case ArgVtypeInIRegs:
+	case ArgHFA:
+		/* Allocate a local to hold the result, the epilog will copy it to the correct place */
+		cfg->ret->opcode = OP_REGOFFSET;
+		cfg->ret->inst_basereg = cfg->frame_reg;
+		cfg->ret->inst_offset = offset;
+		if (cinfo->ret.storage == ArgHFA)
+			// FIXME:
+			offset += 64;
+		else
+			offset += 16;
+		break;
+	case ArgVtypeByRef:
+		/* This variable will be initalized in the prolog from R8 */
+		cfg->vret_addr->opcode = OP_REGOFFSET;
+		cfg->vret_addr->inst_basereg = cfg->frame_reg;
+		cfg->vret_addr->inst_offset = offset;
+		offset += 8;
+		if (G_UNLIKELY (cfg->verbose_level > 1)) {
+			printf ("vret_addr =");
+			mono_print_ins (cfg->vret_addr);
+		}
+		break;
+	default:
+		g_assert_not_reached ();
+		break;
+	}
+
+	/* Arguments */
+	for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
+		ainfo = cinfo->args + i;
+
+		ins = cfg->args [i];
+		if (ins->opcode == OP_REGVAR)
+			continue;
+
+		ins->opcode = OP_REGOFFSET;
+		ins->inst_basereg = cfg->frame_reg;
+
+		switch (ainfo->storage) {
+		case ArgInIReg:
+		case ArgInFReg:
+		case ArgInFRegR4:
+			// FIXME: Use nregs/size
+			/* These will be copied to the stack in the prolog */
+			ins->inst_offset = offset;
+			offset += 8;
+			break;
+		case ArgOnStack:
+		case ArgOnStackR4:
+		case ArgOnStackR8:
+		case ArgVtypeOnStack:
+			/* These are in the parent frame */
+			g_assert (cfg->arch.args_reg);
+			ins->inst_basereg = cfg->arch.args_reg;
+			ins->inst_offset = ainfo->offset;
+			break;
+		case ArgVtypeInIRegs:
+		case ArgHFA:
+			ins->opcode = OP_REGOFFSET;
+			ins->inst_basereg = cfg->frame_reg;
+			/* These arguments are saved to the stack in the prolog */
+			ins->inst_offset = offset;
+			if (cfg->verbose_level >= 2)
+				printf ("arg %d allocated to %s+0x%0x.\n", i, mono_arch_regname (ins->inst_basereg), (int)ins->inst_offset);
+			if (ainfo->storage == ArgHFA)
+				// FIXME:
+				offset += 64;
+			else
+				offset += 16;
+			break;
+		case ArgVtypeByRefOnStack: {
+			MonoInst *vtaddr;
+
+			if (ainfo->gsharedvt) {
+				ins->opcode = OP_REGOFFSET;
+				ins->inst_basereg = cfg->arch.args_reg;
+				ins->inst_offset = ainfo->offset;
+				break;
+			}
+
+			/* The vtype address is in the parent frame */
+			g_assert (cfg->arch.args_reg);
+			MONO_INST_NEW (cfg, vtaddr, 0);
+			vtaddr->opcode = OP_REGOFFSET;
+			vtaddr->inst_basereg = cfg->arch.args_reg;
+			vtaddr->inst_offset = ainfo->offset;
+
+			/* Need an indirection */
+			ins->opcode = OP_VTARG_ADDR;
+			ins->inst_left = vtaddr;
+			break;
+		}
+		case ArgVtypeByRef: {
+			MonoInst *vtaddr;
+
+			if (ainfo->gsharedvt) {
+				ins->opcode = OP_REGOFFSET;
+				ins->inst_basereg = cfg->frame_reg;
+				ins->inst_offset = offset;
+				offset += 8;
+				break;
+			}
+
+			/* The vtype address is in a register, will be copied to the stack in the prolog */
+			MONO_INST_NEW (cfg, vtaddr, 0);
+			vtaddr->opcode = OP_REGOFFSET;
+			vtaddr->inst_basereg = cfg->frame_reg;
+			vtaddr->inst_offset = offset;
+			offset += 8;
+
+			/* Need an indirection */
+			ins->opcode = OP_VTARG_ADDR;
+			ins->inst_left = vtaddr;
+			break;
+		}
+		default:
+			g_assert_not_reached ();
+			break;
+		}
+	}
+
+	/* Allocate these first so they have a small offset, OP_SEQ_POINT depends on this */
+	// FIXME: Allocate these to registers
+	ins = cfg->arch.seq_point_info_var;
+	if (ins) {
+		size = 8;
+		align = 8;
+		offset += align - 1;
+		offset &= ~(align - 1);
+		ins->opcode = OP_REGOFFSET;
+		ins->inst_basereg = cfg->frame_reg;
+		ins->inst_offset = offset;
+		offset += size;
+	}
+	ins = cfg->arch.ss_tramp_var;
+	if (ins) {
+		size = 8;
+		align = 8;
+		offset += align - 1;
+		offset &= ~(align - 1);
+		ins->opcode = OP_REGOFFSET;
+		ins->inst_basereg = cfg->frame_reg;
+		ins->inst_offset = offset;
+		offset += size;
+	}
+	ins = cfg->arch.bp_tramp_var;
+	if (ins) {
+		size = 8;
+		align = 8;
+		offset += align - 1;
+		offset &= ~(align - 1);
+		ins->opcode = OP_REGOFFSET;
+		ins->inst_basereg = cfg->frame_reg;
+		ins->inst_offset = offset;
+		offset += size;
+	}
+
+	/* Locals */
+	offsets = mono_allocate_stack_slots (cfg, FALSE, &locals_stack_size, &locals_stack_align);
+	if (locals_stack_align)
+		offset = ALIGN_TO (offset, locals_stack_align);
+
+	for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
+		if (offsets [i] != -1) {
+			ins = cfg->varinfo [i];
+			ins->opcode = OP_REGOFFSET;
+			ins->inst_basereg = cfg->frame_reg;
+			ins->inst_offset = offset + offsets [i];
+			//printf ("allocated local %d to ", i); mono_print_tree_nl (ins);
+		}
+	}
+	offset += locals_stack_size;
+
+	offset = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT);
+
+	cfg->stack_offset = offset;
+}
+
+#ifdef ENABLE_LLVM
+LLVMCallInfo*
+mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
+{
+	int i, n;
+	CallInfo *cinfo;
+	ArgInfo *ainfo;
+	LLVMCallInfo *linfo;
+
+	n = sig->param_count + sig->hasthis;
+
+	cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
+
+	linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
+
+	switch (cinfo->ret.storage) {
+	case ArgInIReg:
+	case ArgInFReg:
+	case ArgInFRegR4:
+	case ArgNone:
+		break;
+	case ArgVtypeByRef:
+		linfo->ret.storage = LLVMArgVtypeByRef;
+		break;
+		//
+		// FIXME: This doesn't work yet since the llvm backend represents these types as an i8
+		// array which is returned in int regs
+		//
+	case ArgHFA:
+		linfo->ret.storage = LLVMArgFpStruct;
+		linfo->ret.nslots = cinfo->ret.nregs;
+		linfo->ret.esize = cinfo->ret.esize;
+		break;
+	case ArgVtypeInIRegs:
+		/* LLVM models this by returning an int */
+		linfo->ret.storage = LLVMArgVtypeAsScalar;
+		linfo->ret.nslots = cinfo->ret.nregs;
+		linfo->ret.esize = cinfo->ret.esize;
+		break;
+	default:
+		g_assert_not_reached ();
+		break;
+	}
+
+	for (i = 0; i < n; ++i) {
+		LLVMArgInfo *lainfo = &linfo->args [i];
+
+		ainfo = cinfo->args + i;
+
+		lainfo->storage = LLVMArgNone;
+
+		switch (ainfo->storage) {
+		case ArgInIReg:
+		case ArgInFReg:
+		case ArgInFRegR4:
+		case ArgOnStack:
+		case ArgOnStackR4:
+		case ArgOnStackR8:
+			lainfo->storage = LLVMArgInIReg;
+			break;
+		case ArgVtypeByRef:
+		case ArgVtypeByRefOnStack:
+			lainfo->storage = LLVMArgVtypeByRef;
+			break;
+		case ArgHFA: {
+			int j;
+
+			lainfo->storage = LLVMArgAsFpArgs;
+			lainfo->nslots = ainfo->nregs;
+			lainfo->esize = ainfo->esize;
+			for (j = 0; j < ainfo->nregs; ++j)
+				lainfo->pair_storage [j] = LLVMArgInFPReg;
+			break;
+		}
+		case ArgVtypeInIRegs:
+			lainfo->storage = LLVMArgAsIArgs;
+			lainfo->nslots = ainfo->nregs;
+			break;
+		case ArgVtypeOnStack:
+			if (ainfo->hfa) {
+				int j;
+				/* Same as above */
+				lainfo->storage = LLVMArgAsFpArgs;
+				lainfo->nslots = ainfo->nregs;
+				lainfo->esize = ainfo->esize;
+				for (j = 0; j < ainfo->nregs; ++j)
+					lainfo->pair_storage [j] = LLVMArgInFPReg;
+			} else {
+				lainfo->storage = LLVMArgAsIArgs;
+				lainfo->nslots = ainfo->size / 8;
+			}
+			break;
+		default:
+			g_assert_not_reached ();
+			break;
+		}
+	}
+
+	return linfo;
+}
+#endif
+
+static void
+add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int reg, MonoInst *arg)
+{
+	MonoInst *ins;
+
+	switch (storage) {
+	case ArgInIReg:
+		MONO_INST_NEW (cfg, ins, OP_MOVE);
+		ins->dreg = mono_alloc_ireg_copy (cfg, arg->dreg);
+		ins->sreg1 = arg->dreg;
+		MONO_ADD_INS (cfg->cbb, ins);
+		mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, FALSE);
+		break;
+	case ArgInFReg:
+		MONO_INST_NEW (cfg, ins, OP_FMOVE);
+		ins->dreg = mono_alloc_freg (cfg);
+		ins->sreg1 = arg->dreg;
+		MONO_ADD_INS (cfg->cbb, ins);
+		mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
+		break;
+	case ArgInFRegR4:
+		if (COMPILE_LLVM (cfg))
+			MONO_INST_NEW (cfg, ins, OP_FMOVE);
+		else if (cfg->r4fp)
+			MONO_INST_NEW (cfg, ins, OP_RMOVE);
+		else
+			MONO_INST_NEW (cfg, ins, OP_ARM_SETFREG_R4);
+		ins->dreg = mono_alloc_freg (cfg);
+		ins->sreg1 = arg->dreg;
+		MONO_ADD_INS (cfg->cbb, ins);
+		mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
+		break;
+	default:
+		g_assert_not_reached ();
+		break;
+	}
+}
+
+static void
+emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
+{
+	MonoMethodSignature *tmp_sig;
+	int sig_reg;
+
+	if (call->tail_call)
+		NOT_IMPLEMENTED;
+
+	g_assert (cinfo->sig_cookie.storage == ArgOnStack);
+			
+	/*
+	 * mono_ArgIterator_Setup assumes the signature cookie is 
+	 * passed first and all the arguments which were before it are
+	 * passed on the stack after the signature. So compensate by 
+	 * passing a different signature.
+	 */
+	tmp_sig = mono_metadata_signature_dup (call->signature);
+	tmp_sig->param_count -= call->signature->sentinelpos;
+	tmp_sig->sentinelpos = 0;
+	memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
+
+	sig_reg = mono_alloc_ireg (cfg);
+	MONO_EMIT_NEW_SIGNATURECONST (cfg, sig_reg, tmp_sig);
+
+	MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, cinfo->sig_cookie.offset, sig_reg);
+}
+
+void
+mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
+{
+	MonoMethodSignature *sig;
+	MonoInst *arg, *vtarg;
+	CallInfo *cinfo;
+	ArgInfo *ainfo;
+	int i;
+
+	sig = call->signature;
+
+	cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
+
+	switch (cinfo->ret.storage) {
+	case ArgVtypeInIRegs:
+	case ArgHFA:
+		/*
+		 * The vtype is returned in registers, save the return area address in a local, and save the vtype into
+		 * the location pointed to by it after call in emit_move_return_value ().
+		 */
+		if (!cfg->arch.vret_addr_loc) {
+			cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+			/* Prevent it from being register allocated or optimized away */
+			((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE;
+		}
+
+		MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ((MonoInst*)cfg->arch.vret_addr_loc)->dreg, call->vret_var->dreg);
+		break;
+	case ArgVtypeByRef:
+		/* Pass the vtype return address in R8 */
+		MONO_INST_NEW (cfg, vtarg, OP_MOVE);
+		vtarg->sreg1 = call->vret_var->dreg;
+		vtarg->dreg = mono_alloc_preg (cfg);
+		MONO_ADD_INS (cfg->cbb, vtarg);
+
+		mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
+		break;
+	default:
+		break;
+	}
+
+	for (i = 0; i < cinfo->nargs; ++i) {
+		ainfo = cinfo->args + i;
+		arg = call->args [i];
+
+		if ((sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
+			/* Emit the signature cookie just before the implicit arguments */
+			emit_sig_cookie (cfg, call, cinfo);
+		}
+
+		switch (ainfo->storage) {
+		case ArgInIReg:
+		case ArgInFReg:
+		case ArgInFRegR4:
+			add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, arg);
+			break;
+		case ArgOnStack:
+			switch (ainfo->slot_size) {
+			case 8:
+				MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg);
+				break;
+			case 4:
+				MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg);
+				break;
+			case 2:
+				MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI2_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg);
+				break;
+			case 1:
+				MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI1_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg);
+				break;
+			default:
+				g_assert_not_reached ();
+				break;
+			}
+			break;
+		case ArgOnStackR8:
+			MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg);
+			break;
+		case ArgOnStackR4:
+			MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, ARMREG_SP, ainfo->offset, arg->dreg);
+			break;
+		case ArgVtypeInIRegs:
+		case ArgVtypeByRef:
+		case ArgVtypeByRefOnStack:
+		case ArgVtypeOnStack:
+		case ArgHFA: {
+			MonoInst *ins;
+			guint32 align;
+			guint32 size;
+
+			size = mono_class_value_size (arg->klass, &align);
+
+			MONO_INST_NEW (cfg, ins, OP_OUTARG_VT);
+			ins->sreg1 = arg->dreg;
+			ins->klass = arg->klass;
+			ins->backend.size = size;
+			ins->inst_p0 = call;
+			ins->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
+			memcpy (ins->inst_p1, ainfo, sizeof (ArgInfo));
+			MONO_ADD_INS (cfg->cbb, ins);
+			break;
+		}
+		default:
+			g_assert_not_reached ();
+			break;
+		}
+	}
+
+	/* Handle the case where there are no implicit arguments */
+	if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (cinfo->nargs == sig->sentinelpos))
+		emit_sig_cookie (cfg, call, cinfo);
+
+	call->call_info = cinfo;
+	call->stack_usage = cinfo->stack_usage;
+}
+
+void
+mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
+{
+	MonoCallInst *call = (MonoCallInst*)ins->inst_p0;
+	ArgInfo *ainfo = ins->inst_p1;
+	MonoInst *load;
+	int i;
+
+	if (ins->backend.size == 0 && !ainfo->gsharedvt)
+		return;
+
+	switch (ainfo->storage) {
+	case ArgVtypeInIRegs:
+		for (i = 0; i < ainfo->nregs; ++i) {
+			// FIXME: Smaller sizes
+			MONO_INST_NEW (cfg, load, OP_LOADI8_MEMBASE);
+			load->dreg = mono_alloc_ireg (cfg);
+			load->inst_basereg = src->dreg;
+			load->inst_offset = i * sizeof(mgreg_t);
+			MONO_ADD_INS (cfg->cbb, load);
+			add_outarg_reg (cfg, call, ArgInIReg, ainfo->reg + i, load);
+		}
+		break;
+	case ArgHFA:
+		for (i = 0; i < ainfo->nregs; ++i) {
+			if (ainfo->esize == 4)
+				MONO_INST_NEW (cfg, load, OP_LOADR4_MEMBASE);
+			else
+				MONO_INST_NEW (cfg, load, OP_LOADR8_MEMBASE);
+			load->dreg = mono_alloc_freg (cfg);
+			load->inst_basereg = src->dreg;
+			load->inst_offset = ainfo->foffsets [i];
+			MONO_ADD_INS (cfg->cbb, load);
+			add_outarg_reg (cfg, call, ainfo->esize == 4 ? ArgInFRegR4 : ArgInFReg, ainfo->reg + i, load);
+		}
+		break;
+	case ArgVtypeByRef:
+	case ArgVtypeByRefOnStack: {
+		MonoInst *vtaddr, *load, *arg;
+
+		/* Pass the vtype address in a reg/on the stack */
+		if (ainfo->gsharedvt) {
+			load = src;
+		} else {
+			/* Make a copy of the argument */
+			vtaddr = mono_compile_create_var (cfg, &ins->klass->byval_arg, OP_LOCAL);
+
+			MONO_INST_NEW (cfg, load, OP_LDADDR);
+			load->inst_p0 = vtaddr;
+			vtaddr->flags |= MONO_INST_INDIRECT;
+			load->type = STACK_MP;
+			load->klass = vtaddr->klass;
+			load->dreg = mono_alloc_ireg (cfg);
+			MONO_ADD_INS (cfg->cbb, load);
+			mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, ainfo->size, 8);
+		}
+
+		if (ainfo->storage == ArgVtypeByRef) {
+			MONO_INST_NEW (cfg, arg, OP_MOVE);
+			arg->dreg = mono_alloc_preg (cfg);
+			arg->sreg1 = load->dreg;
+			MONO_ADD_INS (cfg->cbb, arg);
+			add_outarg_reg (cfg, call, ArgInIReg, ainfo->reg, arg);
+		} else {
+			MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ARMREG_SP, ainfo->offset, load->dreg);
+		}
+		break;
+	}
+	case ArgVtypeOnStack:
+		for (i = 0; i < ainfo->size / 8; ++i) {
+			MONO_INST_NEW (cfg, load, OP_LOADI8_MEMBASE);
+			load->dreg = mono_alloc_ireg (cfg);
+			load->inst_basereg = src->dreg;
+			load->inst_offset = i * 8;
+			MONO_ADD_INS (cfg->cbb, load);
+			MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, ARMREG_SP, ainfo->offset + (i * 8), load->dreg);
+		}
+		break;
+	default:
+		g_assert_not_reached ();
+		break;
+	}
+}
+
+void
+mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
+{
+	MonoMethodSignature *sig;
+	CallInfo *cinfo;
+
+	sig = mono_method_signature (cfg->method);
+	if (!cfg->arch.cinfo)
+		cfg->arch.cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
+	cinfo = cfg->arch.cinfo;
+
+	switch (cinfo->ret.storage) {
+	case ArgNone:
+		break;
+	case ArgInIReg:
+		MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
+		break;
+	case ArgInFReg:
+		MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
+		break;
+	case ArgInFRegR4:
+		if (COMPILE_LLVM (cfg))
+			MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
+		else if (cfg->r4fp)
+			MONO_EMIT_NEW_UNALU (cfg, OP_RMOVE, cfg->ret->dreg, val->dreg);
+		else
+			MONO_EMIT_NEW_UNALU (cfg, OP_ARM_SETFREG_R4, cfg->ret->dreg, val->dreg);
+		break;
+	default:
+		g_assert_not_reached ();
+		break;
+	}
+}
+
+gboolean
+mono_arch_tail_call_supported (MonoCompile *cfg, MonoMethodSignature *caller_sig, MonoMethodSignature *callee_sig)
+{
+	CallInfo *c1, *c2;
+	gboolean res;
+
+	if (cfg->compile_aot && !cfg->full_aot)
+		/* OP_TAILCALL doesn't work with AOT */
+		return FALSE;
+
+	c1 = get_call_info (NULL, NULL, caller_sig);
+	c2 = get_call_info (NULL, NULL, callee_sig);
+	res = TRUE;
+	// FIXME: Relax these restrictions
+	if (c1->stack_usage != 0)
+		res = FALSE;
+	if (c1->stack_usage != c2->stack_usage)
+		res = FALSE;
+	if ((c1->ret.storage != ArgNone && c1->ret.storage != ArgInIReg) || c1->ret.storage != c2->ret.storage)
+		res = FALSE;
+
+	g_free (c1);
+	g_free (c2);
+
+	return res;
+}
+
+gboolean 
+mono_arch_is_inst_imm (gint64 imm)
+{
+	return (imm >= -((gint64)1<<31) && imm <= (((gint64)1<<31)-1));
+}
+
+void*
+mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
+{
+	NOT_IMPLEMENTED;
+	return NULL;
+}
+
+void*
+mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
+{
+	NOT_IMPLEMENTED;
+	return NULL;
+}
+
+void
+mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
+{
+	//NOT_IMPLEMENTED;
+}
+
+void
+mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
+{
+	//NOT_IMPLEMENTED;
+}
+
+#define ADD_NEW_INS(cfg,dest,op) do {       \
+		MONO_INST_NEW ((cfg), (dest), (op)); \
+        mono_bblock_insert_before_ins (bb, ins, (dest)); \
+	} while (0)
+
+void
+mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
+{
+	MonoInst *ins, *temp, *last_ins = NULL;
+
+	MONO_BB_FOR_EACH_INS (bb, ins) {
+		switch (ins->opcode) {
+		case OP_SBB:
+		case OP_ISBB:
+		case OP_SUBCC:
+		case OP_ISUBCC:
+			if (ins->next  && (ins->next->opcode == OP_COND_EXC_C || ins->next->opcode == OP_COND_EXC_IC))
+				/* ARM sets the C flag to 1 if there was _no_ overflow */
+				ins->next->opcode = OP_COND_EXC_NC;
+			break;
+		case OP_IDIV_IMM:
+		case OP_IREM_IMM:
+		case OP_IDIV_UN_IMM:
+		case OP_IREM_UN_IMM:
+		case OP_LREM_IMM:
+			mono_decompose_op_imm (cfg, bb, ins);
+			break;
+		case OP_LOCALLOC_IMM:
+			if (ins->inst_imm > 32) {
+				ADD_NEW_INS (cfg, temp, OP_ICONST);
+				temp->inst_c0 = ins->inst_imm;
+				temp->dreg = mono_alloc_ireg (cfg);
+				ins->sreg1 = temp->dreg;
+				ins->opcode = mono_op_imm_to_op (ins->opcode);
+			}
+			break;
+		case OP_ICOMPARE_IMM:
+			if (ins->inst_imm == 0 && ins->next && ins->next->opcode == OP_IBEQ) {
+				ins->next->opcode = OP_ARM64_CBZW;
+				ins->next->sreg1 = ins->sreg1;
+				NULLIFY_INS (ins);
+			} else if (ins->inst_imm == 0 && ins->next && ins->next->opcode == OP_IBNE_UN) {
+				ins->next->opcode = OP_ARM64_CBNZW;
+				ins->next->sreg1 = ins->sreg1;
+				NULLIFY_INS (ins);
+			}
+			break;
+		case OP_LCOMPARE_IMM:
+		case OP_COMPARE_IMM:
+			if (ins->inst_imm == 0 && ins->next && ins->next->opcode == OP_LBEQ) {
+				ins->next->opcode = OP_ARM64_CBZX;
+				ins->next->sreg1 = ins->sreg1;
+				NULLIFY_INS (ins);
+			} else if (ins->inst_imm == 0 && ins->next && ins->next->opcode == OP_LBNE_UN) {
+				ins->next->opcode = OP_ARM64_CBNZX;
+				ins->next->sreg1 = ins->sreg1;
+				NULLIFY_INS (ins);
+			}
+			break;
+		case OP_FCOMPARE: {
+			gboolean swap = FALSE;
+			int reg;
+
+			if (!ins->next) {
+				/* Optimized away */
+				NULLIFY_INS (ins);
+				break;
+			}
+
+			/*
+			 * FP compares with unordered operands set the flags
+			 * to NZCV=0011, which matches some non-unordered compares
+			 * as well, like LE, so have to swap the operands.
+			 */
+			switch (ins->next->opcode) {
+			case OP_FBLT:
+				ins->next->opcode = OP_FBGT;
+				swap = TRUE;
+				break;
+			case OP_FBLE:
+				ins->next->opcode = OP_FBGE;
+				swap = TRUE;
+				break;
+			default:
+				break;
+			}
+			if (swap) {
+				reg = ins->sreg1;
+				ins->sreg1 = ins->sreg2;
+				ins->sreg2 = reg;
+			}
+			break;
+		}
+		default:
+			break;
+		}
+
+		last_ins = ins;
+	}
+	bb->last_ins = last_ins;
+	bb->max_vreg = cfg->next_vreg;
+}
+
+void
+mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
+{
+}
+
+static int
+opcode_to_armcond (int opcode)
+{
+	switch (opcode) {
+	case OP_IBEQ:
+	case OP_LBEQ:
+	case OP_FBEQ:
+	case OP_CEQ:
+	case OP_ICEQ:
+	case OP_LCEQ:
+	case OP_FCEQ:
+	case OP_RCEQ:
+	case OP_COND_EXC_IEQ:
+	case OP_COND_EXC_EQ:
+		return ARMCOND_EQ;
+	case OP_IBGE:
+	case OP_LBGE:
+	case OP_FBGE:
+	case OP_ICGE:
+	case OP_FCGE:
+	case OP_RCGE:
+		return ARMCOND_GE;
+	case OP_IBGT:
+	case OP_LBGT:
+	case OP_FBGT:
+	case OP_CGT:
+	case OP_ICGT:
+	case OP_LCGT:
+	case OP_FCGT:
+	case OP_RCGT:
+	case OP_COND_EXC_IGT:
+	case OP_COND_EXC_GT:
+		return ARMCOND_GT;
+	case OP_IBLE:
+	case OP_LBLE:
+	case OP_FBLE:
+	case OP_ICLE:
+	case OP_FCLE:
+	case OP_RCLE:
+		return ARMCOND_LE;
+	case OP_IBLT:
+	case OP_LBLT:
+	case OP_FBLT:
+	case OP_CLT:
+	case OP_ICLT:
+	case OP_LCLT:
+	case OP_COND_EXC_ILT:
+	case OP_COND_EXC_LT:
+		return ARMCOND_LT;
+	case OP_IBNE_UN:
+	case OP_LBNE_UN:
+	case OP_FBNE_UN:
+	case OP_ICNEQ:
+	case OP_FCNEQ:
+	case OP_RCNEQ:
+	case OP_COND_EXC_INE_UN:
+	case OP_COND_EXC_NE_UN:
+		return ARMCOND_NE;
+	case OP_IBGE_UN:
+	case OP_LBGE_UN:
+	case OP_FBGE_UN:
+	case OP_ICGE_UN:
+	case OP_COND_EXC_IGE_UN:
+	case OP_COND_EXC_GE_UN:
+		return ARMCOND_HS;
+	case OP_IBGT_UN:
+	case OP_LBGT_UN:
+	case OP_FBGT_UN:
+	case OP_CGT_UN:
+	case OP_ICGT_UN:
+	case OP_LCGT_UN:
+	case OP_FCGT_UN:
+	case OP_RCGT_UN:
+	case OP_COND_EXC_IGT_UN:
+	case OP_COND_EXC_GT_UN:
+		return ARMCOND_HI;
+	case OP_IBLE_UN:
+	case OP_LBLE_UN:
+	case OP_FBLE_UN:
+	case OP_ICLE_UN:
+	case OP_COND_EXC_ILE_UN:
+	case OP_COND_EXC_LE_UN:
+		return ARMCOND_LS;
+	case OP_IBLT_UN:
+	case OP_LBLT_UN:
+	case OP_FBLT_UN:
+	case OP_CLT_UN:
+	case OP_ICLT_UN:
+	case OP_LCLT_UN:
+	case OP_COND_EXC_ILT_UN:
+	case OP_COND_EXC_LT_UN:
+		return ARMCOND_LO;
+		/*
+		 * FCMP sets the NZCV condition bits as follows:
+		 * eq = 0110
+		 * < = 1000
+		 * > = 0010
+		 * unordered = 0011
+		 * ARMCOND_LT is N!=V, so it matches unordered too, so
+		 * fclt and fclt_un need to be special cased.
+		 */
+	case OP_FCLT:
+	case OP_RCLT:
+		/* N==1 */
+		return ARMCOND_MI;
+	case OP_FCLT_UN:
+	case OP_RCLT_UN:
+		return ARMCOND_LT;
+	case OP_COND_EXC_C:
+	case OP_COND_EXC_IC:
+		return ARMCOND_CS;
+	case OP_COND_EXC_OV:
+	case OP_COND_EXC_IOV:
+		return ARMCOND_VS;
+	case OP_COND_EXC_NC:
+	case OP_COND_EXC_INC:
+		return ARMCOND_CC;
+	case OP_COND_EXC_NO:
+	case OP_COND_EXC_INO:
+		return ARMCOND_VC;
+	default:
+		printf ("%s\n", mono_inst_name (opcode));
+		g_assert_not_reached ();
+		return -1;
+	}
+}
+
+/* This clobbers LR */
+static inline __attribute__((warn_unused_result)) guint8*
+emit_cond_exc (MonoCompile *cfg, guint8 *code, int opcode, const char *exc_name)
+{
+	int cond;
+
+	cond = opcode_to_armcond (opcode);
+	/* Capture PC */
+	arm_adrx (code, ARMREG_IP1, code);
+	mono_add_patch_info_rel (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, exc_name, MONO_R_ARM64_BCC);
+	arm_bcc (code, cond, 0);
+	return code;
+}
+
+static guint8*
+emit_move_return_value (MonoCompile *cfg, guint8 * code, MonoInst *ins)
+{
+	CallInfo *cinfo;
+	MonoCallInst *call;
+
+	call = (MonoCallInst*)ins;
+	cinfo = call->call_info;
+	g_assert (cinfo);
+	switch (cinfo->ret.storage) {
+	case ArgNone:
+		break;
+	case ArgInIReg:
+		/* LLVM compiled code might only set the bottom bits */
+		if (call->signature && mini_get_underlying_type (cfg, call->signature->ret)->type == MONO_TYPE_I4)
+			arm_sxtwx (code, call->inst.dreg, cinfo->ret.reg);
+		else if (call->inst.dreg != cinfo->ret.reg)
+			arm_movx (code, call->inst.dreg, cinfo->ret.reg);
+		break;
+	case ArgInFReg:
+		if (call->inst.dreg != cinfo->ret.reg)
+			arm_fmovd (code, call->inst.dreg, cinfo->ret.reg);
+		break;
+	case ArgInFRegR4:
+		if (cfg->r4fp)
+			arm_fmovs (code, call->inst.dreg, cinfo->ret.reg);
+		else
+			arm_fcvt_sd (code, call->inst.dreg, cinfo->ret.reg);
+		break;
+	case ArgVtypeInIRegs: {
+		MonoInst *loc = cfg->arch.vret_addr_loc;
+		int i;
+
+		/* Load the destination address */
+		g_assert (loc && loc->opcode == OP_REGOFFSET);
+		code = emit_ldrx (code, ARMREG_LR, loc->inst_basereg, loc->inst_offset);
+		for (i = 0; i < cinfo->ret.nregs; ++i)
+			arm_strx (code, cinfo->ret.reg + i, ARMREG_LR, i * 8);
+		break;
+	}
+	case ArgHFA: {
+		MonoInst *loc = cfg->arch.vret_addr_loc;
+		int i;
+
+		/* Load the destination address */
+		g_assert (loc && loc->opcode == OP_REGOFFSET);
+		code = emit_ldrx (code, ARMREG_LR, loc->inst_basereg, loc->inst_offset);
+		for (i = 0; i < cinfo->ret.nregs; ++i) {
+			if (cinfo->ret.esize == 4)
+				arm_strfpw (code, cinfo->ret.reg + i, ARMREG_LR, cinfo->ret.foffsets [i]);
+			else
+				arm_strfpx (code, cinfo->ret.reg + i, ARMREG_LR, cinfo->ret.foffsets [i]);
+		}
+		break;
+	}
+	case ArgVtypeByRef:
+		break;
+	default:
+		g_assert_not_reached ();
+		break;
+	}
+	return code;
+}
+
+/*
+ * emit_branch_island:
+ *
+ *   Emit a branch island for the conditional branches from cfg->native_code + start_offset to code.
+ */
+static guint8*
+emit_branch_island (MonoCompile *cfg, guint8 *code, int start_offset)
+{
+	MonoJumpInfo *ji;
+	int offset, island_size;
+
+	/* Iterate over the patch infos added so far by this bb */
+	island_size = 0;
+	for (ji = cfg->patch_info; ji; ji = ji->next) {
+		if (ji->ip.i < start_offset)
+			/* The patch infos are in reverse order, so this means the end */
+			break;
+		if (ji->relocation == MONO_R_ARM64_BCC || ji->relocation == MONO_R_ARM64_CBZ)
+			island_size += 4;
+	}
+
+	if (island_size) {
+		offset = code - cfg->native_code;
+		if (offset > (cfg->code_size - island_size - 16)) {
+			cfg->code_size *= 2;
+			cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+			code = cfg->native_code + offset;
+		}
+
+		/* Branch over the island */
+		arm_b (code, code + 4 + island_size);
+
+		for (ji = cfg->patch_info; ji; ji = ji->next) {
+			if (ji->ip.i < start_offset)
+				break;
+			if (ji->relocation == MONO_R_ARM64_BCC || ji->relocation == MONO_R_ARM64_CBZ) {
+				/* Rewrite the cond branch so it branches to an uncoditional branch in the branch island */
+				arm_patch_rel (cfg->native_code + ji->ip.i, code, ji->relocation);
+				/* Rewrite the patch so it points to the unconditional branch */
+				ji->ip.i = code - cfg->native_code;
+				ji->relocation = MONO_R_ARM64_B;
+				arm_b (code, code);
+			}
+		}
+	}
+	return code;
+}
+
+void
+mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
+{
+	MonoInst *ins;
+	MonoCallInst *call;
+	guint offset;
+	guint8 *code = cfg->native_code + cfg->code_len;
+	int start_offset, max_len, dreg, sreg1, sreg2;
+	mgreg_t imm;
+
+	if (cfg->verbose_level > 2)
+		g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
+
+	start_offset = code - cfg->native_code;
+
+	MONO_BB_FOR_EACH_INS (bb, ins) {
+		offset = code - cfg->native_code;
+
+		max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
+
+		if (offset > (cfg->code_size - max_len - 16)) {
+			cfg->code_size *= 2;
+			cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+			code = cfg->native_code + offset;
+		}
+
+		if (G_UNLIKELY (cfg->arch.cond_branch_islands && offset - start_offset > 4 * 0x1ffff)) {
+			/* Emit a branch island for large basic blocks */
+			code = emit_branch_island (cfg, code, start_offset);
+			offset = code - cfg->native_code;
+			start_offset = offset;
+		}
+
+		mono_debug_record_line_number (cfg, ins, offset);
+
+		dreg = ins->dreg;
+		sreg1 = ins->sreg1;
+		sreg2 = ins->sreg2;
+		imm = ins->inst_imm;
+
+		switch (ins->opcode) {
+		case OP_ICONST:
+			code = emit_imm (code, dreg, ins->inst_c0);
+			break;
+		case OP_I8CONST:
+			code = emit_imm64 (code, dreg, ins->inst_c0);
+			break;
+		case OP_MOVE:
+			if (dreg != sreg1)
+				arm_movx (code, dreg, sreg1);
+			break;
+		case OP_NOP:
+		case OP_RELAXED_NOP:
+			break;
+		case OP_JUMP_TABLE:
+			mono_add_patch_info_rel (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0, MONO_R_ARM64_IMM);
+			code = emit_imm64_template (code, dreg);
+			break;
+		case OP_BREAK:
+			/*
+			 * gdb does not like encountering the hw breakpoint ins in the debugged code. 
+			 * So instead of emitting a trap, we emit a call a C function and place a 
+			 * breakpoint there.
+			 */
+			code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_break");
+			break;
+		case OP_LOCALLOC: {
+			guint8 *buf [16];
+
+			arm_addx_imm (code, ARMREG_IP0, sreg1, (MONO_ARCH_FRAME_ALIGNMENT - 1));
+			// FIXME: andx_imm doesn't work yet
+			code = emit_imm (code, ARMREG_IP1, -MONO_ARCH_FRAME_ALIGNMENT);
+			arm_andx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1);
+			//arm_andx_imm (code, ARMREG_IP0, sreg1, - MONO_ARCH_FRAME_ALIGNMENT);
+			arm_movspx (code, ARMREG_IP1, ARMREG_SP);
+			arm_subx (code, ARMREG_IP1, ARMREG_IP1, ARMREG_IP0);
+			arm_movspx (code, ARMREG_SP, ARMREG_IP1);
+
+			/* Init */
+			/* ip1 = pointer, ip0 = end */
+			arm_addx (code, ARMREG_IP0, ARMREG_IP1, ARMREG_IP0);
+			buf [0] = code;
+			arm_cmpx (code, ARMREG_IP1, ARMREG_IP0);
+			buf [1] = code;
+			arm_bcc (code, ARMCOND_EQ, 0);
+			arm_stpx (code, ARMREG_RZR, ARMREG_RZR, ARMREG_IP1, 0);
+			arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 16);
+			arm_b (code, buf [0]);
+			arm_patch_rel (buf [1], code, MONO_R_ARM64_BCC);
+
+			arm_movspx (code, dreg, ARMREG_SP);
+			if (cfg->param_area)
+				code = emit_subx_sp_imm (code, cfg->param_area);
+			break;
+		}
+		case OP_LOCALLOC_IMM: {
+			int imm, offset;
+
+			imm = ALIGN_TO (ins->inst_imm, MONO_ARCH_FRAME_ALIGNMENT);
+			g_assert (arm_is_arith_imm (imm));
+			arm_subx_imm (code, ARMREG_SP, ARMREG_SP, imm);
+
+			/* Init */
+			g_assert (MONO_ARCH_FRAME_ALIGNMENT == 16);
+			offset = 0;
+			while (offset < imm) {
+				arm_stpx (code, ARMREG_RZR, ARMREG_RZR, ARMREG_SP, offset);
+				offset += 16;
+			}
+			arm_movspx (code, dreg, ARMREG_SP);
+			if (cfg->param_area)
+				code = emit_subx_sp_imm (code, cfg->param_area);
+			break;
+		}
+		case OP_AOTCONST:
+			code = emit_aotconst (cfg, code, dreg, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
+			break;
+		case OP_OBJC_GET_SELECTOR:
+			mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_OBJC_SELECTOR_REF, ins->inst_p0);
+			/* See arch_emit_objc_selector_ref () in aot-compiler.c */
+			arm_ldrx_lit (code, ins->dreg, 0);
+			arm_nop (code);
+			arm_nop (code);
+			break;
+		case OP_SEQ_POINT: {
+			MonoInst *info_var = cfg->arch.seq_point_info_var;
+
+			/*
+			 * For AOT, we use one got slot per method, which will point to a
+			 * SeqPointInfo structure, containing all the information required
+			 * by the code below.
+			 */
+			if (cfg->compile_aot) {
+				g_assert (info_var);
+				g_assert (info_var->opcode == OP_REGOFFSET);
+			}
+
+			if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
+				MonoInst *var = cfg->arch.ss_tramp_var;
+
+				g_assert (var);
+				g_assert (var->opcode == OP_REGOFFSET);
+				/* Load ss_tramp_var */
+				/* This is equal to &ss_trampoline */
+				arm_ldrx (code, ARMREG_IP1, var->inst_basereg, var->inst_offset);
+				/* Load the trampoline address */
+				arm_ldrx (code, ARMREG_IP1, ARMREG_IP1, 0);
+				/* Call it if it is non-null */
+				arm_cbzx (code, ARMREG_IP1, code + 8);
+				arm_blrx (code, ARMREG_IP1);
+			}
+
+			mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
+
+			if (cfg->compile_aot) {
+				guint32 offset = code - cfg->native_code;
+				guint32 val;
+
+				arm_ldrx (code, ARMREG_IP1, info_var->inst_basereg, info_var->inst_offset);
+				/* Add the offset */
+				val = ((offset / 4) * sizeof (guint8*)) + MONO_STRUCT_OFFSET (SeqPointInfo, bp_addrs);
+				/* Load the info->bp_addrs [offset], which is either 0 or the address of the bp trampoline */
+				code = emit_ldrx (code, ARMREG_IP1, ARMREG_IP1, val);
+				/* Skip the load if its 0 */
+				arm_cbzx (code, ARMREG_IP1, code + 8);
+				/* Call the breakpoint trampoline */
+				arm_blrx (code, ARMREG_IP1);
+			} else {
+				MonoInst *var = cfg->arch.bp_tramp_var;
+
+				g_assert (var);
+				g_assert (var->opcode == OP_REGOFFSET);
+				/* Load the address of the bp trampoline into IP0 */
+				arm_ldrx (code, ARMREG_IP0, var->inst_basereg, var->inst_offset);
+				/* 
+				 * A placeholder for a possible breakpoint inserted by
+				 * mono_arch_set_breakpoint ().
+				 */
+				arm_nop (code);
+			}
+			break;
+		}
+
+			/* BRANCH */
+		case OP_BR:
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb, MONO_R_ARM64_B);
+			arm_b (code, code);
+			break;
+		case OP_BR_REG:
+			arm_brx (code, sreg1);
+			break;
+		case OP_IBEQ:
+		case OP_IBGE:
+		case OP_IBGT:
+		case OP_IBLE:
+		case OP_IBLT:
+		case OP_IBNE_UN:
+		case OP_IBGE_UN:
+		case OP_IBGT_UN:
+		case OP_IBLE_UN:
+		case OP_IBLT_UN:
+		case OP_LBEQ:
+		case OP_LBGE:
+		case OP_LBGT:
+		case OP_LBLE:
+		case OP_LBLT:
+		case OP_LBNE_UN:
+		case OP_LBGE_UN:
+		case OP_LBGT_UN:
+		case OP_LBLE_UN:
+		case OP_LBLT_UN:
+		case OP_FBEQ:
+		case OP_FBNE_UN:
+		case OP_FBLT:
+		case OP_FBGT:
+		case OP_FBGT_UN:
+		case OP_FBLE:
+		case OP_FBGE:
+		case OP_FBGE_UN: {
+			int cond;
+
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_BCC);
+			cond = opcode_to_armcond (ins->opcode);
+			arm_bcc (code, cond, 0);
+			break;
+		}
+		case OP_FBLT_UN:
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_BCC);
+			/* For fp compares, ARMCOND_LT is lt or unordered */
+			arm_bcc (code, ARMCOND_LT, 0);
+			break;
+		case OP_FBLE_UN:
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_BCC);
+			arm_bcc (code, ARMCOND_EQ, 0);
+			offset = code - cfg->native_code;
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_BCC);
+			/* For fp compares, ARMCOND_LT is lt or unordered */
+			arm_bcc (code, ARMCOND_LT, 0);
+			break;
+		case OP_ARM64_CBZW:
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ);
+			arm_cbzw (code, sreg1, 0);
+			break;
+		case OP_ARM64_CBZX:
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ);
+			arm_cbzx (code, sreg1, 0);
+			break;
+		case OP_ARM64_CBNZW:
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ);
+			arm_cbnzw (code, sreg1, 0);
+			break;
+		case OP_ARM64_CBNZX:
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ);
+			arm_cbnzx (code, sreg1, 0);
+			break;
+			/* ALU */
+		case OP_IADD:
+			arm_addw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LADD:
+			arm_addx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_ISUB:
+			arm_subw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LSUB:
+			arm_subx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_IAND:
+			arm_andw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LAND:
+			arm_andx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_IOR:
+			arm_orrw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LOR:
+			arm_orrx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_IXOR:
+			arm_eorw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LXOR:
+			arm_eorx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_INEG:
+			arm_negw (code, dreg, sreg1);
+			break;
+		case OP_LNEG:
+			arm_negx (code, dreg, sreg1);
+			break;
+		case OP_INOT:
+			arm_mvnw (code, dreg, sreg1);
+			break;
+		case OP_LNOT:
+			arm_mvnx (code, dreg, sreg1);
+			break;
+		case OP_IADDCC:
+			arm_addsw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_ADDCC:
+		case OP_LADDCC:
+			arm_addsx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_ISUBCC:
+			arm_subsw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LSUBCC:
+		case OP_SUBCC:
+			arm_subsx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_ICOMPARE:
+			arm_cmpw (code, sreg1, sreg2);
+			break;
+		case OP_COMPARE:
+		case OP_LCOMPARE:
+			arm_cmpx (code, sreg1, sreg2);
+			break;
+		case OP_IADD_IMM:
+			code = emit_addw_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_LADD_IMM:
+		case OP_ADD_IMM:
+			code = emit_addx_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_ISUB_IMM:
+			code = emit_subw_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_LSUB_IMM:
+			code = emit_subx_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_IAND_IMM:
+			code = emit_andw_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_LAND_IMM:
+		case OP_AND_IMM:
+			code = emit_andx_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_IOR_IMM:
+			code = emit_orrw_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_LOR_IMM:
+			code = emit_orrx_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_IXOR_IMM:
+			code = emit_eorw_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_LXOR_IMM:
+			code = emit_eorx_imm (code, dreg, sreg1, imm);
+			break;
+		case OP_ICOMPARE_IMM:
+			code = emit_cmpw_imm (code, sreg1, imm);
+			break;
+		case OP_LCOMPARE_IMM:
+		case OP_COMPARE_IMM:
+			if (imm == 0) {
+				arm_cmpx (code, sreg1, ARMREG_RZR);
+			} else {
+				// FIXME: 32 vs 64 bit issues for 0xffffffff
+				code = emit_imm64 (code, ARMREG_LR, imm);
+				arm_cmpx (code, sreg1, ARMREG_LR);
+			}
+			break;
+		case OP_ISHL:
+			arm_lslvw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LSHL:
+			arm_lslvx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_ISHR:
+			arm_asrvw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LSHR:
+			arm_asrvx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_ISHR_UN:
+			arm_lsrvw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LSHR_UN:
+			arm_lsrvx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_ISHL_IMM:
+			if (imm == 0)
+				arm_movx (code, dreg, sreg1);
+			else
+				arm_lslw (code, dreg, sreg1, imm);
+			break;
+		case OP_LSHL_IMM:
+			if (imm == 0)
+				arm_movx (code, dreg, sreg1);
+			else
+				arm_lslx (code, dreg, sreg1, imm);
+			break;
+		case OP_ISHR_IMM:
+			if (imm == 0)
+				arm_movx (code, dreg, sreg1);
+			else
+				arm_asrw (code, dreg, sreg1, imm);
+			break;
+		case OP_LSHR_IMM:
+		case OP_SHR_IMM:
+			if (imm == 0)
+				arm_movx (code, dreg, sreg1);
+			else
+				arm_asrx (code, dreg, sreg1, imm);
+			break;
+		case OP_ISHR_UN_IMM:
+			if (imm == 0)
+				arm_movx (code, dreg, sreg1);
+			else
+				arm_lsrw (code, dreg, sreg1, imm);
+			break;
+		case OP_SHR_UN_IMM:
+		case OP_LSHR_UN_IMM:
+			if (imm == 0)
+				arm_movx (code, dreg, sreg1);
+			else
+				arm_lsrx (code, dreg, sreg1, imm);
+			break;
+
+			/* 64BIT ALU */
+		case OP_SEXT_I4:
+			arm_sxtwx (code, dreg, sreg1);
+			break;
+		case OP_ZEXT_I4:
+			/* Clean out the upper word */
+			arm_movw (code, dreg, sreg1);
+			break;
+		case OP_SHL_IMM:
+			arm_lslx (code, dreg, sreg1, imm);
+			break;
+
+			/* MULTIPLY/DIVISION */
+		case OP_IDIV:
+		case OP_IREM:
+			// FIXME: Optimize this
+			/* Check for zero */
+			arm_cmpx_imm (code, sreg2, 0);
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException");
+			/* Check for INT_MIN/-1 */
+			code = emit_imm (code, ARMREG_IP0, 0x80000000);
+			arm_cmpx (code, sreg1, ARMREG_IP0);
+			arm_cset (code, ARMCOND_EQ, ARMREG_IP1);
+			code = emit_imm (code, ARMREG_IP0, 0xffffffff);
+			arm_cmpx (code, sreg2, ARMREG_IP0);
+			arm_cset (code, ARMCOND_EQ, ARMREG_IP0);
+			arm_andx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1);
+			arm_cmpx_imm (code, ARMREG_IP0, 1);
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "OverflowException");
+			if (ins->opcode == OP_IREM) {
+				arm_sdivw (code, ARMREG_LR, sreg1, sreg2);
+				arm_msubw (code, dreg, ARMREG_LR, sreg2, sreg1);
+			} else {
+				arm_sdivw (code, dreg, sreg1, sreg2);
+			}
+			break;
+		case OP_IDIV_UN:
+			arm_cmpx_imm (code, sreg2, 0);
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException");
+			arm_udivw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_IREM_UN:
+			arm_cmpx_imm (code, sreg2, 0);
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException");
+			arm_udivw (code, ARMREG_LR, sreg1, sreg2);
+			arm_msubw (code, dreg, ARMREG_LR, sreg2, sreg1);
+			break;
+		case OP_LDIV:
+		case OP_LREM:
+			// FIXME: Optimize this
+			/* Check for zero */
+			arm_cmpx_imm (code, sreg2, 0);
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException");
+			/* Check for INT64_MIN/-1 */
+			code = emit_imm64 (code, ARMREG_IP0, 0x8000000000000000);
+			arm_cmpx (code, sreg1, ARMREG_IP0);
+			arm_cset (code, ARMCOND_EQ, ARMREG_IP1);
+			code = emit_imm64 (code, ARMREG_IP0, 0xffffffffffffffff);
+			arm_cmpx (code, sreg2, ARMREG_IP0);
+			arm_cset (code, ARMCOND_EQ, ARMREG_IP0);
+			arm_andx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1);
+			arm_cmpx_imm (code, ARMREG_IP0, 1);
+			/* 64 bit uses ArithmeticException */
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "ArithmeticException");
+			if (ins->opcode == OP_LREM) {
+				arm_sdivx (code, ARMREG_LR, sreg1, sreg2);
+				arm_msubx (code, dreg, ARMREG_LR, sreg2, sreg1);
+			} else {
+				arm_sdivx (code, dreg, sreg1, sreg2);
+			}
+			break;
+		case OP_LDIV_UN:
+			arm_cmpx_imm (code, sreg2, 0);
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException");
+			arm_udivx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LREM_UN:
+			arm_cmpx_imm (code, sreg2, 0);
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_IEQ, "DivideByZeroException");
+			arm_udivx (code, ARMREG_LR, sreg1, sreg2);
+			arm_msubx (code, dreg, ARMREG_LR, sreg2, sreg1);
+			break;
+		case OP_IMUL:
+			arm_mulw (code, dreg, sreg1, sreg2);
+			break;
+		case OP_LMUL:
+			arm_mulx (code, dreg, sreg1, sreg2);
+			break;
+		case OP_IMUL_IMM:
+			code = emit_imm (code, ARMREG_LR, imm);
+			arm_mulw (code, dreg, sreg1, ARMREG_LR);
+			break;
+		case OP_MUL_IMM:
+		case OP_LMUL_IMM:
+			code = emit_imm (code, ARMREG_LR, imm);
+			arm_mulx (code, dreg, sreg1, ARMREG_LR);
+			break;
+
+			/* CONVERSIONS */
+		case OP_ICONV_TO_I1:
+		case OP_LCONV_TO_I1:
+			arm_sxtbx (code, dreg, sreg1);
+			break;
+		case OP_ICONV_TO_I2:
+		case OP_LCONV_TO_I2:
+			arm_sxthx (code, dreg, sreg1);
+			break;
+		case OP_ICONV_TO_U1:
+		case OP_LCONV_TO_U1:
+			arm_uxtbw (code, dreg, sreg1);
+			break;
+		case OP_ICONV_TO_U2:
+		case OP_LCONV_TO_U2:
+			arm_uxthw (code, dreg, sreg1);
+			break;
+
+			/* CSET */
+		case OP_CEQ:
+		case OP_ICEQ:
+		case OP_LCEQ:
+		case OP_CLT:
+		case OP_ICLT:
+		case OP_LCLT:
+		case OP_CGT:
+		case OP_ICGT:
+		case OP_LCGT:
+		case OP_CLT_UN:
+		case OP_ICLT_UN:
+		case OP_LCLT_UN:
+		case OP_CGT_UN:
+		case OP_ICGT_UN:
+		case OP_LCGT_UN:
+		case OP_ICNEQ:
+		case OP_ICGE:
+		case OP_ICLE:
+		case OP_ICGE_UN:
+		case OP_ICLE_UN: {
+			int cond;
+
+			cond = opcode_to_armcond (ins->opcode);
+			arm_cset (code, cond, dreg);
+			break;
+		}
+		case OP_FCEQ:
+		case OP_FCLT:
+		case OP_FCLT_UN:
+		case OP_FCGT:
+		case OP_FCGT_UN:
+		case OP_FCNEQ:
+		case OP_FCLE:
+		case OP_FCGE: {
+			int cond;
+
+			cond = opcode_to_armcond (ins->opcode);
+			arm_fcmpd (code, sreg1, sreg2);
+			arm_cset (code, cond, dreg);
+			break;
+		}
+
+			/* MEMORY */
+		case OP_LOADI1_MEMBASE:
+			code = emit_ldrsbx (code, dreg, ins->inst_basereg, ins->inst_offset);
+			break;
+		case OP_LOADU1_MEMBASE:
+			code = emit_ldrb (code, dreg, ins->inst_basereg, ins->inst_offset);
+			break;
+		case OP_LOADI2_MEMBASE:
+			code = emit_ldrshx (code, dreg, ins->inst_basereg, ins->inst_offset);
+			break;
+		case OP_LOADU2_MEMBASE:
+			code = emit_ldrh (code, dreg, ins->inst_basereg, ins->inst_offset);
+			break;
+		case OP_LOADI4_MEMBASE:
+			code = emit_ldrswx (code, dreg, ins->inst_basereg, ins->inst_offset);
+			break;
+		case OP_LOADU4_MEMBASE:
+			code = emit_ldrw (code, dreg, ins->inst_basereg, ins->inst_offset);
+			break;
+		case OP_LOAD_MEMBASE:
+		case OP_LOADI8_MEMBASE:
+			code = emit_ldrx (code, dreg, ins->inst_basereg, ins->inst_offset);
+			break;
+		case OP_STOREI1_MEMBASE_IMM:
+		case OP_STOREI2_MEMBASE_IMM:
+		case OP_STOREI4_MEMBASE_IMM:
+		case OP_STORE_MEMBASE_IMM:
+		case OP_STOREI8_MEMBASE_IMM: {
+			int immreg;
+
+			if (imm != 0) {
+				code = emit_imm (code, ARMREG_LR, imm);
+				immreg = ARMREG_LR;
+			} else {
+				immreg = ARMREG_RZR;
+			}
+
+			switch (ins->opcode) {
+			case OP_STOREI1_MEMBASE_IMM:
+				code = emit_strb (code, immreg, ins->inst_destbasereg, ins->inst_offset);
+				break;
+			case OP_STOREI2_MEMBASE_IMM:
+				code = emit_strh (code, immreg, ins->inst_destbasereg, ins->inst_offset);
+				break;
+			case OP_STOREI4_MEMBASE_IMM:
+				code = emit_strw (code, immreg, ins->inst_destbasereg, ins->inst_offset);
+				break;
+			case OP_STORE_MEMBASE_IMM:
+			case OP_STOREI8_MEMBASE_IMM:
+				code = emit_strx (code, immreg, ins->inst_destbasereg, ins->inst_offset);
+				break;
+			default:
+				g_assert_not_reached ();
+				break;
+			}
+			break;
+		}
+		case OP_STOREI1_MEMBASE_REG:
+			code = emit_strb (code, sreg1, ins->inst_destbasereg, ins->inst_offset);
+			break;
+		case OP_STOREI2_MEMBASE_REG:
+			code = emit_strh (code, sreg1, ins->inst_destbasereg, ins->inst_offset);
+			break;
+		case OP_STOREI4_MEMBASE_REG:
+			code = emit_strw (code, sreg1, ins->inst_destbasereg, ins->inst_offset);
+			break;
+		case OP_STORE_MEMBASE_REG:
+		case OP_STOREI8_MEMBASE_REG:
+			code = emit_strx (code, sreg1, ins->inst_destbasereg, ins->inst_offset);
+			break;
+
+		case OP_TLS_GET:
+			code = emit_tls_get (code, dreg, ins->inst_offset);
+			break;
+		case OP_TLS_GET_REG:
+			code = emit_tls_get_reg (code, dreg, sreg1);
+			break;
+		case OP_TLS_SET:
+			code = emit_tls_set (code, sreg1, ins->inst_offset);
+			break;
+		case OP_TLS_SET_REG:
+			code = emit_tls_set_reg (code, sreg1, sreg2);
+			break;
+
+			/* Atomic */
+		case OP_MEMORY_BARRIER:
+			arm_dmb (code, 0);
+			break;
+		case OP_ATOMIC_ADD_I4: {
+			guint8 *buf [16];
+
+			buf [0] = code;
+			arm_ldaxrw (code, ARMREG_IP0, sreg1);
+			arm_addx (code, ARMREG_IP0, ARMREG_IP0, sreg2);
+			arm_stlxrw (code, ARMREG_IP1, ARMREG_IP0, sreg1);
+			arm_cbnzw (code, ARMREG_IP1, buf [0]);
+
+			arm_movx (code, dreg, ARMREG_IP0);
+			break;
+		}
+		case OP_ATOMIC_ADD_I8: {
+			guint8 *buf [16];
+
+			buf [0] = code;
+			arm_ldaxrx (code, ARMREG_IP0, sreg1);
+			arm_addx (code, ARMREG_IP0, ARMREG_IP0, sreg2);
+			arm_stlxrx (code, ARMREG_IP1, ARMREG_IP0, sreg1);
+			arm_cbnzx (code, ARMREG_IP1, buf [0]);
+
+			arm_movx (code, dreg, ARMREG_IP0);
+			break;
+		}
+		case OP_ATOMIC_EXCHANGE_I4: {
+			guint8 *buf [16];
+
+			buf [0] = code;
+			arm_ldaxrw (code, ARMREG_IP0, sreg1);
+			arm_stlxrw (code, ARMREG_IP1, sreg2, sreg1);
+			arm_cbnzw (code, ARMREG_IP1, buf [0]);
+
+			arm_movx (code, dreg, ARMREG_IP0);
+			break;
+		}
+		case OP_ATOMIC_EXCHANGE_I8: {
+			guint8 *buf [16];
+
+			buf [0] = code;
+			arm_ldaxrx (code, ARMREG_IP0, sreg1);
+			arm_stlxrx (code, ARMREG_IP1, sreg2, sreg1);
+			arm_cbnzw (code, ARMREG_IP1, buf [0]);
+
+			arm_movx (code, dreg, ARMREG_IP0);
+			break;
+		}
+		case OP_ATOMIC_CAS_I4: {
+			guint8 *buf [16];
+
+			/* sreg2 is the value, sreg3 is the comparand */
+			buf [0] = code;
+			arm_ldaxrw (code, ARMREG_IP0, sreg1);
+			arm_cmpw (code, ARMREG_IP0, ins->sreg3);
+			buf [1] = code;
+			arm_bcc (code, ARMCOND_NE, 0);
+			arm_stlxrw (code, ARMREG_IP1, sreg2, sreg1);
+			arm_cbnzw (code, ARMREG_IP1, buf [0]);
+			arm_patch_rel (buf [1], code, MONO_R_ARM64_BCC);
+
+			arm_movx (code, dreg, ARMREG_IP0);
+			break;
+		}
+		case OP_ATOMIC_CAS_I8: {
+			guint8 *buf [16];
+
+			buf [0] = code;
+			arm_ldaxrx (code, ARMREG_IP0, sreg1);
+			arm_cmpx (code, ARMREG_IP0, ins->sreg3);
+			buf [1] = code;
+			arm_bcc (code, ARMCOND_NE, 0);
+			arm_stlxrx (code, ARMREG_IP1, sreg2, sreg1);
+			arm_cbnzw (code, ARMREG_IP1, buf [0]);
+			arm_patch_rel (buf [1], code, MONO_R_ARM64_BCC);
+
+			arm_movx (code, dreg, ARMREG_IP0);
+			break;
+		}
+		case OP_ATOMIC_LOAD_I1: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+			arm_ldarb (code, ins->dreg, ARMREG_LR);
+			arm_sxtbx (code, ins->dreg, ins->dreg);
+			break;
+		}
+		case OP_ATOMIC_LOAD_U1: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+			arm_ldarb (code, ins->dreg, ARMREG_LR);
+			arm_uxtbx (code, ins->dreg, ins->dreg);
+			break;
+		}
+		case OP_ATOMIC_LOAD_I2: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+			arm_ldarh (code, ins->dreg, ARMREG_LR);
+			arm_sxthx (code, ins->dreg, ins->dreg);
+			break;
+		}
+		case OP_ATOMIC_LOAD_U2: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+			arm_ldarh (code, ins->dreg, ARMREG_LR);
+			arm_uxthx (code, ins->dreg, ins->dreg);
+			break;
+		}
+		case OP_ATOMIC_LOAD_I4: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+			arm_ldarw (code, ins->dreg, ARMREG_LR);
+			arm_sxtwx (code, ins->dreg, ins->dreg);
+			break;
+		}
+		case OP_ATOMIC_LOAD_U4: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+			arm_ldarw (code, ins->dreg, ARMREG_LR);
+			arm_movw (code, ins->dreg, ins->dreg); /* Clear upper half of the register. */
+			break;
+		}
+		case OP_ATOMIC_LOAD_I8:
+		case OP_ATOMIC_LOAD_U8: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+			arm_ldarx (code, ins->dreg, ARMREG_LR);
+			break;
+		}
+		case OP_ATOMIC_LOAD_R4: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+			if (cfg->r4fp) {
+				arm_ldarw (code, ARMREG_LR, ARMREG_LR);
+				arm_fmov_rx_to_double (code, ins->dreg, ARMREG_LR);
+			} else {
+				arm_ldarw (code, ARMREG_LR, ARMREG_LR);
+				arm_fmov_rx_to_double (code, FP_TEMP_REG, ARMREG_LR);
+				arm_fcvt_sd (code, ins->dreg, FP_TEMP_REG);
+			}
+			break;
+		}
+		case OP_ATOMIC_LOAD_R8: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+			arm_ldarx (code, ARMREG_LR, ARMREG_LR);
+			arm_fmov_rx_to_double (code, ins->dreg, ARMREG_LR);
+			break;
+		}
+		case OP_ATOMIC_STORE_I1:
+		case OP_ATOMIC_STORE_U1: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
+			arm_stlrb (code, ARMREG_LR, ins->sreg1);
+			break;
+		}
+		case OP_ATOMIC_STORE_I2:
+		case OP_ATOMIC_STORE_U2: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
+			arm_stlrh (code, ARMREG_LR, ins->sreg1);
+			break;
+		}
+		case OP_ATOMIC_STORE_I4:
+		case OP_ATOMIC_STORE_U4: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
+			arm_stlrw (code, ARMREG_LR, ins->sreg1);
+			break;
+		}
+		case OP_ATOMIC_STORE_I8:
+		case OP_ATOMIC_STORE_U8: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
+			arm_stlrx (code, ARMREG_LR, ins->sreg1);
+			break;
+		}
+		case OP_ATOMIC_STORE_R4: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
+			if (cfg->r4fp) {
+				arm_fmov_double_to_rx (code, ARMREG_IP0, ins->sreg1);
+				arm_stlrw (code, ARMREG_LR, ARMREG_IP0);
+			} else {
+				arm_fcvt_ds (code, FP_TEMP_REG, ins->sreg1);
+				arm_fmov_double_to_rx (code, ARMREG_IP0, FP_TEMP_REG);
+				arm_stlrw (code, ARMREG_LR, ARMREG_IP0);
+			}
+			break;
+		}
+		case OP_ATOMIC_STORE_R8: {
+			code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
+			arm_fmov_double_to_rx (code, ARMREG_IP0, ins->sreg1);
+			arm_stlrx (code, ARMREG_LR, ARMREG_IP0);
+			break;
+		}
+
+			/* FP */
+		case OP_R8CONST: {
+			guint64 imm = *(guint64*)ins->inst_p0;
+
+			if (imm == 0) {
+				arm_fmov_rx_to_double (code, dreg, ARMREG_RZR);
+			} else {
+				code = emit_imm64 (code, ARMREG_LR, imm);
+				arm_fmov_rx_to_double (code, ins->dreg, ARMREG_LR);
+			}
+			break;
+		}
+		case OP_R4CONST: {
+			guint64 imm = *(guint32*)ins->inst_p0;
+
+			code = emit_imm64 (code, ARMREG_LR, imm);
+			if (cfg->r4fp) {
+				arm_fmov_rx_to_double (code, dreg, ARMREG_LR);
+			} else {
+				arm_fmov_rx_to_double (code, FP_TEMP_REG, ARMREG_LR);
+				arm_fcvt_sd (code, dreg, FP_TEMP_REG);
+			}
+			break;
+		}
+		case OP_LOADR8_MEMBASE:
+			code = emit_ldrfpx (code, dreg, ins->inst_basereg, ins->inst_offset);
+			break;
+		case OP_LOADR4_MEMBASE:
+			if (cfg->r4fp) {
+				code = emit_ldrfpw (code, dreg, ins->inst_basereg, ins->inst_offset);
+			} else {
+				code = emit_ldrfpw (code, FP_TEMP_REG, ins->inst_basereg, ins->inst_offset);
+				arm_fcvt_sd (code, dreg, FP_TEMP_REG);
+			}
+			break;
+		case OP_STORER8_MEMBASE_REG:
+			code = emit_strfpx (code, sreg1, ins->inst_destbasereg, ins->inst_offset);
+			break;
+		case OP_STORER4_MEMBASE_REG:
+			if (cfg->r4fp) {
+				code = emit_strfpw (code, sreg1, ins->inst_destbasereg, ins->inst_offset);
+			} else {
+				arm_fcvt_ds (code, FP_TEMP_REG, sreg1);
+				code = emit_strfpw (code, FP_TEMP_REG, ins->inst_destbasereg, ins->inst_offset);
+			}
+			break;
+		case OP_FMOVE:
+			if (dreg != sreg1)
+				arm_fmovd (code, dreg, sreg1);
+			break;
+		case OP_RMOVE:
+			if (dreg != sreg1)
+				arm_fmovs (code, dreg, sreg1);
+			break;
+		case OP_MOVE_F_TO_I4:
+			if (cfg->r4fp) {
+				arm_fmov_double_to_rx (code, ins->dreg, ins->sreg1);
+			} else {
+				arm_fcvt_ds (code, ins->dreg, ins->sreg1);
+				arm_fmov_double_to_rx (code, ins->dreg, ins->dreg);
+			}
+			break;
+		case OP_MOVE_I4_TO_F:
+			if (cfg->r4fp) {
+				arm_fmov_rx_to_double (code, ins->dreg, ins->sreg1);
+			} else {
+				arm_fmov_rx_to_double (code, ins->dreg, ins->sreg1);
+				arm_fcvt_sd (code, ins->dreg, ins->dreg);
+			}
+			break;
+		case OP_MOVE_F_TO_I8:
+			arm_fmov_double_to_rx (code, ins->dreg, ins->sreg1);
+			break;
+		case OP_MOVE_I8_TO_F:
+			arm_fmov_rx_to_double (code, ins->dreg, ins->sreg1);
+			break;
+		case OP_FCOMPARE:
+			arm_fcmpd (code, sreg1, sreg2);
+			break;
+		case OP_RCOMPARE:
+			arm_fcmps (code, sreg1, sreg2);
+			break;
+		case OP_FCONV_TO_I1:
+			arm_fcvtzs_dx (code, dreg, sreg1);
+			arm_sxtbx (code, dreg, dreg);
+			break;
+		case OP_FCONV_TO_U1:
+			arm_fcvtzu_dx (code, dreg, sreg1);
+			arm_uxtbw (code, dreg, dreg);
+			break;
+		case OP_FCONV_TO_I2:
+			arm_fcvtzs_dx (code, dreg, sreg1);
+			arm_sxthx (code, dreg, dreg);
+			break;
+		case OP_FCONV_TO_U2:
+			arm_fcvtzu_dx (code, dreg, sreg1);
+			arm_uxthw (code, dreg, dreg);
+			break;
+		case OP_FCONV_TO_I4:
+			arm_fcvtzs_dx (code, dreg, sreg1);
+			arm_sxtwx (code, dreg, dreg);
+			break;
+		case OP_FCONV_TO_U4:
+			arm_fcvtzu_dx (code, dreg, sreg1);
+			break;
+		case OP_FCONV_TO_I8:
+			arm_fcvtzs_dx (code, dreg, sreg1);
+			break;
+		case OP_FCONV_TO_U8:
+			arm_fcvtzu_dx (code, dreg, sreg1);
+			break;
+		case OP_FCONV_TO_R4:
+			if (cfg->r4fp) {
+				arm_fcvt_ds (code, dreg, sreg1);
+			} else {
+				arm_fcvt_ds (code, FP_TEMP_REG, sreg1);
+				arm_fcvt_sd (code, dreg, FP_TEMP_REG);
+			}
+			break;
+		case OP_ICONV_TO_R4:
+			if (cfg->r4fp) {
+				arm_scvtf_rw_to_s (code, dreg, sreg1);
+			} else {
+				arm_scvtf_rw_to_s (code, FP_TEMP_REG, sreg1);
+				arm_fcvt_sd (code, dreg, FP_TEMP_REG);
+			}
+			break;
+		case OP_LCONV_TO_R4:
+			if (cfg->r4fp) {
+				arm_scvtf_rx_to_s (code, dreg, sreg1);
+			} else {
+				arm_scvtf_rx_to_s (code, FP_TEMP_REG, sreg1);
+				arm_fcvt_sd (code, dreg, FP_TEMP_REG);
+			}
+			break;
+		case OP_ICONV_TO_R8:
+			arm_scvtf_rw_to_d (code, dreg, sreg1);
+			break;
+		case OP_LCONV_TO_R8:
+			arm_scvtf_rx_to_d (code, dreg, sreg1);
+			break;
+		case OP_ICONV_TO_R_UN:
+			arm_ucvtf_rw_to_d (code, dreg, sreg1);
+			break;
+		case OP_LCONV_TO_R_UN:
+			arm_ucvtf_rx_to_d (code, dreg, sreg1);
+			break;
+		case OP_FADD:
+			arm_fadd_d (code, dreg, sreg1, sreg2);
+			break;
+		case OP_FSUB:
+			arm_fsub_d (code, dreg, sreg1, sreg2);
+			break;
+		case OP_FMUL:
+			arm_fmul_d (code, dreg, sreg1, sreg2);
+			break;
+		case OP_FDIV:
+			arm_fdiv_d (code, dreg, sreg1, sreg2);
+			break;
+		case OP_FREM:
+			/* Emulated */
+			g_assert_not_reached ();
+			break;
+		case OP_FNEG:
+			arm_fneg_d (code, dreg, sreg1);
+			break;
+		case OP_ARM_SETFREG_R4:
+			arm_fcvt_ds (code, dreg, sreg1);
+			break;
+		case OP_CKFINITE:
+			/* Check for infinity */
+			code = emit_imm64 (code, ARMREG_LR, 0x7fefffffffffffffLL);
+			arm_fmov_rx_to_double (code, FP_TEMP_REG, ARMREG_LR);
+			arm_fabs_d (code, FP_TEMP_REG2, sreg1);
+			arm_fcmpd (code, FP_TEMP_REG2, FP_TEMP_REG);
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_GT, "ArithmeticException");
+			/* Check for nans */
+			arm_fcmpd (code, FP_TEMP_REG2, FP_TEMP_REG2);
+			code = emit_cond_exc (cfg, code, OP_COND_EXC_OV, "ArithmeticException");
+			arm_fmovd (code, dreg, sreg1);
+			break;
+
+			/* R4 */
+		case OP_RADD:
+			arm_fadd_s (code, dreg, sreg1, sreg2);
+			break;
+		case OP_RSUB:
+			arm_fsub_s (code, dreg, sreg1, sreg2);
+			break;
+		case OP_RMUL:
+			arm_fmul_s (code, dreg, sreg1, sreg2);
+			break;
+		case OP_RDIV:
+			arm_fdiv_s (code, dreg, sreg1, sreg2);
+			break;
+		case OP_RNEG:
+			arm_fneg_s (code, dreg, sreg1);
+			break;
+		case OP_RCONV_TO_I1:
+			arm_fcvtzs_sx (code, dreg, sreg1);
+			arm_sxtbx (code, dreg, dreg);
+			break;
+		case OP_RCONV_TO_U1:
+			arm_fcvtzu_sx (code, dreg, sreg1);
+			arm_uxtbw (code, dreg, dreg);
+			break;
+		case OP_RCONV_TO_I2:
+			arm_fcvtzs_sx (code, dreg, sreg1);
+			arm_sxthx (code, dreg, dreg);
+			break;
+		case OP_RCONV_TO_U2:
+			arm_fcvtzu_sx (code, dreg, sreg1);
+			arm_uxthw (code, dreg, dreg);
+			break;
+		case OP_RCONV_TO_I4:
+			arm_fcvtzs_sx (code, dreg, sreg1);
+			arm_sxtwx (code, dreg, dreg);
+			break;
+		case OP_RCONV_TO_U4:
+			arm_fcvtzu_sx (code, dreg, sreg1);
+			break;
+		case OP_RCONV_TO_I8:
+			arm_fcvtzs_sx (code, dreg, sreg1);
+			break;
+		case OP_RCONV_TO_U8:
+			arm_fcvtzu_sx (code, dreg, sreg1);
+			break;
+		case OP_RCONV_TO_R8:
+			arm_fcvt_sd (code, dreg, sreg1);
+			break;
+		case OP_RCONV_TO_R4:
+			if (dreg != sreg1)
+				arm_fmovs (code, dreg, sreg1);
+			break;
+		case OP_RCEQ:
+		case OP_RCLT:
+		case OP_RCLT_UN:
+		case OP_RCGT:
+		case OP_RCGT_UN:
+		case OP_RCNEQ:
+		case OP_RCLE:
+		case OP_RCGE: {
+			int cond;
+
+			cond = opcode_to_armcond (ins->opcode);
+			arm_fcmps (code, sreg1, sreg2);
+			arm_cset (code, cond, dreg);
+			break;
+		}
+
+			/* CALLS */
+		case OP_VOIDCALL:
+		case OP_CALL:
+		case OP_LCALL:
+		case OP_FCALL:
+		case OP_RCALL:
+		case OP_VCALL2:
+			call = (MonoCallInst*)ins;
+			if (ins->flags & MONO_INST_HAS_METHOD)
+				code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
+			else
+				code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
+			code = emit_move_return_value (cfg, code, ins);
+			break;
+		case OP_VOIDCALL_REG:
+		case OP_CALL_REG:
+		case OP_LCALL_REG:
+		case OP_FCALL_REG:
+		case OP_RCALL_REG:
+		case OP_VCALL2_REG:
+			arm_blrx (code, sreg1);
+			code = emit_move_return_value (cfg, code, ins);
+			break;
+		case OP_VOIDCALL_MEMBASE:
+		case OP_CALL_MEMBASE:
+		case OP_LCALL_MEMBASE:
+		case OP_FCALL_MEMBASE:
+		case OP_RCALL_MEMBASE:
+		case OP_VCALL2_MEMBASE:
+			code = emit_ldrx (code, ARMREG_IP0, ins->inst_basereg, ins->inst_offset);
+			arm_blrx (code, ARMREG_IP0);
+			code = emit_move_return_value (cfg, code, ins);
+			break;
+		case OP_TAILCALL: {
+			MonoCallInst *call = (MonoCallInst*)ins;
+
+			g_assert (!cfg->method->save_lmf);
+
+			// FIXME: Copy stack arguments
+
+			/* Restore registers */
+			code = emit_load_regset (code, MONO_ARCH_CALLEE_SAVED_REGS & cfg->used_int_regs, ARMREG_FP, cfg->arch.saved_gregs_offset);
+
+			/* Destroy frame */
+			code = mono_arm_emit_destroy_frame (code, cfg->stack_offset, ((1 << ARMREG_IP0) | (1 << ARMREG_IP1)));
+
+			if (cfg->compile_aot) {
+				/* This is not a PLT patch */
+				code = emit_aotconst (cfg, code, ARMREG_IP0, MONO_PATCH_INFO_METHOD_JUMP, call->method);
+				arm_brx (code, ARMREG_IP0);
+			} else {
+				mono_add_patch_info_rel (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, call->method, MONO_R_ARM64_B);
+				arm_b (code, code);
+			}
+			ins->flags |= MONO_INST_GC_CALLSITE;
+			ins->backend.pc_offset = code - cfg->native_code;
+			break;
+		}
+		case OP_ARGLIST:
+			g_assert (cfg->arch.cinfo);
+			code = emit_addx_imm (code, ARMREG_IP0, cfg->arch.args_reg, ((CallInfo*)cfg->arch.cinfo)->sig_cookie.offset);
+			arm_strx (code, ARMREG_IP0, sreg1, 0);
+			break;
+		case OP_DYN_CALL: {
+			MonoInst *var = cfg->dyn_call_var;
+			guint8 *labels [16];
+			int i;
+
+			/*
+			 * sreg1 points to a DynCallArgs structure initialized by mono_arch_start_dyn_call ().
+			 * sreg2 is the function to call.
+			 */
+
+			g_assert (var->opcode == OP_REGOFFSET);
+
+			arm_movx (code, ARMREG_LR, sreg1);
+			arm_movx (code, ARMREG_IP1, sreg2);
+
+			/* Save args buffer */
+			code = emit_strx (code, ARMREG_LR, var->inst_basereg, var->inst_offset);
+
+			/* Set fp argument regs */
+			code = emit_ldrw (code, ARMREG_R0, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, n_fpargs));
+			arm_cmpw (code, ARMREG_R0, ARMREG_RZR);
+			labels [0] = code;
+			arm_bcc (code, ARMCOND_EQ, 0);
+			for (i = 0; i < 8; ++i)
+				code = emit_ldrfpx (code, ARMREG_D0 + i, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, fpregs) + (i * 8));
+			arm_patch_rel (labels [0], code, MONO_R_ARM64_BCC);
+
+			/* Set stack args */
+			for (i = 0; i < DYN_CALL_STACK_ARGS; ++i) {
+				code = emit_ldrx (code, ARMREG_R0, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, regs) + ((PARAM_REGS + 1 + i) * sizeof (mgreg_t)));
+				code = emit_strx (code, ARMREG_R0, ARMREG_SP, i * sizeof (mgreg_t));
+			}
+
+			/* Set argument registers + r8 */
+			code = mono_arm_emit_load_regarray (code, 0x1ff, ARMREG_LR, 0);
+
+			/* Make the call */
+			arm_blrx (code, ARMREG_IP1);
+
+			/* Save result */
+			code = emit_ldrx (code, ARMREG_LR, var->inst_basereg, var->inst_offset);
+			arm_strx (code, ARMREG_R0, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, res));
+			arm_strx (code, ARMREG_R1, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, res2));
+			/* Save fp result */
+			code = emit_ldrw (code, ARMREG_R0, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, n_fpret));
+			arm_cmpw (code, ARMREG_R0, ARMREG_RZR);
+			labels [1] = code;
+			arm_bcc (code, ARMCOND_EQ, 0);
+			for (i = 0; i < 8; ++i)
+				code = emit_strfpx (code, ARMREG_D0 + i, ARMREG_LR, MONO_STRUCT_OFFSET (DynCallArgs, fpregs) + (i * 8));
+			arm_patch_rel (labels [1], code, MONO_R_ARM64_BCC);
+			break;
+		}
+
+		case OP_GENERIC_CLASS_INIT: {
+			static int byte_offset = -1;
+			static guint8 bitmask;
+			guint8 *jump;
+
+			if (byte_offset < 0)
+				mono_marshal_find_bitfield_offset (MonoVTable, initialized, &byte_offset, &bitmask);
+
+			/* Load vtable->initialized */
+			arm_ldrsbx (code, ARMREG_IP0, sreg1, byte_offset);
+			// FIXME: No andx_imm yet */
+			code = mono_arm_emit_imm64 (code, ARMREG_IP1, bitmask);
+			arm_andx (code, ARMREG_IP0, ARMREG_IP0, ARMREG_IP1);
+			jump = code;
+			arm_cbnzx (code, ARMREG_IP0, 0);
+
+			/* Slowpath */
+			g_assert (sreg1 == ARMREG_R0);
+
+			/* Call mono_generic_class_init_trampoline () */
+			/* The vtable is still in R0, the generic trampoline code will pass it as the argument to the C function */
+			code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
+							  (gpointer)"specific_trampoline_generic_class_init");
+
+			mono_arm_patch (jump, code, MONO_R_ARM64_CBZ);
+			break;
+		}
+
+		case OP_CHECK_THIS:
+			arm_ldrx (code, ARMREG_LR, sreg1, 0);
+			break;
+		case OP_NOT_NULL:
+		case OP_NOT_REACHED:
+		case OP_DUMMY_USE:
+			break;
+		case OP_IL_SEQ_POINT:
+			mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
+			break;
+
+			/* EH */
+		case OP_COND_EXC_C:
+		case OP_COND_EXC_IC:
+		case OP_COND_EXC_OV:
+		case OP_COND_EXC_IOV:
+		case OP_COND_EXC_NC:
+		case OP_COND_EXC_INC:
+		case OP_COND_EXC_NO:
+		case OP_COND_EXC_INO:
+		case OP_COND_EXC_EQ:
+		case OP_COND_EXC_IEQ:
+		case OP_COND_EXC_NE_UN:
+		case OP_COND_EXC_INE_UN:
+		case OP_COND_EXC_ILT:
+		case OP_COND_EXC_LT:
+		case OP_COND_EXC_ILT_UN:
+		case OP_COND_EXC_LT_UN:
+		case OP_COND_EXC_IGT:
+		case OP_COND_EXC_GT:
+		case OP_COND_EXC_IGT_UN:
+		case OP_COND_EXC_GT_UN:
+		case OP_COND_EXC_IGE:
+		case OP_COND_EXC_GE:
+		case OP_COND_EXC_IGE_UN:
+		case OP_COND_EXC_GE_UN:
+		case OP_COND_EXC_ILE:
+		case OP_COND_EXC_LE:
+		case OP_COND_EXC_ILE_UN:
+		case OP_COND_EXC_LE_UN:
+			code = emit_cond_exc (cfg, code, ins->opcode, ins->inst_p1);
+			break;
+		case OP_THROW:
+			if (sreg1 != ARMREG_R0)
+				arm_movx (code, ARMREG_R0, sreg1);
+			code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+							  (gpointer)"mono_arch_throw_exception");
+			break;
+		case OP_RETHROW:
+			if (sreg1 != ARMREG_R0)
+				arm_movx (code, ARMREG_R0, sreg1);
+			code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+							  (gpointer)"mono_arch_rethrow_exception");
+			break;
+		case OP_CALL_HANDLER:
+			mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb, MONO_R_ARM64_BL);
+			arm_bl (code, 0);
+			cfg->thunk_area += THUNK_SIZE;
+			break;
+		case OP_START_HANDLER: {
+			MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+
+			/* Save caller address */
+			code = emit_strx (code, ARMREG_LR, spvar->inst_basereg, spvar->inst_offset);
+
+			/*
+			 * Reserve a param area, see test_0_finally_param_area ().
+			 * This is needed because the param area is not set up when
+			 * we are called from EH code.
+			 */
+			if (cfg->param_area)
+				code = emit_subx_sp_imm (code, cfg->param_area);
+			break;
+		}
+		case OP_ENDFINALLY:
+		case OP_ENDFILTER: {
+			MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+
+			if (cfg->param_area)
+				code = emit_addx_sp_imm (code, cfg->param_area);
+
+			if (ins->opcode == OP_ENDFILTER && sreg1 != ARMREG_R0)
+				arm_movx (code, ARMREG_R0, sreg1);
+
+			/* Return to either after the branch in OP_CALL_HANDLER, or to the EH code */
+			code = emit_ldrx (code, ARMREG_LR, spvar->inst_basereg, spvar->inst_offset);
+			arm_brx (code, ARMREG_LR);
+			break;
+		}
+		case OP_GET_EX_OBJ:
+			if (ins->dreg != ARMREG_R0)
+				arm_movx (code, ins->dreg, ARMREG_R0);
+			break;
+
+		default:
+			g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
+			g_assert_not_reached ();
+		}
+
+		if ((cfg->opt & MONO_OPT_BRANCH) && ((code - cfg->native_code - offset) > max_len)) {
+			g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
+				   mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
+			g_assert_not_reached ();
+		}
+	}
+
+	/*
+	 * If the compiled code size is larger than the bcc displacement (19 bits signed),
+	 * insert branch islands between/inside basic blocks.
+	 */
+	if (cfg->arch.cond_branch_islands)
+		code = emit_branch_island (cfg, code, start_offset);
+
+	cfg->code_len = code - cfg->native_code;
+}
+
+static guint8*
+emit_move_args (MonoCompile *cfg, guint8 *code)
+{
+	MonoInst *ins;
+	CallInfo *cinfo;
+	ArgInfo *ainfo;
+	int i, part;
+
+	cinfo = cfg->arch.cinfo;
+	g_assert (cinfo);
+	for (i = 0; i < cinfo->nargs; ++i) {
+		ainfo = cinfo->args + i;
+		ins = cfg->args [i];
+
+		if (ins->opcode == OP_REGVAR) {
+			switch (ainfo->storage) {
+			case ArgInIReg:
+				arm_movx (code, ins->dreg, ainfo->reg);
+				break;
+			case ArgOnStack:
+				switch (ainfo->slot_size) {
+				case 1:
+					if (ainfo->sign)
+						code = emit_ldrsbx (code, ins->dreg, cfg->arch.args_reg, ainfo->offset);
+					else
+						code = emit_ldrb (code, ins->dreg, cfg->arch.args_reg, ainfo->offset);
+					break;
+				case 2:
+					if (ainfo->sign)
+						code = emit_ldrshx (code, ins->dreg, cfg->arch.args_reg, ainfo->offset);
+					else
+						code = emit_ldrh (code, ins->dreg, cfg->arch.args_reg, ainfo->offset);
+					break;
+				case 4:
+					if (ainfo->sign)
+						code = emit_ldrswx (code, ins->dreg, cfg->arch.args_reg, ainfo->offset);
+					else
+						code = emit_ldrw (code, ins->dreg, cfg->arch.args_reg, ainfo->offset);
+					break;
+				default:
+					code = emit_ldrx (code, ins->dreg, cfg->arch.args_reg, ainfo->offset);
+					break;
+				}
+				break;
+			default:
+				g_assert_not_reached ();
+				break;
+			}
+		} else {
+			if (ainfo->storage != ArgVtypeByRef && ainfo->storage != ArgVtypeByRefOnStack)
+				g_assert (ins->opcode == OP_REGOFFSET);
+
+			switch (ainfo->storage) {
+			case ArgInIReg:
+				/* Stack slots for arguments have size 8 */
+				code = emit_strx (code, ainfo->reg, ins->inst_basereg, ins->inst_offset);
+				break;
+			case ArgInFReg:
+				code = emit_strfpx (code, ainfo->reg, ins->inst_basereg, ins->inst_offset);
+				break;
+			case ArgInFRegR4:
+				code = emit_strfpw (code, ainfo->reg, ins->inst_basereg, ins->inst_offset);
+				break;
+			case ArgOnStack:
+			case ArgOnStackR4:
+			case ArgOnStackR8:
+			case ArgVtypeByRefOnStack:
+			case ArgVtypeOnStack:
+				break;
+			case ArgVtypeByRef: {
+				MonoInst *addr_arg = ins->inst_left;
+
+				if (ainfo->gsharedvt) {
+					g_assert (ins->opcode == OP_GSHAREDVT_ARG_REGOFFSET);
+					arm_strx (code, ainfo->reg, ins->inst_basereg, ins->inst_offset);
+				} else {
+					g_assert (ins->opcode == OP_VTARG_ADDR);
+					g_assert (addr_arg->opcode == OP_REGOFFSET);
+					arm_strx (code, ainfo->reg, addr_arg->inst_basereg, addr_arg->inst_offset);
+				}
+				break;
+			}
+			case ArgVtypeInIRegs:
+				for (part = 0; part < ainfo->nregs; part ++) {
+					code = emit_strx (code, ainfo->reg + part, ins->inst_basereg, ins->inst_offset + (part * 8));
+				}
+				break;
+			case ArgHFA:
+				for (part = 0; part < ainfo->nregs; part ++) {
+					if (ainfo->esize == 4)
+						code = emit_strfpw (code, ainfo->reg + part, ins->inst_basereg, ins->inst_offset + ainfo->foffsets [part]);
+					else
+						code = emit_strfpx (code, ainfo->reg + part, ins->inst_basereg, ins->inst_offset + ainfo->foffsets [part]);
+				}
+				break;
+			default:
+				g_assert_not_reached ();
+				break;
+			}
+		}
+	}
+
+	return code;
+}
+
+/*
+ * emit_store_regarray:
+ *
+ *   Emit code to store the registers in REGS into the appropriate elements of
+ * the register array at BASEREG+OFFSET.
+ */
+static __attribute__((warn_unused_result)) guint8*
+emit_store_regarray (guint8 *code, guint64 regs, int basereg, int offset)
+{
+	int i;
+
+	for (i = 0; i < 32; ++i) {
+		if (regs & (1 << i)) {
+			if (i + 1 < 32 && (regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) {
+				arm_stpx (code, i, i + 1, basereg, offset + (i * 8));
+				i++;
+			} else if (i == ARMREG_SP) {
+				arm_movspx (code, ARMREG_IP1, ARMREG_SP);
+				arm_strx (code, ARMREG_IP1, basereg, offset + (i * 8));
+			} else {
+				arm_strx (code, i, basereg, offset + (i * 8));
+			}
+		}
+	}
+	return code;
+}
+
+/*
+ * emit_load_regarray:
+ *
+ *   Emit code to load the registers in REGS from the appropriate elements of
+ * the register array at BASEREG+OFFSET.
+ */
+static __attribute__((warn_unused_result)) guint8*
+emit_load_regarray (guint8 *code, guint64 regs, int basereg, int offset)
+{
+	int i;
+
+	for (i = 0; i < 32; ++i) {
+		if (regs & (1 << i)) {
+			if ((regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) {
+				if (offset + (i * 8) < 500)
+					arm_ldpx (code, i, i + 1, basereg, offset + (i * 8));
+				else {
+					code = emit_ldrx (code, i, basereg, offset + (i * 8));
+					code = emit_ldrx (code, i + 1, basereg, offset + ((i + 1) * 8));
+				}
+				i++;
+			} else if (i == ARMREG_SP) {
+				g_assert_not_reached ();
+			} else {
+				code = emit_ldrx (code, i, basereg, offset + (i * 8));
+			}
+		}
+	}
+	return code;
+}
+
+/*
+ * emit_store_regset:
+ *
+ *   Emit code to store the registers in REGS into consecutive memory locations starting
+ * at BASEREG+OFFSET.
+ */
+static __attribute__((warn_unused_result)) guint8*
+emit_store_regset (guint8 *code, guint64 regs, int basereg, int offset)
+{
+	int i, pos;
+
+	pos = 0;
+	for (i = 0; i < 32; ++i) {
+		if (regs & (1 << i)) {
+			if ((regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) {
+				arm_stpx (code, i, i + 1, basereg, offset + (pos * 8));
+				i++;
+				pos++;
+			} else if (i == ARMREG_SP) {
+				arm_movspx (code, ARMREG_IP1, ARMREG_SP);
+				arm_strx (code, ARMREG_IP1, basereg, offset + (pos * 8));
+			} else {
+				arm_strx (code, i, basereg, offset + (pos * 8));
+			}
+			pos++;
+		}
+	}
+	return code;
+}
+
+/*
+ * emit_load_regset:
+ *
+ *   Emit code to load the registers in REGS from consecutive memory locations starting
+ * at BASEREG+OFFSET.
+ */
+static __attribute__((warn_unused_result)) guint8*
+emit_load_regset (guint8 *code, guint64 regs, int basereg, int offset)
+{
+	int i, pos;
+
+	pos = 0;
+	for (i = 0; i < 32; ++i) {
+		if (regs & (1 << i)) {
+			if ((regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) {
+				arm_ldpx (code, i, i + 1, basereg, offset + (pos * 8));
+				i++;
+				pos++;
+			} else if (i == ARMREG_SP) {
+				g_assert_not_reached ();
+			} else {
+				arm_ldrx (code, i, basereg, offset + (pos * 8));
+			}
+			pos++;
+		}
+	}
+	return code;
+}
+
+__attribute__((warn_unused_result)) guint8*
+mono_arm_emit_load_regarray (guint8 *code, guint64 regs, int basereg, int offset)
+{
+	return emit_load_regarray (code, regs, basereg, offset);
+}
+
+__attribute__((warn_unused_result)) guint8*
+mono_arm_emit_store_regarray (guint8 *code, guint64 regs, int basereg, int offset)
+{
+	return emit_store_regarray (code, regs, basereg, offset);
+}
+
+__attribute__((warn_unused_result)) guint8*
+mono_arm_emit_store_regset (guint8 *code, guint64 regs, int basereg, int offset)
+{
+	return emit_store_regset (code, regs, basereg, offset);
+}
+
+/* Same as emit_store_regset, but emit unwind info too */
+/* CFA_OFFSET is the offset between the CFA and basereg */
+static __attribute__((warn_unused_result)) guint8*
+emit_store_regset_cfa (MonoCompile *cfg, guint8 *code, guint64 regs, int basereg, int offset, int cfa_offset, guint64 no_cfa_regset)
+{
+	int i, j, pos, nregs;
+	guint32 cfa_regset = regs & ~no_cfa_regset;
+
+	pos = 0;
+	for (i = 0; i < 32; ++i) {
+		nregs = 1;
+		if (regs & (1 << i)) {
+			if ((regs & (1 << (i + 1))) && (i + 1 != ARMREG_SP)) {
+				if (offset < 256) {
+					arm_stpx (code, i, i + 1, basereg, offset + (pos * 8));
+				} else {
+					code = emit_strx (code, i, basereg, offset + (pos * 8));
+					code = emit_strx (code, i + 1, basereg, offset + (pos * 8) + 8);
+				}
+				nregs = 2;
+			} else if (i == ARMREG_SP) {
+				arm_movspx (code, ARMREG_IP1, ARMREG_SP);
+				code = emit_strx (code, ARMREG_IP1, basereg, offset + (pos * 8));
+			} else {
+				code = emit_strx (code, i, basereg, offset + (pos * 8));
+			}
+
+			for (j = 0; j < nregs; ++j) {
+				if (cfa_regset & (1 << (i + j)))
+					mono_emit_unwind_op_offset (cfg, code, i + j, (- cfa_offset) + offset + ((pos + j) * 8));
+			}
+
+			i += nregs - 1;
+			pos += nregs;
+		}
+	}
+	return code;
+}
+
+/*
+ * emit_setup_lmf:
+ *
+ *   Emit code to initialize an LMF structure at LMF_OFFSET.
+ * Clobbers ip0/ip1.
+ */
+static guint8*
+emit_setup_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset, int cfa_offset)
+{
+	/*
+	 * The LMF should contain all the state required to be able to reconstruct the machine state
+	 * at the current point of execution. Since the LMF is only read during EH, only callee
+	 * saved etc. registers need to be saved.
+	 * FIXME: Save callee saved fp regs, JITted code doesn't use them, but native code does, and they
+	 * need to be restored during EH.
+	 */
+
+	/* pc */
+	arm_adrx (code, ARMREG_LR, code);
+	code = emit_strx (code, ARMREG_LR, ARMREG_FP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, pc));
+	/* gregs + fp + sp */
+	/* Don't emit unwind info for sp/fp, they are already handled in the prolog */
+	code = emit_store_regset_cfa (cfg, code, MONO_ARCH_LMF_REGS, ARMREG_FP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, gregs), cfa_offset, (1 << ARMREG_FP) | (1 << ARMREG_SP));
+
+	return code;
+}
+
+guint8 *
+mono_arch_emit_prolog (MonoCompile *cfg)
+{
+	MonoMethod *method = cfg->method;
+	MonoMethodSignature *sig;
+	MonoBasicBlock *bb;
+	guint8 *code;
+	int cfa_offset, max_offset;
+
+	sig = mono_method_signature (method);
+	cfg->code_size = 256 + sig->param_count * 64;
+	code = cfg->native_code = g_malloc (cfg->code_size);
+
+	/* This can be unaligned */
+	cfg->stack_offset = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
+
+	/*
+	 * - Setup frame
+	 */
+	cfa_offset = 0;
+	mono_emit_unwind_op_def_cfa (cfg, code, ARMREG_SP, 0);
+
+	/* Setup frame */
+	if (arm_is_ldpx_imm (-cfg->stack_offset)) {
+		arm_stpx_pre (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, -cfg->stack_offset);
+	} else {
+		/* sp -= cfg->stack_offset */
+		/* This clobbers ip0/ip1 */
+		code = emit_subx_sp_imm (code, cfg->stack_offset);
+		arm_stpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0);
+	}
+	cfa_offset += cfg->stack_offset;
+	mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
+	mono_emit_unwind_op_offset (cfg, code, ARMREG_FP, (- cfa_offset) + 0);
+	mono_emit_unwind_op_offset (cfg, code, ARMREG_LR, (- cfa_offset) + 8);
+	arm_movspx (code, ARMREG_FP, ARMREG_SP);
+	mono_emit_unwind_op_def_cfa_reg (cfg, code, ARMREG_FP);
+	if (cfg->param_area) {
+		/* The param area is below the frame pointer */
+		code = emit_subx_sp_imm (code, cfg->param_area);
+	}
+
+	if (cfg->method->save_lmf) {
+		code = emit_setup_lmf (cfg, code, cfg->lmf_var->inst_offset, cfa_offset);
+	} else {
+		/* Save gregs */
+		code = emit_store_regset_cfa (cfg, code, MONO_ARCH_CALLEE_SAVED_REGS & cfg->used_int_regs, ARMREG_FP, cfg->arch.saved_gregs_offset, cfa_offset, 0);
+	}
+
+	/* Setup args reg */
+	if (cfg->arch.args_reg) {
+		/* The register was already saved above */
+		code = emit_addx_imm (code, cfg->arch.args_reg, ARMREG_FP, cfg->stack_offset);
+	}
+
+	/* Save return area addr received in R8 */
+	if (cfg->vret_addr) {
+		MonoInst *ins = cfg->vret_addr;
+
+		g_assert (ins->opcode == OP_REGOFFSET);
+		code = emit_strx (code, ARMREG_R8, ins->inst_basereg, ins->inst_offset);
+	}
+
+	/* Save mrgctx received in MONO_ARCH_RGCTX_REG */
+	if (cfg->rgctx_var) {
+		MonoInst *ins = cfg->rgctx_var;
+
+		g_assert (ins->opcode == OP_REGOFFSET);
+
+		code = emit_strx (code, MONO_ARCH_RGCTX_REG, ins->inst_basereg, ins->inst_offset); 
+	}
+		
+	/*
+	 * Move arguments to their registers/stack locations.
+	 */
+	code = emit_move_args (cfg, code);
+
+	/* Initialize seq_point_info_var */
+	if (cfg->arch.seq_point_info_var) {
+		MonoInst *ins = cfg->arch.seq_point_info_var;
+
+		/* Initialize the variable from a GOT slot */
+		code = emit_aotconst (cfg, code, ARMREG_IP0, MONO_PATCH_INFO_SEQ_POINT_INFO, cfg->method);
+		g_assert (ins->opcode == OP_REGOFFSET);
+		code = emit_strx (code, ARMREG_IP0, ins->inst_basereg, ins->inst_offset);
+
+		/* Initialize ss_tramp_var */
+		ins = cfg->arch.ss_tramp_var;
+		g_assert (ins->opcode == OP_REGOFFSET);
+
+		code = emit_ldrx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (SeqPointInfo, ss_tramp_addr));
+		code = emit_strx (code, ARMREG_IP1, ins->inst_basereg, ins->inst_offset);
+	} else {
+		MonoInst *ins;
+
+		if (cfg->arch.ss_tramp_var) {
+			/* Initialize ss_tramp_var */
+			ins = cfg->arch.ss_tramp_var;
+			g_assert (ins->opcode == OP_REGOFFSET);
+
+			code = emit_imm64 (code, ARMREG_IP0, (guint64)&ss_trampoline);
+			code = emit_strx (code, ARMREG_IP0, ins->inst_basereg, ins->inst_offset);
+		}
+
+		if (cfg->arch.bp_tramp_var) {
+			/* Initialize bp_tramp_var */
+			ins = cfg->arch.bp_tramp_var;
+			g_assert (ins->opcode == OP_REGOFFSET);
+
+			code = emit_imm64 (code, ARMREG_IP0, (guint64)bp_trampoline);
+			code = emit_strx (code, ARMREG_IP0, ins->inst_basereg, ins->inst_offset);
+		}
+	}
+
+	max_offset = 0;
+	if (cfg->opt & MONO_OPT_BRANCH) {
+		for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
+			MonoInst *ins;
+			bb->max_offset = max_offset;
+
+			MONO_BB_FOR_EACH_INS (bb, ins) {
+				max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
+			}
+		}
+	}
+	if (max_offset > 0x3ffff * 4)
+		cfg->arch.cond_branch_islands = TRUE;
+
+	return code;
+}
+
+static guint8*
+realloc_code (MonoCompile *cfg, int size)
+{
+	while (cfg->code_len + size > (cfg->code_size - 16)) {
+		cfg->code_size *= 2;
+		cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+		cfg->stat_code_reallocs++;
+	}
+	return cfg->native_code + cfg->code_len;
+}
+
+void
+mono_arch_emit_epilog (MonoCompile *cfg)
+{
+	CallInfo *cinfo;
+	int max_epilog_size;
+	guint8 *code;
+	int i;
+
+	max_epilog_size = 16 + 20*4;
+	code = realloc_code (cfg, max_epilog_size);
+
+	if (cfg->method->save_lmf) {
+		code = mono_arm_emit_load_regarray (code, MONO_ARCH_CALLEE_SAVED_REGS & cfg->used_int_regs, ARMREG_FP, cfg->lmf_var->inst_offset + MONO_STRUCT_OFFSET (MonoLMF, gregs) - (MONO_ARCH_FIRST_LMF_REG * 8));
+	} else {
+		/* Restore gregs */
+		code = emit_load_regset (code, MONO_ARCH_CALLEE_SAVED_REGS & cfg->used_int_regs, ARMREG_FP, cfg->arch.saved_gregs_offset);
+	}
+
+	/* Load returned vtypes into registers if needed */
+	cinfo = cfg->arch.cinfo;
+	switch (cinfo->ret.storage) {
+	case ArgVtypeInIRegs: {
+		MonoInst *ins = cfg->ret;
+
+		for (i = 0; i < cinfo->ret.nregs; ++i)
+			code = emit_ldrx (code, cinfo->ret.reg + i, ins->inst_basereg, ins->inst_offset + (i * 8));
+		break;
+	}
+	case ArgHFA: {
+		MonoInst *ins = cfg->ret;
+
+		for (i = 0; i < cinfo->ret.nregs; ++i) {
+			if (cinfo->ret.esize == 4)
+				code = emit_ldrfpw (code, cinfo->ret.reg + i, ins->inst_basereg, ins->inst_offset + cinfo->ret.foffsets [i]);
+			else
+				code = emit_ldrfpx (code, cinfo->ret.reg + i, ins->inst_basereg, ins->inst_offset + cinfo->ret.foffsets [i]);
+		}
+		break;
+	}
+	default:
+		break;
+	}
+
+	/* Destroy frame */
+	code = mono_arm_emit_destroy_frame (code, cfg->stack_offset, ((1 << ARMREG_IP0) | (1 << ARMREG_IP1)));
+
+	arm_retx (code, ARMREG_LR);
+
+	g_assert (code - (cfg->native_code + cfg->code_len) < max_epilog_size);
+
+	cfg->code_len = code - cfg->native_code;
+}
+
+void
+mono_arch_emit_exceptions (MonoCompile *cfg)
+{
+	MonoJumpInfo *ji;
+	MonoClass *exc_class;
+	guint8 *code, *ip;
+	guint8* exc_throw_pos [MONO_EXC_INTRINS_NUM];
+	guint8 exc_throw_found [MONO_EXC_INTRINS_NUM];
+	int i, id, size = 0;
+
+	for (i = 0; i < MONO_EXC_INTRINS_NUM; i++) {
+		exc_throw_pos [i] = NULL;
+		exc_throw_found [i] = 0;
+	}
+
+	for (ji = cfg->patch_info; ji; ji = ji->next) {
+		if (ji->type == MONO_PATCH_INFO_EXC) {
+			i = mini_exception_id_by_name (ji->data.target);
+			if (!exc_throw_found [i]) {
+				size += 32;
+				exc_throw_found [i] = TRUE;
+			}
+		}
+	}
+
+	code = realloc_code (cfg, size);
+
+	/* Emit code to raise corlib exceptions */
+	for (ji = cfg->patch_info; ji; ji = ji->next) {
+		if (ji->type != MONO_PATCH_INFO_EXC)
+			continue;
+
+		ip = cfg->native_code + ji->ip.i;
+
+		id = mini_exception_id_by_name (ji->data.target);
+
+		if (exc_throw_pos [id]) {
+			/* ip points to the bcc () in OP_COND_EXC_... */
+			arm_patch_rel (ip, exc_throw_pos [id], ji->relocation);
+			ji->type = MONO_PATCH_INFO_NONE;
+			continue;
+		}
+
+		exc_throw_pos [id] = code;
+		arm_patch_rel (ip, code, ji->relocation);
+
+		/* We are being branched to from the code generated by emit_cond_exc (), the pc is in ip1 */
+
+		/* r0 = type token */
+		exc_class = mono_class_from_name (mono_defaults.corlib, "System", ji->data.name);
+		g_assert (exc_class);
+		code = emit_imm (code, ARMREG_R0, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
+		/* r1 = throw ip */
+		arm_movx (code, ARMREG_R1, ARMREG_IP1);
+		/* Branch to the corlib exception throwing trampoline */
+		ji->ip.i = code - cfg->native_code;
+		ji->type = MONO_PATCH_INFO_INTERNAL_METHOD;
+		ji->data.name = "mono_arch_throw_corlib_exception";
+		ji->relocation = MONO_R_ARM64_BL;
+		arm_bl (code, 0);
+		cfg->thunk_area += THUNK_SIZE;
+	}
+
+	cfg->code_len = code - cfg->native_code;
+
+	g_assert (cfg->code_len < cfg->code_size);
+}
+
+MonoInst*
+mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
+{
+	return NULL;
+}
+
+gboolean
+mono_arch_print_tree (MonoInst *tree, int arity)
+{
+	return FALSE;
+}
+
+guint32
+mono_arch_get_patch_offset (guint8 *code)
+{
+	return 0;
+}
+
+gpointer
+mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
+						   gpointer fail_tramp)
+{
+	int i, buf_len, imt_reg;
+	guint8 *buf, *code;
+
+#if DEBUG_IMT
+	printf ("building IMT thunk for class %s %s entries %d code size %d code at %p end %p vtable %p\n", vtable->klass->name_space, vtable->klass->name, count, size, start, ((guint8*)start) + size, vtable);
+	for (i = 0; i < count; ++i) {
+		MonoIMTCheckItem *item = imt_entries [i];
+		printf ("method %d (%p) %s vtable slot %p is_equals %d chunk size %d\n", i, item->key, item->key->name, &vtable->vtable [item->value.vtable_slot], item->is_equals, item->chunk_size);
+	}
+#endif
+
+	buf_len = 0;
+	for (i = 0; i < count; ++i) {
+		MonoIMTCheckItem *item = imt_entries [i];
+		if (item->is_equals) {
+			gboolean fail_case = !item->check_target_idx && fail_tramp;
+
+			if (item->check_target_idx || fail_case) {
+				if (!item->compare_done || fail_case) {
+					buf_len += 4 * 4 + 4;
+				}
+				buf_len += 4;
+				if (item->has_target_code) {
+					buf_len += 5 * 4;
+				} else {
+					buf_len += 6 * 4;
+				}
+				if (fail_case) {
+					buf_len += 5 * 4;
+				}
+			} else {
+				buf_len += 6 * 4;
+			}
+		} else {
+			buf_len += 6 * 4;
+		}
+	}
+
+	if (fail_tramp)
+		buf = mono_method_alloc_generic_virtual_thunk (domain, buf_len);
+	else
+		buf = mono_domain_code_reserve (domain, buf_len);
+	code = buf;
+
+	/*
+	 * We are called by JITted code, which passes in the IMT argument in
+	 * MONO_ARCH_RGCTX_REG (r27). We need to preserve all caller saved regs
+	 * except ip0/ip1.
+	 */
+	imt_reg = MONO_ARCH_RGCTX_REG;
+	for (i = 0; i < count; ++i) {
+		MonoIMTCheckItem *item = imt_entries [i];
+
+		item->code_target = code;
+
+		if (item->is_equals) {
+			/*
+			 * Check the imt argument against item->key, if equals, jump to either
+			 * item->value.target_code or to vtable [item->value.vtable_slot].
+			 * If fail_tramp is set, jump to it if not-equals.
+			 */
+			gboolean fail_case = !item->check_target_idx && fail_tramp;
+
+			if (item->check_target_idx || fail_case) {
+				/* Compare imt_reg with item->key */
+				if (!item->compare_done || fail_case) {
+					// FIXME: Optimize this
+					code = emit_imm64 (code, ARMREG_IP0, (guint64)item->key);
+					arm_cmpx (code, imt_reg, ARMREG_IP0);
+				}
+				item->jmp_code = code;
+				arm_bcc (code, ARMCOND_NE, 0);
+				/* Jump to target if equals */
+				if (item->has_target_code) {
+					code = emit_imm64 (code, ARMREG_IP0, (guint64)item->value.target_code);
+					arm_brx (code, ARMREG_IP0);
+				} else {
+					guint64 imm = (guint64)&(vtable->vtable [item->value.vtable_slot]);
+
+					code = emit_imm64 (code, ARMREG_IP0, imm);
+					arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 0);
+					arm_brx (code, ARMREG_IP0);
+				}
+
+				if (fail_case) {
+					arm_patch_rel (item->jmp_code, code, MONO_R_ARM64_BCC);
+					item->jmp_code = NULL;
+					code = emit_imm64 (code, ARMREG_IP0, (guint64)fail_tramp);
+					arm_brx (code, ARMREG_IP0);
+				}
+			} else {
+				guint64 imm = (guint64)&(vtable->vtable [item->value.vtable_slot]);
+
+				code = emit_imm64 (code, ARMREG_IP0, imm);
+				arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, 0);
+				arm_brx (code, ARMREG_IP0);
+			}
+		} else {
+			code = emit_imm64 (code, ARMREG_IP0, (guint64)item->key);
+			arm_cmpx (code, imt_reg, ARMREG_IP0);
+			item->jmp_code = code;
+			arm_bcc (code, ARMCOND_HS, 0);
+		}
+	}
+	/* Patch the branches */
+	for (i = 0; i < count; ++i) {
+		MonoIMTCheckItem *item = imt_entries [i];
+		if (item->jmp_code && item->check_target_idx)
+			arm_patch_rel (item->jmp_code, imt_entries [item->check_target_idx]->code_target, MONO_R_ARM64_BCC);
+	}
+
+	g_assert ((code - buf) < buf_len);
+
+	mono_arch_flush_icache (buf, code - buf);
+
+	return buf;
+}
+
+GSList *
+mono_arch_get_trampolines (gboolean aot)
+{
+	return mono_arm_get_exception_trampolines (aot);
+}
+
+#else /* DISABLE_JIT */
+
+gpointer
+mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
+						   gpointer fail_tramp)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+#endif /* !DISABLE_JIT */
+
+#ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
+
+void
+mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
+{
+	guint8 *code = ip;
+	guint32 native_offset = ip - (guint8*)ji->code_start;
+
+	if (ji->from_aot) {
+		SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start);
+
+		g_assert (native_offset % 4 == 0);
+		g_assert (info->bp_addrs [native_offset / 4] == 0);
+		info->bp_addrs [native_offset / 4] = mini_get_breakpoint_trampoline ();
+	} else {
+		/* ip points to an ldrx */
+		code += 4;
+		arm_blrx (code, ARMREG_IP0);
+		mono_arch_flush_icache (ip, code - ip);
+	}
+}
+
+void
+mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
+{
+	guint8 *code = ip;
+
+	if (ji->from_aot) {
+		guint32 native_offset = ip - (guint8*)ji->code_start;
+		SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start);
+
+		g_assert (native_offset % 4 == 0);
+		info->bp_addrs [native_offset / 4] = NULL;
+	} else {
+		/* ip points to an ldrx */
+		code += 4;
+		arm_nop (code);
+		mono_arch_flush_icache (ip, code - ip);
+	}
+}
+
+void
+mono_arch_start_single_stepping (void)
+{
+	ss_trampoline = mini_get_single_step_trampoline ();
+}
+
+void
+mono_arch_stop_single_stepping (void)
+{
+	ss_trampoline = NULL;
+}
+
+gboolean
+mono_arch_is_single_step_event (void *info, void *sigctx)
+{
+	/* We use soft breakpoints on arm64 */
+	return FALSE;
+}
+
+gboolean
+mono_arch_is_breakpoint_event (void *info, void *sigctx)
+{
+	/* We use soft breakpoints on arm64 */
+	return FALSE;
+}
+
+void
+mono_arch_skip_breakpoint (MonoContext *ctx, MonoJitInfo *ji)
+{
+	g_assert_not_reached ();
+}
+
+void
+mono_arch_skip_single_step (MonoContext *ctx)
+{
+	g_assert_not_reached ();
+}
+
+gpointer
+mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
+{
+	SeqPointInfo *info;
+	MonoJitInfo *ji;
+
+	// FIXME: Add a free function
+
+	mono_domain_lock (domain);
+	info = g_hash_table_lookup (domain_jit_info (domain)->arch_seq_points, 
+								code);
+	mono_domain_unlock (domain);
+
+	if (!info) {
+		ji = mono_jit_info_table_find (domain, (char*)code);
+		g_assert (ji);
+
+		info = g_malloc0 (sizeof (SeqPointInfo) + (ji->code_size / 4) * sizeof(guint8*));
+
+		info->ss_tramp_addr = &ss_trampoline;
+
+		mono_domain_lock (domain);
+		g_hash_table_insert (domain_jit_info (domain)->arch_seq_points,
+							 code, info);
+		mono_domain_unlock (domain);
+	}
+
+	return info;
+}
+
+void
+mono_arch_init_lmf_ext (MonoLMFExt *ext, gpointer prev_lmf)
+{
+	ext->lmf.previous_lmf = prev_lmf;
+	/* Mark that this is a MonoLMFExt */
+	ext->lmf.previous_lmf = (gpointer)(((gssize)ext->lmf.previous_lmf) | 2);
+	ext->lmf.gregs [MONO_ARCH_LMF_REG_SP] = (gssize)ext;
+}
+
+#endif /* MONO_ARCH_SOFT_DEBUG_SUPPORTED */
+
+gboolean
+mono_arch_opcode_supported (int opcode)
+{
+	switch (opcode) {
+	case OP_ATOMIC_ADD_I4:
+	case OP_ATOMIC_ADD_I8:
+	case OP_ATOMIC_EXCHANGE_I4:
+	case OP_ATOMIC_EXCHANGE_I8:
+	case OP_ATOMIC_CAS_I4:
+	case OP_ATOMIC_CAS_I8:
+	case OP_ATOMIC_LOAD_I1:
+	case OP_ATOMIC_LOAD_I2:
+	case OP_ATOMIC_LOAD_I4:
+	case OP_ATOMIC_LOAD_I8:
+	case OP_ATOMIC_LOAD_U1:
+	case OP_ATOMIC_LOAD_U2:
+	case OP_ATOMIC_LOAD_U4:
+	case OP_ATOMIC_LOAD_U8:
+	case OP_ATOMIC_LOAD_R4:
+	case OP_ATOMIC_LOAD_R8:
+	case OP_ATOMIC_STORE_I1:
+	case OP_ATOMIC_STORE_I2:
+	case OP_ATOMIC_STORE_I4:
+	case OP_ATOMIC_STORE_I8:
+	case OP_ATOMIC_STORE_U1:
+	case OP_ATOMIC_STORE_U2:
+	case OP_ATOMIC_STORE_U4:
+	case OP_ATOMIC_STORE_U8:
+	case OP_ATOMIC_STORE_R4:
+	case OP_ATOMIC_STORE_R8:
+		return TRUE;
+	default:
+		return FALSE;
+	}
+}
+
+CallInfo*
+mono_arch_get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig)
+{
+	return get_call_info (gsctx, mp, sig);
+}
diff --git a/mono/mini/mini-arm64.h b/mono/mini/mini-arm64.h
index a963c75..9241cfd 100644
--- a/mono/mini/mini-arm64.h
+++ b/mono/mini/mini-arm64.h
@@ -1 +1,250 @@
-#include "../../../mono-extensions/mono/mini/mini-arm64.h"
+/*
+ * mini-arm64.h
+ *
+ * Copyright 2013 Xamarin Inc
+ *
+ * Based on mini-arm.h:
+ *
+ * Copyright 2011 Xamarin Inc
+ */
+
+#ifndef __MONO_MINI_ARM64_H__
+#define __MONO_MINI_ARM64_H__
+
+#include <mono/arch/arm64/arm64-codegen.h>
+
+#define MONO_ARCH_CPU_SPEC mono_arm64_cpu_desc
+
+#define MONO_MAX_IREGS 32
+#define MONO_MAX_FREGS 32
+
+#define MONO_CONTEXT_SET_LLVM_EXC_REG(ctx, exc) do { (ctx)->regs [0] = (gsize)exc; } while (0)
+
+#define MONO_INIT_CONTEXT_FROM_FUNC(ctx,func) do {	\
+		MONO_CONTEXT_SET_BP ((ctx), __builtin_frame_address (0));	\
+		MONO_CONTEXT_SET_SP ((ctx), __builtin_frame_address (0));	\
+		MONO_CONTEXT_SET_IP ((ctx), (func));	\
+	} while (0)
+
+#define MONO_ARCH_INIT_TOP_LMF_ENTRY(lmf)
+
+/* Parameters used by the register allocator */
+/* r0..r7, r9..r14 (r15 is the imt/rgctx reg) */
+#define MONO_ARCH_CALLEE_REGS 0xfeff
+/* r19..r28 */
+#define MONO_ARCH_CALLEE_SAVED_REGS (0x3ff << 19)
+
+/* v15/v16 is reserved for a scratch reg */
+#define MONO_ARCH_CALLEE_FREGS 0xfffc00ff
+/* v8..v15 */
+#define MONO_ARCH_CALLEE_SAVED_FREGS 0xff00
+
+#define MONO_ARCH_USE_FPSTACK FALSE
+#define MONO_ARCH_FPSTACK_SIZE 0
+
+#define MONO_ARCH_INST_SREG2_MASK(ins) (0)
+
+#define MONO_ARCH_INST_FIXED_REG(desc) ((desc) == 'a' ? ARMREG_R0 : -1)
+
+#define MONO_ARCH_INST_IS_REGPAIR(desc) (0)
+
+#define MONO_ARCH_INST_IS_FLOAT(desc) ((desc) == 'f')
+
+#define MONO_ARCH_INST_REGPAIR_REG2(desc,hreg1) (-1)
+
+#define MONO_ARCH_USE_FPSTACK FALSE
+
+#define MONO_ARCH_FRAME_ALIGNMENT 16
+
+#define MONO_ARCH_CODE_ALIGNMENT 32
+
+/* callee saved regs + fp + sp */
+#define MONO_ARCH_LMF_REGS ((0x3ff << 19) | (1 << ARMREG_FP) | (1 << ARMREG_SP))
+#define MONO_ARCH_NUM_LMF_REGS (10 + 2)
+#define MONO_ARCH_FIRST_LMF_REG ARMREG_R19
+#define MONO_ARCH_LMF_REG_FP 10
+#define MONO_ARCH_LMF_REG_SP 11
+
+struct MonoLMF {
+	/* 
+	 * If the second lowest bit is set to 1, then this is a MonoLMFExt structure, and
+	 * the other fields are not valid.
+	 */
+	gpointer    previous_lmf;
+	gpointer    lmf_addr;
+	mgreg_t    pc;
+	mgreg_t    gregs [MONO_ARCH_NUM_LMF_REGS];
+};
+
+/* Structure used by the sequence points in AOTed code */
+typedef struct {
+	gpointer ss_trigger_page;
+	gpointer bp_trigger_page;
+	gpointer ss_tramp_addr;
+	guint8* bp_addrs [MONO_ZERO_LEN_ARRAY];
+} SeqPointInfo;
+
+#define PARAM_REGS 8
+#define FP_PARAM_REGS 8
+
+#define DYN_CALL_STACK_ARGS 6
+
+typedef struct {
+	/* The +1 is for r8 */
+	mgreg_t regs [PARAM_REGS + 1 + DYN_CALL_STACK_ARGS];
+	mgreg_t res, res2;
+	guint8 *ret;
+	double fpregs [FP_PARAM_REGS];
+	int n_fpargs, n_fpret;
+	guint8 buffer [256];
+} DynCallArgs;
+
+typedef struct {
+	gpointer cinfo;
+	int saved_gregs_offset;
+	/* Points to arguments received on the stack */
+	int args_reg;
+	gboolean cond_branch_islands;
+	gpointer vret_addr_loc;
+	gpointer seq_point_info_var;
+	gpointer ss_tramp_var;
+	gpointer bp_tramp_var;
+	guint8 *thunks;
+	int thunks_size;
+} MonoCompileArch;
+
+#define MONO_ARCH_EMULATE_FREM 1
+#define MONO_ARCH_NO_EMULATE_LONG_MUL_OPTS 1
+#define MONO_ARCH_EMULATE_LONG_MUL_OVF_OPTS 1
+#define MONO_ARCH_NO_EMULATE_LONG_SHIFT_OPS 1
+#define MONO_ARCH_NEED_DIV_CHECK 1
+#define MONO_ARCH_EMULATE_MUL_OVF 1
+#define MONO_ARCH_HAVE_IMT 1
+#define MONO_ARCH_HAVE_OP_TAIL_CALL 1
+#define MONO_ARCH_THIS_AS_FIRST_ARG 1
+#define MONO_ARCH_RGCTX_REG ARMREG_R15
+#define MONO_ARCH_IMT_REG MONO_ARCH_RGCTX_REG
+#define MONO_ARCH_VTABLE_REG ARMREG_R0
+#define MONO_ARCH_EXC_REG ARMREG_R0
+#define MONO_ARCH_HAVE_XP_UNWIND 1
+#define MONO_ARCH_HAVE_CREATE_DELEGATE_TRAMPOLINE 1
+#define MONO_ARCH_HAVE_GENERALIZED_IMT_THUNK 1
+#define MONO_ARCH_USE_SIGACTION 1
+#define MONO_ARCH_HAVE_SIGCTX_TO_MONOCTX 1
+#define MONO_ARCH_HAVE_CONTEXT_SET_INT_REG 1
+#define MONO_ARCH_GSHARED_SUPPORTED 1
+#define MONO_ARCH_AOT_SUPPORTED 1
+#define MONO_ARCH_LLVM_SUPPORTED 1
+#define MONO_ARCH_HAVE_FULL_AOT_TRAMPOLINES 1
+#define MONO_ARCH_HAVE_EXCEPTIONS_INIT 1
+#define MONO_ARCH_HAVE_GET_TRAMPOLINES 1
+#define MONO_ARCH_DYN_CALL_SUPPORTED 1
+#define MONO_ARCH_DYN_CALL_PARAM_AREA (DYN_CALL_STACK_ARGS * 8)
+#define MONO_ARCH_SOFT_DEBUG_SUPPORTED 1
+#define MONO_ARCH_HAVE_SETUP_RESUME_FROM_SIGNAL_HANDLER_CTX 1
+#define MONO_ARCH_HAVE_GENERAL_RGCTX_LAZY_FETCH_TRAMPOLINE 1
+#ifndef MONO_CROSS_COMPILE
+#define MONO_ARCH_ENABLE_MONO_LMF_VAR 1
+#endif
+#define MONO_ARCH_HAVE_OP_GET_EX_OBJ 1
+#define MONO_ARCH_HAVE_OBJC_GET_SELECTOR 1
+#define MONO_ARCH_HAVE_SDB_TRAMPOLINES 1
+#define MONO_ARCH_HAVE_PATCH_CODE_NEW 1
+#define MONO_ARCH_HAVE_OP_GENERIC_CLASS_INIT 1
+
+#ifdef TARGET_IOS
+
+#define MONO_ARCH_REDZONE_SIZE 128
+
+#else
+
+#define MONO_ARCH_REDZONE_SIZE 0
+#if !defined(__PIC__)
+#define MONO_ARCH_HAVE_TLS_GET 1
+#endif
+#define MONO_ARCH_HAVE_TLS_GET_REG 1
+
+#endif
+
+/* Relocations */
+#define MONO_R_ARM64_B 1
+#define MONO_R_ARM64_BCC 2
+#define MONO_R_ARM64_IMM 3
+#define MONO_R_ARM64_BL 4
+#define MONO_R_ARM64_BL_SHORT 5
+#define MONO_R_ARM64_CBZ 6
+
+typedef enum {
+        ArgInIReg,
+        ArgInFReg,
+        ArgInFRegR4,
+        ArgOnStack,
+        ArgOnStackR8,
+        ArgOnStackR4,
+        /*
+         * Vtype passed in consecutive int registers.
+         * ainfo->reg is the firs register,
+         * ainfo->nregs is the number of registers,
+         * ainfo->size is the size of the structure.
+         */
+        ArgVtypeInIRegs,
+        ArgVtypeByRef,
+        ArgVtypeByRefOnStack,
+        ArgVtypeOnStack,
+        ArgHFA,
+        ArgNone
+} ArgStorage;
+
+typedef struct {
+        ArgStorage storage;
+        int reg;
+        /* ArgOnStack */
+        int offset;
+        /* ArgVtypeInIRegs/ArgHFA */
+        int nregs, size;
+        /* ArgHFA */
+        int esize;
+        /* ArgHFA */
+        /* The offsets of the float values inside the arg */
+        guint16 foffsets [4];
+        /* ArgOnStack */
+        int slot_size;
+        gboolean sign;
+        gboolean gsharedvt;
+        gboolean hfa;
+} ArgInfo;
+
+typedef struct {
+        int nargs;
+        int gr, fr, stack_usage;
+        ArgInfo ret;
+        ArgInfo sig_cookie;
+        ArgInfo args [1];
+} CallInfo;
+
+guint8* mono_arm_emit_imm64 (guint8 *code, int dreg, gint64 imm);
+
+guint8* mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm);
+
+guint8* mono_arm_emit_destroy_frame (guint8 *code, int stack_offset, guint64 temp_regs);
+
+guint8* mono_arm_emit_store_regset (guint8 *code, guint64 regs, int basereg, int offset);
+
+guint8* mono_arm_emit_store_regarray (guint8 *code, guint64 regs, int basereg, int offset);
+
+guint8* mono_arm_emit_load_regarray (guint8 *code, guint64 regs, int basereg, int offset);
+
+/* MonoJumpInfo **ji */
+guint8* mono_arm_emit_aotconst (gpointer ji, guint8 *code, guint8 *code_start, int dreg, guint32 patch_type, gconstpointer data);
+
+void mono_arm_patch (guint8 *code, guint8 *target, int relocation);
+
+void mono_arm_throw_exception (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble *fp_regs, gboolean corlib, gboolean rethrow);
+
+GSList* mono_arm_get_exception_trampolines (gboolean aot);
+
+void mono_arm_resume_unwind (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble *fp_regs, gboolean corlib, gboolean rethrow);
+
+CallInfo* mono_arch_get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig);
+
+#endif /* __MONO_MINI_ARM64_H__ */
diff --git a/mono/mini/tramp-arm64.c b/mono/mini/tramp-arm64.c
index 1650437..376bb2f 100644
--- a/mono/mini/tramp-arm64.c
+++ b/mono/mini/tramp-arm64.c
@@ -1 +1,657 @@
-#include "../../../mono-extensions/mono/mini/tramp-arm64.c"
+/*
+ * tramp-arm64.c: JIT trampoline code for ARM64
+ *
+ * Copyright 2013 Xamarin Inc
+ *
+ * Based on tramp-arm.c:
+ * 
+ * Authors:
+ *   Paolo Molaro (lupus at ximian.com)
+ *
+ * (C) 2001-2003 Ximian, Inc.
+ * Copyright 2003-2011 Novell Inc
+ * Copyright 2011 Xamarin Inc
+ */
+
+#include "mini.h"
+#include "debugger-agent.h"
+
+#include <mono/arch/arm64/arm64-codegen.h>
+#include <mono/metadata/abi-details.h>
+
+#define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
+
+void
+mono_arch_patch_callsite (guint8 *method_start, guint8 *code_ptr, guint8 *addr)
+{
+	mono_arm_patch (code_ptr - 4, addr, MONO_R_ARM64_BL);
+	mono_arch_flush_icache (code_ptr - 4, 4);
+}
+
+void
+mono_arch_patch_plt_entry (guint8 *code, gpointer *got, mgreg_t *regs, guint8 *addr)
+{
+	guint32 ins;
+	guint64 slot_addr;
+	int disp;
+
+	/* 
+	 * Decode the address loaded by the PLT entry emitted by arch_emit_plt_entry () in
+	 * aot-compiler.c
+	 */
+
+	/* adrp */
+	ins = ((guint32*)code) [0];
+	g_assert (((ins >> 24) & 0x1f) == 0x10);
+	disp = (((ins >> 5) & 0x7ffff) << 2) | ((ins >> 29) & 0x3);
+	/* FIXME: disp is signed */
+	g_assert ((disp >> 20) == 0);
+
+	slot_addr = ((guint64)code + (disp << 12)) & ~0xfff;
+
+	/* add x16, x16, :lo12:got */
+	ins = ((guint32*)code) [1];
+	g_assert (((ins >> 22) & 0x3) == 0);
+	slot_addr += (ins >> 10) & 0xfff;
+
+	/* ldr x16, [x16, <offset>] */
+	ins = ((guint32*)code) [2];
+	g_assert (((ins >> 24) & 0x3f) == 0x39);
+	slot_addr += ((ins >> 10) & 0xfff) * 8;
+
+	g_assert (*(guint64*)slot_addr);
+	*(gpointer*)slot_addr = addr;
+}
+
+void
+mono_arch_nullify_class_init_trampoline (guint8 *code, mgreg_t *regs)
+{
+	mono_arch_patch_callsite (NULL, code, mini_get_nullified_class_init_trampoline ());
+}
+
+guint8*
+mono_arch_get_call_target (guint8 *code)
+{
+	guint32 imm;
+	int disp;
+
+	code -= 4;
+
+	imm = *(guint32*)code;
+	/* Should be a bl */
+	g_assert (((imm >> 31) & 0x1) == 0x1);
+	g_assert (((imm >> 26) & 0x7) == 0x5);
+
+	disp = (imm & 0x3ffffff);
+	if ((disp >> 25) != 0)
+		/* Negative, sing extend to 32 bits */
+		disp = disp | 0xfc000000;
+
+	return code + (disp * 4);
+}
+
+guint32
+mono_arch_get_plt_info_offset (guint8 *plt_entry, mgreg_t *regs, guint8 *code)
+{
+	/* The offset is stored as the 5th word of the plt entry */
+	return ((guint32*)plt_entry) [4];
+}
+
+#ifndef DISABLE_JIT
+
+guchar*
+mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInfo **info, gboolean aot)
+{
+	guint8 *code, *buf, *tramp;
+	int i, buf_len, imm;
+	int frame_size, offset, gregs_offset, num_fregs, fregs_offset, arg_offset, lmf_offset;
+	guint64 gregs_regset;
+	GSList *unwind_ops = NULL;
+	MonoJumpInfo *ji = NULL;
+	char *tramp_name;
+
+	buf_len = 768;
+	buf = code = mono_global_codeman_reserve (buf_len);
+
+	/*
+	 * We are getting called by a specific trampoline, ip1 contains the trampoline argument.
+	 */
+
+	/* Compute stack frame size and offsets */
+	offset = 0;
+	/* frame block */
+	offset += 2 * 8;
+	/* gregs */
+	gregs_offset = offset;
+	offset += 32 * 8;
+	/* fregs */
+	// FIXME: Save 128 bits
+	/* Only have to save the argument regs */
+	num_fregs = 8;
+	fregs_offset = offset;
+	offset += num_fregs * 8;
+	/* arg */
+	arg_offset = offset;
+	offset += 8;
+	/* LMF */
+	lmf_offset = offset;
+	offset += sizeof (MonoLMF);
+	//offset += 22 * 8;
+	frame_size = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT);
+
+	/* Setup stack frame */
+	imm = frame_size;
+	while (imm > 256) {
+		arm_subx_imm (code, ARMREG_SP, ARMREG_SP, 256);
+		imm -= 256;
+	}
+	arm_subx_imm (code, ARMREG_SP, ARMREG_SP, imm);
+	arm_stpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0);
+	arm_movspx (code, ARMREG_FP, ARMREG_SP);
+
+	/* Save gregs */
+	// FIXME: Optimize this
+	gregs_regset = ~((1 << ARMREG_FP) | (1 << ARMREG_SP));
+	code = mono_arm_emit_store_regarray (code, gregs_regset, ARMREG_FP, gregs_offset);
+	/* Save fregs */
+	for (i = 0; i < num_fregs; ++i)
+		arm_strfpx (code, i, ARMREG_FP, fregs_offset + (i * 8));
+	/* Save trampoline arg */
+	arm_strx (code, ARMREG_IP1, ARMREG_FP, arg_offset);
+
+	/* Setup LMF */
+	arm_addx_imm (code, ARMREG_IP0, ARMREG_FP, lmf_offset);
+	code = mono_arm_emit_store_regset (code, MONO_ARCH_LMF_REGS, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, gregs));
+	/* Save caller fp */
+	arm_ldrx (code, ARMREG_IP1, ARMREG_FP, 0);
+	arm_strx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, gregs) + (MONO_ARCH_LMF_REG_FP * 8));
+	/* Save caller sp */
+	arm_movx (code, ARMREG_IP1, ARMREG_FP);
+	imm = frame_size;
+	while (imm > 256) {
+		arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 256);
+		imm -= 256;
+	}
+	arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, imm);
+	arm_strx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, gregs) + (MONO_ARCH_LMF_REG_SP * 8));
+	/* Save caller pc */
+	if (tramp_type == MONO_TRAMPOLINE_JUMP)
+		arm_movx (code, ARMREG_LR, ARMREG_RZR);
+	else
+		arm_ldrx (code, ARMREG_LR, ARMREG_FP, 8);
+	arm_strx (code, ARMREG_LR, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, pc));
+
+	/* Save LMF */
+	/* Similar to emit_save_lmf () */
+	if (aot) {
+		code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_get_lmf_addr");
+	} else {
+		tramp = (guint8*)mono_get_lmf_addr;
+		code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)tramp);
+	}
+	arm_blrx (code, ARMREG_IP0);
+	/* r0 contains the address of the tls slot holding the current lmf */
+	/* ip0 = lmf */
+	arm_addx_imm (code, ARMREG_IP0, ARMREG_FP, lmf_offset);
+	/* lmf->lmf_addr = lmf_addr */
+	arm_strx (code, ARMREG_R0, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, lmf_addr));
+	/* lmf->previous_lmf = *lmf_addr */
+	arm_ldrx (code, ARMREG_IP1, ARMREG_R0, 0);
+	arm_strx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, previous_lmf));
+	/* *lmf_addr = lmf */
+	arm_strx (code, ARMREG_IP0, ARMREG_R0, 0);
+
+	/* Call the C trampoline function */
+	/* Arg 1 = gregs */
+	arm_addx_imm (code, ARMREG_R0, ARMREG_FP, gregs_offset);
+	/* Arg 2 = caller */
+	if (tramp_type == MONO_TRAMPOLINE_JUMP)
+		arm_movx (code, ARMREG_R1, ARMREG_RZR);
+	else
+		arm_ldrx (code, ARMREG_R1, ARMREG_FP, gregs_offset + (ARMREG_LR * 8));
+	/* Arg 3 = arg */
+	if (MONO_TRAMPOLINE_TYPE_HAS_ARG (tramp_type))
+		/* Passed in r0 */
+		arm_ldrx (code, ARMREG_R2, ARMREG_FP, gregs_offset + (ARMREG_R0 * 8));
+	else
+		arm_ldrx (code, ARMREG_R2, ARMREG_FP, arg_offset);
+	/* Arg 4 = trampoline addr */
+	arm_movx (code, ARMREG_R3, ARMREG_RZR);
+
+	if (aot) {
+		char *icall_name = g_strdup_printf ("trampoline_func_%d", tramp_type);
+		code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, icall_name);
+	} else {
+		tramp = (guint8*)mono_get_trampoline_func (tramp_type);
+		code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)tramp);
+	}
+	arm_blrx (code, ARMREG_IP0);
+
+	/* Restore LMF */
+	/* Similar to emit_restore_lmf () */
+	/* Clobbers ip0/ip1 */
+	/* ip0 = lmf */
+	arm_addx_imm (code, ARMREG_IP0, ARMREG_FP, lmf_offset);
+	/* ip1 = lmf->previous_lmf */
+	arm_ldrx (code, ARMREG_IP1, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, previous_lmf));
+	/* ip0 = lmf->lmf_addr */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_IP0, MONO_STRUCT_OFFSET (MonoLMF, lmf_addr));
+	/* *lmf_addr = previous_lmf */
+	arm_strx (code, ARMREG_IP1, ARMREG_IP0, 0);
+
+	/* Save the result to ip1 */
+	arm_movx (code, ARMREG_IP1, ARMREG_R0);
+
+	/* Restore gregs */
+	/* Only have to load the argument regs (r0..r8) and the rgctx reg */
+	code = mono_arm_emit_load_regarray (code, 0x1ff | (1 << ARMREG_LR) | (1 << MONO_ARCH_RGCTX_REG), ARMREG_FP, gregs_offset);
+	/* Restore fregs */
+	for (i = 0; i < num_fregs; ++i)
+		arm_ldrfpx (code, i, ARMREG_FP, fregs_offset + (i * 8));
+
+	/* These trampolines return a value */
+	if (tramp_type == MONO_TRAMPOLINE_RGCTX_LAZY_FETCH)
+		arm_movx (code, ARMREG_R0, ARMREG_IP1);
+
+	/* Cleanup frame */
+	code = mono_arm_emit_destroy_frame (code, frame_size, ((1 << ARMREG_IP0)));
+
+	if ((tramp_type == MONO_TRAMPOLINE_CLASS_INIT) || (tramp_type == MONO_TRAMPOLINE_GENERIC_CLASS_INIT) || (tramp_type == MONO_TRAMPOLINE_RGCTX_LAZY_FETCH))
+		arm_retx (code, ARMREG_LR);
+	else
+		arm_brx (code, ARMREG_IP1);
+
+	g_assert ((code - buf) < buf_len);
+	mono_arch_flush_icache (buf, code - buf);
+
+	if (info) {
+		tramp_name = mono_get_generic_trampoline_name (tramp_type);
+		*info = mono_tramp_info_create (tramp_name, buf, code - buf, ji, unwind_ops);
+		g_free (tramp_name);
+	}
+
+	return buf;
+}
+
+gpointer
+mono_arch_get_nullified_class_init_trampoline (MonoTrampInfo **info)
+{
+	guint8 *buf, *code;
+
+	code = buf = mono_global_codeman_reserve (16);
+
+	arm_retx (code, ARMREG_LR);
+
+	mono_arch_flush_icache (buf, code - buf);
+
+	if (info)
+		*info = mono_tramp_info_create ("nullified_class_init_trampoline", buf, code - buf, NULL, NULL);
+
+	return buf;
+}
+
+gpointer
+mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_type, MonoDomain *domain, guint32 *code_len)
+{
+	guint8 *code, *buf, *tramp;
+	int buf_len = 64;
+
+	/*
+	 * Return a trampoline which calls generic trampoline TRAMP_TYPE passing in ARG1.
+	 * Pass the argument in ip1, clobbering ip0.
+	 */
+	tramp = mono_get_trampoline_code (tramp_type);
+
+	buf = code = mono_global_codeman_reserve (buf_len);
+
+	code = mono_arm_emit_imm64 (code, ARMREG_IP1, (guint64)arg1);
+	code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)tramp);
+
+	arm_brx (code, ARMREG_IP0);
+
+	g_assert ((code - buf) < buf_len);
+	mono_arch_flush_icache (buf, code - buf);
+	if (code_len)
+		*code_len = code - buf;
+
+	return buf;
+}
+
+gpointer
+mono_arch_get_unbox_trampoline (MonoMethod *m, gpointer addr)
+{
+	guint8 *code, *start;
+	guint32 size = 32;
+	MonoDomain *domain = mono_domain_get ();
+
+	start = code = mono_domain_code_reserve (domain, size);
+	code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)addr);
+	arm_addx_imm (code, ARMREG_R0, ARMREG_R0, sizeof (MonoObject));
+	arm_brx (code, ARMREG_IP0);
+
+	g_assert ((code - start) <= size);
+	mono_arch_flush_icache (start, code - start);
+	return start;
+}
+
+gpointer
+mono_arch_get_static_rgctx_trampoline (MonoMethod *m, MonoMethodRuntimeGenericContext *mrgctx, gpointer addr)
+{
+	guint8 *code, *start;
+	guint32 buf_len = 32;
+	MonoDomain *domain = mono_domain_get ();
+
+	start = code = mono_domain_code_reserve (domain, buf_len);
+	code = mono_arm_emit_imm64 (code, MONO_ARCH_RGCTX_REG, (guint64)mrgctx);
+	code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)addr);
+	arm_brx (code, ARMREG_IP0);
+
+	g_assert ((code - start) <= buf_len);
+
+	mono_arch_flush_icache (start, code - start);
+
+	return start;
+}
+
+gpointer
+mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info, gboolean aot)
+{
+	guint8 *code, *buf;
+	int buf_size;
+	int i, depth, index, njumps;
+	gboolean is_mrgctx;
+	guint8 **rgctx_null_jumps;
+	MonoJumpInfo *ji = NULL;
+	GSList *unwind_ops = NULL;
+	guint8 *tramp;
+	guint32 code_len;
+
+	is_mrgctx = MONO_RGCTX_SLOT_IS_MRGCTX (slot);
+	index = MONO_RGCTX_SLOT_INDEX (slot);
+	if (is_mrgctx)
+		index += MONO_SIZEOF_METHOD_RUNTIME_GENERIC_CONTEXT / sizeof (gpointer);
+	for (depth = 0; ; ++depth) {
+		int size = mono_class_rgctx_get_array_size (depth, is_mrgctx);
+
+		if (index < size - 1)
+			break;
+		index -= size - 1;
+	}
+
+	buf_size = 64 + 16 * depth;
+	code = buf = mono_global_codeman_reserve (buf_size);
+
+	rgctx_null_jumps = g_malloc0 (sizeof (guint8*) * (depth + 2));
+	njumps = 0;
+
+	/* The vtable/mrgtx is in R0 */
+	g_assert (MONO_ARCH_VTABLE_REG == ARMREG_R0);
+
+	if (is_mrgctx) {
+		/* get mrgctx ptr */
+		arm_movx (code, ARMREG_IP1, ARMREG_R0);
+ 	} else {
+		/* load rgctx ptr from vtable */
+		code = mono_arm_emit_ldrx (code, ARMREG_IP1, ARMREG_R0, MONO_STRUCT_OFFSET (MonoVTable, runtime_generic_context));
+		/* is the rgctx ptr null? */
+		/* if yes, jump to actual trampoline */
+		rgctx_null_jumps [njumps ++] = code;
+		arm_cbzx (code, ARMREG_IP1, 0);
+	}
+
+	for (i = 0; i < depth; ++i) {
+		/* load ptr to next array */
+		if (is_mrgctx && i == 0) {
+			code = mono_arm_emit_ldrx (code, ARMREG_IP1, ARMREG_IP1, MONO_SIZEOF_METHOD_RUNTIME_GENERIC_CONTEXT);
+		} else {
+			code = mono_arm_emit_ldrx (code, ARMREG_IP1, ARMREG_IP1, 0);
+		}
+		/* is the ptr null? */
+		/* if yes, jump to actual trampoline */
+		rgctx_null_jumps [njumps ++] = code;
+		arm_cbzx (code, ARMREG_IP1, 0);
+	}
+
+	/* fetch slot */
+	code = mono_arm_emit_ldrx (code, ARMREG_IP1, ARMREG_IP1, sizeof (gpointer) * (index + 1));
+	/* is the slot null? */
+	/* if yes, jump to actual trampoline */
+	rgctx_null_jumps [njumps ++] = code;
+	arm_cbzx (code, ARMREG_IP1, 0);
+	/* otherwise return, result is in IP1 */
+	arm_movx (code, ARMREG_R0, ARMREG_IP1);
+	arm_brx (code, ARMREG_LR);
+
+	g_assert (njumps <= depth + 2);
+	for (i = 0; i < njumps; ++i)
+		mono_arm_patch (rgctx_null_jumps [i], code, MONO_R_ARM64_CBZ);
+
+	g_free (rgctx_null_jumps);
+
+	/* Slowpath */
+
+	/* Call mono_rgctx_lazy_fetch_trampoline (), passing in the slot as argument */
+	/* The vtable/mrgctx is still in R0 */
+	if (aot) {
+		code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, g_strdup_printf ("specific_trampoline_lazy_fetch_%u", slot));
+	} else {
+		tramp = mono_arch_create_specific_trampoline (GUINT_TO_POINTER (slot), MONO_TRAMPOLINE_RGCTX_LAZY_FETCH, mono_get_root_domain (), &code_len);
+		code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)tramp);
+	}
+	arm_brx (code, ARMREG_IP0);
+
+	mono_arch_flush_icache (buf, code - buf);
+
+	g_assert (code - buf <= buf_size);
+
+	if (info) {
+		char *name = mono_get_rgctx_fetch_trampoline_name (slot);
+		*info = mono_tramp_info_create (name, buf, code - buf, ji, unwind_ops);
+		g_free (name);
+	}
+
+	return buf;
+}
+
+gpointer
+mono_arch_create_general_rgctx_lazy_fetch_trampoline (MonoTrampInfo **info, gboolean aot)
+{
+	guint8 *code, *buf;
+	int tramp_size;
+	MonoJumpInfo *ji = NULL;
+	GSList *unwind_ops = NULL;
+
+	g_assert (aot);
+
+	tramp_size = 32;
+
+	code = buf = mono_global_codeman_reserve (tramp_size);
+
+	mono_add_unwind_op_def_cfa (unwind_ops, code, buf, ARMREG_SP, 0);
+
+	// FIXME: Currently, we always go to the slow path.
+	/* Load trampoline addr */
+	arm_ldrx (code, ARMREG_IP0, MONO_ARCH_RGCTX_REG, 8);
+	/* The vtable/mrgctx is in R0 */
+	g_assert (MONO_ARCH_VTABLE_REG == ARMREG_R0);
+	arm_brx (code, ARMREG_IP0);
+
+	mono_arch_flush_icache (buf, code - buf);
+
+	g_assert (code - buf <= tramp_size);
+
+	if (info)
+		*info = mono_tramp_info_create ("rgctx_fetch_trampoline_general", buf, code - buf, ji, unwind_ops);
+
+	return buf;
+}
+
+/*
+ * mono_arch_create_sdb_trampoline:
+ *
+ *   Return a trampoline which captures the current context, passes it to
+ * debugger_agent_single_step_from_context ()/debugger_agent_breakpoint_from_context (),
+ * then restores the (potentially changed) context.
+ */
+guint8*
+mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gboolean aot)
+{
+	int tramp_size = 512;
+	int offset, imm, frame_size, ctx_offset;
+	guint64 gregs_regset;
+	guint8 *code, *buf;
+	GSList *unwind_ops = NULL;
+	MonoJumpInfo *ji = NULL;
+
+	code = buf = mono_global_codeman_reserve (tramp_size);
+
+	/* Compute stack frame size and offsets */
+	offset = 0;
+	/* frame block */
+	offset += 2 * 8;
+	/* MonoContext */
+	ctx_offset = offset;
+	offset += sizeof (MonoContext);
+	offset = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT);
+	frame_size = offset;
+
+	// FIXME: Unwind info
+
+	/* Setup stack frame */
+	imm = frame_size;
+	while (imm > 256) {
+		arm_subx_imm (code, ARMREG_SP, ARMREG_SP, 256);
+		imm -= 256;
+	}
+	arm_subx_imm (code, ARMREG_SP, ARMREG_SP, imm);
+	arm_stpx (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, 0);
+	arm_movspx (code, ARMREG_FP, ARMREG_SP);
+
+	/* Initialize a MonoContext structure on the stack */
+	/* No need to save fregs */
+	gregs_regset = ~((1 << ARMREG_FP) | (1 << ARMREG_SP));
+	code = mono_arm_emit_store_regarray (code, gregs_regset, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs));
+	/* Save caller fp */
+	arm_ldrx (code, ARMREG_IP1, ARMREG_FP, 0);
+	arm_strx (code, ARMREG_IP1, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_FP * 8));
+	/* Save caller sp */
+	arm_movx (code, ARMREG_IP1, ARMREG_FP);
+	imm = frame_size;
+	while (imm > 256) {
+		arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, 256);
+		imm -= 256;
+	}
+	arm_addx_imm (code, ARMREG_IP1, ARMREG_IP1, imm);
+	arm_strx (code, ARMREG_IP1, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_SP * 8));
+	/* Save caller ip */
+	arm_ldrx (code, ARMREG_IP1, ARMREG_FP, 8);
+	arm_strx (code, ARMREG_IP1, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, pc));
+
+	/* Call the single step/breakpoint function in sdb */
+	/* Arg1 = ctx */
+	arm_addx_imm (code, ARMREG_R0, ARMREG_FP, ctx_offset);
+	if (aot) {
+		if (single_step)
+			code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, "debugger_agent_single_step_from_context");
+		else
+			code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, "debugger_agent_breakpoint_from_context");
+	} else {
+		gpointer addr = single_step ? debugger_agent_single_step_from_context : debugger_agent_breakpoint_from_context;
+
+		code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)addr);
+	}
+	arm_blrx (code, ARMREG_IP0);
+
+	/* Restore ctx */
+	/* Save fp/pc into the frame block */
+	arm_ldrx (code, ARMREG_IP0, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_FP * 8));
+	arm_strx (code, ARMREG_IP0, ARMREG_FP, 0);
+	arm_ldrx (code, ARMREG_IP0, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, pc));
+	arm_strx (code, ARMREG_IP0, ARMREG_FP, 8);
+	gregs_regset = ~((1 << ARMREG_FP) | (1 << ARMREG_SP));
+	code = mono_arm_emit_load_regarray (code, gregs_regset, ARMREG_FP, ctx_offset + G_STRUCT_OFFSET (MonoContext, regs));
+
+	code = mono_arm_emit_destroy_frame (code, frame_size, ((1 << ARMREG_IP0) | (1 << ARMREG_IP1)));
+
+	arm_retx (code, ARMREG_LR);
+
+	mono_arch_flush_icache (code, code - buf);
+	g_assert (code - buf <= tramp_size);
+
+	const char *tramp_name = single_step ? "sdb_single_step_trampoline" : "sdb_breakpoint_trampoline";
+	*info = mono_tramp_info_create (tramp_name, buf, code - buf, ji, unwind_ops);
+
+	return buf;
+}
+
+#else /* DISABLE_JIT */
+
+guchar*
+mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInfo **info, gboolean aot)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+gpointer
+mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_type, MonoDomain *domain, guint32 *code_len)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+gpointer
+mono_arch_get_unbox_trampoline (MonoMethod *m, gpointer addr)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+gpointer
+mono_arch_get_static_rgctx_trampoline (MonoMethod *m, MonoMethodRuntimeGenericContext *mrgctx, gpointer addr)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+gpointer
+mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info, gboolean aot)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+gpointer
+mono_arch_get_nullified_class_init_trampoline (MonoTrampInfo **info)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+guint8*
+mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gboolean aot)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+#endif /* !DISABLE_JIT */
+
+#if defined(MONO_ARCH_GSHAREDVT_SUPPORTED)
+
+gpointer
+mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot)
+{
+	if (info)
+		*info = NULL;
+	return NULL;
+}
+
+gpointer
+mono_arch_get_gsharedvt_arg_trampoline (MonoDomain *domain, gpointer arg, gpointer addr)
+{
+	g_assert_not_reached ();
+	return NULL;
+}
+
+#endif /* !MONOTOUCH */

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-mono/packages/mono.git



More information about the Pkg-mono-svn-commits mailing list