[gcc-7] 292/354: * Update the Linaro support to the 7-2017.09 snapshot.
Ximin Luo
infinity0 at debian.org
Thu Nov 23 15:51:21 UTC 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch master
in repository gcc-7.
commit c59d8f88bd5c92ad3e451dc781eca309992cd57f
Author: doko <doko at 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca>
Date: Thu Sep 14 19:25:08 2017 +0000
* Update the Linaro support to the 7-2017.09 snapshot.
git-svn-id: svn+ssh://svn.debian.org/svn/gcccvs/branches/sid/gcc-7@9683 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca
---
debian/changelog | 1 +
debian/patches/gcc-linaro-doc.diff | 67 +-
debian/patches/gcc-linaro-no-macros.diff | 2 +-
debian/patches/gcc-linaro.diff | 2700 ++++++++++++++++++++++++++++--
4 files changed, 2637 insertions(+), 133 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index e70af52..4929e0f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,7 @@ gcc-7 (7.2.0-5) UNRELEASED; urgency=medium
* Enable libgo tests and rebuilds with make -C (Svante Signell).
Closes: #873929.
* Fix PR sanitizer/77631, support separate debug info in libbacktrace.
+ * Update the Linaro support to the 7-2017.09 snapshot.
-- Matthias Klose <doko at debian.org> Wed, 13 Sep 2017 22:52:19 +0200
diff --git a/debian/patches/gcc-linaro-doc.diff b/debian/patches/gcc-linaro-doc.diff
index ac879e0..37ac4c9 100644
--- a/debian/patches/gcc-linaro-doc.diff
+++ b/debian/patches/gcc-linaro-doc.diff
@@ -1,4 +1,4 @@
-# DP: Changes for the Linaro 7-2017.08 snapshot (documentation).
+# DP: Changes for the Linaro 7-2017.09 snapshot (documentation).
--- a/src/gcc/doc/install.texi
+++ b/src/gcc/doc/install.texi
@@ -28,8 +28,56 @@
@item Option @tab aprofile @tab rmprofile
--- a/src/gcc/doc/invoke.texi
+++ b/src/gcc/doc/invoke.texi
-@@ -14076,6 +14076,10 @@ Enable Large System Extension instructions. This is on by default for
+@@ -580,15 +580,14 @@ Objective-C and Objective-C++ Dialects}.
+ -mgeneral-regs-only @gol
+ -mcmodel=tiny -mcmodel=small -mcmodel=large @gol
+ -mstrict-align @gol
+--momit-leaf-frame-pointer -mno-omit-leaf-frame-pointer @gol
++-momit-leaf-frame-pointer @gol
+ -mtls-dialect=desc -mtls-dialect=traditional @gol
+ -mtls-size=@var{size} @gol
+--mfix-cortex-a53-835769 -mno-fix-cortex-a53-835769 @gol
+--mfix-cortex-a53-843419 -mno-fix-cortex-a53-843419 @gol
+--mlow-precision-recip-sqrt -mno-low-precision-recip-sqrt at gol
+--mlow-precision-sqrt -mno-low-precision-sqrt at gol
+--mlow-precision-div -mno-low-precision-div @gol
+--march=@var{name} -mcpu=@var{name} -mtune=@var{name}}
++-mfix-cortex-a53-835769 -mfix-cortex-a53-843419 @gol
++-mlow-precision-recip-sqrt -mlow-precision-sqrt -mlow-precision-div @gol
++-mpc-relative-literal-loads @gol
++-msign-return-address=@var{scope} @gol
++-march=@var{name} -mcpu=@var{name} -mtune=@var{name} -moverride=@var{string}}
+
+ @emph{Adapteva Epiphany Options}
+ @gccoptlist{-mhalf-reg-file -mprefer-short-insn-regs @gol
+@@ -13961,7 +13960,7 @@ support for the ARMv8.2-A architecture extensions.
+
+ The value @samp{armv8.1-a} implies @samp{armv8-a} and enables compiler
+ support for the ARMv8.1-A architecture extension. In particular, it
+-enables the @samp{+crc} and @samp{+lse} features.
++enables the @samp{+crc}, @samp{+lse}, and @samp{+rdma} features.
+
+ The value @samp{native} is available on native AArch64 GNU/Linux and
+ causes the compiler to pick the architecture of the host system. This
+@@ -14034,8 +14033,10 @@ across releases.
+ This option is only intended to be useful when developing GCC.
+
+ @item -mpc-relative-literal-loads
++ at itemx -mno-pc-relative-literal-loads
+ @opindex mpc-relative-literal-loads
+-Enable PC-relative literal loads. With this option literal pools are
++ at opindex mno-pc-relative-literal-loads
++Enable or disable PC-relative literal loads. With this option literal pools are
+ accessed using a single instruction and emitted after each function. This
+ limits the maximum size of functions to 1MB. This is enabled by default for
+ @option{-mcmodel=tiny}.
+@@ -14074,8 +14075,15 @@ instructions. This is on by default for all possible values for options
+ @item lse
+ Enable Large System Extension instructions. This is on by default for
@option{-march=armv8.1-a}.
++ at item rdma
++Enable Round Double Multiply Accumulate instructions. This is on by default
++for @option{-march=armv8.1-a}.
@item fp16
Enable FP16 extension. This also enables floating-point instructions.
+ at item rcpc
@@ -41,7 +89,20 @@
--- a/src/gcc/doc/sourcebuild.texi
+++ b/src/gcc/doc/sourcebuild.texi
-@@ -2274,6 +2274,11 @@ the codeset to convert to.
+@@ -1570,6 +1570,12 @@ Test system supports executing NEON v2 instructions.
+ ARM Target supports @code{-mfpu=neon -mfloat-abi=softfp} or compatible
+ options. Some multilibs may be incompatible with these options.
+
++ at item arm_neon_ok_no_float_abi
++ at anchor{arm_neon_ok_no_float_abi}
++ARM Target supports NEON with @code{-mfpu=neon}, but without any
++-mfloat-abi= option. Some multilibs may be incompatible with this
++option.
++
+ @item arm_neonv2_ok
+ @anchor{arm_neonv2_ok}
+ ARM Target supports @code{-mfpu=neon-vfpv4 -mfloat-abi=softfp} or compatible
+@@ -2274,6 +2280,11 @@ the codeset to convert to.
Skip the test if the target does not support profiling with option
@var{profopt}.
diff --git a/debian/patches/gcc-linaro-no-macros.diff b/debian/patches/gcc-linaro-no-macros.diff
index 737d486..c94dbe8 100644
--- a/debian/patches/gcc-linaro-no-macros.diff
+++ b/debian/patches/gcc-linaro-no-macros.diff
@@ -89,4 +89,4 @@ Index: b/src/gcc/LINARO-VERSION
--- a/src/gcc/LINARO-VERSION
+++ /dev/null
@@ -1,1 +0,0 @@
--Snapshot 7.2-2017.08
+-Snapshot 7.2-2017.09
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff
index 4df4ae0..9bbc4e7 100644
--- a/debian/patches/gcc-linaro.diff
+++ b/debian/patches/gcc-linaro.diff
@@ -1,8 +1,8 @@
-# DP: Changes for the Linaro 7-2017.08 snapshot.
+# DP: Changes for the Linaro 7-2017.09 snapshot.
MSG=$(git log origin/linaro/gcc-7-branch --format=format:"%s" -n 1 --grep "Merge branches"); SVN=${MSG##* }; git log origin/gcc-7-branch --format=format:"%H" -n 1 --grep "gcc-7-branch@${SVN%.}"
-LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefefa91b044ffa4a4b868ef7188e5255a \
+LANG=C git diff --no-renames bb85d61e6bfbadee4494e034a5d8187cf0626aed 1604249e382610b087a72d0d07103f815458cec0 \
| egrep -v '^(diff|index) ' \
| filterdiff --strip=1 --addoldprefix=a/src/ --addnewprefix=b/src/ \
| sed 's,a/src//dev/null,/dev/null,'
@@ -10,7 +10,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
--- /dev/null
+++ b/src/gcc/LINARO-VERSION
@@ -0,0 +1 @@
-+Snapshot 7.2-2017.08
++Snapshot 7.2-2017.09
--- a/src/gcc/Makefile.in
+++ b/src/gcc/Makefile.in
@@ -845,10 +845,12 @@ BASEVER := $(srcdir)/BASE-VER # 4.x.y
@@ -46,6 +46,318 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
+--- a/src/gcc/ccmp.c
++++ b/src/gcc/ccmp.c
+@@ -38,6 +38,29 @@ along with GCC; see the file COPYING3. If not see
+ #include "ccmp.h"
+ #include "predict.h"
+
++/* Check whether T is a simple boolean variable or a SSA name
++ set by a comparison operator in the same basic block. */
++static bool
++ccmp_tree_comparison_p (tree t, basic_block bb)
++{
++ gimple *g = get_gimple_for_ssa_name (t);
++ tree_code tcode;
++
++ /* If we have a boolean variable allow it and generate a compare
++ to zero reg when expanding. */
++ if (!g)
++ return (TREE_CODE (TREE_TYPE (t)) == BOOLEAN_TYPE);
++
++ /* Check to see if SSA name is set by a comparison operator in
++ the same basic block. */
++ if (!is_gimple_assign (g))
++ return false;
++ if (bb != gimple_bb (g))
++ return false;
++ tcode = gimple_assign_rhs_code (g);
++ return TREE_CODE_CLASS (tcode) == tcc_comparison;
++}
++
+ /* The following functions expand conditional compare (CCMP) instructions.
+ Here is a short description about the over all algorithm:
+ * ccmp_candidate_p is used to identify the CCMP candidate
+@@ -71,49 +94,69 @@ along with GCC; see the file COPYING3. If not see
+ static bool
+ ccmp_candidate_p (gimple *g)
+ {
+- tree rhs = gimple_assign_rhs_to_tree (g);
++ tree rhs;
+ tree lhs, op0, op1;
+ gimple *gs0, *gs1;
+- tree_code tcode, tcode0, tcode1;
+- tcode = TREE_CODE (rhs);
++ tree_code tcode;
++ basic_block bb;
++
++ if (!g)
++ return false;
+
++ rhs = gimple_assign_rhs_to_tree (g);
++ tcode = TREE_CODE (rhs);
+ if (tcode != BIT_AND_EXPR && tcode != BIT_IOR_EXPR)
+ return false;
+
+ lhs = gimple_assign_lhs (g);
+ op0 = TREE_OPERAND (rhs, 0);
+ op1 = TREE_OPERAND (rhs, 1);
++ bb = gimple_bb (g);
+
+ if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
+ || !has_single_use (lhs))
+ return false;
+
+- gs0 = get_gimple_for_ssa_name (op0);
+- gs1 = get_gimple_for_ssa_name (op1);
+- if (!gs0 || !gs1 || !is_gimple_assign (gs0) || !is_gimple_assign (gs1)
+- /* g, gs0 and gs1 must be in the same basic block, since current stage
+- is out-of-ssa. We can not guarantee the correctness when forwording
+- the gs0 and gs1 into g whithout DATAFLOW analysis. */
+- || gimple_bb (gs0) != gimple_bb (gs1)
+- || gimple_bb (gs0) != gimple_bb (g))
+- return false;
++ gs0 = get_gimple_for_ssa_name (op0); /* gs0 may be NULL */
++ gs1 = get_gimple_for_ssa_name (op1); /* gs1 may be NULL */
+
+- tcode0 = gimple_assign_rhs_code (gs0);
+- tcode1 = gimple_assign_rhs_code (gs1);
+- if (TREE_CODE_CLASS (tcode0) == tcc_comparison
+- && TREE_CODE_CLASS (tcode1) == tcc_comparison)
++ if (ccmp_tree_comparison_p (op0, bb) && ccmp_tree_comparison_p (op1, bb))
+ return true;
+- if (TREE_CODE_CLASS (tcode0) == tcc_comparison
+- && ccmp_candidate_p (gs1))
++ if (ccmp_tree_comparison_p (op0, bb) && ccmp_candidate_p (gs1))
+ return true;
+- else if (TREE_CODE_CLASS (tcode1) == tcc_comparison
+- && ccmp_candidate_p (gs0))
++ if (ccmp_tree_comparison_p (op1, bb) && ccmp_candidate_p (gs0))
+ return true;
+ /* We skip ccmp_candidate_p (gs1) && ccmp_candidate_p (gs0) since
+- there is no way to set the CC flag. */
++ there is no way to set and maintain the CC flag on both sides of
++ the logical operator at the same time. */
+ return false;
+ }
+
++/* Extract the comparison we want to do from the tree. */
++void
++get_compare_parts (tree t, int *up, rtx_code *rcode,
++ tree *rhs1, tree *rhs2)
++{
++ tree_code code;
++ gimple *g = get_gimple_for_ssa_name (t);
++ if (g)
++ {
++ *up = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)));
++ code = gimple_assign_rhs_code (g);
++ *rcode = get_rtx_code (code, *up);
++ *rhs1 = gimple_assign_rhs1 (g);
++ *rhs2 = gimple_assign_rhs2 (g);
++ }
++ else
++ {
++ /* If g is not a comparison operator create a compare to zero. */
++ *up = 1;
++ *rcode = NE;
++ *rhs1 = t;
++ *rhs2 = build_zero_cst (TREE_TYPE (t));
++ }
++}
++
+ /* PREV is a comparison with the CC register which represents the
+ result of the previous CMP or CCMP. The function expands the
+ next compare based on G which is ANDed/ORed with the previous
+@@ -121,20 +164,16 @@ ccmp_candidate_p (gimple *g)
+ PREP_SEQ returns all insns to prepare opearands for compare.
+ GEN_SEQ returns all compare insns. */
+ static rtx
+-expand_ccmp_next (gimple *g, tree_code code, rtx prev,
++expand_ccmp_next (tree op, tree_code code, rtx prev,
+ rtx_insn **prep_seq, rtx_insn **gen_seq)
+ {
+ rtx_code rcode;
+- int unsignedp = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)));
+-
+- gcc_assert (code == BIT_AND_EXPR || code == BIT_IOR_EXPR);
+-
+- rcode = get_rtx_code (gimple_assign_rhs_code (g), unsignedp);
++ int unsignedp;
++ tree rhs1, rhs2;
+
++ get_compare_parts(op, &unsignedp, &rcode, &rhs1, &rhs2);
+ return targetm.gen_ccmp_next (prep_seq, gen_seq, prev, rcode,
+- gimple_assign_rhs1 (g),
+- gimple_assign_rhs2 (g),
+- get_rtx_code (code, 0));
++ rhs1, rhs2, get_rtx_code (code, 0));
+ }
+
+ /* Expand conditional compare gimple G. A typical CCMP sequence is like:
+@@ -153,39 +192,42 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq)
+ {
+ tree exp = gimple_assign_rhs_to_tree (g);
+ tree_code code = TREE_CODE (exp);
+- gimple *gs0 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 0));
+- gimple *gs1 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 1));
++ basic_block bb = gimple_bb (g);
++
++ tree op0 = TREE_OPERAND (exp, 0);
++ tree op1 = TREE_OPERAND (exp, 1);
++ gimple *gs0 = get_gimple_for_ssa_name (op0);
++ gimple *gs1 = get_gimple_for_ssa_name (op1);
+ rtx tmp;
+- tree_code code0 = gimple_assign_rhs_code (gs0);
+- tree_code code1 = gimple_assign_rhs_code (gs1);
+
+ gcc_assert (code == BIT_AND_EXPR || code == BIT_IOR_EXPR);
+- gcc_assert (gs0 && gs1 && is_gimple_assign (gs0) && is_gimple_assign (gs1));
+
+- if (TREE_CODE_CLASS (code0) == tcc_comparison)
++ if (ccmp_tree_comparison_p (op0, bb))
+ {
+- if (TREE_CODE_CLASS (code1) == tcc_comparison)
++ if (ccmp_tree_comparison_p (op1, bb))
+ {
+ int unsignedp0, unsignedp1;
+ rtx_code rcode0, rcode1;
++ tree logical_op0_rhs1, logical_op0_rhs2;
++ tree logical_op1_rhs1, logical_op1_rhs2;
+ int speed_p = optimize_insn_for_speed_p ();
++
+ rtx tmp2 = NULL_RTX, ret = NULL_RTX, ret2 = NULL_RTX;
+ unsigned cost1 = MAX_COST;
+ unsigned cost2 = MAX_COST;
+
+- unsignedp0 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs0)));
+- unsignedp1 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs1)));
+- rcode0 = get_rtx_code (code0, unsignedp0);
+- rcode1 = get_rtx_code (code1, unsignedp1);
++ get_compare_parts (op0, &unsignedp0, &rcode0,
++ &logical_op0_rhs1, &logical_op0_rhs2);
++
++ get_compare_parts (op1, &unsignedp1, &rcode1,
++ &logical_op1_rhs1, &logical_op1_rhs2);
+
+ rtx_insn *prep_seq_1, *gen_seq_1;
+ tmp = targetm.gen_ccmp_first (&prep_seq_1, &gen_seq_1, rcode0,
+- gimple_assign_rhs1 (gs0),
+- gimple_assign_rhs2 (gs0));
+-
++ logical_op0_rhs1, logical_op0_rhs2);
+ if (tmp != NULL)
+ {
+- ret = expand_ccmp_next (gs1, code, tmp, &prep_seq_1, &gen_seq_1);
++ ret = expand_ccmp_next (op1, code, tmp, &prep_seq_1, &gen_seq_1);
+ cost1 = seq_cost (prep_seq_1, speed_p);
+ cost1 += seq_cost (gen_seq_1, speed_p);
+ }
+@@ -197,27 +239,22 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq)
+ rtx_insn *prep_seq_2, *gen_seq_2;
+ if (tmp == NULL || cost1 < COSTS_N_INSNS (25))
+ tmp2 = targetm.gen_ccmp_first (&prep_seq_2, &gen_seq_2, rcode1,
+- gimple_assign_rhs1 (gs1),
+- gimple_assign_rhs2 (gs1));
+-
++ logical_op1_rhs1, logical_op1_rhs2);
+ if (!tmp && !tmp2)
+ return NULL_RTX;
+-
+ if (tmp2 != NULL)
+ {
+- ret2 = expand_ccmp_next (gs0, code, tmp2, &prep_seq_2,
++ ret2 = expand_ccmp_next (op0, code, tmp2, &prep_seq_2,
+ &gen_seq_2);
+ cost2 = seq_cost (prep_seq_2, speed_p);
+ cost2 += seq_cost (gen_seq_2, speed_p);
+ }
+-
+ if (cost2 < cost1)
+ {
+ *prep_seq = prep_seq_2;
+ *gen_seq = gen_seq_2;
+ return ret2;
+ }
+-
+ *prep_seq = prep_seq_1;
+ *gen_seq = gen_seq_1;
+ return ret;
+@@ -227,28 +264,18 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq)
+ tmp = expand_ccmp_expr_1 (gs1, prep_seq, gen_seq);
+ if (!tmp)
+ return NULL_RTX;
+-
+- return expand_ccmp_next (gs0, code, tmp, prep_seq, gen_seq);
++ return expand_ccmp_next (op0, code, tmp, prep_seq, gen_seq);
+ }
+ }
+ else
+ {
+ gcc_assert (gimple_assign_rhs_code (gs0) == BIT_AND_EXPR
+ || gimple_assign_rhs_code (gs0) == BIT_IOR_EXPR);
+-
+- if (TREE_CODE_CLASS (gimple_assign_rhs_code (gs1)) == tcc_comparison)
+- {
+- tmp = expand_ccmp_expr_1 (gs0, prep_seq, gen_seq);
+- if (!tmp)
+- return NULL_RTX;
+-
+- return expand_ccmp_next (gs1, code, tmp, prep_seq, gen_seq);
+- }
+- else
+- {
+- gcc_assert (gimple_assign_rhs_code (gs1) == BIT_AND_EXPR
+- || gimple_assign_rhs_code (gs1) == BIT_IOR_EXPR);
+- }
++ gcc_assert (ccmp_tree_comparison_p (op1, bb));
++ tmp = expand_ccmp_expr_1 (gs0, prep_seq, gen_seq);
++ if (!tmp)
++ return NULL_RTX;
++ return expand_ccmp_next (op1, code, tmp, prep_seq, gen_seq);
+ }
+
+ return NULL_RTX;
+@@ -258,7 +285,7 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq)
+ Return NULL_RTX if G is not a legal candidate or expand fail.
+ Otherwise return the target. */
+ rtx
+-expand_ccmp_expr (gimple *g)
++expand_ccmp_expr (gimple *g, machine_mode mode)
+ {
+ rtx_insn *last;
+ rtx tmp;
+@@ -275,7 +302,6 @@ expand_ccmp_expr (gimple *g)
+ {
+ insn_code icode;
+ machine_mode cc_mode = CCmode;
+- tree lhs = gimple_assign_lhs (g);
+ rtx_code cmp_code = GET_CODE (tmp);
+
+ #ifdef SELECT_CC_MODE
+@@ -284,7 +310,6 @@ expand_ccmp_expr (gimple *g)
+ icode = optab_handler (cstore_optab, cc_mode);
+ if (icode != CODE_FOR_nothing)
+ {
+- machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+ rtx target = gen_reg_rtx (mode);
+
+ emit_insn (prep_seq);
+@@ -300,4 +325,3 @@ expand_ccmp_expr (gimple *g)
+ delete_insns_since (last);
+ return NULL_RTX;
+ }
+-
+--- a/src/gcc/ccmp.h
++++ b/src/gcc/ccmp.h
+@@ -20,6 +20,6 @@ along with GCC; see the file COPYING3. If not see
+ #ifndef GCC_CCMP_H
+ #define GCC_CCMP_H
+
+-extern rtx expand_ccmp_expr (gimple *);
++extern rtx expand_ccmp_expr (gimple *, machine_mode);
+
+ #endif /* GCC_CCMP_H */
--- a/src/gcc/config.gcc
+++ b/src/gcc/config.gcc
@@ -3796,34 +3796,19 @@ case "${target}" in
@@ -143,8 +455,8 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
-/* V8.1 Architecture Processors. */
+/* Qualcomm ('Q') cores. */
-+AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
-+AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
++AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
++AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
+
+/* Samsung ('S') cores. */
+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
@@ -176,15 +488,28 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
COSTS_N_INSNS (1), /* UNUSED: Log_shift. */
COSTS_N_INSNS (1), /* UNUSED: Log_shift_reg. */
0, /* Extend. */
+--- a/src/gcc/config/aarch64/aarch64-fusion-pairs.def
++++ b/src/gcc/config/aarch64/aarch64-fusion-pairs.def
+@@ -34,5 +34,6 @@ AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK)
+ AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
+ AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
+ AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
++AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
+
+ #undef AARCH64_FUSION_PAIR
--- a/src/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/src/gcc/config/aarch64/aarch64-option-extensions.def
-@@ -60,4 +60,7 @@ AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, "atomics")
+@@ -60,4 +60,11 @@ AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, "atomics")
Disabling "fp16" just disables "fp16". */
AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fphp asimdhp")
+/* Enabling or disabling "rcpc" only changes "rcpc". */
+AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc")
+
++/* Enabling "rdma" also enables "fp", "simd".
++ Disabling "rdma" just disables "rdma". */
++AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, "rdma")
++
#undef AARCH64_OPT_EXTENSION
--- a/src/gcc/config/aarch64/aarch64-protos.h
+++ b/src/gcc/config/aarch64/aarch64-protos.h
@@ -226,7 +551,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
#define AARCH64_FUSION_PAIR(x, name) \
-@@ -301,6 +312,7 @@ extern struct tune_params aarch64_tune_params;
+@@ -301,18 +312,22 @@ extern struct tune_params aarch64_tune_params;
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
int aarch64_get_condition_code (rtx);
@@ -234,14 +559,32 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
-@@ -311,6 +323,7 @@ bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
+ bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
+ int aarch64_branch_cost (bool, bool);
+ enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
++bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
+ bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
bool aarch64_constant_address_p (rtx);
bool aarch64_emit_approx_div (rtx, rtx, rtx);
bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
+void aarch64_expand_call (rtx, rtx, bool);
bool aarch64_expand_movmem (rtx *);
bool aarch64_float_const_zero_rtx_p (rtx);
++bool aarch64_float_const_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned);
+ bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs);
+ bool aarch64_gen_movmemqi (rtx *);
+@@ -338,9 +353,9 @@ bool aarch64_pad_arg_upward (machine_mode, const_tree);
+ bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
+ bool aarch64_regno_ok_for_base_p (int, bool);
+ bool aarch64_regno_ok_for_index_p (int, bool);
++bool aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *fail);
+ bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
+ bool high);
+-bool aarch64_simd_imm_scalar_p (rtx x, machine_mode mode);
+ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
+ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
+ bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
--- a/src/gcc/config/aarch64/aarch64-simd.md
+++ b/src/gcc/config/aarch64/aarch64-simd.md
@@ -44,12 +44,12 @@
@@ -324,7 +667,45 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
{
-@@ -2796,38 +2809,10 @@
+@@ -1020,6 +1033,18 @@
+ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+ )
+
++(define_insn "*aarch64_mla_elt_merge<mode>"
++ [(set (match_operand:VDQHS 0 "register_operand" "=w")
++ (plus:VDQHS
++ (mult:VDQHS (vec_duplicate:VDQHS
++ (match_operand:<VEL> 1 "register_operand" "w"))
++ (match_operand:VDQHS 2 "register_operand" "w"))
++ (match_operand:VDQHS 3 "register_operand" "0")))]
++ "TARGET_SIMD"
++ "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
++ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
++)
++
+ (define_insn "aarch64_mls<mode>"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+ (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
+@@ -1067,6 +1092,18 @@
+ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+ )
+
++(define_insn "*aarch64_mls_elt_merge<mode>"
++ [(set (match_operand:VDQHS 0 "register_operand" "=w")
++ (minus:VDQHS
++ (match_operand:VDQHS 1 "register_operand" "0")
++ (mult:VDQHS (vec_duplicate:VDQHS
++ (match_operand:<VEL> 2 "register_operand" "w"))
++ (match_operand:VDQHS 3 "register_operand" "w"))))]
++ "TARGET_SIMD"
++ "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
++ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
++)
++
+ ;; Max/Min operations.
+ (define_insn "<su><maxmin><mode>3"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+@@ -2796,38 +2833,10 @@
(match_operand:VDC 2 "register_operand")]
"TARGET_SIMD"
{
@@ -388,7 +769,16 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
#undef AARCH64_EXTRA_TUNING_OPTION
--- a/src/gcc/config/aarch64/aarch64.c
+++ b/src/gcc/config/aarch64/aarch64.c
-@@ -193,10 +193,10 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
+@@ -147,6 +147,8 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
+ const_tree type,
+ int misalignment,
+ bool is_packed);
++static machine_mode
++aarch64_simd_container_mode (machine_mode mode, unsigned width);
+
+ /* Major revision number of the ARM Architecture implemented by the target. */
+ unsigned aarch64_architecture_version;
+@@ -193,10 +195,10 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
static const struct cpu_addrcost_table generic_addrcost_table =
{
{
@@ -401,7 +791,24 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
},
0, /* pre_modify */
0, /* post_modify */
-@@ -526,6 +526,61 @@ static const cpu_approx_modes xgene1_approx_modes =
+@@ -390,13 +392,13 @@ static const struct cpu_vector_cost thunderx_vector_cost =
+ 3, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 4, /* vec_int_stmt_cost */
+- 4, /* vec_fp_stmt_cost */
++ 1, /* vec_fp_stmt_cost */
+ 4, /* vec_permute_cost */
+ 2, /* vec_to_scalar_cost */
+ 2, /* scalar_to_vec_cost */
+ 3, /* vec_align_load_cost */
+- 10, /* vec_unalign_load_cost */
+- 10, /* vec_unalign_store_cost */
++ 5, /* vec_unalign_load_cost */
++ 5, /* vec_unalign_store_cost */
+ 1, /* vec_store_cost */
+ 3, /* cond_taken_branch_cost */
+ 3 /* cond_not_taken_branch_cost */
+@@ -526,6 +528,61 @@ static const cpu_approx_modes xgene1_approx_modes =
AARCH64_APPROX_ALL /* recip_sqrt */
};
@@ -463,7 +870,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
static const struct tune_params generic_tunings =
{
&cortexa57_extra_costs,
-@@ -538,17 +593,17 @@ static const struct tune_params generic_tunings =
+@@ -538,17 +595,17 @@ static const struct tune_params generic_tunings =
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
8, /* function_align. */
@@ -486,7 +893,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
static const struct tune_params cortexa35_tunings =
-@@ -564,7 +619,7 @@ static const struct tune_params cortexa35_tunings =
+@@ -564,7 +621,7 @@ static const struct tune_params cortexa35_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
16, /* function_align. */
@@ -495,7 +902,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -572,9 +627,9 @@ static const struct tune_params cortexa35_tunings =
+@@ -572,9 +629,9 @@ static const struct tune_params cortexa35_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -507,7 +914,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
static const struct tune_params cortexa53_tunings =
-@@ -590,7 +645,7 @@ static const struct tune_params cortexa53_tunings =
+@@ -590,7 +647,7 @@ static const struct tune_params cortexa53_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
16, /* function_align. */
@@ -516,7 +923,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -598,9 +653,9 @@ static const struct tune_params cortexa53_tunings =
+@@ -598,9 +655,9 @@ static const struct tune_params cortexa53_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -528,7 +935,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
static const struct tune_params cortexa57_tunings =
-@@ -616,7 +671,7 @@ static const struct tune_params cortexa57_tunings =
+@@ -616,7 +673,7 @@ static const struct tune_params cortexa57_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
16, /* function_align. */
@@ -537,7 +944,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -624,9 +679,9 @@ static const struct tune_params cortexa57_tunings =
+@@ -624,9 +681,9 @@ static const struct tune_params cortexa57_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -549,7 +956,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
static const struct tune_params cortexa72_tunings =
-@@ -642,7 +697,7 @@ static const struct tune_params cortexa72_tunings =
+@@ -642,7 +699,7 @@ static const struct tune_params cortexa72_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
16, /* function_align. */
@@ -558,7 +965,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -650,9 +705,9 @@ static const struct tune_params cortexa72_tunings =
+@@ -650,9 +707,9 @@ static const struct tune_params cortexa72_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -570,7 +977,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
static const struct tune_params cortexa73_tunings =
-@@ -668,7 +723,7 @@ static const struct tune_params cortexa73_tunings =
+@@ -668,7 +725,7 @@ static const struct tune_params cortexa73_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
16, /* function_align. */
@@ -579,7 +986,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -676,11 +731,13 @@ static const struct tune_params cortexa73_tunings =
+@@ -676,11 +733,13 @@ static const struct tune_params cortexa73_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -595,7 +1002,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
static const struct tune_params exynosm1_tunings =
{
&exynosm1_extra_costs,
-@@ -701,9 +758,34 @@ static const struct tune_params exynosm1_tunings =
+@@ -701,9 +760,34 @@ static const struct tune_params exynosm1_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
48, /* max_case_values. */
@@ -632,7 +1039,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
static const struct tune_params thunderx_tunings =
-@@ -726,9 +808,10 @@ static const struct tune_params thunderx_tunings =
+@@ -726,9 +810,10 @@ static const struct tune_params thunderx_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -645,7 +1052,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
static const struct tune_params xgene1_tunings =
-@@ -751,9 +834,9 @@ static const struct tune_params xgene1_tunings =
+@@ -751,9 +836,9 @@ static const struct tune_params xgene1_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -657,7 +1064,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
static const struct tune_params qdf24xx_tunings =
-@@ -777,9 +860,9 @@ static const struct tune_params qdf24xx_tunings =
+@@ -777,9 +862,9 @@ static const struct tune_params qdf24xx_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -669,7 +1076,17 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
static const struct tune_params thunderx2t99_tunings =
-@@ -802,9 +885,9 @@ static const struct tune_params thunderx2t99_tunings =
+@@ -792,7 +877,8 @@ static const struct tune_params thunderx2t99_tunings =
+ &generic_approx_modes,
+ 4, /* memmov_cost. */
+ 4, /* issue_rate. */
+- (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC), /* fusible_ops */
++ (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
++ | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
+ 16, /* function_align. */
+ 8, /* jump_align. */
+ 16, /* loop_align. */
+@@ -802,9 +888,9 @@ static const struct tune_params thunderx2t99_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -682,7 +1099,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
};
/* Support for fine-grained override of the tuning structures. */
-@@ -1649,41 +1732,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
+@@ -1649,41 +1735,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
machine_mode dst_mode = GET_MODE (dst);
gcc_assert (VECTOR_MODE_P (dst_mode));
@@ -756,7 +1173,39 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
}
/* Split a complex SIMD move. */
-@@ -1919,6 +2002,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
+@@ -1792,6 +1878,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
+ return 1;
+ }
+
++ /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
++ (with XXXX non-zero). In that case check to see if the move can be done in
++ a smaller mode. */
++ val2 = val & 0xffffffff;
++ if (mode == DImode
++ && aarch64_move_imm (val2, SImode)
++ && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
++ {
++ if (generate)
++ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
++
++ /* Check if we have to emit a second instruction by checking to see
++ if any of the upper 32 bits of the original DI mode value is set. */
++ if (val == val2)
++ return 1;
++
++ i = (val >> 48) ? 48 : 32;
++
++ if (generate)
++ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
++ GEN_INT ((val >> i) & 0xffff)));
++
++ return 2;
++ }
++
+ if ((val >> 32) == 0 || mode == SImode)
+ {
+ if (generate)
+@@ -1919,6 +2030,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
gcc_assert (can_create_pseudo_p ());
base = gen_reg_rtx (ptr_mode);
aarch64_expand_mov_immediate (base, XEXP (mem, 0));
@@ -765,7 +1214,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
mem = gen_rtx_MEM (ptr_mode, base);
}
-@@ -2683,11 +2768,19 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+@@ -2683,11 +2796,19 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
plus_constant (Pmode, stack_pointer_rtx, -first));
/* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
@@ -790,7 +1239,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
/* Step 3: the loop
do
-@@ -4549,6 +4642,24 @@ aarch64_classify_address (struct aarch64_address_info *info,
+@@ -4549,6 +4670,24 @@ aarch64_classify_address (struct aarch64_address_info *info,
}
}
@@ -815,7 +1264,132 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
bool
aarch64_symbolic_address_p (rtx x)
{
-@@ -4633,6 +4744,50 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+@@ -4611,6 +4750,74 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
+ return true;
+ }
+
++/* Return the binary representation of floating point constant VALUE in INTVAL.
++ If the value cannot be converted, return false without setting INTVAL.
++ The conversion is done in the given MODE. */
++bool
++aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
++{
++
++ /* We make a general exception for 0. */
++ if (aarch64_float_const_zero_rtx_p (value))
++ {
++ *intval = 0;
++ return true;
++ }
++
++ machine_mode mode = GET_MODE (value);
++ if (GET_CODE (value) != CONST_DOUBLE
++ || !SCALAR_FLOAT_MODE_P (mode)
++ || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
++ /* Only support up to DF mode. */
++ || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
++ return false;
++
++ unsigned HOST_WIDE_INT ival = 0;
++
++ long res[2];
++ real_to_target (res,
++ CONST_DOUBLE_REAL_VALUE (value),
++ REAL_MODE_FORMAT (mode));
++
++ if (mode == DFmode)
++ {
++ int order = BYTES_BIG_ENDIAN ? 1 : 0;
++ ival = zext_hwi (res[order], 32);
++ ival |= (zext_hwi (res[1 - order], 32) << 32);
++ }
++ else
++ ival = zext_hwi (res[0], 32);
++
++ *intval = ival;
++ return true;
++}
++
++/* Return TRUE if rtx X is an immediate constant that can be moved using a
++ single MOV(+MOVK) followed by an FMOV. */
++bool
++aarch64_float_const_rtx_p (rtx x)
++{
++ machine_mode mode = GET_MODE (x);
++ if (mode == VOIDmode)
++ return false;
++
++ /* Determine whether it's cheaper to write float constants as
++ mov/movk pairs over ldr/adrp pairs. */
++ unsigned HOST_WIDE_INT ival;
++
++ if (GET_CODE (x) == CONST_DOUBLE
++ && SCALAR_FLOAT_MODE_P (mode)
++ && aarch64_reinterpret_float_as_int (x, &ival))
++ {
++ machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode);
++ int num_instr = aarch64_internal_mov_immediate
++ (NULL_RTX, gen_int_mode (ival, imode), false, imode);
++ return num_instr < 3;
++ }
++
++ return false;
++}
++
+ /* Return TRUE if rtx X is immediate constant 0.0 */
+ bool
+ aarch64_float_const_zero_rtx_p (rtx x)
+@@ -4623,6 +4830,49 @@ aarch64_float_const_zero_rtx_p (rtx x)
+ return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
+ }
+
++/* Return TRUE if rtx X is immediate constant that fits in a single
++ MOVI immediate operation. */
++bool
++aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
++{
++ if (!TARGET_SIMD)
++ return false;
++
++ machine_mode vmode, imode;
++ unsigned HOST_WIDE_INT ival;
++
++ if (GET_CODE (x) == CONST_DOUBLE
++ && SCALAR_FLOAT_MODE_P (mode))
++ {
++ if (!aarch64_reinterpret_float_as_int (x, &ival))
++ return false;
++
++ /* We make a general exception for 0. */
++ if (aarch64_float_const_zero_rtx_p (x))
++ return true;
++
++ imode = int_mode_for_mode (mode);
++ }
++ else if (GET_CODE (x) == CONST_INT
++ && SCALAR_INT_MODE_P (mode))
++ {
++ imode = mode;
++ ival = INTVAL (x);
++ }
++ else
++ return false;
++
++ /* use a 64 bit mode for everything except for DI/DF mode, where we use
++ a 128 bit vector mode. */
++ int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
++
++ vmode = aarch64_simd_container_mode (imode, width);
++ rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
++
++ return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);
++}
++
++
+ /* Return the fixed registers used for condition codes. */
+
+ static bool
+@@ -4633,6 +4883,50 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
return true;
}
@@ -866,7 +1440,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
/* Emit call insn with PAT and do aarch64-specific handling. */
void
-@@ -4705,7 +4860,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
+@@ -4705,7 +4999,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
the comparison will have to be swapped when we emit the assembly
code. */
if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
@@ -875,7 +1449,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
&& (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
|| GET_CODE (x) == LSHIFTRT
|| GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
-@@ -5112,6 +5267,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
+@@ -5112,6 +5406,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case MEM:
output_address (GET_MODE (x), XEXP (x, 0));
@@ -884,7 +1458,20 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
break;
case CONST:
-@@ -5976,9 +6133,10 @@ aarch64_strip_shift (rtx x)
+@@ -5756,12 +6052,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
+ return NO_REGS;
+ }
+
+- /* If it's an integer immediate that MOVI can't handle, then
+- FP_REGS is not an option, so we return NO_REGS instead. */
+- if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
+- && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
+- return NO_REGS;
+-
+ /* Register eliminiation can result in a request for
+ SP+constant->FP_REGS. We cannot support such operations which
+ use SP as source and an FP_REG as destination, so reject out
+@@ -5976,9 +6266,10 @@ aarch64_strip_shift (rtx x)
/* Helper function for rtx cost calculation. Strip an extend
expression from X. Returns the inner operand if successful, or the
original expression on failure. We deal with a number of possible
@@ -897,7 +1484,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
{
rtx op = x;
-@@ -6002,7 +6160,8 @@ aarch64_strip_extend (rtx x)
+@@ -6002,7 +6293,8 @@ aarch64_strip_extend (rtx x)
/* Now handle extended register, as this may also have an optional
left shift by 1..4. */
@@ -907,7 +1494,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
&& CONST_INT_P (XEXP (op, 1))
&& ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
op = XEXP (op, 0);
-@@ -6026,6 +6185,39 @@ aarch64_shift_p (enum rtx_code code)
+@@ -6026,6 +6318,39 @@ aarch64_shift_p (enum rtx_code code)
return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
}
@@ -947,7 +1534,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
/* Helper function for rtx cost calculation. Calculate the cost of
a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
Return the calculated cost of the expression, recursing manually in to
-@@ -6063,7 +6255,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
+@@ -6063,7 +6388,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
{
if (compound_p)
{
@@ -960,7 +1547,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
/* ARITH + shift-by-register. */
cost += extra_cost->alu.arith_shift_reg;
else if (is_extend)
-@@ -6081,7 +6277,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
+@@ -6081,7 +6410,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
}
/* Strip extends as we will have costed them in the case above. */
if (is_extend)
@@ -969,7 +1556,33 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
cost += rtx_cost (op0, VOIDmode, code, 0, speed);
-@@ -6925,13 +7121,13 @@ cost_minus:
+@@ -6672,6 +7001,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
+ return true;
+
+ case CONST_DOUBLE:
++
++ /* First determine number of instructions to do the move
++ as an integer constant. */
++ if (!aarch64_float_const_representable_p (x)
++ && !aarch64_can_const_movi_rtx_p (x, mode)
++ && aarch64_float_const_rtx_p (x))
++ {
++ unsigned HOST_WIDE_INT ival;
++ bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
++ gcc_assert (succeed);
++
++ machine_mode imode = mode == HFmode ? SImode
++ : int_mode_for_mode (mode);
++ int ncost = aarch64_internal_mov_immediate
++ (NULL_RTX, gen_int_mode (ival, imode), false, imode);
++ *cost += COSTS_N_INSNS (ncost);
++ return true;
++ }
++
+ if (speed)
+ {
+ /* mov[df,sf]_aarch64. */
+@@ -6925,13 +7273,13 @@ cost_minus:
if (speed)
*cost += extra_cost->alu.extend_arith;
@@ -985,7 +1598,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
/* Cost this as an FMA-alike operation. */
if ((GET_CODE (new_op1) == MULT
-@@ -7004,7 +7200,7 @@ cost_plus:
+@@ -7004,7 +7352,7 @@ cost_plus:
if (speed)
*cost += extra_cost->alu.extend_arith;
@@ -994,7 +1607,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
*cost += rtx_cost (op0, VOIDmode,
(enum rtx_code) GET_CODE (op0), 0, speed);
return true;
-@@ -7012,7 +7208,7 @@ cost_plus:
+@@ -7012,7 +7360,7 @@ cost_plus:
/* Strip any extend, leave shifts behind as we will
cost them through mult_cost. */
@@ -1003,7 +1616,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
if (GET_CODE (new_op0) == MULT
|| aarch64_shift_p (GET_CODE (new_op0)))
-@@ -7482,17 +7678,13 @@ cost_plus:
+@@ -7482,17 +7830,13 @@ cost_plus:
case UMOD:
if (speed)
{
@@ -1024,7 +1637,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
}
return false; /* All arguments need to be in registers. */
-@@ -7506,7 +7698,9 @@ cost_plus:
+@@ -7506,7 +7850,9 @@ cost_plus:
else if (GET_MODE_CLASS (mode) == MODE_INT)
/* There is no integer SQRT, so only DIV and UDIV can get
here. */
@@ -1035,7 +1648,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
else
*cost += extra_cost->fp[mode == DFmode].div;
}
-@@ -8687,13 +8881,39 @@ aarch64_override_options_internal (struct gcc_options *opts)
+@@ -8687,13 +9033,39 @@ aarch64_override_options_internal (struct gcc_options *opts)
opts->x_param_values,
global_options_set.x_param_values);
@@ -1078,7 +1691,73 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
aarch64_override_options_after_change_1 (opts);
}
-@@ -11647,6 +11867,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+@@ -9970,18 +10342,16 @@ aarch64_legitimate_pic_operand_p (rtx x)
+ /* Return true if X holds either a quarter-precision or
+ floating-point +0.0 constant. */
+ static bool
+-aarch64_valid_floating_const (machine_mode mode, rtx x)
++aarch64_valid_floating_const (rtx x)
+ {
+ if (!CONST_DOUBLE_P (x))
+ return false;
+
+- if (aarch64_float_const_zero_rtx_p (x))
++ /* This call determines which constants can be used in mov<mode>
++ as integer moves instead of constant loads. */
++ if (aarch64_float_const_rtx_p (x))
+ return true;
+
+- /* We only handle moving 0.0 to a TFmode register. */
+- if (!(mode == SFmode || mode == DFmode))
+- return false;
+-
+ return aarch64_float_const_representable_p (x);
+ }
+
+@@ -9993,11 +10363,15 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
+ if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
+ return false;
+
+- /* This could probably go away because
+- we now decompose CONST_INTs according to expand_mov_immediate. */
++ /* For these cases we never want to use a literal load.
++ As such we have to prevent the compiler from forcing these
++ to memory. */
+ if ((GET_CODE (x) == CONST_VECTOR
+ && aarch64_simd_valid_immediate (x, mode, false, NULL))
+- || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
++ || CONST_INT_P (x)
++ || aarch64_valid_floating_const (x)
++ || aarch64_can_const_movi_rtx_p (x, mode)
++ || aarch64_float_const_rtx_p (x))
+ return !targetm.cannot_force_const_mem (mode, x);
+
+ if (GET_CODE (x) == HIGH
+@@ -11275,23 +11649,6 @@ aarch64_mask_from_zextract_ops (rtx width, rtx pos)
+ }
+
+ bool
+-aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
+-{
+- HOST_WIDE_INT imm = INTVAL (x);
+- int i;
+-
+- for (i = 0; i < 8; i++)
+- {
+- unsigned int byte = imm & 0xff;
+- if (byte != 0xff && byte != 0)
+- return false;
+- imm >>= 8;
+- }
+-
+- return true;
+-}
+-
+-bool
+ aarch64_mov_operand_p (rtx x, machine_mode mode)
+ {
+ if (GET_CODE (x) == HIGH
+@@ -11647,6 +12004,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
return;
}
@@ -1136,7 +1815,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
/* Initialise a vector which is part-variable. We want to first try
to build those lanes which are constant in the most efficient way we
can. */
-@@ -11680,10 +11951,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+@@ -11680,10 +12088,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
}
/* Insert the variable lanes directly. */
@@ -1147,7 +1826,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
for (int i = 0; i < n_elts; i++)
{
rtx x = XVECEXP (vals, 0, i);
-@@ -12049,6 +12316,17 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12049,6 +12453,17 @@ aarch64_split_compare_and_swap (rtx operands[])
mode = GET_MODE (mem);
model = memmodel_from_int (INTVAL (model_rtx));
@@ -1165,7 +1844,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
label1 = NULL;
if (!is_weak)
{
-@@ -12065,11 +12343,21 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12065,11 +12480,21 @@ aarch64_split_compare_and_swap (rtx operands[])
else
aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
@@ -1192,7 +1871,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
-@@ -12088,7 +12376,15 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12088,7 +12513,15 @@ aarch64_split_compare_and_swap (rtx operands[])
}
emit_label (label2);
@@ -1209,6 +1888,113 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
/* Emit any final barrier needed for a __sync operation. */
if (is_mm_sync (model))
aarch64_emit_post_barrier (model);
+@@ -12608,15 +13041,28 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
+ }
+
+ char*
+-aarch64_output_scalar_simd_mov_immediate (rtx immediate,
+- machine_mode mode)
++aarch64_output_scalar_simd_mov_immediate (rtx immediate, machine_mode mode)
+ {
++
++ /* If a floating point number was passed and we desire to use it in an
++ integer mode do the conversion to integer. */
++ if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
++ {
++ unsigned HOST_WIDE_INT ival;
++ if (!aarch64_reinterpret_float_as_int (immediate, &ival))
++ gcc_unreachable ();
++ immediate = gen_int_mode (ival, mode);
++ }
++
+ machine_mode vmode;
++ /* use a 64 bit mode for everything except for DI/DF mode, where we use
++ a 128 bit vector mode. */
++ int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
+
+ gcc_assert (!VECTOR_MODE_P (mode));
+- vmode = aarch64_simd_container_mode (mode, 64);
++ vmode = aarch64_simd_container_mode (mode, width);
+ rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
+- return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
++ return aarch64_output_simd_mov_immediate (v_op, vmode, width);
+ }
+
+ /* Split operands into moves from op[1] + op[2] into op[0]. */
+@@ -13981,13 +14427,66 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+ {
+ enum attr_type prev_type = get_attr_type (prev);
+
+- /* FIXME: this misses some which is considered simple arthematic
+- instructions for ThunderX. Simple shifts are missed here. */
+- if (prev_type == TYPE_ALUS_SREG
+- || prev_type == TYPE_ALUS_IMM
+- || prev_type == TYPE_LOGICS_REG
+- || prev_type == TYPE_LOGICS_IMM)
+- return true;
++ unsigned int condreg1, condreg2;
++ rtx cc_reg_1;
++ aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
++ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
++
++ if (reg_referenced_p (cc_reg_1, PATTERN (curr))
++ && prev
++ && modified_in_p (cc_reg_1, prev))
++ {
++ /* FIXME: this misses some which is considered simple arthematic
++ instructions for ThunderX. Simple shifts are missed here. */
++ if (prev_type == TYPE_ALUS_SREG
++ || prev_type == TYPE_ALUS_IMM
++ || prev_type == TYPE_LOGICS_REG
++ || prev_type == TYPE_LOGICS_IMM)
++ return true;
++ }
++ }
++
++ if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
++ && any_condjump_p (curr))
++ {
++ /* We're trying to match:
++ prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
++ curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
++ (const_int 0))
++ (label_ref ("SYM"))
++ (pc)) */
++ if (SET_DEST (curr_set) == (pc_rtx)
++ && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
++ && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
++ && REG_P (SET_DEST (prev_set))
++ && REGNO (SET_DEST (prev_set))
++ == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
++ {
++ /* Fuse ALU operations followed by conditional branch instruction. */
++ switch (get_attr_type (prev))
++ {
++ case TYPE_ALU_IMM:
++ case TYPE_ALU_SREG:
++ case TYPE_ADC_REG:
++ case TYPE_ADC_IMM:
++ case TYPE_ADCS_REG:
++ case TYPE_ADCS_IMM:
++ case TYPE_LOGIC_REG:
++ case TYPE_LOGIC_IMM:
++ case TYPE_CSEL:
++ case TYPE_ADR:
++ case TYPE_MOV_IMM:
++ case TYPE_SHIFT_REG:
++ case TYPE_SHIFT_IMM:
++ case TYPE_BFM:
++ case TYPE_RBIT:
++ case TYPE_REV:
++ case TYPE_EXTEND:
++ return true;
++
++ default:;
++ }
++ }
+ }
+
+ return false;
--- a/src/gcc/config/aarch64/aarch64.h
+++ b/src/gcc/config/aarch64/aarch64.h
@@ -98,14 +98,24 @@
@@ -1244,7 +2030,15 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
#define STRUCTURE_SIZE_BOUNDARY 8
-@@ -140,6 +150,7 @@ extern unsigned aarch64_architecture_version;
+@@ -134,12 +144,14 @@ extern unsigned aarch64_architecture_version;
+ #define AARCH64_FL_CRC (1 << 3) /* Has CRC. */
+ /* ARMv8.1-A architecture extensions. */
+ #define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */
+-#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1-A extensions. */
++#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */
++#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */
+ /* ARMv8.2-A architecture extensions. */
+ #define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */
#define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */
/* ARMv8.3-A architecture extensions. */
#define AARCH64_FL_V8_3 (1 << 10) /* Has ARMv8.3-A features. */
@@ -1252,9 +2046,60 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
/* Has FP and SIMD. */
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
+@@ -150,7 +162,8 @@ extern unsigned aarch64_architecture_version;
+ /* Architecture flags that effect instruction selection. */
+ #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
+ #define AARCH64_FL_FOR_ARCH8_1 \
+- (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC | AARCH64_FL_V8_1)
++ (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
++ | AARCH64_FL_RDMA | AARCH64_FL_V8_1)
+ #define AARCH64_FL_FOR_ARCH8_2 \
+ (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
+ #define AARCH64_FL_FOR_ARCH8_3 \
+@@ -163,7 +176,7 @@ extern unsigned aarch64_architecture_version;
+ #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP)
+ #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
+ #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
+-#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_V8_1)
++#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA)
+ #define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2)
+ #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
+ #define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3)
--- a/src/gcc/config/aarch64/aarch64.md
+++ b/src/gcc/config/aarch64/aarch64.md
-@@ -519,27 +519,31 @@
+@@ -181,6 +181,11 @@
+ ;; will be disabled when !TARGET_FLOAT.
+ (define_attr "fp" "no,yes" (const_string "no"))
+
++;; Attribute that specifies whether or not the instruction touches half
++;; precision fp registers. When this is set to yes for an alternative,
++;; that alternative will be disabled when !TARGET_FP_F16INST.
++(define_attr "fp16" "no,yes" (const_string "no"))
++
+ ;; Attribute that specifies whether or not the instruction touches simd
+ ;; registers. When this is set to yes for an alternative, that alternative
+ ;; will be disabled when !TARGET_SIMD.
+@@ -194,11 +199,14 @@
+ ;; registers when -mgeneral-regs-only is specified.
+ (define_attr "enabled" "no,yes"
+ (cond [(ior
+- (and (eq_attr "fp" "yes")
+- (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
+- (and (eq_attr "simd" "yes")
+- (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
+- (const_string "no")
++ (ior
++ (and (eq_attr "fp" "yes")
++ (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
++ (and (eq_attr "simd" "yes")
++ (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
++ (and (eq_attr "fp16" "yes")
++ (eq (symbol_ref "TARGET_FP_F16INST") (const_int 0))))
++ (const_string "no")
+ ] (const_string "yes")))
+
+ ;; Attribute that specifies whether we are dealing with a branch to a
+@@ -519,27 +527,31 @@
)
(define_insn "prefetch"
@@ -1296,7 +2141,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
return pftype[INTVAL(operands[1])][locality];
}
[(set_attr "type" "load1")]
-@@ -713,12 +717,6 @@
+@@ -713,12 +725,6 @@
;; Subroutine calls and sibcalls
;; -------------------------------------------------------------------
@@ -1309,7 +2154,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_expand "call"
[(parallel [(call (match_operand 0 "memory_operand" "")
(match_operand 1 "general_operand" ""))
-@@ -727,57 +725,22 @@
+@@ -727,57 +733,22 @@
""
"
{
@@ -1374,7 +2219,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_expand "call_value"
[(parallel [(set (match_operand 0 "" "")
(call (match_operand 1 "memory_operand" "")
-@@ -787,60 +750,23 @@
+@@ -787,60 +758,23 @@
""
"
{
@@ -1442,7 +2287,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_expand "sibcall"
[(parallel [(call (match_operand 0 "memory_operand" "")
(match_operand 1 "general_operand" ""))
-@@ -848,29 +774,11 @@
+@@ -848,29 +782,11 @@
(use (match_operand 2 "" ""))])]
""
{
@@ -1473,7 +2318,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_expand "sibcall_value"
[(parallel [(set (match_operand 0 "" "")
(call (match_operand 1 "memory_operand" "")
-@@ -879,19 +787,7 @@
+@@ -879,19 +795,7 @@
(use (match_operand 3 "" ""))])]
""
{
@@ -1494,7 +2339,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
DONE;
}
)
-@@ -899,8 +795,7 @@
+@@ -899,8 +803,7 @@
(define_insn "*sibcall_insn"
[(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf"))
(match_operand 1 "" ""))
@@ -1504,7 +2349,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
"SIBLING_CALL_P (insn)"
"@
br\\t%0
-@@ -913,8 +808,7 @@
+@@ -913,8 +816,7 @@
(call (mem:DI
(match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf"))
(match_operand 2 "" "")))
@@ -1514,29 +2359,81 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
"SIBLING_CALL_P (insn)"
"@
br\\t%1
-@@ -1026,8 +920,8 @@
+@@ -1026,8 +928,8 @@
)
(define_insn_and_split "*movsi_aarch64"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w")
- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
-+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w,r,*w")
-+ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w"))]
++ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w,w")
++ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Ds"))]
"(register_operand (operands[0], SImode)
|| aarch64_reg_or_zero (operands[1], SImode))"
"@
-@@ -1058,8 +952,8 @@
+@@ -1044,8 +946,9 @@
+ adrp\\t%x0, %A1
+ fmov\\t%s0, %w1
+ fmov\\t%w0, %s1
+- fmov\\t%s0, %s1"
+- "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
++ fmov\\t%s0, %s1
++ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
++ "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
+ && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+ [(const_int 0)]
+ "{
+@@ -1053,13 +956,14 @@
+ DONE;
+ }"
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+- adr,adr,f_mcr,f_mrc,fmov")
+- (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
++ adr,adr,f_mcr,f_mrc,fmov,neon_move")
++ (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
++ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w, r,*w,w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
-+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w,r,*w,w")
-+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
++ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r,*w,m, m,r,r, *w,r,*w,w")
++ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
-@@ -1123,7 +1017,7 @@
+@@ -1067,6 +971,7 @@
+ mov\\t%0, %x1
+ mov\\t%x0, %1
+ mov\\t%x0, %1
++ mov\\t%w0, %1
+ #
+ ldr\\t%x0, %1
+ ldr\\t%d0, %1
+@@ -1077,7 +982,7 @@
+ fmov\\t%d0, %x1
+ fmov\\t%x0, %d1
+ fmov\\t%d0, %d1
+- movi\\t%d0, %1"
++ * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
+ "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
+ && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+ [(const_int 0)]
+@@ -1085,10 +990,10 @@
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
+ DONE;
+ }"
+- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+- adr,adr,f_mcr,f_mrc,fmov,neon_move")
+- (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+- (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
++ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load1,\
++ load1,store1,store1,adr,adr,f_mcr,f_mrc,fmov,neon_move")
++ (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
++ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+ )
+
+ (define_insn "insv_imm<mode>"
+@@ -1123,7 +1028,7 @@
#
#
#
@@ -1545,7 +2442,118 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
ldp\\t%0, %H0, %1
stp\\t%1, %H1, %0
stp\\txzr, xzr, %0
-@@ -1237,7 +1131,7 @@
+@@ -1168,28 +1073,31 @@
+ )
+
+ (define_insn "*movhf_aarch64"
+- [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r")
+- (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))]
++ [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r")
++ (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
+ "TARGET_FLOAT && (register_operand (operands[0], HFmode)
+ || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+ "@
+ movi\\t%0.4h, #0
+- mov\\t%0.h[0], %w1
++ fmov\\t%h0, %w1
+ umov\\t%w0, %1.h[0]
+ mov\\t%0.h[0], %1.h[0]
++ fmov\\t%h0, %1
++ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
+ ldr\\t%h0, %1
+ str\\t%h1, %0
+ ldrh\\t%w0, %1
+ strh\\t%w1, %0
+ mov\\t%w0, %w1"
+- [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
+- f_loads,f_stores,load1,store1,mov_reg")
+- (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
++ [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
++ neon_move,f_loads,f_stores,load1,store1,mov_reg")
++ (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")
++ (set_attr "fp16" "*,yes,*,*,yes,*,*,*,*,*,*")]
+ )
+
+ (define_insn "*movsf_aarch64"
+- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
+- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
++ [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
++ (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
+ "TARGET_FLOAT && (register_operand (operands[0], SFmode)
+ || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+ "@
+@@ -1198,19 +1106,22 @@
+ fmov\\t%w0, %s1
+ fmov\\t%s0, %s1
+ fmov\\t%s0, %1
++ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
+ ldr\\t%s0, %1
+ str\\t%s1, %0
+ ldr\\t%w0, %1
+ str\\t%w1, %0
+- mov\\t%w0, %w1"
+- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
+- f_loads,f_stores,load1,store1,mov_reg")
+- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
++ mov\\t%w0, %w1
++ mov\\t%w0, %1"
++ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
++ f_loads,f_stores,load1,store1,mov_reg,\
++ fconsts")
++ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
+ )
+
+ (define_insn "*movdf_aarch64"
+- [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
+- (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
++ [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
++ (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
+ "TARGET_FLOAT && (register_operand (operands[0], DFmode)
+ || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+ "@
+@@ -1219,14 +1130,37 @@
+ fmov\\t%x0, %d1
+ fmov\\t%d0, %d1
+ fmov\\t%d0, %1
++ * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
+ ldr\\t%d0, %1
+ str\\t%d1, %0
+ ldr\\t%x0, %1
+ str\\t%x1, %0
+- mov\\t%x0, %x1"
+- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
+- f_loadd,f_stored,load1,store1,mov_reg")
+- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
++ mov\\t%x0, %x1
++ mov\\t%x0, %1"
++ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
++ f_loadd,f_stored,load1,store1,mov_reg,\
++ fconstd")
++ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
++)
++
++(define_split
++ [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
++ (match_operand:GPF_HF 1 "general_operand"))]
++ "can_create_pseudo_p ()
++ && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
++ && !aarch64_float_const_representable_p (operands[1])
++ && aarch64_float_const_rtx_p (operands[1])"
++ [(const_int 0)]
++ {
++ unsigned HOST_WIDE_INT ival;
++ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
++ FAIL;
++
++ rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
++ emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
++ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
++ DONE;
++ }
+ )
+
+ (define_insn "*movtf_aarch64"
+@@ -1237,7 +1171,7 @@
"TARGET_FLOAT && (register_operand (operands[0], TFmode)
|| aarch64_reg_or_fp_zero (operands[1], TFmode))"
"@
@@ -1554,7 +2562,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
#
#
#
-@@ -2340,6 +2234,55 @@
+@@ -2340,6 +2274,55 @@
[(set_attr "type" "alus_sreg")]
)
@@ -1610,7 +2618,57 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_insn "*sub_<shift>_<mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
(minus:GPI (match_operand:GPI 3 "register_operand" "r")
-@@ -4997,6 +4940,18 @@
+@@ -3881,6 +3864,22 @@
+ [(set_attr "type" "logics_reg,logics_imm")]
+ )
+
++(define_split
++ [(set (reg:CC_NZ CC_REGNUM)
++ (compare:CC_NZ
++ (and:GPI (match_operand:GPI 0 "register_operand")
++ (match_operand:GPI 1 "aarch64_mov_imm_operand"))
++ (const_int 0)))
++ (clobber (match_operand:SI 2 "register_operand"))]
++ ""
++ [(set (match_dup 2) (match_dup 1))
++ (set (reg:CC_NZ CC_REGNUM)
++ (compare:CC_NZ
++ (and:GPI (match_dup 0)
++ (match_dup 2))
++ (const_int 0)))]
++)
++
+ (define_insn "*and<mode>3nr_compare0_zextract"
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+@@ -3916,6 +3915,26 @@
+ [(set_attr "type" "logics_shift_imm")]
+ )
+
++(define_split
++ [(set (reg:CC_NZ CC_REGNUM)
++ (compare:CC_NZ
++ (and:GPI (SHIFT:GPI
++ (match_operand:GPI 0 "register_operand")
++ (match_operand:QI 1 "aarch64_shift_imm_<mode>"))
++ (match_operand:GPI 2 "aarch64_mov_imm_operand"))
++ (const_int 0)))
++ (clobber (match_operand:SI 3 "register_operand"))]
++ ""
++ [(set (match_dup 3) (match_dup 2))
++ (set (reg:CC_NZ CC_REGNUM)
++ (compare:CC_NZ
++ (and:GPI (SHIFT:GPI
++ (match_dup 0)
++ (match_dup 1))
++ (match_dup 3))
++ (const_int 0)))]
++)
++
+ ;; -------------------------------------------------------------------
+ ;; Shifts
+ ;; -------------------------------------------------------------------
+@@ -4997,6 +5016,18 @@
[(set_attr "type" "f_minmax<stype>")]
)
@@ -1629,7 +2687,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
;; For copysign (x, y), we want to generate:
;;
;; LDR d2, #(1 << 63)
-@@ -5030,14 +4985,16 @@
+@@ -5030,14 +5061,16 @@
(match_operand:SF 2 "register_operand")]
"TARGET_FLOAT && TARGET_SIMD"
{
@@ -1650,6 +2708,17 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
DONE;
}
+--- a/src/gcc/config/aarch64/arm_neon.h
++++ b/src/gcc/config/aarch64/arm_neon.h
+@@ -12162,7 +12162,7 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+
+ /* ARMv8.1-A instrinsics. */
+ #pragma GCC push_options
+-#pragma GCC target ("arch=armv8.1-a")
++#pragma GCC target ("+nothing+rdma")
+
+ __extension__ extern __inline int16x4_t
+ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
--- a/src/gcc/config/aarch64/atomics.md
+++ b/src/gcc/config/aarch64/atomics.md
@@ -25,7 +25,7 @@
@@ -1751,15 +2820,53 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_constraint "UsM"
"@internal
-@@ -214,3 +223,8 @@
- A constraint that matches an immediate operand valid for AdvSIMD scalar."
+@@ -167,6 +176,12 @@
+ (and (match_code "const_double")
+ (match_test "aarch64_float_const_representable_p (op)")))
+
++(define_constraint "Uvi"
++ "A floating point constant which can be used with a\
++ MOVI immediate operation."
++ (and (match_code "const_double")
++ (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))")))
++
+ (define_constraint "Dn"
+ "@internal
+ A constraint that matches vector of immediates."
+@@ -211,6 +226,19 @@
+
+ (define_constraint "Dd"
+ "@internal
+- A constraint that matches an immediate operand valid for AdvSIMD scalar."
++ A constraint that matches an integer immediate operand valid\
++ for AdvSIMD scalar operations in DImode."
(and (match_code "const_int")
- (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))")))
+- (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))")))
++ (match_test "aarch64_can_const_movi_rtx_p (op, DImode)")))
++
++(define_constraint "Ds"
++ "@internal
++ A constraint that matches an integer immediate operand valid\
++ for AdvSIMD scalar operations in SImode."
++ (and (match_code "const_int")
++ (match_test "aarch64_can_const_movi_rtx_p (op, SImode)")))
+
+(define_address_constraint "Dp"
+ "@internal
+ An address valid for a prefetch instruction."
+ (match_test "aarch64_address_valid_for_prefetch_p (op, true)"))
+--- a/src/gcc/config/aarch64/iterators.md
++++ b/src/gcc/config/aarch64/iterators.md
+@@ -44,6 +44,9 @@
+ ;; Iterator for all scalar floating point modes (HF, SF, DF)
+ (define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
+
++;; Iterator for all scalar floating point modes (HF, SF, DF)
++(define_mode_iterator GPF_HF [HF SF DF])
++
+ ;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
+ (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
+
--- a/src/gcc/config/aarch64/predicates.md
+++ b/src/gcc/config/aarch64/predicates.md
@@ -77,6 +77,10 @@
@@ -1773,7 +2880,18 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_predicate "aarch64_plus_immediate"
(and (match_code "const_int")
(ior (match_test "aarch64_uimm12_shift (INTVAL (op))")
-@@ -165,6 +169,9 @@
+@@ -106,6 +110,10 @@
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "aarch64_logical_immediate")))
+
++(define_predicate "aarch64_mov_imm_operand"
++ (and (match_code "const_int")
++ (match_test "aarch64_move_imm (INTVAL (op), mode)")))
++
+ (define_predicate "aarch64_logical_and_immediate"
+ (and (match_code "const_int")
+ (match_test "aarch64_and_bitmask_imm (INTVAL (op), mode)")))
+@@ -165,6 +173,9 @@
(match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
0)")))
@@ -1994,7 +3112,35 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
offsets = arm_get_frame_offsets ();
return offsets->outgoing_args != 0;
}
-@@ -9285,6 +9293,10 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
+@@ -7858,6 +7866,8 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
+ {
+ HOST_WIDE_INT val = INTVAL (index);
+
++ /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
++ If vldr is selected it uses arm_coproc_mem_operand. */
+ if (TARGET_LDRD)
+ return val > -256 && val < 256;
+ else
+@@ -7985,11 +7995,13 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
+ if (code == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (index);
+- /* ??? Can we assume ldrd for thumb2? */
+- /* Thumb-2 ldrd only has reg+const addressing modes. */
+- /* ldrd supports offsets of +-1020.
+- However the ldr fallback does not. */
+- return val > -256 && val < 256 && (val & 3) == 0;
++ /* Thumb-2 ldrd only has reg+const addressing modes.
++ Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
++ If vldr is selected it uses arm_coproc_mem_operand. */
++ if (TARGET_LDRD)
++ return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
++ else
++ return IN_RANGE (val, -255, 4095 - 4);
+ }
+ else
+ return 0;
+@@ -9285,6 +9297,10 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
*cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
else
*cost = LIBCALL_COST (2);
@@ -2005,7 +3151,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
return false; /* All arguments must be in registers. */
case MOD:
-@@ -9307,7 +9319,9 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
+@@ -9307,7 +9323,9 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
/* Fall-through. */
case UMOD:
@@ -2016,6 +3162,35 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
return false; /* All arguments must be in registers. */
case ROTATE:
+@@ -13548,10 +13566,7 @@ gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
+ emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
+ offset = 0;
+ if (!TARGET_THUMB1)
+- {
+- base_reg = regs[0];
+- base_reg_rtx = newbase;
+- }
++ base_reg_rtx = newbase;
+ }
+
+ for (i = 0; i < nops; i++)
+@@ -14075,7 +14090,6 @@ arm_gen_movmemqi (rtx *operands)
+ {
+ HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
+ HOST_WIDE_INT srcoffset, dstoffset;
+- int i;
+ rtx src, dst, srcbase, dstbase;
+ rtx part_bytes_reg = NULL;
+ rtx mem;
+@@ -14105,7 +14119,7 @@ arm_gen_movmemqi (rtx *operands)
+ if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
+ part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
+
+- for (i = 0; in_words_to_go >= 2; i+=4)
++ while (in_words_to_go >= 2)
+ {
+ if (in_words_to_go > 4)
+ emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
@@ -16857,9 +16871,10 @@ compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
return not_to_clear_mask;
}
@@ -2104,7 +3279,45 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
alter the frame layout, so is independent of the epilogue. */
int n;
int frame;
-@@ -28225,17 +28248,32 @@ arm_expand_compare_and_swap (rtx operands[])
+@@ -21650,8 +21673,8 @@ arm_expand_prologue (void)
+ will prevent the scheduler from moving stores to the frame
+ before the stack adjustment. */
+ if (frame_pointer_needed)
+- insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
+- hard_frame_pointer_rtx));
++ emit_insn (gen_stack_tie (stack_pointer_rtx,
++ hard_frame_pointer_rtx));
+ }
+
+
+@@ -23736,7 +23759,6 @@ thumb_pop (FILE *f, unsigned long mask)
+ {
+ int regno;
+ int lo_mask = mask & 0xFF;
+- int pushed_words = 0;
+
+ gcc_assert (mask);
+
+@@ -23759,8 +23781,6 @@ thumb_pop (FILE *f, unsigned long mask)
+
+ if ((lo_mask & ~1) != 0)
+ fprintf (f, ", ");
+-
+- pushed_words++;
+ }
+ }
+
+@@ -24030,9 +24050,6 @@ thumb_exit (FILE *f, int reg_containing_return_addr)
+ move_to = number_of_first_bit_set (regs_to_pop);
+
+ asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
+-
+- regs_to_pop &= ~(1 << move_to);
+-
+ --pops_needed;
+ }
+
+@@ -28225,17 +28242,32 @@ arm_expand_compare_and_swap (rtx operands[])
gcc_unreachable ();
}
@@ -2156,6 +3369,153 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
r0 * argument word/integer result
r1-r3 argument word
+--- a/src/gcc/config/arm/arm.md
++++ b/src/gcc/config/arm/arm.md
+@@ -457,14 +457,13 @@
+ )
+
+ (define_insn_and_split "*arm_adddi3"
+- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r,&r,&r")
+- (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+- (match_operand:DI 2 "arm_adddi_operand" "r, 0, r, Dd, Dd")))
++ [(set (match_operand:DI 0 "arm_general_register_operand" "=&r,&r,&r,&r,&r")
++ (plus:DI (match_operand:DI 1 "arm_general_register_operand" "%0, 0, r, 0, r")
++ (match_operand:DI 2 "arm_general_adddi_operand" "r, 0, r, Dd, Dd")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_32BIT && !TARGET_NEON"
+ "#"
+- "TARGET_32BIT && reload_completed
+- && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))"
++ "TARGET_32BIT && ((!TARGET_NEON && !TARGET_IWMMXT) || reload_completed)"
+ [(parallel [(set (reg:CC_C CC_REGNUM)
+ (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+ (match_dup 1)))
+@@ -1263,13 +1262,13 @@
+ )
+
+ (define_insn_and_split "*arm_subdi3"
+- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r")
+- (minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0")
+- (match_operand:DI 2 "s_register_operand" "r,0,0")))
++ [(set (match_operand:DI 0 "arm_general_register_operand" "=&r,&r,&r")
++ (minus:DI (match_operand:DI 1 "arm_general_register_operand" "0,r,0")
++ (match_operand:DI 2 "arm_general_register_operand" "r,0,0")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_32BIT && !TARGET_NEON"
+ "#" ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
+- "&& reload_completed"
++ "&& (!TARGET_IWMMXT || reload_completed)"
+ [(parallel [(set (reg:CC CC_REGNUM)
+ (compare:CC (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+@@ -2255,7 +2254,24 @@
+ (and:DI (match_operand:DI 1 "s_register_operand" "")
+ (match_operand:DI 2 "neon_inv_logic_op2" "")))]
+ "TARGET_32BIT"
+- ""
++ "
++ if (!TARGET_NEON && !TARGET_IWMMXT)
++ {
++ rtx low = simplify_gen_binary (AND, SImode,
++ gen_lowpart (SImode, operands[1]),
++ gen_lowpart (SImode, operands[2]));
++ rtx high = simplify_gen_binary (AND, SImode,
++ gen_highpart (SImode, operands[1]),
++ gen_highpart_mode (SImode, DImode,
++ operands[2]));
++
++ emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
++ emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
++
++ DONE;
++ }
++ /* Otherwise expand pattern as above. */
++ "
+ )
+
+ (define_insn_and_split "*anddi3_insn"
+@@ -3128,7 +3144,24 @@
+ (ior:DI (match_operand:DI 1 "s_register_operand" "")
+ (match_operand:DI 2 "neon_logic_op2" "")))]
+ "TARGET_32BIT"
+- ""
++ "
++ if (!TARGET_NEON && !TARGET_IWMMXT)
++ {
++ rtx low = simplify_gen_binary (IOR, SImode,
++ gen_lowpart (SImode, operands[1]),
++ gen_lowpart (SImode, operands[2]));
++ rtx high = simplify_gen_binary (IOR, SImode,
++ gen_highpart (SImode, operands[1]),
++ gen_highpart_mode (SImode, DImode,
++ operands[2]));
++
++ emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
++ emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
++
++ DONE;
++ }
++ /* Otherwise expand pattern as above. */
++ "
+ )
+
+ (define_insn_and_split "*iordi3_insn"
+@@ -3316,6 +3349,22 @@
+ no NEON instructions that take an immediate. */
+ if (TARGET_IWMMXT && !REG_P (operands[2]))
+ operands[2] = force_reg (DImode, operands[2]);
++ if (!TARGET_NEON && !TARGET_IWMMXT)
++ {
++ rtx low = simplify_gen_binary (XOR, SImode,
++ gen_lowpart (SImode, operands[1]),
++ gen_lowpart (SImode, operands[2]));
++ rtx high = simplify_gen_binary (XOR, SImode,
++ gen_highpart (SImode, operands[1]),
++ gen_highpart_mode (SImode, DImode,
++ operands[2]));
++
++ emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
++ emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
++
++ DONE;
++ }
++ /* Otherwise expand pattern as above. */
+ }
+ )
+
+@@ -5027,7 +5076,31 @@
+ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+ "")
+
+-(define_insn_and_split "one_cmpldi2"
++(define_expand "one_cmpldi2"
++ [(set (match_operand:DI 0 "s_register_operand" "")
++ (not:DI (match_operand:DI 1 "s_register_operand" "")))]
++ "TARGET_32BIT"
++ "
++ if (!TARGET_NEON && !TARGET_IWMMXT)
++ {
++ rtx low = simplify_gen_unary (NOT, SImode,
++ gen_lowpart (SImode, operands[1]),
++ SImode);
++ rtx high = simplify_gen_unary (NOT, SImode,
++ gen_highpart_mode (SImode, DImode,
++ operands[1]),
++ SImode);
++
++ emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
++ emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
++
++ DONE;
++ }
++ /* Otherwise expand pattern as above. */
++ "
++)
++
++(define_insn_and_split "*one_cmpldi2_insn"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,&r,&r,?w")
+ (not:DI (match_operand:DI 1 "s_register_operand" " w, 0, r, w")))]
+ "TARGET_32BIT"
--- a/src/gcc/config/arm/arm_neon.h
+++ b/src/gcc/config/arm/arm_neon.h
@@ -17069,14 +17069,22 @@ __extension__ extern __inline float16x4_t
@@ -2419,7 +3779,21 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_insn "sub<mode>3_fp16"
[(set
(match_operand:VH 0 "s_register_operand" "=w")
-@@ -664,8 +692,17 @@
+@@ -650,7 +678,7 @@
+ (match_operand:VCVTF 2 "register_operand" "w")
+ (match_operand:VCVTF 3 "register_operand" "0")))]
+ "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+- "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++ "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "neon_fp_mla_s<q>")]
+ )
+
+@@ -660,12 +688,21 @@
+ (match_operand:VCVTF 2 "register_operand" "w")
+ (match_operand:VCVTF 3 "register_operand" "0")))]
+ "TARGET_NEON && TARGET_FMA"
+- "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++ "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "type" "neon_fp_mla_s<q>")]
)
@@ -2439,6 +3813,33 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_insn "fma<VH:mode>4_intrinsic"
[(set (match_operand:VH 0 "register_operand" "=w")
(fma:VH
+@@ -683,7 +720,7 @@
+ (match_operand:VCVTF 2 "register_operand" "w")
+ (match_operand:VCVTF 3 "register_operand" "0")))]
+ "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+- "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++ "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "neon_fp_mla_s<q>")]
+ )
+
+@@ -694,7 +731,7 @@
+ (match_operand:VCVTF 2 "register_operand" "w")
+ (match_operand:VCVTF 3 "register_operand" "0")))]
+ "TARGET_NEON && TARGET_FMA"
+- "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++ "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "neon_fp_mla_s<q>")]
+ )
+
+@@ -715,7 +752,7 @@
+ "s_register_operand" "w")]
+ NEON_VRINT))]
+ "TARGET_NEON && TARGET_FPU_ARMV8"
+- "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
++ "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
+ [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
+ )
+
@@ -2175,6 +2212,17 @@
(const_string "neon_mul_<V_elem_ch><q>")))]
)
@@ -2457,6 +3858,20 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
(define_insn "neon_vmulf<mode>"
[(set
(match_operand:VH 0 "s_register_operand" "=w")
+--- a/src/gcc/config/arm/predicates.md
++++ b/src/gcc/config/arm/predicates.md
+@@ -82,6 +82,11 @@
+ || REGNO (op) >= FIRST_PSEUDO_REGISTER));
+ })
+
++(define_predicate "arm_general_adddi_operand"
++ (ior (match_operand 0 "arm_general_register_operand")
++ (and (match_code "const_int")
++ (match_test "const_ok_for_dimode_op (INTVAL (op), PLUS)"))))
++
+ (define_predicate "vfp_register_operand"
+ (match_code "reg,subreg")
+ {
--- a/src/gcc/config/arm/sync.md
+++ b/src/gcc/config/arm/sync.md
@@ -191,9 +191,9 @@
@@ -2648,6 +4063,24 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
# Option combinations to build library with
+--- a/src/gcc/config/i386/i386.c
++++ b/src/gcc/config/i386/i386.c
+@@ -29499,6 +29499,15 @@ ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
+ if (!any_condjump_p (condjmp))
+ return false;
+
++ unsigned int condreg1, condreg2;
++ rtx cc_reg_1;
++ ix86_fixed_condition_code_regs (&condreg1, &condreg2);
++ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
++ if (!reg_referenced_p (cc_reg_1, PATTERN (condjmp))
++ || !condgen
++ || !modified_in_p (cc_reg_1, condgen))
++ return false;
++
+ if (get_attr_type (condgen) != TYPE_TEST
+ && get_attr_type (condgen) != TYPE_ICMP
+ && get_attr_type (condgen) != TYPE_INCDEC
--- a/src/gcc/configure
+++ b/src/gcc/configure
@@ -1717,7 +1717,8 @@ Optional Packages:
@@ -2782,6 +4215,15 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
{
emit_insn (uns_insns);
+@@ -9757,7 +9766,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
+ if (targetm.gen_ccmp_first)
+ {
+ gcc_checking_assert (targetm.gen_ccmp_next != NULL);
+- r = expand_ccmp_expr (g);
++ r = expand_ccmp_expr (g, mode);
+ if (r)
+ break;
+ }
--- a/src/gcc/generic-match-head.c
+++ b/src/gcc/generic-match-head.c
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
@@ -3067,6 +4509,58 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
for (; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
{
INITIAL_ELIMINATION_OFFSET (ep->from, ep->to, ep->initial_offset);
+--- a/src/gcc/sched-deps.c
++++ b/src/gcc/sched-deps.c
+@@ -2834,34 +2834,30 @@ static void
+ sched_macro_fuse_insns (rtx_insn *insn)
+ {
+ rtx_insn *prev;
+-
++ prev = prev_nonnote_nondebug_insn (insn);
++ if (!prev)
++ return;
++
+ if (any_condjump_p (insn))
+ {
+ unsigned int condreg1, condreg2;
+ rtx cc_reg_1;
+ targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+- prev = prev_nonnote_nondebug_insn (insn);
+- if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
+- || !prev
+- || !modified_in_p (cc_reg_1, prev))
+- return;
++ if (reg_referenced_p (cc_reg_1, PATTERN (insn))
++ && modified_in_p (cc_reg_1, prev))
++ {
++ if (targetm.sched.macro_fusion_pair_p (prev, insn))
++ SCHED_GROUP_P (insn) = 1;
++ return;
++ }
+ }
+- else
+- {
+- rtx insn_set = single_set (insn);
+-
+- prev = prev_nonnote_nondebug_insn (insn);
+- if (!prev
+- || !insn_set
+- || !single_set (prev))
+- return;
+
++ if (single_set (insn) && single_set (prev))
++ {
++ if (targetm.sched.macro_fusion_pair_p (prev, insn))
++ SCHED_GROUP_P (insn) = 1;
+ }
+-
+- if (targetm.sched.macro_fusion_pair_p (prev, insn))
+- SCHED_GROUP_P (insn) = 1;
+-
+ }
+
+ /* Get the implicit reg pending clobbers for INSN and save them in TEMP. */
--- a/src/gcc/simplify-rtx.c
+++ b/src/gcc/simplify-rtx.c
@@ -3345,19 +3345,21 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
@@ -3295,82 +4789,308 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
+ return a / x;
+}
+
-+unsigned long long
-+f3 (unsigned long long a, int b)
++unsigned long long
++f3 (unsigned long long a, int b)
++{
++ unsigned long long x = 1ULL << b;
++ return a / x;
++}
++
++/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr79697.c
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-gimple -fdump-tree-cddce-details -fdump-tree-optimized" } */
++
++void f(void)
++{
++ __builtin_strdup ("abc");
++}
++
++void g(void)
++{
++ __builtin_strndup ("abc", 3);
++}
++
++void h(void)
++{
++ __builtin_realloc (0, 10);
++}
++
++/* { dg-final { scan-tree-dump "Deleting : __builtin_strdup" "cddce1" } } */
++/* { dg-final { scan-tree-dump "Deleting : __builtin_strndup" "cddce1" } } */
++/* { dg-final { scan-tree-dump "__builtin_malloc" "gimple" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++foo (int *a)
++{
++ int x = 3;
++ return __atomic_compare_exchange_n (a, &x, 0, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++}
++
++/* { dg-final { scan-assembler "stxr\\tw\[0-9\]+, wzr,.*" } } */
++/* { dg-final { scan-assembler-not "mov\\tw\[0-9\]+, 0" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++foo (int *a)
++{
++ int x = 0;
++ return __atomic_compare_exchange_n (a, &x, 4, 0,
++ __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++}
++
++/* { dg-final { scan-assembler-times "cbnz\\tw\[0-9\]+" 2 } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/ccmp_2.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int g(void);
++int h(int a, _Bool c)
++{
++ if (a != 0 && c)
++ return g();
++ return 1;
++}
++
++/* { dg-final { scan-assembler "\tccmp\t" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 " } */
++
++int f3 (int x, int y)
++{
++ int res = x << 3;
++ return res != 0;
++}
++
++/* We should combine the shift and compare */
++/* { dg-final { scan-assembler "cmp\.*\twzr, w\[0-9\]+, lsl 3" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -mno-pc-relative-literal-loads" } */
++/* { dg-skip-if "Tiny model won't generate adrp" { *-*-* } { "-mcmodel=tiny" } { "" } } */
++
++double d0(void)
++{
++ double x = 0.0d;
++ return x;
++}
++
++double dn1(void)
++{
++ double x = -0.0d;
++ return x;
++}
++
++
++double d1(void)
++{
++ double x = 1.5d;
++ return x;
++}
++
++double d2(void)
++{
++ double x = 123256.0d;
++ return x;
++}
++
++double d3(void)
++{
++ double x = 123256123456.0d;
++ return x;
++}
++
++double d4(void)
++{
++ double x = 123456123456123456.0d;
++ return x;
++}
++
++/* { dg-final { scan-assembler-times "movi\td\[0-9\]+, #?0" 1 } } */
++
++/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, \.LC\[0-9\]" 2 } } */
++/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+, \\\[x\[0-9\], #:lo12:\.LC\[0-9\]\\\]" 2 } } */
++
++/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, 1\\\.5e\\\+0" 1 } } */
++
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, 25838523252736" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x40fe, lsl 48" 1 } } */
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -9223372036854775808" 1 } } */
++/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, x\[0-9\]+" 2 } } */
++
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_1.c
+@@ -0,0 +1,49 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
++/* { dg-add-options arm_v8_2a_fp16_scalar } */
++
++extern __fp16 foo ();
++extern void bar (__fp16* x);
++
++void f1 ()
++{
++ volatile __fp16 a = 17.0;
++}
++
++
++void f2 (__fp16 *a)
++{
++ *a = 17.0;
++}
++
++void f3 ()
++{
++ __fp16 b = foo ();
++ b = 17.0;
++ bar (&b);
++}
++
++__fp16 f4 ()
++{
++ __fp16 a = 0;
++ __fp16 b = 1;
++ __fp16 c = 2;
++ __fp16 d = 4;
++
++ __fp16 z = a + b;
++ z = z + c;
++ z = z - d;
++ return z;
++}
++
++__fp16 f5 ()
+{
-+ unsigned long long x = 1ULL << b;
-+ return a / x;
++ __fp16 a = 16;
++ bar (&a);
++ return a;
+}
+
-+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, #?19520" 3 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0xbc, lsl 8" 1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x4c, lsl 8" 1 } } */
--- /dev/null
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr79697.c
-@@ -0,0 +1,21 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_2.c
+@@ -0,0 +1,45 @@
+/* { dg-do compile } */
-+/* { dg-options "-O2 -fdump-tree-gimple -fdump-tree-cddce-details -fdump-tree-optimized" } */
++/* { dg-options "-O3" } */
++/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
++/* { dg-add-options arm_v8_2a_fp16_scalar } */
+
-+void f(void)
++#include <arm_fp16.h>
++
++float16_t f0(void)
+{
-+ __builtin_strdup ("abc");
++ float16_t x = 0.0f;
++ return x;
+}
+
-+void g(void)
++float16_t fn1(void)
+{
-+ __builtin_strndup ("abc", 3);
++ float16_t x = -0.0f;
++ return x;
+}
+
-+void h(void)
++float16_t f1(void)
+{
-+ __builtin_realloc (0, 10);
++ float16_t x = 256.0f;
++ return x;
+}
+
-+/* { dg-final { scan-tree-dump "Deleting : __builtin_strdup" "cddce1" } } */
-+/* { dg-final { scan-tree-dump "Deleting : __builtin_strndup" "cddce1" } } */
-+/* { dg-final { scan-tree-dump "__builtin_malloc" "gimple" } } */
---- /dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
-@@ -0,0 +1,12 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2" } */
++float16_t f2(void)
++{
++ float16_t x = 123256.0f;
++ return x;
++}
+
-+int
-+foo (int *a)
++float16_t f3(void)
+{
-+ int x = 3;
-+ return __atomic_compare_exchange_n (a, &x, 0, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++ float16_t x = 17.0;
++ return x;
+}
+
-+/* { dg-final { scan-assembler "stxr\\tw\[0-9\]+, wzr,.*" } } */
-+/* { dg-final { scan-assembler-not "mov\\tw\[0-9\]+, 0" } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.4h, ?#0" 1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x80, lsl 8" 1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x5c, lsl 8" 1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x7c, lsl 8" 1 } } */
++
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 19520" 1 } } */
++/* { dg-final { scan-assembler-times "fmov\th\[0-9\], w\[0-9\]+" 1 } } */
++
--- /dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
-@@ -0,0 +1,12 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/flt_mov_immediate_1.c
+@@ -0,0 +1,52 @@
+/* { dg-do compile } */
-+/* { dg-options "-O2" } */
++/* { dg-options "-O3" } */
+
-+int
-+foo (int *a)
++float f0(void)
+{
-+ int x = 0;
-+ return __atomic_compare_exchange_n (a, &x, 4, 0,
-+ __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++ float x = 0.0f;
++ return x;
+}
+
-+/* { dg-final { scan-assembler-times "cbnz\\tw\[0-9\]+" 2 } } */
---- /dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
-@@ -0,0 +1,11 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 " } */
++float fn1(void)
++{
++ float x = -0.0f;
++ return x;
++}
+
-+int f3 (int x, int y)
++float f1(void)
+{
-+ int res = x << 3;
-+ return res != 0;
++ float x = 256.0f;
++ return x;
+}
+
-+/* We should combine the shift and compare */
-+/* { dg-final { scan-assembler "cmp\.*\twzr, w\[0-9\]+, lsl 3" } } */
++float f2(void)
++{
++ float x = 123256.0f;
++ return x;
++}
++
++float f3(void)
++{
++ float x = 2.0f;
++ return x;
++}
++
++float f4(void)
++{
++ float x = -20000.1;
++ return x;
++}
++
++
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, ?#0" 1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x80, lsl 24" 1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x80, lsl 24" 1 } } */
++
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 48128" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tw\[0-9\]+, 0x47f0, lsl 16" 1 } } */
++
++/* { dg-final { scan-assembler-times "fmov\ts\[0-9\]+, 2\\\.0e\\\+0" 1 } } */
++
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 16435" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tw\[0-9\]+, 0xc69c, lsl 16" 1 } } */
++
--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/aarch64/hfmode_ins_1.c
@@ -0,0 +1,21 @@
@@ -3438,6 +5158,68 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
+/* { dg-final { scan-assembler-times "fcvtzs\t\[w,x\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
+/* { dg-final { scan-assembler-not "bl" } } */
--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/int_mov_immediate_1.c
+@@ -0,0 +1,59 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target lp64 } */
++/* { dg-options "-O3" } */
++
++long long f1(void)
++{
++ return 0xffff6666;
++}
++
++int f3(void)
++{
++ return 0xffff6666;
++}
++
++
++long f2(void)
++{
++ return 0x11110000ffff6666;
++}
++
++long f4(void)
++{
++ return 0x11110001ffff6666;
++}
++
++long f5(void)
++{
++ return 0x111100001ff6666;
++}
++
++long f6(void)
++{
++ return 0x00001111ffff6666;
++}
++
++long f7(void)
++{
++ return 0x000011116666ffff;
++}
++
++long f8(void)
++{
++ return 0x0f0011116666ffff;
++}
++
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, -39322" 1 } } */
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 4294927974" 3 } } */
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 1718026239" 1 } } */
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -2576941057" 1 } } */
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -39322" 1 } } */
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, 26214" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0xf00, lsl 48" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1111, lsl 48" 2 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1000, lsl 32" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1111, lsl 32" 3 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x111, lsl 48" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1ff, lsl 16" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1, lsl 32" 1 } } */
++
+--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/aarch64/lrint-matherr.h
@@ -0,0 +1,5 @@
+#define TEST(name, float_type, int_type, pref) void f_##name (float_type x) \
@@ -3489,6 +5271,52 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
+/* { dg-final { scan-assembler-times "bl\tlrint" 4 } } */
+/* { dg-final { scan-assembler-times "bl\tllrint" 2 } } */
+/* { dg-final { scan-assembler-not "fcvtzs" } } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
+@@ -4,10 +4,10 @@
+ #pragma GCC target ("+nothing+simd, cmodel=small")
+
+ int
+-cal (float a)
++cal (double a)
+ {
+- float b = 1.2;
+- float c = 2.2;
++ double b = 3.2;
++ double c = 2.2;
+ if ((a + b) != c)
+ return 0;
+ else
+@@ -19,11 +19,11 @@ cal (float a)
+ #pragma GCC target ("cmodel=large")
+
+ int
+-cal2 (float a)
++cal2 (double a)
+ {
+
+- float b = 1.2;
+- float c = 2.2;
++ double b = 3.2;
++ double c = 2.2;
+ if ((a + b) != c)
+ return 0;
+ else
+@@ -33,11 +33,11 @@ cal2 (float a)
+ #pragma GCC pop_options
+
+ int
+-cal3 (float a)
++cal3 (double a)
+ {
+
+- float b = 1.2;
+- float c = 2.2;
++ double b = 3.2;
++ double c = 2.2;
+ if ((a + b) != c)
+ return 0;
+ else
--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_1.c
@@ -0,0 +1,18 @@
@@ -3551,6 +5379,76 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
+
+/* { dg-final { scan-assembler-times "udiv\tw\[0-9\]+, w\[0-9\]+" 4 } } */
+/* { dg-final { scan-assembler-times "sdiv\tw\[0-9\]+, w\[0-9\]+" 2 } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c
+@@ -0,0 +1,67 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++
++typedef short int __attribute__ ((vector_size (16))) v8hi;
++
++v8hi
++mla8hi (v8hi v0, v8hi v1, short int v2)
++{
++ /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
++ return v0 + v1 * v2;
++}
++
++
++v8hi
++mls8hi (v8hi v0, v8hi v1, short int v2)
++{
++ /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
++ return v0 - v1 * v2;
++}
++
++typedef short int __attribute__ ((vector_size (8))) v4hi;
++
++v4hi
++mla4hi (v4hi v0, v4hi v1, short int v2)
++{
++ /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
++ return v0 + v1 * v2;
++}
++
++v4hi
++mls4hi (v4hi v0, v4hi v1, short int v2)
++{
++ /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
++ return v0 - v1 * v2;
++}
++
++typedef int __attribute__ ((vector_size (16))) v4si;
++
++v4si
++mla4si (v4si v0, v4si v1, int v2)
++{
++ /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
++ return v0 + v1 * v2;
++}
++
++v4si
++mls4si (v4si v0, v4si v1, int v2)
++{
++ /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
++ return v0 - v1 * v2;
++}
++
++typedef int __attribute__((vector_size (8))) v2si;
++
++v2si
++mla2si (v2si v0, v2si v1, int v2)
++{
++ /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
++ return v0 + v1 * v2;
++}
++
++v2si
++mls2si (v2si v0, v2si v1, int v2)
++{
++ /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
++ return v0 - v1 * v2;
++}
--- a/src/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c
@@ -3,7 +3,7 @@
@@ -3713,6 +5611,27 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
+/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, #4" 1 } } */
+/* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" } } */
--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/tst_imm_split_1.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++f (unsigned char *p)
++{
++ return p[0] == 50 || p[0] == 52;
++}
++
++int
++g (unsigned char *p)
++{
++ return (p[0] >> 4 & 0xfd) == 0;
++}
++
++/* { dg-final { scan-assembler-not "and\\t\[xw\]\[0-9\]+, \[xw\]\[0-9\]+.*" } } */
++/* { dg-final { scan-assembler "tst\\t\[xw\]\[0-9\]+, \[xw\]\[0-9\]+" } } */
++/* { dg-final { scan-assembler "tst\\t\[xw\]\[0-9\]+, \[xw\]\[0-9\]+, lsr 4" } } */
+--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-init-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
@@ -4566,6 +6485,28 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
+/* Ensure there is no IT block with more than 2 instructions, ie. we only allow
+ IT, ITT and ITE. */
+/* { dg-final { scan-assembler-not "\\sit\[te\]{2}" } } */
+--- a/src/gcc/testsuite/gcc.target/arm/lto/pr65837-attr_0.c
++++ b/src/gcc/testsuite/gcc.target/arm/lto/pr65837-attr_0.c
+@@ -1,6 +1,7 @@
+ /* { dg-lto-do run } */
+ /* { dg-require-effective-target arm_neon_hw } */
+-/* { dg-lto-options {{-flto}} } */
++/* { dg-require-effective-target arm_neon_ok_no_float_abi } */
++/* { dg-lto-options {{-flto -mfpu=neon}} } */
+
+ #include "arm_neon.h"
+
+--- a/src/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c
++++ b/src/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c
+@@ -1,7 +1,7 @@
+ /* { dg-lto-do run } */
+ /* { dg-require-effective-target arm_neon_hw } */
++/* { dg-require-effective-target arm_neon_ok_no_float_abi } */
+ /* { dg-lto-options {{-flto -mfpu=neon}} } */
+-/* { dg-suppress-ld-options {-mfpu=neon} } */
+
+ #include "arm_neon.h"
+
--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/arm/movdi_movt.c
@@ -0,0 +1,18 @@
@@ -4618,6 +6559,178 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
+#define __ARM_FEATURE_LDREX 0
+/* { dg-warning ".__ARM_FEATURE_LDREX. redefined" "" { target *-*-* } .-1 } */
--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/pr77308-1.c
+@@ -0,0 +1,169 @@
++/* { dg-do compile } */
++/* { dg-options "-Os -Wstack-usage=2500" } */
++
++/* This is a modified algorithm with bit-not "~" at the Sigma-blocks.
++ It improves the test coverage of one_cmpldi2 and subdi3 patterns.
++ Unlike the original test case these insns can reach the reload pass,
++ which may result in large stack usage. */
++
++#define SHA_LONG64 unsigned long long
++#define U64(C) C##ULL
++
++#define SHA_LBLOCK 16
++#define SHA512_CBLOCK (SHA_LBLOCK*8)
++
++typedef struct SHA512state_st {
++ SHA_LONG64 h[8];
++ SHA_LONG64 Nl, Nh;
++ union {
++ SHA_LONG64 d[SHA_LBLOCK];
++ unsigned char p[SHA512_CBLOCK];
++ } u;
++ unsigned int num, md_len;
++} SHA512_CTX;
++
++static const SHA_LONG64 K512[80] = {
++ U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
++ U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
++ U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
++ U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
++ U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
++ U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
++ U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
++ U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
++ U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
++ U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
++ U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
++ U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
++ U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
++ U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
++ U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
++ U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
++ U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
++ U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
++ U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
++ U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
++ U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
++ U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
++ U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
++ U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
++ U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
++ U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
++ U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
++ U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
++ U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
++ U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
++ U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
++ U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
++ U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
++ U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
++ U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
++ U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
++ U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
++ U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
++ U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
++ U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
++};
++
++#define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
++#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
++#define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
++#define Sigma0(x) ~(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
++#define Sigma1(x) ~(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
++#define sigma0(x) ~(ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
++#define sigma1(x) ~(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
++#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
++#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
++
++#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
++ T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
++ h = Sigma0(a) + Maj(a,b,c); \
++ d += T1; h += T1; } while (0)
++#define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
++ s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
++ s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
++ T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
++ ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
++void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
++ unsigned int num)
++{
++ const SHA_LONG64 *W = in;
++ SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
++ SHA_LONG64 X[16];
++ int i;
++
++ while (num--) {
++
++ a = ctx->h[0];
++ b = ctx->h[1];
++ c = ctx->h[2];
++ d = ctx->h[3];
++ e = ctx->h[4];
++ f = ctx->h[5];
++ g = ctx->h[6];
++ h = ctx->h[7];
++
++ T1 = X[0] = PULL64(W[0]);
++ ROUND_00_15(0, a, b, c, d, e, f, g, h);
++ T1 = X[1] = PULL64(W[1]);
++ ROUND_00_15(1, h, a, b, c, d, e, f, g);
++ T1 = X[2] = PULL64(W[2]);
++ ROUND_00_15(2, g, h, a, b, c, d, e, f);
++ T1 = X[3] = PULL64(W[3]);
++ ROUND_00_15(3, f, g, h, a, b, c, d, e);
++ T1 = X[4] = PULL64(W[4]);
++ ROUND_00_15(4, e, f, g, h, a, b, c, d);
++ T1 = X[5] = PULL64(W[5]);
++ ROUND_00_15(5, d, e, f, g, h, a, b, c);
++ T1 = X[6] = PULL64(W[6]);
++ ROUND_00_15(6, c, d, e, f, g, h, a, b);
++ T1 = X[7] = PULL64(W[7]);
++ ROUND_00_15(7, b, c, d, e, f, g, h, a);
++ T1 = X[8] = PULL64(W[8]);
++ ROUND_00_15(8, a, b, c, d, e, f, g, h);
++ T1 = X[9] = PULL64(W[9]);
++ ROUND_00_15(9, h, a, b, c, d, e, f, g);
++ T1 = X[10] = PULL64(W[10]);
++ ROUND_00_15(10, g, h, a, b, c, d, e, f);
++ T1 = X[11] = PULL64(W[11]);
++ ROUND_00_15(11, f, g, h, a, b, c, d, e);
++ T1 = X[12] = PULL64(W[12]);
++ ROUND_00_15(12, e, f, g, h, a, b, c, d);
++ T1 = X[13] = PULL64(W[13]);
++ ROUND_00_15(13, d, e, f, g, h, a, b, c);
++ T1 = X[14] = PULL64(W[14]);
++ ROUND_00_15(14, c, d, e, f, g, h, a, b);
++ T1 = X[15] = PULL64(W[15]);
++ ROUND_00_15(15, b, c, d, e, f, g, h, a);
++
++ for (i = 16; i < 80; i += 16) {
++ ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
++ ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
++ ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
++ ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
++ ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
++ ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
++ ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
++ ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
++ ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
++ ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
++ ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
++ ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
++ ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
++ ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
++ ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
++ ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
++ }
++
++ ctx->h[0] += a;
++ ctx->h[1] += b;
++ ctx->h[2] += c;
++ ctx->h[3] += d;
++ ctx->h[4] += e;
++ ctx->h[5] += f;
++ ctx->h[6] += g;
++ ctx->h[7] += h;
++
++ W += SHA_LBLOCK;
++ }
++}
+--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/arm/sdiv_costs_1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
@@ -4924,7 +7037,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
+ set stack_opt "-fstack-check"
+ } else { set stack_opt "-fstack-check=$stack_kind" }
+
-+ return [check_no_compiler_messages stack_check executable {
++ return [check_no_compiler_messages stack_check_$stack_kind executable {
+ int main (void) { return 0; }
+ } "$stack_opt"]
+}
@@ -4932,7 +7045,66 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
# Return 1 if compilation with -freorder-blocks-and-partition is error-free
# for trivial code, 0 otherwise. As some targets (ARM for example) only
# warn when -fprofile-use is also supplied we test that combination too.
-@@ -3768,12 +3779,13 @@ proc check_effective_target_arm_fp16_hw { } {
+@@ -3365,7 +3376,7 @@ proc add_options_for_arm_v8_1a_neon { flags } {
+ return "$flags"
+ }
+ global et_arm_v8_1a_neon_flags
+- return "$flags $et_arm_v8_1a_neon_flags -march=armv8.1-a"
++ return "$flags $et_arm_v8_1a_neon_flags"
+ }
+
+ # Add the options needed for ARMv8.2 with the scalar FP16 extension.
+@@ -3428,8 +3439,9 @@ proc check_effective_target_arm_neon_ok_nocache { } {
+ global et_arm_neon_flags
+ set et_arm_neon_flags ""
+ if { [check_effective_target_arm32] } {
+- foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon" "-mfpu=neon -mfloat-abi=softfp" "-mfpu=neon -mfloat-abi=softfp -march=armv7-a"} {
++ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon" "-mfpu=neon -mfloat-abi=softfp" "-mfpu=neon -mfloat-abi=softfp -march=armv7-a" "-mfloat-abi=hard" "-mfpu=neon -mfloat-abi=hard" "-mfpu=neon -mfloat-abi=hard -march=armv7-a"} {
+ if { [check_no_compiler_messages_nocache arm_neon_ok object {
++ #include <arm_neon.h>
+ int dummy;
+ #ifndef __ARM_NEON__
+ #error not NEON
+@@ -3454,6 +3466,38 @@ proc check_effective_target_arm_neon_ok { } {
+ check_effective_target_arm_neon_ok_nocache]
+ }
+
++# Return 1 if this is an ARM target supporting -mfpu=neon without any
++# -mfloat-abi= option. Useful in tests where add_options is not
++# supported (such as lto tests).
++
++proc check_effective_target_arm_neon_ok_no_float_abi_nocache { } {
++ if { [check_effective_target_arm32] } {
++ foreach flags {"-mfpu=neon"} {
++ if { [check_no_compiler_messages_nocache arm_neon_ok_no_float_abi object {
++ #include <arm_neon.h>
++ int dummy;
++ #ifndef __ARM_NEON__
++ #error not NEON
++ #endif
++ /* Avoid the case where a test adds -mfpu=neon, but the toolchain is
++ configured for -mcpu=arm926ej-s, for example. */
++ #if __ARM_ARCH < 7 || __ARM_ARCH_PROFILE == 'M'
++ #error Architecture does not support NEON.
++ #endif
++ } "$flags"] } {
++ return 1
++ }
++ }
++ }
++
++ return 0
++}
++
++proc check_effective_target_arm_neon_ok_no_float_abi { } {
++ return [check_cached_effective_target arm_neon_ok_no_float_abi \
++ check_effective_target_arm_neon_ok_no_float_abi_nocache]
++}
++
+ proc check_effective_target_arm_crc_ok_nocache { } {
+ global et_arm_crc_flags
+ set et_arm_crc_flags "-march=armv8-a+crc"
+@@ -3769,12 +3813,13 @@ proc check_effective_target_arm_fp16_hw { } {
# can be selected and a routine to give the flags to select that architecture
# Note: Extra flags may be added to disable options from newer compilers
# (Thumb in particular - but others may be added in the future).
@@ -4949,7 +7121,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
v4 "-march=armv4 -marm" __ARM_ARCH_4__
v4t "-march=armv4t" __ARM_ARCH_4T__
v5 "-march=armv5 -marm" __ARM_ARCH_5__
-@@ -3788,20 +3800,23 @@ foreach { armfunc armflag armdef } {
+@@ -3789,20 +3834,23 @@ foreach { armfunc armflag armdef } {
v7r "-march=armv7-r" __ARM_ARCH_7R__
v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__
v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__
@@ -4977,7 +7149,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
#endif
} "FLAG" ]
}
-@@ -3822,26 +3837,6 @@ foreach { armfunc armflag armdef } {
+@@ -3823,26 +3871,6 @@ foreach { armfunc armflag armdef } {
}]
}
@@ -5004,6 +7176,29 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
# Return 1 if GCC was configured with --with-mode=
proc check_effective_target_default_mode { } {
+@@ -4038,13 +4066,15 @@ proc check_effective_target_arm_v8_1a_neon_ok_nocache { } {
+ # since AArch64 only needs the -march setting.
+ foreach flags {"" "-mfpu=neon-fp-armv8" "-mfloat-abi=softfp" \
+ "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
+- if { [check_no_compiler_messages_nocache arm_v8_1a_neon_ok object {
+- #if !defined (__ARM_FEATURE_QRDMX)
+- #error "__ARM_FEATURE_QRDMX not defined"
+- #endif
+- } "$flags -march=armv8.1-a"] } {
+- set et_arm_v8_1a_neon_flags "$flags -march=armv8.1-a"
+- return 1
++ foreach arches { "-march=armv8-a+rdma" "-march=armv8.1-a" } {
++ if { [check_no_compiler_messages_nocache arm_v8_1a_neon_ok object {
++ #if !defined (__ARM_FEATURE_QRDMX)
++ #error "__ARM_FEATURE_QRDMX not defined"
++ #endif
++ } "$flags $arches"] } {
++ set et_arm_v8_1a_neon_flags "$flags $arches"
++ return 1
++ }
+ }
+ }
+
--- a/src/gcc/tree-ssa-dce.c
+++ b/src/gcc/tree-ssa-dce.c
@@ -233,6 +233,8 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive)
@@ -5325,3 +7520,250 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
+
+SHLIB_LDFLAGS = -Wl,--soname=$(SHLIB_SONAME) \
+ $(LDFLAGS)
+--- /dev/null
++++ b/src/libstdc++-v3/config/cpu/aarch64/opt/bits/opt_random.h
+@@ -0,0 +1,47 @@
++// Optimizations for random number functions, aarch64 version -*- C++ -*-
++
++// Copyright (C) 2017 Free Software Foundation, Inc.
++//
++// This file is part of the GNU ISO C++ Library. This library is free
++// software; you can redistribute it and/or modify it under the
++// terms of the GNU General Public License as published by the
++// Free Software Foundation; either version 3, or (at your option)
++// any later version.
++
++// This library is distributed in the hope that it will be useful,
++// but WITHOUT ANY WARRANTY; without even the implied warranty of
++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++// GNU General Public License for more details.
++
++// Under Section 7 of GPL version 3, you are granted additional
++// permissions described in the GCC Runtime Library Exception, version
++// 3.1, as published by the Free Software Foundation.
++
++// You should have received a copy of the GNU General Public License and
++// a copy of the GCC Runtime Library Exception along with this program;
++// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
++// <http://www.gnu.org/licenses/>.
++
++/** @file bits/opt_random.h
++ * This is an internal header file, included by other library headers.
++ * Do not attempt to use it directly. @headername{random}
++ */
++
++#ifndef _BITS_OPT_RANDOM_H
++#define _BITS_OPT_RANDOM_H 1
++
++#pragma GCC system_header
++
++
++namespace std _GLIBCXX_VISIBILITY (default)
++{
++_GLIBCXX_BEGIN_NAMESPACE_VERSION
++
++
++
++
++_GLIBCXX_END_NAMESPACE_VERSION
++} // namespace
++
++
++#endif // _BITS_OPT_RANDOM_H
+--- /dev/null
++++ b/src/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
+@@ -0,0 +1,180 @@
++// Optimizations for random number extensions, aarch64 version -*- C++ -*-
++
++// Copyright (C) 2017 Free Software Foundation, Inc.
++//
++// This file is part of the GNU ISO C++ Library. This library is free
++// software; you can redistribute it and/or modify it under the
++// terms of the GNU General Public License as published by the
++// Free Software Foundation; either version 3, or (at your option)
++// any later version.
++
++// This library is distributed in the hope that it will be useful,
++// but WITHOUT ANY WARRANTY; without even the implied warranty of
++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++// GNU General Public License for more details.
++
++// Under Section 7 of GPL version 3, you are granted additional
++// permissions described in the GCC Runtime Library Exception, version
++// 3.1, as published by the Free Software Foundation.
++
++// You should have received a copy of the GNU General Public License and
++// a copy of the GCC Runtime Library Exception along with this program;
++// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
++// <http://www.gnu.org/licenses/>.
++
++/** @file ext/random.tcc
++ * This is an internal header file, included by other library headers.
++ * Do not attempt to use it directly. @headername{ext/random}
++ */
++
++#ifndef _EXT_OPT_RANDOM_H
++#define _EXT_OPT_RANDOM_H 1
++
++#pragma GCC system_header
++
++#ifdef __ARM_NEON
++
++#ifdef __AARCH64EB__
++# define __VEXT(_A,_B,_C) __builtin_shuffle (_A, _B, (__Uint8x16_t) \
++ {16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \
++ 24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C})
++#else
++# define __VEXT(_A,_B,_C) __builtin_shuffle (_B, _A, (__Uint8x16_t) \
++ {_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \
++ _C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15})
++#endif
++
++namespace __gnu_cxx _GLIBCXX_VISIBILITY (default)
++{
++_GLIBCXX_BEGIN_NAMESPACE_VERSION
++
++ namespace {
++ // Logical Shift right 128-bits by c * 8 bits
++
++ __extension__ extern __inline __Uint32x4_t
++ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
++ __aarch64_lsr_128 (__Uint8x16_t __a, __const int __c)
++ {
++ const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
++ 0, 0, 0, 0, 0, 0, 0, 0};
++
++ return (__Uint32x4_t) __VEXT (__zero, __a, __c);
++ }
++
++ // Logical Shift left 128-bits by c * 8 bits
++
++ __extension__ extern __inline __Uint32x4_t
++ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
++ __aarch64_lsl_128 (__Uint8x16_t __a, __const int __c)
++ {
++ const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
++ 0, 0, 0, 0, 0, 0, 0, 0};
++
++ return (__Uint32x4_t) __VEXT (__a, __zero, 16 - __c);
++ }
++
++ template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2>
++ inline __Uint32x4_t __aarch64_recursion (__Uint32x4_t __a,
++ __Uint32x4_t __b,
++ __Uint32x4_t __c,
++ __Uint32x4_t __d,
++ __Uint32x4_t __e)
++ {
++ __Uint32x4_t __y = (__b >> __sr1);
++ __Uint32x4_t __z = __aarch64_lsr_128 ((__Uint8x16_t) __c, __sr2);
++
++ __Uint32x4_t __v = __d << __sl1;
++
++ __z = __z ^ __a;
++ __z = __z ^ __v;
++
++ __Uint32x4_t __x = __aarch64_lsl_128 ((__Uint8x16_t) __a, __sl2);
++
++ __y = __y & __e;
++ __z = __z ^ __x;
++ return __z ^ __y;
++ }
++}
++
++#define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ 1
++ template<typename _UIntType, size_t __m,
++ size_t __pos1, size_t __sl1, size_t __sl2,
++ size_t __sr1, size_t __sr2,
++ uint32_t __msk1, uint32_t __msk2,
++ uint32_t __msk3, uint32_t __msk4,
++ uint32_t __parity1, uint32_t __parity2,
++ uint32_t __parity3, uint32_t __parity4>
++ void simd_fast_mersenne_twister_engine<_UIntType, __m,
++ __pos1, __sl1, __sl2, __sr1, __sr2,
++ __msk1, __msk2, __msk3, __msk4,
++ __parity1, __parity2, __parity3,
++ __parity4>::
++ _M_gen_rand (void)
++ {
++ __Uint32x4_t __r1 = _M_state[_M_nstate - 2];
++ __Uint32x4_t __r2 = _M_state[_M_nstate - 1];
++
++ __Uint32x4_t __aData = {__msk1, __msk2, __msk3, __msk4};
++
++ size_t __i;
++ for (__i = 0; __i < _M_nstate - __pos1; ++__i)
++ {
++ __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2>
++ (_M_state[__i], _M_state[__i + __pos1], __r1, __r2, __aData);
++
++ _M_state[__i] = __r;
++
++ __r1 = __r2;
++ __r2 = __r;
++ }
++ for (; __i < _M_nstate; ++__i)
++ {
++ __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2>
++ (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2,
++ __aData);
++
++ _M_state[__i] = __r;
++
++ __r1 = __r2;
++ __r2 = __r;
++ }
++
++ _M_pos = 0;
++ }
++
++
++#define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL 1
++ template<typename _UIntType, size_t __m,
++ size_t __pos1, size_t __sl1, size_t __sl2,
++ size_t __sr1, size_t __sr2,
++ uint32_t __msk1, uint32_t __msk2,
++ uint32_t __msk3, uint32_t __msk4,
++ uint32_t __parity1, uint32_t __parity2,
++ uint32_t __parity3, uint32_t __parity4>
++ bool
++ operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
++ __m, __pos1, __sl1, __sl2, __sr1, __sr2,
++ __msk1, __msk2, __msk3, __msk4,
++ __parity1, __parity2, __parity3, __parity4>& __lhs,
++ const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
++ __m, __pos1, __sl1, __sl2, __sr1, __sr2,
++ __msk1, __msk2, __msk3, __msk4,
++ __parity1, __parity2, __parity3, __parity4>& __rhs)
++ {
++ if (__lhs._M_pos != __rhs._M_pos)
++ return false;
++
++ __Uint32x4_t __res = __lhs._M_state[0] ^ __rhs._M_state[0];
++
++ for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
++ __res |= __lhs._M_state[__i] ^ __rhs._M_state[__i];
++
++ return (__int128) __res == 0;
++ }
++
++_GLIBCXX_END_NAMESPACE_VERSION
++ } // namespace
++
++#endif // __ARM_NEON
++
++#endif // _EXT_OPT_RANDOM_H
+--- a/src/libstdc++-v3/include/ext/random
++++ b/src/libstdc++-v3/include/ext/random
+@@ -184,6 +184,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
+ #ifdef __SSE2__
+ __m128i _M_state[_M_nstate];
+ #endif
++#ifdef __ARM_NEON
++#ifdef __aarch64__
++ __Uint32x4_t _M_state[_M_nstate];
++#endif
++#endif
+ uint32_t _M_state32[_M_nstate32];
+ result_type _M_stateT[state_size];
+ } __attribute__ ((__aligned__ (16)));
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/gcc-7.git
More information about the Reproducible-commits
mailing list