[gcc-7] 292/354: * Update the Linaro support to the 7-2017.09 snapshot.

Ximin Luo infinity0 at debian.org
Thu Nov 23 15:51:21 UTC 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch master
in repository gcc-7.

commit c59d8f88bd5c92ad3e451dc781eca309992cd57f
Author: doko <doko at 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca>
Date:   Thu Sep 14 19:25:08 2017 +0000

      * Update the Linaro support to the 7-2017.09 snapshot.
    
    
    git-svn-id: svn+ssh://svn.debian.org/svn/gcccvs/branches/sid/gcc-7@9683 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca
---
 debian/changelog                         |    1 +
 debian/patches/gcc-linaro-doc.diff       |   67 +-
 debian/patches/gcc-linaro-no-macros.diff |    2 +-
 debian/patches/gcc-linaro.diff           | 2700 ++++++++++++++++++++++++++++--
 4 files changed, 2637 insertions(+), 133 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index e70af52..4929e0f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,7 @@ gcc-7 (7.2.0-5) UNRELEASED; urgency=medium
   * Enable libgo tests and rebuilds with make -C (Svante Signell).
     Closes: #873929.
   * Fix PR sanitizer/77631, support separate debug info in libbacktrace.
+  * Update the Linaro support to the 7-2017.09 snapshot.
 
  -- Matthias Klose <doko at debian.org>  Wed, 13 Sep 2017 22:52:19 +0200
 
diff --git a/debian/patches/gcc-linaro-doc.diff b/debian/patches/gcc-linaro-doc.diff
index ac879e0..37ac4c9 100644
--- a/debian/patches/gcc-linaro-doc.diff
+++ b/debian/patches/gcc-linaro-doc.diff
@@ -1,4 +1,4 @@
-# DP: Changes for the Linaro 7-2017.08 snapshot (documentation).
+# DP: Changes for the Linaro 7-2017.09 snapshot (documentation).
 
 --- a/src/gcc/doc/install.texi
 +++ b/src/gcc/doc/install.texi
@@ -28,8 +28,56 @@
  @item Option @tab aprofile @tab rmprofile
 --- a/src/gcc/doc/invoke.texi
 +++ b/src/gcc/doc/invoke.texi
-@@ -14076,6 +14076,10 @@ Enable Large System Extension instructions.  This is on by default for
+@@ -580,15 +580,14 @@ Objective-C and Objective-C++ Dialects}.
+ -mgeneral-regs-only @gol
+ -mcmodel=tiny  -mcmodel=small  -mcmodel=large @gol
+ -mstrict-align @gol
+--momit-leaf-frame-pointer  -mno-omit-leaf-frame-pointer @gol
++-momit-leaf-frame-pointer @gol
+ -mtls-dialect=desc  -mtls-dialect=traditional @gol
+ -mtls-size=@var{size} @gol
+--mfix-cortex-a53-835769  -mno-fix-cortex-a53-835769 @gol
+--mfix-cortex-a53-843419  -mno-fix-cortex-a53-843419 @gol
+--mlow-precision-recip-sqrt  -mno-low-precision-recip-sqrt at gol
+--mlow-precision-sqrt  -mno-low-precision-sqrt at gol
+--mlow-precision-div  -mno-low-precision-div @gol
+--march=@var{name}  -mcpu=@var{name}  -mtune=@var{name}}
++-mfix-cortex-a53-835769  -mfix-cortex-a53-843419 @gol
++-mlow-precision-recip-sqrt  -mlow-precision-sqrt  -mlow-precision-div @gol
++-mpc-relative-literal-loads @gol
++-msign-return-address=@var{scope} @gol
++-march=@var{name}  -mcpu=@var{name}  -mtune=@var{name}  -moverride=@var{string}}
+ 
+ @emph{Adapteva Epiphany Options}
+ @gccoptlist{-mhalf-reg-file  -mprefer-short-insn-regs @gol
+@@ -13961,7 +13960,7 @@ support for the ARMv8.2-A architecture extensions.
+ 
+ The value @samp{armv8.1-a} implies @samp{armv8-a} and enables compiler
+ support for the ARMv8.1-A architecture extension.  In particular, it
+-enables the @samp{+crc} and @samp{+lse} features.
++enables the @samp{+crc}, @samp{+lse}, and @samp{+rdma} features.
+ 
+ The value @samp{native} is available on native AArch64 GNU/Linux and
+ causes the compiler to pick the architecture of the host system.  This
+@@ -14034,8 +14033,10 @@ across releases.
+ This option is only intended to be useful when developing GCC.
+ 
+ @item -mpc-relative-literal-loads
++ at itemx -mno-pc-relative-literal-loads
+ @opindex mpc-relative-literal-loads
+-Enable PC-relative literal loads.  With this option literal pools are
++ at opindex mno-pc-relative-literal-loads
++Enable or disable PC-relative literal loads.  With this option literal pools are
+ accessed using a single instruction and emitted after each function.  This
+ limits the maximum size of functions to 1MB.  This is enabled by default for
+ @option{-mcmodel=tiny}.
+@@ -14074,8 +14075,15 @@ instructions.  This is on by default for all possible values for options
+ @item lse
+ Enable Large System Extension instructions.  This is on by default for
  @option{-march=armv8.1-a}.
++ at item rdma
++Enable Round Double Multiply Accumulate instructions.  This is on by default
++for @option{-march=armv8.1-a}.
  @item fp16
  Enable FP16 extension.  This also enables floating-point instructions.
 + at item rcpc
@@ -41,7 +89,20 @@
  
 --- a/src/gcc/doc/sourcebuild.texi
 +++ b/src/gcc/doc/sourcebuild.texi
-@@ -2274,6 +2274,11 @@ the codeset to convert to.
+@@ -1570,6 +1570,12 @@ Test system supports executing NEON v2 instructions.
+ ARM Target supports @code{-mfpu=neon -mfloat-abi=softfp} or compatible
+ options.  Some multilibs may be incompatible with these options.
+ 
++ at item arm_neon_ok_no_float_abi
++ at anchor{arm_neon_ok_no_float_abi}
++ARM Target supports NEON with @code{-mfpu=neon}, but without any
++-mfloat-abi= option.  Some multilibs may be incompatible with this
++option.
++
+ @item arm_neonv2_ok
+ @anchor{arm_neonv2_ok}
+ ARM Target supports @code{-mfpu=neon-vfpv4 -mfloat-abi=softfp} or compatible
+@@ -2274,6 +2280,11 @@ the codeset to convert to.
  Skip the test if the target does not support profiling with option
  @var{profopt}.
  
diff --git a/debian/patches/gcc-linaro-no-macros.diff b/debian/patches/gcc-linaro-no-macros.diff
index 737d486..c94dbe8 100644
--- a/debian/patches/gcc-linaro-no-macros.diff
+++ b/debian/patches/gcc-linaro-no-macros.diff
@@ -89,4 +89,4 @@ Index: b/src/gcc/LINARO-VERSION
 --- a/src/gcc/LINARO-VERSION
 +++ /dev/null
 @@ -1,1 +0,0 @@
--Snapshot 7.2-2017.08
+-Snapshot 7.2-2017.09
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff
index 4df4ae0..9bbc4e7 100644
--- a/debian/patches/gcc-linaro.diff
+++ b/debian/patches/gcc-linaro.diff
@@ -1,8 +1,8 @@
-# DP: Changes for the Linaro 7-2017.08 snapshot.
+# DP: Changes for the Linaro 7-2017.09 snapshot.
 
 MSG=$(git log origin/linaro/gcc-7-branch --format=format:"%s" -n 1 --grep "Merge branches"); SVN=${MSG##* }; git log origin/gcc-7-branch --format=format:"%H" -n 1 --grep "gcc-7-branch@${SVN%.}"
 
-LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefefa91b044ffa4a4b868ef7188e5255a \
+LANG=C git diff --no-renames bb85d61e6bfbadee4494e034a5d8187cf0626aed 1604249e382610b087a72d0d07103f815458cec0 \
  | egrep -v '^(diff|index) ' \
  | filterdiff --strip=1 --addoldprefix=a/src/  --addnewprefix=b/src/ \
  | sed 's,a/src//dev/null,/dev/null,'
@@ -10,7 +10,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 --- /dev/null
 +++ b/src/gcc/LINARO-VERSION
 @@ -0,0 +1 @@
-+Snapshot 7.2-2017.08
++Snapshot 7.2-2017.09
 --- a/src/gcc/Makefile.in
 +++ b/src/gcc/Makefile.in
 @@ -845,10 +845,12 @@ BASEVER     := $(srcdir)/BASE-VER  # 4.x.y
@@ -46,6 +46,318 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
  CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
  
+--- a/src/gcc/ccmp.c
++++ b/src/gcc/ccmp.c
+@@ -38,6 +38,29 @@ along with GCC; see the file COPYING3.  If not see
+ #include "ccmp.h"
+ #include "predict.h"
+ 
++/* Check whether T is a simple boolean variable or a SSA name
++   set by a comparison operator in the same basic block.  */
++static bool
++ccmp_tree_comparison_p (tree t, basic_block bb)
++{
++  gimple *g = get_gimple_for_ssa_name (t);
++  tree_code tcode;
++
++  /* If we have a boolean variable allow it and generate a compare
++     to zero reg when expanding.  */
++  if (!g)
++    return (TREE_CODE (TREE_TYPE (t)) == BOOLEAN_TYPE);
++
++  /* Check to see if SSA name is set by a comparison operator in
++     the same basic block.  */ 
++  if (!is_gimple_assign (g))
++    return false;
++  if (bb != gimple_bb (g))
++    return false;
++  tcode = gimple_assign_rhs_code (g);
++  return TREE_CODE_CLASS (tcode) == tcc_comparison;
++}
++
+ /* The following functions expand conditional compare (CCMP) instructions.
+    Here is a short description about the over all algorithm:
+      * ccmp_candidate_p is used to identify the CCMP candidate
+@@ -71,49 +94,69 @@ along with GCC; see the file COPYING3.  If not see
+ static bool
+ ccmp_candidate_p (gimple *g)
+ {
+-  tree rhs = gimple_assign_rhs_to_tree (g);
++  tree rhs;
+   tree lhs, op0, op1;
+   gimple *gs0, *gs1;
+-  tree_code tcode, tcode0, tcode1;
+-  tcode = TREE_CODE (rhs);
++  tree_code tcode;
++  basic_block bb;
++
++  if (!g)
++    return false;
+ 
++  rhs = gimple_assign_rhs_to_tree (g);
++  tcode = TREE_CODE (rhs);
+   if (tcode != BIT_AND_EXPR && tcode != BIT_IOR_EXPR)
+     return false;
+ 
+   lhs = gimple_assign_lhs (g);
+   op0 = TREE_OPERAND (rhs, 0);
+   op1 = TREE_OPERAND (rhs, 1);
++  bb = gimple_bb (g);
+ 
+   if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
+       || !has_single_use (lhs))
+     return false;
+ 
+-  gs0 = get_gimple_for_ssa_name (op0);
+-  gs1 = get_gimple_for_ssa_name (op1);
+-  if (!gs0 || !gs1 || !is_gimple_assign (gs0) || !is_gimple_assign (gs1)
+-      /* g, gs0 and gs1 must be in the same basic block, since current stage
+-	 is out-of-ssa.  We can not guarantee the correctness when forwording
+-	 the gs0 and gs1 into g whithout DATAFLOW analysis.  */
+-      || gimple_bb (gs0) != gimple_bb (gs1)
+-      || gimple_bb (gs0) != gimple_bb (g))
+-    return false;
++  gs0 = get_gimple_for_ssa_name (op0); /* gs0 may be NULL */
++  gs1 = get_gimple_for_ssa_name (op1); /* gs1 may be NULL */
+ 
+-  tcode0 = gimple_assign_rhs_code (gs0);
+-  tcode1 = gimple_assign_rhs_code (gs1);
+-  if (TREE_CODE_CLASS (tcode0) == tcc_comparison
+-      && TREE_CODE_CLASS (tcode1) == tcc_comparison)
++  if (ccmp_tree_comparison_p (op0, bb) && ccmp_tree_comparison_p (op1, bb))
+     return true;
+-  if (TREE_CODE_CLASS (tcode0) == tcc_comparison
+-      && ccmp_candidate_p (gs1))
++  if (ccmp_tree_comparison_p (op0, bb) && ccmp_candidate_p (gs1))
+     return true;
+-  else if (TREE_CODE_CLASS (tcode1) == tcc_comparison
+-	   && ccmp_candidate_p (gs0))
++  if (ccmp_tree_comparison_p (op1, bb) && ccmp_candidate_p (gs0))
+     return true;
+   /* We skip ccmp_candidate_p (gs1) && ccmp_candidate_p (gs0) since
+-     there is no way to set the CC flag.  */
++     there is no way to set and maintain the CC flag on both sides of
++     the logical operator at the same time.  */
+   return false;
+ }
+ 
++/* Extract the comparison we want to do from the tree.  */
++void
++get_compare_parts (tree t, int *up, rtx_code *rcode,
++		   tree *rhs1, tree *rhs2)
++{
++  tree_code code;
++  gimple *g = get_gimple_for_ssa_name (t);
++  if (g)
++    {
++      *up = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)));
++      code = gimple_assign_rhs_code (g);
++      *rcode = get_rtx_code (code, *up);
++      *rhs1 = gimple_assign_rhs1 (g);
++      *rhs2 = gimple_assign_rhs2 (g);
++    }
++  else
++    {
++      /* If g is not a comparison operator create a compare to zero.  */
++      *up = 1;
++      *rcode = NE;
++      *rhs1 = t;
++      *rhs2 = build_zero_cst (TREE_TYPE (t));
++    }
++}
++
+ /* PREV is a comparison with the CC register which represents the
+    result of the previous CMP or CCMP.  The function expands the
+    next compare based on G which is ANDed/ORed with the previous
+@@ -121,20 +164,16 @@ ccmp_candidate_p (gimple *g)
+    PREP_SEQ returns all insns to prepare opearands for compare.
+    GEN_SEQ returns all compare insns.  */
+ static rtx
+-expand_ccmp_next (gimple *g, tree_code code, rtx prev,
++expand_ccmp_next (tree op, tree_code code, rtx prev,
+ 		  rtx_insn **prep_seq, rtx_insn **gen_seq)
+ {
+   rtx_code rcode;
+-  int unsignedp = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)));
+-
+-  gcc_assert (code == BIT_AND_EXPR || code == BIT_IOR_EXPR);
+-
+-  rcode = get_rtx_code (gimple_assign_rhs_code (g), unsignedp);
++  int unsignedp;
++  tree rhs1, rhs2;
+ 
++  get_compare_parts(op, &unsignedp, &rcode, &rhs1, &rhs2);
+   return targetm.gen_ccmp_next (prep_seq, gen_seq, prev, rcode,
+-				gimple_assign_rhs1 (g),
+-				gimple_assign_rhs2 (g),
+-				get_rtx_code (code, 0));
++				rhs1, rhs2, get_rtx_code (code, 0));
+ }
+ 
+ /* Expand conditional compare gimple G.  A typical CCMP sequence is like:
+@@ -153,39 +192,42 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq)
+ {
+   tree exp = gimple_assign_rhs_to_tree (g);
+   tree_code code = TREE_CODE (exp);
+-  gimple *gs0 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 0));
+-  gimple *gs1 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 1));
++  basic_block bb = gimple_bb (g);
++
++  tree op0 = TREE_OPERAND (exp, 0);
++  tree op1 = TREE_OPERAND (exp, 1);
++  gimple *gs0 = get_gimple_for_ssa_name (op0);
++  gimple *gs1 = get_gimple_for_ssa_name (op1);
+   rtx tmp;
+-  tree_code code0 = gimple_assign_rhs_code (gs0);
+-  tree_code code1 = gimple_assign_rhs_code (gs1);
+ 
+   gcc_assert (code == BIT_AND_EXPR || code == BIT_IOR_EXPR);
+-  gcc_assert (gs0 && gs1 && is_gimple_assign (gs0) && is_gimple_assign (gs1));
+ 
+-  if (TREE_CODE_CLASS (code0) == tcc_comparison)
++  if (ccmp_tree_comparison_p (op0, bb))
+     {
+-      if (TREE_CODE_CLASS (code1) == tcc_comparison)
++      if (ccmp_tree_comparison_p (op1, bb))
+ 	{
+ 	  int unsignedp0, unsignedp1;
+ 	  rtx_code rcode0, rcode1;
++	  tree logical_op0_rhs1, logical_op0_rhs2;
++	  tree logical_op1_rhs1, logical_op1_rhs2;
+ 	  int speed_p = optimize_insn_for_speed_p ();
++
+ 	  rtx tmp2 = NULL_RTX, ret = NULL_RTX, ret2 = NULL_RTX;
+ 	  unsigned cost1 = MAX_COST;
+ 	  unsigned cost2 = MAX_COST;
+ 
+-	  unsignedp0 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs0)));
+-	  unsignedp1 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs1)));
+-	  rcode0 = get_rtx_code (code0, unsignedp0);
+-	  rcode1 = get_rtx_code (code1, unsignedp1);
++	  get_compare_parts (op0, &unsignedp0, &rcode0,
++			     &logical_op0_rhs1, &logical_op0_rhs2);
++
++	  get_compare_parts (op1, &unsignedp1, &rcode1,
++			     &logical_op1_rhs1, &logical_op1_rhs2);
+ 
+ 	  rtx_insn *prep_seq_1, *gen_seq_1;
+ 	  tmp = targetm.gen_ccmp_first (&prep_seq_1, &gen_seq_1, rcode0,
+-					gimple_assign_rhs1 (gs0),
+-					gimple_assign_rhs2 (gs0));
+-
++					logical_op0_rhs1, logical_op0_rhs2);
+ 	  if (tmp != NULL)
+ 	    {
+-	      ret = expand_ccmp_next (gs1, code, tmp, &prep_seq_1, &gen_seq_1);
++	      ret = expand_ccmp_next (op1, code, tmp, &prep_seq_1, &gen_seq_1);
+ 	      cost1 = seq_cost (prep_seq_1, speed_p);
+ 	      cost1 += seq_cost (gen_seq_1, speed_p);
+ 	    }
+@@ -197,27 +239,22 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq)
+ 	  rtx_insn *prep_seq_2, *gen_seq_2;
+ 	  if (tmp == NULL || cost1 < COSTS_N_INSNS (25))
+ 	    tmp2 = targetm.gen_ccmp_first (&prep_seq_2, &gen_seq_2, rcode1,
+-					   gimple_assign_rhs1 (gs1),
+-					   gimple_assign_rhs2 (gs1));
+-
++					   logical_op1_rhs1, logical_op1_rhs2);
+ 	  if (!tmp && !tmp2)
+ 	    return NULL_RTX;
+-
+ 	  if (tmp2 != NULL)
+ 	    {
+-	      ret2 = expand_ccmp_next (gs0, code, tmp2, &prep_seq_2,
++	      ret2 = expand_ccmp_next (op0, code, tmp2, &prep_seq_2,
+ 				       &gen_seq_2);
+ 	      cost2 = seq_cost (prep_seq_2, speed_p);
+ 	      cost2 += seq_cost (gen_seq_2, speed_p);
+ 	    }
+-
+ 	  if (cost2 < cost1)
+ 	    {
+ 	      *prep_seq = prep_seq_2;
+ 	      *gen_seq = gen_seq_2;
+ 	      return ret2;
+ 	    }
+-
+ 	  *prep_seq = prep_seq_1;
+ 	  *gen_seq = gen_seq_1;
+ 	  return ret;
+@@ -227,28 +264,18 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq)
+ 	  tmp = expand_ccmp_expr_1 (gs1, prep_seq, gen_seq);
+ 	  if (!tmp)
+ 	    return NULL_RTX;
+-
+-	  return expand_ccmp_next (gs0, code, tmp, prep_seq, gen_seq);
++	  return expand_ccmp_next (op0, code, tmp, prep_seq, gen_seq);
+ 	}
+     }
+   else
+     {
+       gcc_assert (gimple_assign_rhs_code (gs0) == BIT_AND_EXPR
+                   || gimple_assign_rhs_code (gs0) == BIT_IOR_EXPR);
+-
+-      if (TREE_CODE_CLASS (gimple_assign_rhs_code (gs1)) == tcc_comparison)
+-	{
+-	  tmp = expand_ccmp_expr_1 (gs0, prep_seq, gen_seq);
+-	  if (!tmp)
+-	    return NULL_RTX;
+-
+-	  return expand_ccmp_next (gs1, code, tmp, prep_seq, gen_seq);
+-	}
+-      else
+-	{
+-	  gcc_assert (gimple_assign_rhs_code (gs1) == BIT_AND_EXPR
+-		      || gimple_assign_rhs_code (gs1) == BIT_IOR_EXPR);
+-	}
++      gcc_assert (ccmp_tree_comparison_p (op1, bb));
++      tmp = expand_ccmp_expr_1 (gs0, prep_seq, gen_seq);
++      if (!tmp)
++	return NULL_RTX;
++      return expand_ccmp_next (op1, code, tmp, prep_seq, gen_seq);
+     }
+ 
+   return NULL_RTX;
+@@ -258,7 +285,7 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, rtx_insn **gen_seq)
+    Return NULL_RTX if G is not a legal candidate or expand fail.
+    Otherwise return the target.  */
+ rtx
+-expand_ccmp_expr (gimple *g)
++expand_ccmp_expr (gimple *g, machine_mode mode)
+ {
+   rtx_insn *last;
+   rtx tmp;
+@@ -275,7 +302,6 @@ expand_ccmp_expr (gimple *g)
+     {
+       insn_code icode;
+       machine_mode cc_mode = CCmode;
+-      tree lhs = gimple_assign_lhs (g);
+       rtx_code cmp_code = GET_CODE (tmp);
+ 
+ #ifdef SELECT_CC_MODE
+@@ -284,7 +310,6 @@ expand_ccmp_expr (gimple *g)
+       icode = optab_handler (cstore_optab, cc_mode);
+       if (icode != CODE_FOR_nothing)
+ 	{
+-	  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+ 	  rtx target = gen_reg_rtx (mode);
+ 
+ 	  emit_insn (prep_seq);
+@@ -300,4 +325,3 @@ expand_ccmp_expr (gimple *g)
+   delete_insns_since (last);
+   return NULL_RTX;
+ }
+-
+--- a/src/gcc/ccmp.h
++++ b/src/gcc/ccmp.h
+@@ -20,6 +20,6 @@ along with GCC; see the file COPYING3.  If not see
+ #ifndef GCC_CCMP_H
+ #define GCC_CCMP_H
+ 
+-extern rtx expand_ccmp_expr (gimple *);
++extern rtx expand_ccmp_expr (gimple *, machine_mode);
+ 
+ #endif  /* GCC_CCMP_H  */
 --- a/src/gcc/config.gcc
 +++ b/src/gcc/config.gcc
 @@ -3796,34 +3796,19 @@ case "${target}" in
@@ -143,8 +455,8 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
 -/* V8.1 Architecture Processors.  */
 +/* Qualcomm ('Q') cores. */
-+AARCH64_CORE("falkor",      falkor,    cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0xC00, -1)
-+AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0xC00, -1)
++AARCH64_CORE("falkor",      falkor,    cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
++AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
 +
 +/* Samsung ('S') cores. */
 +AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
@@ -176,15 +488,28 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
      COSTS_N_INSNS (1),	/* UNUSED: Log_shift.  */
      COSTS_N_INSNS (1),	/* UNUSED: Log_shift_reg.  */
      0,			/* Extend.  */
+--- a/src/gcc/config/aarch64/aarch64-fusion-pairs.def
++++ b/src/gcc/config/aarch64/aarch64-fusion-pairs.def
+@@ -34,5 +34,6 @@ AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK)
+ AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
+ AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
+ AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
++AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
+ 
+ #undef AARCH64_FUSION_PAIR
 --- a/src/gcc/config/aarch64/aarch64-option-extensions.def
 +++ b/src/gcc/config/aarch64/aarch64-option-extensions.def
-@@ -60,4 +60,7 @@ AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, "atomics")
+@@ -60,4 +60,11 @@ AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, "atomics")
     Disabling "fp16" just disables "fp16".  */
  AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fphp asimdhp")
  
 +/* Enabling or disabling "rcpc" only changes "rcpc".  */
 +AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc")
 +
++/* Enabling "rdma" also enables "fp", "simd".
++   Disabling "rdma" just disables "rdma".  */
++AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, "rdma")
++
  #undef AARCH64_OPT_EXTENSION
 --- a/src/gcc/config/aarch64/aarch64-protos.h
 +++ b/src/gcc/config/aarch64/aarch64-protos.h
@@ -226,7 +551,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  #define AARCH64_FUSION_PAIR(x, name) \
-@@ -301,6 +312,7 @@ extern struct tune_params aarch64_tune_params;
+@@ -301,18 +312,22 @@ extern struct tune_params aarch64_tune_params;
  
  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
  int aarch64_get_condition_code (rtx);
@@ -234,14 +559,32 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
  unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
  unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
-@@ -311,6 +323,7 @@ bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
+ bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
+ int aarch64_branch_cost (bool, bool);
+ enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
++bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
+ bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
  bool aarch64_constant_address_p (rtx);
  bool aarch64_emit_approx_div (rtx, rtx, rtx);
  bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
 +void aarch64_expand_call (rtx, rtx, bool);
  bool aarch64_expand_movmem (rtx *);
  bool aarch64_float_const_zero_rtx_p (rtx);
++bool aarch64_float_const_rtx_p (rtx);
  bool aarch64_function_arg_regno_p (unsigned);
+ bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs);
+ bool aarch64_gen_movmemqi (rtx *);
+@@ -338,9 +353,9 @@ bool aarch64_pad_arg_upward (machine_mode, const_tree);
+ bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
+ bool aarch64_regno_ok_for_base_p (int, bool);
+ bool aarch64_regno_ok_for_index_p (int, bool);
++bool aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *fail);
+ bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
+ 					    bool high);
+-bool aarch64_simd_imm_scalar_p (rtx x, machine_mode mode);
+ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
+ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
+ bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 --- a/src/gcc/config/aarch64/aarch64-simd.md
 +++ b/src/gcc/config/aarch64/aarch64-simd.md
 @@ -44,12 +44,12 @@
@@ -324,7 +667,45 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  	    (match_operand:SI 2 "immediate_operand" "i")))]
    "TARGET_SIMD"
    {
-@@ -2796,38 +2809,10 @@
+@@ -1020,6 +1033,18 @@
+   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+ )
+ 
++(define_insn "*aarch64_mla_elt_merge<mode>"
++  [(set (match_operand:VDQHS 0 "register_operand" "=w")
++	(plus:VDQHS
++	  (mult:VDQHS (vec_duplicate:VDQHS
++		  (match_operand:<VEL> 1 "register_operand" "w"))
++		(match_operand:VDQHS 2 "register_operand" "w"))
++	  (match_operand:VDQHS 3 "register_operand" "0")))]
++ "TARGET_SIMD"
++ "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
++  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
++)
++
+ (define_insn "aarch64_mls<mode>"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
+@@ -1067,6 +1092,18 @@
+   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+ )
+ 
++(define_insn "*aarch64_mls_elt_merge<mode>"
++  [(set (match_operand:VDQHS 0 "register_operand" "=w")
++	(minus:VDQHS
++	  (match_operand:VDQHS 1 "register_operand" "0")
++	  (mult:VDQHS (vec_duplicate:VDQHS
++		  (match_operand:<VEL> 2 "register_operand" "w"))
++		(match_operand:VDQHS 3 "register_operand" "w"))))]
++  "TARGET_SIMD"
++  "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
++  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
++)
++
+ ;; Max/Min operations.
+ (define_insn "<su><maxmin><mode>3"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+@@ -2796,38 +2833,10 @@
     (match_operand:VDC 2 "register_operand")]
    "TARGET_SIMD"
  {
@@ -388,7 +769,16 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  #undef AARCH64_EXTRA_TUNING_OPTION
 --- a/src/gcc/config/aarch64/aarch64.c
 +++ b/src/gcc/config/aarch64/aarch64.c
-@@ -193,10 +193,10 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
+@@ -147,6 +147,8 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
+ 							 const_tree type,
+ 							 int misalignment,
+ 							 bool is_packed);
++static machine_mode
++aarch64_simd_container_mode (machine_mode mode, unsigned width);
+ 
+ /* Major revision number of the ARM Architecture implemented by the target.  */
+ unsigned aarch64_architecture_version;
+@@ -193,10 +195,10 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
  static const struct cpu_addrcost_table generic_addrcost_table =
  {
      {
@@ -401,7 +791,24 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
      },
    0, /* pre_modify  */
    0, /* post_modify  */
-@@ -526,6 +526,61 @@ static const cpu_approx_modes xgene1_approx_modes =
+@@ -390,13 +392,13 @@ static const struct cpu_vector_cost thunderx_vector_cost =
+   3, /* scalar_load_cost  */
+   1, /* scalar_store_cost  */
+   4, /* vec_int_stmt_cost  */
+-  4, /* vec_fp_stmt_cost  */
++  1, /* vec_fp_stmt_cost  */
+   4, /* vec_permute_cost  */
+   2, /* vec_to_scalar_cost  */
+   2, /* scalar_to_vec_cost  */
+   3, /* vec_align_load_cost  */
+-  10, /* vec_unalign_load_cost  */
+-  10, /* vec_unalign_store_cost  */
++  5, /* vec_unalign_load_cost  */
++  5, /* vec_unalign_store_cost  */
+   1, /* vec_store_cost  */
+   3, /* cond_taken_branch_cost  */
+   3 /* cond_not_taken_branch_cost  */
+@@ -526,6 +528,61 @@ static const cpu_approx_modes xgene1_approx_modes =
    AARCH64_APPROX_ALL	/* recip_sqrt  */
  };
  
@@ -463,7 +870,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  static const struct tune_params generic_tunings =
  {
    &cortexa57_extra_costs,
-@@ -538,17 +593,17 @@ static const struct tune_params generic_tunings =
+@@ -538,17 +595,17 @@ static const struct tune_params generic_tunings =
    2, /* issue_rate  */
    (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
    8,	/* function_align.  */
@@ -486,7 +893,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  static const struct tune_params cortexa35_tunings =
-@@ -564,7 +619,7 @@ static const struct tune_params cortexa35_tunings =
+@@ -564,7 +621,7 @@ static const struct tune_params cortexa35_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
    16,	/* function_align.  */
@@ -495,7 +902,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -572,9 +627,9 @@ static const struct tune_params cortexa35_tunings =
+@@ -572,9 +629,9 @@ static const struct tune_params cortexa35_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -507,7 +914,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  static const struct tune_params cortexa53_tunings =
-@@ -590,7 +645,7 @@ static const struct tune_params cortexa53_tunings =
+@@ -590,7 +647,7 @@ static const struct tune_params cortexa53_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
    16,	/* function_align.  */
@@ -516,7 +923,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -598,9 +653,9 @@ static const struct tune_params cortexa53_tunings =
+@@ -598,9 +655,9 @@ static const struct tune_params cortexa53_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -528,7 +935,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  static const struct tune_params cortexa57_tunings =
-@@ -616,7 +671,7 @@ static const struct tune_params cortexa57_tunings =
+@@ -616,7 +673,7 @@ static const struct tune_params cortexa57_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
    16,	/* function_align.  */
@@ -537,7 +944,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -624,9 +679,9 @@ static const struct tune_params cortexa57_tunings =
+@@ -624,9 +681,9 @@ static const struct tune_params cortexa57_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -549,7 +956,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  static const struct tune_params cortexa72_tunings =
-@@ -642,7 +697,7 @@ static const struct tune_params cortexa72_tunings =
+@@ -642,7 +699,7 @@ static const struct tune_params cortexa72_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
    16,	/* function_align.  */
@@ -558,7 +965,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -650,9 +705,9 @@ static const struct tune_params cortexa72_tunings =
+@@ -650,9 +707,9 @@ static const struct tune_params cortexa72_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -570,7 +977,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  static const struct tune_params cortexa73_tunings =
-@@ -668,7 +723,7 @@ static const struct tune_params cortexa73_tunings =
+@@ -668,7 +725,7 @@ static const struct tune_params cortexa73_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
    16,	/* function_align.  */
@@ -579,7 +986,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -676,11 +731,13 @@ static const struct tune_params cortexa73_tunings =
+@@ -676,11 +733,13 @@ static const struct tune_params cortexa73_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -595,7 +1002,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  static const struct tune_params exynosm1_tunings =
  {
    &exynosm1_extra_costs,
-@@ -701,9 +758,34 @@ static const struct tune_params exynosm1_tunings =
+@@ -701,9 +760,34 @@ static const struct tune_params exynosm1_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    48,	/* max_case_values.  */
@@ -632,7 +1039,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  static const struct tune_params thunderx_tunings =
-@@ -726,9 +808,10 @@ static const struct tune_params thunderx_tunings =
+@@ -726,9 +810,10 @@ static const struct tune_params thunderx_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -645,7 +1052,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  static const struct tune_params xgene1_tunings =
-@@ -751,9 +834,9 @@ static const struct tune_params xgene1_tunings =
+@@ -751,9 +836,9 @@ static const struct tune_params xgene1_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -657,7 +1064,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  static const struct tune_params qdf24xx_tunings =
-@@ -777,9 +860,9 @@ static const struct tune_params qdf24xx_tunings =
+@@ -777,9 +862,9 @@ static const struct tune_params qdf24xx_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -669,7 +1076,17 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  static const struct tune_params thunderx2t99_tunings =
-@@ -802,9 +885,9 @@ static const struct tune_params thunderx2t99_tunings =
+@@ -792,7 +877,8 @@ static const struct tune_params thunderx2t99_tunings =
+   &generic_approx_modes,
+   4, /* memmov_cost.  */
+   4, /* issue_rate.  */
+-  (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
++  (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
++   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
+   16,	/* function_align.  */
+   8,	/* jump_align.  */
+   16,	/* loop_align.  */
+@@ -802,9 +888,9 @@ static const struct tune_params thunderx2t99_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -682,7 +1099,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  };
  
  /* Support for fine-grained override of the tuning structures.  */
-@@ -1649,41 +1732,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
+@@ -1649,41 +1735,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
    machine_mode dst_mode = GET_MODE (dst);
  
    gcc_assert (VECTOR_MODE_P (dst_mode));
@@ -756,7 +1173,39 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  }
  
  /* Split a complex SIMD move.  */
-@@ -1919,6 +2002,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
+@@ -1792,6 +1878,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
+       return 1;
+     }
+ 
++  /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
++     (with XXXX non-zero). In that case check to see if the move can be done in
++     a smaller mode.  */
++  val2 = val & 0xffffffff;
++  if (mode == DImode
++      && aarch64_move_imm (val2, SImode)
++      && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
++    {
++      if (generate)
++	emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
++
++      /* Check if we have to emit a second instruction by checking to see
++         if any of the upper 32 bits of the original DI mode value is set.  */
++      if (val == val2)
++	return 1;
++
++      i = (val >> 48) ? 48 : 32;
++
++      if (generate)
++	 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
++				    GEN_INT ((val >> i) & 0xffff)));
++
++      return 2;
++    }
++
+   if ((val >> 32) == 0 || mode == SImode)
+     {
+       if (generate)
+@@ -1919,6 +2030,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
  	      gcc_assert (can_create_pseudo_p ());
  	      base = gen_reg_rtx (ptr_mode);
  	      aarch64_expand_mov_immediate (base, XEXP (mem, 0));
@@ -765,7 +1214,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  	      mem = gen_rtx_MEM (ptr_mode, base);
  	    }
  
-@@ -2683,11 +2768,19 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+@@ -2683,11 +2796,19 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
  		     plus_constant (Pmode, stack_pointer_rtx, -first));
  
        /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
@@ -790,7 +1239,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
        /* Step 3: the loop
  
  	 do
-@@ -4549,6 +4642,24 @@ aarch64_classify_address (struct aarch64_address_info *info,
+@@ -4549,6 +4670,24 @@ aarch64_classify_address (struct aarch64_address_info *info,
      }
  }
  
@@ -815,7 +1264,132 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  bool
  aarch64_symbolic_address_p (rtx x)
  {
-@@ -4633,6 +4744,50 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+@@ -4611,6 +4750,74 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
+   return true;
+ }
+ 
++/* Return the binary representation of floating point constant VALUE in INTVAL.
++   If the value cannot be converted, return false without setting INTVAL.
++   The conversion is done in the given MODE.  */
++bool
++aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
++{
++
++  /* We make a general exception for 0.  */
++  if (aarch64_float_const_zero_rtx_p (value))
++    {
++      *intval = 0;
++      return true;
++    }
++
++  machine_mode mode = GET_MODE (value);
++  if (GET_CODE (value) != CONST_DOUBLE
++      || !SCALAR_FLOAT_MODE_P (mode)
++      || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
++      /* Only support up to DF mode.  */
++      || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
++    return false;
++
++  unsigned HOST_WIDE_INT ival = 0;
++
++  long res[2];
++  real_to_target (res,
++		  CONST_DOUBLE_REAL_VALUE (value),
++		  REAL_MODE_FORMAT (mode));
++
++  if (mode == DFmode)
++    {
++      int order = BYTES_BIG_ENDIAN ? 1 : 0;
++      ival = zext_hwi (res[order], 32);
++      ival |= (zext_hwi (res[1 - order], 32) << 32);
++    }
++  else
++      ival = zext_hwi (res[0], 32);
++
++  *intval = ival;
++  return true;
++}
++
++/* Return TRUE if rtx X is an immediate constant that can be moved using a
++   single MOV(+MOVK) followed by an FMOV.  */
++bool
++aarch64_float_const_rtx_p (rtx x)
++{
++  machine_mode mode = GET_MODE (x);
++  if (mode == VOIDmode)
++    return false;
++
++  /* Determine whether it's cheaper to write float constants as
++     mov/movk pairs over ldr/adrp pairs.  */
++  unsigned HOST_WIDE_INT ival;
++
++  if (GET_CODE (x) == CONST_DOUBLE
++      && SCALAR_FLOAT_MODE_P (mode)
++      && aarch64_reinterpret_float_as_int (x, &ival))
++    {
++      machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode);
++      int num_instr = aarch64_internal_mov_immediate
++			(NULL_RTX, gen_int_mode (ival, imode), false, imode);
++      return num_instr < 3;
++    }
++
++  return false;
++}
++
+ /* Return TRUE if rtx X is immediate constant 0.0 */
+ bool
+ aarch64_float_const_zero_rtx_p (rtx x)
+@@ -4623,6 +4830,49 @@ aarch64_float_const_zero_rtx_p (rtx x)
+   return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
+ }
+ 
++/* Return TRUE if rtx X is immediate constant that fits in a single
++   MOVI immediate operation.  */
++bool
++aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
++{
++  if (!TARGET_SIMD)
++     return false;
++
++  machine_mode vmode, imode;
++  unsigned HOST_WIDE_INT ival;
++
++  if (GET_CODE (x) == CONST_DOUBLE
++      && SCALAR_FLOAT_MODE_P (mode))
++    {
++      if (!aarch64_reinterpret_float_as_int (x, &ival))
++	return false;
++
++      /* We make a general exception for 0.  */
++      if (aarch64_float_const_zero_rtx_p (x))
++	return true;
++
++      imode = int_mode_for_mode (mode);
++    }
++  else if (GET_CODE (x) == CONST_INT
++	   && SCALAR_INT_MODE_P (mode))
++    {
++       imode = mode;
++       ival = INTVAL (x);
++    }
++  else
++    return false;
++
++   /* use a 64 bit mode for everything except for DI/DF mode, where we use
++     a 128 bit vector mode.  */
++  int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
++
++  vmode = aarch64_simd_container_mode (imode, width);
++  rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
++
++  return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);
++}
++
++
+ /* Return the fixed registers used for condition codes.  */
+ 
+ static bool
+@@ -4633,6 +4883,50 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
    return true;
  }
  
@@ -866,7 +1440,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  /* Emit call insn with PAT and do aarch64-specific handling.  */
  
  void
-@@ -4705,7 +4860,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
+@@ -4705,7 +4999,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       the comparison will have to be swapped when we emit the assembly
       code.  */
    if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
@@ -875,7 +1449,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
        && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
  	  || GET_CODE (x) == LSHIFTRT
  	  || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
-@@ -5112,6 +5267,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
+@@ -5112,6 +5406,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
  
  	case MEM:
  	  output_address (GET_MODE (x), XEXP (x, 0));
@@ -884,7 +1458,20 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  	  break;
  
  	case CONST:
-@@ -5976,9 +6133,10 @@ aarch64_strip_shift (rtx x)
+@@ -5756,12 +6052,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
+       return NO_REGS;
+     }
+ 
+-  /* If it's an integer immediate that MOVI can't handle, then
+-     FP_REGS is not an option, so we return NO_REGS instead.  */
+-  if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
+-      && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
+-    return NO_REGS;
+-
+   /* Register eliminiation can result in a request for
+      SP+constant->FP_REGS.  We cannot support such operations which
+      use SP as source and an FP_REG as destination, so reject out
+@@ -5976,9 +6266,10 @@ aarch64_strip_shift (rtx x)
  /* Helper function for rtx cost calculation.  Strip an extend
     expression from X.  Returns the inner operand if successful, or the
     original expression on failure.  We deal with a number of possible
@@ -897,7 +1484,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  {
    rtx op = x;
  
-@@ -6002,7 +6160,8 @@ aarch64_strip_extend (rtx x)
+@@ -6002,7 +6293,8 @@ aarch64_strip_extend (rtx x)
  
    /* Now handle extended register, as this may also have an optional
       left shift by 1..4.  */
@@ -907,7 +1494,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
        && CONST_INT_P (XEXP (op, 1))
        && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
      op = XEXP (op, 0);
-@@ -6026,6 +6185,39 @@ aarch64_shift_p (enum rtx_code code)
+@@ -6026,6 +6318,39 @@ aarch64_shift_p (enum rtx_code code)
    return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
  }
  
@@ -947,7 +1534,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  /* Helper function for rtx cost calculation.  Calculate the cost of
     a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
     Return the calculated cost of the expression, recursing manually in to
-@@ -6063,7 +6255,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
+@@ -6063,7 +6388,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
  	    {
  	      if (compound_p)
  	        {
@@ -960,7 +1547,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  		    /* ARITH + shift-by-register.  */
  		    cost += extra_cost->alu.arith_shift_reg;
  		  else if (is_extend)
-@@ -6081,7 +6277,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
+@@ -6081,7 +6410,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
  	    }
  	  /* Strip extends as we will have costed them in the case above.  */
  	  if (is_extend)
@@ -969,7 +1556,33 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
  	  cost += rtx_cost (op0, VOIDmode, code, 0, speed);
  
-@@ -6925,13 +7121,13 @@ cost_minus:
+@@ -6672,6 +7001,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
+       return true;
+ 
+     case CONST_DOUBLE:
++
++      /* First determine number of instructions to do the move
++	  as an integer constant.  */
++      if (!aarch64_float_const_representable_p (x)
++	   && !aarch64_can_const_movi_rtx_p (x, mode)
++	   && aarch64_float_const_rtx_p (x))
++	{
++	  unsigned HOST_WIDE_INT ival;
++	  bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
++	  gcc_assert (succeed);
++
++	  machine_mode imode = mode == HFmode ? SImode
++					      : int_mode_for_mode (mode);
++	  int ncost = aarch64_internal_mov_immediate
++		(NULL_RTX, gen_int_mode (ival, imode), false, imode);
++	  *cost += COSTS_N_INSNS (ncost);
++	  return true;
++	}
++
+       if (speed)
+ 	{
+ 	  /* mov[df,sf]_aarch64.  */
+@@ -6925,13 +7273,13 @@ cost_minus:
  	    if (speed)
  	      *cost += extra_cost->alu.extend_arith;
  
@@ -985,7 +1598,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
  	/* Cost this as an FMA-alike operation.  */
  	if ((GET_CODE (new_op1) == MULT
-@@ -7004,7 +7200,7 @@ cost_plus:
+@@ -7004,7 +7352,7 @@ cost_plus:
  	    if (speed)
  	      *cost += extra_cost->alu.extend_arith;
  
@@ -994,7 +1607,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  	    *cost += rtx_cost (op0, VOIDmode,
  			       (enum rtx_code) GET_CODE (op0), 0, speed);
  	    return true;
-@@ -7012,7 +7208,7 @@ cost_plus:
+@@ -7012,7 +7360,7 @@ cost_plus:
  
  	/* Strip any extend, leave shifts behind as we will
  	   cost them through mult_cost.  */
@@ -1003,7 +1616,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
  	if (GET_CODE (new_op0) == MULT
  	    || aarch64_shift_p (GET_CODE (new_op0)))
-@@ -7482,17 +7678,13 @@ cost_plus:
+@@ -7482,17 +7830,13 @@ cost_plus:
      case UMOD:
        if (speed)
  	{
@@ -1024,7 +1637,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  	}
        return false;  /* All arguments need to be in registers.  */
  
-@@ -7506,7 +7698,9 @@ cost_plus:
+@@ -7506,7 +7850,9 @@ cost_plus:
  	  else if (GET_MODE_CLASS (mode) == MODE_INT)
  	    /* There is no integer SQRT, so only DIV and UDIV can get
  	       here.  */
@@ -1035,7 +1648,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  	  else
  	    *cost += extra_cost->fp[mode == DFmode].div;
  	}
-@@ -8687,13 +8881,39 @@ aarch64_override_options_internal (struct gcc_options *opts)
+@@ -8687,13 +9033,39 @@ aarch64_override_options_internal (struct gcc_options *opts)
  			 opts->x_param_values,
  			 global_options_set.x_param_values);
  
@@ -1078,7 +1691,73 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    aarch64_override_options_after_change_1 (opts);
  }
  
-@@ -11647,6 +11867,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+@@ -9970,18 +10342,16 @@ aarch64_legitimate_pic_operand_p (rtx x)
+ /* Return true if X holds either a quarter-precision or
+      floating-point +0.0 constant.  */
+ static bool
+-aarch64_valid_floating_const (machine_mode mode, rtx x)
++aarch64_valid_floating_const (rtx x)
+ {
+   if (!CONST_DOUBLE_P (x))
+     return false;
+ 
+-  if (aarch64_float_const_zero_rtx_p (x))
++  /* This call determines which constants can be used in mov<mode>
++     as integer moves instead of constant loads.  */
++  if (aarch64_float_const_rtx_p (x))
+     return true;
+ 
+-  /* We only handle moving 0.0 to a TFmode register.  */
+-  if (!(mode == SFmode || mode == DFmode))
+-    return false;
+-
+   return aarch64_float_const_representable_p (x);
+ }
+ 
+@@ -9993,11 +10363,15 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
+   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
+     return false;
+ 
+-  /* This could probably go away because
+-     we now decompose CONST_INTs according to expand_mov_immediate.  */
++  /* For these cases we never want to use a literal load.
++     As such we have to prevent the compiler from forcing these
++     to memory.  */
+   if ((GET_CODE (x) == CONST_VECTOR
+        && aarch64_simd_valid_immediate (x, mode, false, NULL))
+-      || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
++      || CONST_INT_P (x)
++      || aarch64_valid_floating_const (x)
++      || aarch64_can_const_movi_rtx_p (x, mode)
++      || aarch64_float_const_rtx_p (x))
+ 	return !targetm.cannot_force_const_mem (mode, x);
+ 
+   if (GET_CODE (x) == HIGH
+@@ -11275,23 +11649,6 @@ aarch64_mask_from_zextract_ops (rtx width, rtx pos)
+ }
+ 
+ bool
+-aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
+-{
+-  HOST_WIDE_INT imm = INTVAL (x);
+-  int i;
+-
+-  for (i = 0; i < 8; i++)
+-    {
+-      unsigned int byte = imm & 0xff;
+-      if (byte != 0xff && byte != 0)
+-       return false;
+-      imm >>= 8;
+-    }
+-
+-  return true;
+-}
+-
+-bool
+ aarch64_mov_operand_p (rtx x, machine_mode mode)
+ {
+   if (GET_CODE (x) == HIGH
+@@ -11647,6 +12004,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
        return;
      }
  
@@ -1136,7 +1815,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    /* Initialise a vector which is part-variable.  We want to first try
       to build those lanes which are constant in the most efficient way we
       can.  */
-@@ -11680,10 +11951,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+@@ -11680,10 +12088,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
      }
  
    /* Insert the variable lanes directly.  */
@@ -1147,7 +1826,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    for (int i = 0; i < n_elts; i++)
      {
        rtx x = XVECEXP (vals, 0, i);
-@@ -12049,6 +12316,17 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12049,6 +12453,17 @@ aarch64_split_compare_and_swap (rtx operands[])
    mode = GET_MODE (mem);
    model = memmodel_from_int (INTVAL (model_rtx));
  
@@ -1165,7 +1844,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    label1 = NULL;
    if (!is_weak)
      {
-@@ -12065,11 +12343,21 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12065,11 +12480,21 @@ aarch64_split_compare_and_swap (rtx operands[])
    else
      aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
  
@@ -1192,7 +1871,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
    aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
  
-@@ -12088,7 +12376,15 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12088,7 +12513,15 @@ aarch64_split_compare_and_swap (rtx operands[])
      }
  
    emit_label (label2);
@@ -1209,6 +1888,113 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    /* Emit any final barrier needed for a __sync operation.  */
    if (is_mm_sync (model))
      aarch64_emit_post_barrier (model);
+@@ -12608,15 +13041,28 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
+ }
+ 
+ char*
+-aarch64_output_scalar_simd_mov_immediate (rtx immediate,
+-					  machine_mode mode)
++aarch64_output_scalar_simd_mov_immediate (rtx immediate,  machine_mode mode)
+ {
++
++  /* If a floating point number was passed and we desire to use it in an
++     integer mode do the conversion to integer.  */
++  if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
++    {
++      unsigned HOST_WIDE_INT ival;
++      if (!aarch64_reinterpret_float_as_int (immediate, &ival))
++	  gcc_unreachable ();
++      immediate = gen_int_mode (ival, mode);
++    }
++
+   machine_mode vmode;
++  /* use a 64 bit mode for everything except for DI/DF mode, where we use
++     a 128 bit vector mode.  */
++  int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
+ 
+   gcc_assert (!VECTOR_MODE_P (mode));
+-  vmode = aarch64_simd_container_mode (mode, 64);
++  vmode = aarch64_simd_container_mode (mode, width);
+   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
+-  return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
++  return aarch64_output_simd_mov_immediate (v_op, vmode, width);
+ }
+ 
+ /* Split operands into moves from op[1] + op[2] into op[0].  */
+@@ -13981,13 +14427,66 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+     {
+       enum attr_type prev_type = get_attr_type (prev);
+ 
+-      /* FIXME: this misses some which is considered simple arthematic
+-         instructions for ThunderX.  Simple shifts are missed here.  */
+-      if (prev_type == TYPE_ALUS_SREG
+-          || prev_type == TYPE_ALUS_IMM
+-          || prev_type == TYPE_LOGICS_REG
+-          || prev_type == TYPE_LOGICS_IMM)
+-        return true;
++      unsigned int condreg1, condreg2;
++      rtx cc_reg_1;
++      aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
++      cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
++
++      if (reg_referenced_p (cc_reg_1, PATTERN (curr))
++	  && prev
++	  && modified_in_p (cc_reg_1, prev))
++	{
++	  /* FIXME: this misses some which is considered simple arthematic
++	     instructions for ThunderX.  Simple shifts are missed here.  */
++	  if (prev_type == TYPE_ALUS_SREG
++	      || prev_type == TYPE_ALUS_IMM
++	      || prev_type == TYPE_LOGICS_REG
++	      || prev_type == TYPE_LOGICS_IMM)
++	    return true;
++	}
++    }
++
++  if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
++      && any_condjump_p (curr))
++    {
++      /* We're trying to match:
++	  prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
++	  curr (cbz) ==  (set (pc) (if_then_else (eq/ne) (r0)
++							 (const_int 0))
++						 (label_ref ("SYM"))
++						 (pc))  */
++      if (SET_DEST (curr_set) == (pc_rtx)
++	  && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
++	  && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
++	  && REG_P (SET_DEST (prev_set))
++	  && REGNO (SET_DEST (prev_set))
++	     == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
++	{
++	  /* Fuse ALU operations followed by conditional branch instruction.  */
++	  switch (get_attr_type (prev))
++	    {
++	    case TYPE_ALU_IMM:
++	    case TYPE_ALU_SREG:
++	    case TYPE_ADC_REG:
++	    case TYPE_ADC_IMM:
++	    case TYPE_ADCS_REG:
++	    case TYPE_ADCS_IMM:
++	    case TYPE_LOGIC_REG:
++	    case TYPE_LOGIC_IMM:
++	    case TYPE_CSEL:
++	    case TYPE_ADR:
++	    case TYPE_MOV_IMM:
++	    case TYPE_SHIFT_REG:
++	    case TYPE_SHIFT_IMM:
++	    case TYPE_BFM:
++	    case TYPE_RBIT:
++	    case TYPE_REV:
++	    case TYPE_EXTEND:
++	      return true;
++
++	    default:;
++	    }
++	}
+     }
+ 
+   return false;
 --- a/src/gcc/config/aarch64/aarch64.h
 +++ b/src/gcc/config/aarch64/aarch64.h
 @@ -98,14 +98,24 @@
@@ -1244,7 +2030,15 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
  #define STRUCTURE_SIZE_BOUNDARY		8
  
-@@ -140,6 +150,7 @@ extern unsigned aarch64_architecture_version;
+@@ -134,12 +144,14 @@ extern unsigned aarch64_architecture_version;
+ #define AARCH64_FL_CRC        (1 << 3)	/* Has CRC.  */
+ /* ARMv8.1-A architecture extensions.  */
+ #define AARCH64_FL_LSE	      (1 << 4)  /* Has Large System Extensions.  */
+-#define AARCH64_FL_V8_1	      (1 << 5)  /* Has ARMv8.1-A extensions.  */
++#define AARCH64_FL_RDMA	      (1 << 5)  /* Has Round Double Multiply Add.  */
++#define AARCH64_FL_V8_1	      (1 << 6)  /* Has ARMv8.1-A extensions.  */
+ /* ARMv8.2-A architecture extensions.  */
+ #define AARCH64_FL_V8_2	      (1 << 8)  /* Has ARMv8.2-A features.  */
  #define AARCH64_FL_F16	      (1 << 9)  /* Has ARMv8.2-A FP16 extensions.  */
  /* ARMv8.3-A architecture extensions.  */
  #define AARCH64_FL_V8_3	      (1 << 10)  /* Has ARMv8.3-A features.  */
@@ -1252,9 +2046,60 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
  /* Has FP and SIMD.  */
  #define AARCH64_FL_FPSIMD     (AARCH64_FL_FP | AARCH64_FL_SIMD)
+@@ -150,7 +162,8 @@ extern unsigned aarch64_architecture_version;
+ /* Architecture flags that effect instruction selection.  */
+ #define AARCH64_FL_FOR_ARCH8       (AARCH64_FL_FPSIMD)
+ #define AARCH64_FL_FOR_ARCH8_1			       \
+-  (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC | AARCH64_FL_V8_1)
++  (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
++   | AARCH64_FL_RDMA | AARCH64_FL_V8_1)
+ #define AARCH64_FL_FOR_ARCH8_2			\
+   (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
+ #define AARCH64_FL_FOR_ARCH8_3			\
+@@ -163,7 +176,7 @@ extern unsigned aarch64_architecture_version;
+ #define AARCH64_ISA_FP             (aarch64_isa_flags & AARCH64_FL_FP)
+ #define AARCH64_ISA_SIMD           (aarch64_isa_flags & AARCH64_FL_SIMD)
+ #define AARCH64_ISA_LSE		   (aarch64_isa_flags & AARCH64_FL_LSE)
+-#define AARCH64_ISA_RDMA	   (aarch64_isa_flags & AARCH64_FL_V8_1)
++#define AARCH64_ISA_RDMA	   (aarch64_isa_flags & AARCH64_FL_RDMA)
+ #define AARCH64_ISA_V8_2	   (aarch64_isa_flags & AARCH64_FL_V8_2)
+ #define AARCH64_ISA_F16		   (aarch64_isa_flags & AARCH64_FL_F16)
+ #define AARCH64_ISA_V8_3	   (aarch64_isa_flags & AARCH64_FL_V8_3)
 --- a/src/gcc/config/aarch64/aarch64.md
 +++ b/src/gcc/config/aarch64/aarch64.md
-@@ -519,27 +519,31 @@
+@@ -181,6 +181,11 @@
+ ;; will be disabled when !TARGET_FLOAT.
+ (define_attr "fp" "no,yes" (const_string "no"))
+ 
++;; Attribute that specifies whether or not the instruction touches half
++;; precision fp registers.  When this is set to yes for an alternative,
++;; that alternative will be disabled when !TARGET_FP_F16INST.
++(define_attr "fp16" "no,yes" (const_string "no"))
++
+ ;; Attribute that specifies whether or not the instruction touches simd
+ ;; registers.  When this is set to yes for an alternative, that alternative
+ ;; will be disabled when !TARGET_SIMD.
+@@ -194,11 +199,14 @@
+ ;; registers when -mgeneral-regs-only is specified.
+ (define_attr "enabled" "no,yes"
+   (cond [(ior
+-	(and (eq_attr "fp" "yes")
+-	     (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
+-	(and (eq_attr "simd" "yes")
+-	     (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
+-	     (const_string "no")
++	    (ior
++		(and (eq_attr "fp" "yes")
++		     (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
++		(and (eq_attr "simd" "yes")
++		     (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
++	    (and (eq_attr "fp16" "yes")
++		 (eq (symbol_ref "TARGET_FP_F16INST") (const_int 0))))
++	    (const_string "no")
+ 	] (const_string "yes")))
+ 
+ ;; Attribute that specifies whether we are dealing with a branch to a
+@@ -519,27 +527,31 @@
  )
  
  (define_insn "prefetch"
@@ -1296,7 +2141,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
      return pftype[INTVAL(operands[1])][locality];
    }
    [(set_attr "type" "load1")]
-@@ -713,12 +717,6 @@
+@@ -713,12 +725,6 @@
  ;; Subroutine calls and sibcalls
  ;; -------------------------------------------------------------------
  
@@ -1309,7 +2154,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  (define_expand "call"
    [(parallel [(call (match_operand 0 "memory_operand" "")
  		    (match_operand 1 "general_operand" ""))
-@@ -727,57 +725,22 @@
+@@ -727,57 +733,22 @@
    ""
    "
    {
@@ -1374,7 +2219,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  (define_expand "call_value"
    [(parallel [(set (match_operand 0 "" "")
  		   (call (match_operand 1 "memory_operand" "")
-@@ -787,60 +750,23 @@
+@@ -787,60 +758,23 @@
    ""
    "
    {
@@ -1442,7 +2287,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  (define_expand "sibcall"
    [(parallel [(call (match_operand 0 "memory_operand" "")
  		    (match_operand 1 "general_operand" ""))
-@@ -848,29 +774,11 @@
+@@ -848,29 +782,11 @@
  	      (use (match_operand 2 "" ""))])]
    ""
    {
@@ -1473,7 +2318,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  (define_expand "sibcall_value"
    [(parallel [(set (match_operand 0 "" "")
  		   (call (match_operand 1 "memory_operand" "")
-@@ -879,19 +787,7 @@
+@@ -879,19 +795,7 @@
  	      (use (match_operand 3 "" ""))])]
    ""
    {
@@ -1494,7 +2339,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
      DONE;
    }
  )
-@@ -899,8 +795,7 @@
+@@ -899,8 +803,7 @@
  (define_insn "*sibcall_insn"
    [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf"))
  	 (match_operand 1 "" ""))
@@ -1504,7 +2349,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    "SIBLING_CALL_P (insn)"
    "@
     br\\t%0
-@@ -913,8 +808,7 @@
+@@ -913,8 +816,7 @@
  	(call (mem:DI
  		(match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf"))
  	      (match_operand 2 "" "")))
@@ -1514,29 +2359,81 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    "SIBLING_CALL_P (insn)"
    "@
     br\\t%1
-@@ -1026,8 +920,8 @@
+@@ -1026,8 +928,8 @@
  )
  
  (define_insn_and_split "*movsi_aarch64"
 -  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r  ,*w, r,*w")
 -	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
-+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r  ,*w,r,*w")
-+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w"))]
++  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r  ,*w, r,*w,w")
++	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Ds"))]
    "(register_operand (operands[0], SImode)
      || aarch64_reg_or_zero (operands[1], SImode))"
    "@
-@@ -1058,8 +952,8 @@
+@@ -1044,8 +946,9 @@
+    adrp\\t%x0, %A1
+    fmov\\t%s0, %w1
+    fmov\\t%w0, %s1
+-   fmov\\t%s0, %s1"
+-   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
++   fmov\\t%s0, %s1
++   * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
++  "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
+     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+    [(const_int 0)]
+    "{
+@@ -1053,13 +956,14 @@
+        DONE;
+     }"
+   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+-                     adr,adr,f_mcr,f_mrc,fmov")
+-   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
++		    adr,adr,f_mcr,f_mrc,fmov,neon_move")
++   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
++   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
  )
  
  (define_insn_and_split "*movdi_aarch64"
 -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r,  *w, r,*w,w")
 -	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
-+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r,  *w,r,*w,w")
-+	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
++  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r,*w,m,  m,r,r,  *w,r,*w,w")
++	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
    "(register_operand (operands[0], DImode)
      || aarch64_reg_or_zero (operands[1], DImode))"
    "@
-@@ -1123,7 +1017,7 @@
+@@ -1067,6 +971,7 @@
+    mov\\t%0, %x1
+    mov\\t%x0, %1
+    mov\\t%x0, %1
++   mov\\t%w0, %1
+    #
+    ldr\\t%x0, %1
+    ldr\\t%d0, %1
+@@ -1077,7 +982,7 @@
+    fmov\\t%d0, %x1
+    fmov\\t%x0, %d1
+    fmov\\t%d0, %d1
+-   movi\\t%d0, %1"
++   * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
+    "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
+     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+    [(const_int 0)]
+@@ -1085,10 +990,10 @@
+        aarch64_expand_mov_immediate (operands[0], operands[1]);
+        DONE;
+     }"
+-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+-                     adr,adr,f_mcr,f_mrc,fmov,neon_move")
+-   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+-   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
++  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load1,\
++                     load1,store1,store1,adr,adr,f_mcr,f_mrc,fmov,neon_move")
++   (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
++   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+ )
+ 
+ (define_insn "insv_imm<mode>"
+@@ -1123,7 +1028,7 @@
     #
     #
     #
@@ -1545,7 +2442,118 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
     ldp\\t%0, %H0, %1
     stp\\t%1, %H1, %0
     stp\\txzr, xzr, %0
-@@ -1237,7 +1131,7 @@
+@@ -1168,28 +1073,31 @@
+ )
+ 
+ (define_insn "*movhf_aarch64"
+-  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w,m,r,m ,r")
+-	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,m,w,m,rY,r"))]
++  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r")
++	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
+   "TARGET_FLOAT && (register_operand (operands[0], HFmode)
+     || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+   "@
+    movi\\t%0.4h, #0
+-   mov\\t%0.h[0], %w1
++   fmov\\t%h0, %w1
+    umov\\t%w0, %1.h[0]
+    mov\\t%0.h[0], %1.h[0]
++   fmov\\t%h0, %1
++   * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
+    ldr\\t%h0, %1
+    str\\t%h1, %0
+    ldrh\\t%w0, %1
+    strh\\t%w1, %0
+    mov\\t%w0, %w1"
+-  [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
+-                     f_loads,f_stores,load1,store1,mov_reg")
+-   (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
++  [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
++		     neon_move,f_loads,f_stores,load1,store1,mov_reg")
++   (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")
++   (set_attr "fp16"   "*,yes,*,*,yes,*,*,*,*,*,*")]
+ )
+ 
+ (define_insn "*movsf_aarch64"
+-  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
+-	(match_operand:SF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
++  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
++	(match_operand:SF 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
+   "TARGET_FLOAT && (register_operand (operands[0], SFmode)
+     || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+   "@
+@@ -1198,19 +1106,22 @@
+    fmov\\t%w0, %s1
+    fmov\\t%s0, %s1
+    fmov\\t%s0, %1
++   * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
+    ldr\\t%s0, %1
+    str\\t%s1, %0
+    ldr\\t%w0, %1
+    str\\t%w1, %0
+-   mov\\t%w0, %w1"
+-  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
+-                     f_loads,f_stores,load1,store1,mov_reg")
+-   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
++   mov\\t%w0, %w1
++   mov\\t%w0, %1"
++  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
++		     f_loads,f_stores,load1,store1,mov_reg,\
++		     fconsts")
++   (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
+ )
+ 
+ (define_insn "*movdf_aarch64"
+-  [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
+-	(match_operand:DF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
++  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
++	(match_operand:DF 1 "general_operand"      "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
+   "TARGET_FLOAT && (register_operand (operands[0], DFmode)
+     || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+   "@
+@@ -1219,14 +1130,37 @@
+    fmov\\t%x0, %d1
+    fmov\\t%d0, %d1
+    fmov\\t%d0, %1
++   * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
+    ldr\\t%d0, %1
+    str\\t%d1, %0
+    ldr\\t%x0, %1
+    str\\t%x1, %0
+-   mov\\t%x0, %x1"
+-  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
+-                     f_loadd,f_stored,load1,store1,mov_reg")
+-   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
++   mov\\t%x0, %x1
++   mov\\t%x0, %1"
++  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
++		     f_loadd,f_stored,load1,store1,mov_reg,\
++		     fconstd")
++   (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
++)
++
++(define_split
++  [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
++	(match_operand:GPF_HF 1 "general_operand"))]
++  "can_create_pseudo_p ()
++   && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
++   && !aarch64_float_const_representable_p (operands[1])
++   &&  aarch64_float_const_rtx_p (operands[1])"
++  [(const_int 0)]
++  {
++    unsigned HOST_WIDE_INT ival;
++    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
++      FAIL;
++
++    rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
++    emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
++    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
++    DONE;
++  }
+ )
+ 
+ (define_insn "*movtf_aarch64"
+@@ -1237,7 +1171,7 @@
    "TARGET_FLOAT && (register_operand (operands[0], TFmode)
      || aarch64_reg_or_fp_zero (operands[1], TFmode))"
    "@
@@ -1554,7 +2562,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
     #
     #
     #
-@@ -2340,6 +2234,55 @@
+@@ -2340,6 +2274,55 @@
    [(set_attr "type" "alus_sreg")]
  )
  
@@ -1610,7 +2618,57 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  (define_insn "*sub_<shift>_<mode>"
    [(set (match_operand:GPI 0 "register_operand" "=r")
  	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
-@@ -4997,6 +4940,18 @@
+@@ -3881,6 +3864,22 @@
+   [(set_attr "type" "logics_reg,logics_imm")]
+ )
+ 
++(define_split
++  [(set (reg:CC_NZ CC_REGNUM)
++	(compare:CC_NZ
++	 (and:GPI (match_operand:GPI 0 "register_operand")
++		  (match_operand:GPI 1 "aarch64_mov_imm_operand"))
++	 (const_int 0)))
++   (clobber (match_operand:SI 2 "register_operand"))]
++  ""
++  [(set (match_dup 2) (match_dup 1))
++   (set (reg:CC_NZ CC_REGNUM)
++	(compare:CC_NZ
++	 (and:GPI (match_dup 0)
++		  (match_dup 2))
++	 (const_int 0)))]
++)
++
+ (define_insn "*and<mode>3nr_compare0_zextract"
+   [(set (reg:CC_NZ CC_REGNUM)
+ 	(compare:CC_NZ
+@@ -3916,6 +3915,26 @@
+   [(set_attr "type" "logics_shift_imm")]
+ )
+ 
++(define_split
++  [(set (reg:CC_NZ CC_REGNUM)
++	(compare:CC_NZ
++	 (and:GPI (SHIFT:GPI
++		   (match_operand:GPI 0 "register_operand")
++		   (match_operand:QI 1 "aarch64_shift_imm_<mode>"))
++		  (match_operand:GPI 2 "aarch64_mov_imm_operand"))
++	(const_int 0)))
++    (clobber (match_operand:SI 3 "register_operand"))]
++  ""
++  [(set (match_dup 3) (match_dup 2))
++   (set (reg:CC_NZ CC_REGNUM)
++	(compare:CC_NZ
++	 (and:GPI (SHIFT:GPI
++		   (match_dup 0)
++		   (match_dup 1))
++		  (match_dup 3))
++	 (const_int 0)))]
++)
++
+ ;; -------------------------------------------------------------------
+ ;; Shifts
+ ;; -------------------------------------------------------------------
+@@ -4997,6 +5016,18 @@
    [(set_attr "type" "f_minmax<stype>")]
  )
  
@@ -1629,7 +2687,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  ;; For copysign (x, y), we want to generate:
  ;;
  ;;   LDR d2, #(1 << 63)
-@@ -5030,14 +4985,16 @@
+@@ -5030,14 +5061,16 @@
     (match_operand:SF 2 "register_operand")]
    "TARGET_FLOAT && TARGET_SIMD"
  {
@@ -1650,6 +2708,17 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
    DONE;
  }
+--- a/src/gcc/config/aarch64/arm_neon.h
++++ b/src/gcc/config/aarch64/arm_neon.h
+@@ -12162,7 +12162,7 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+ 
+ /* ARMv8.1-A instrinsics.  */
+ #pragma GCC push_options
+-#pragma GCC target ("arch=armv8.1-a")
++#pragma GCC target ("+nothing+rdma")
+ 
+ __extension__ extern __inline int16x4_t
+ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 --- a/src/gcc/config/aarch64/atomics.md
 +++ b/src/gcc/config/aarch64/atomics.md
 @@ -25,7 +25,7 @@
@@ -1751,15 +2820,53 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
  (define_constraint "UsM"
    "@internal
-@@ -214,3 +223,8 @@
-  A constraint that matches an immediate operand valid for AdvSIMD scalar."
+@@ -167,6 +176,12 @@
+   (and (match_code "const_double")
+        (match_test "aarch64_float_const_representable_p (op)")))
+ 
++(define_constraint "Uvi"
++  "A floating point constant which can be used with a\
++   MOVI immediate operation."
++  (and (match_code "const_double")
++       (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))")))
++
+ (define_constraint "Dn"
+   "@internal
+  A constraint that matches vector of immediates."
+@@ -211,6 +226,19 @@
+ 
+ (define_constraint "Dd"
+   "@internal
+- A constraint that matches an immediate operand valid for AdvSIMD scalar."
++ A constraint that matches an integer immediate operand valid\
++ for AdvSIMD scalar operations in DImode."
   (and (match_code "const_int")
-       (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))")))
+-      (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))")))
++      (match_test "aarch64_can_const_movi_rtx_p (op, DImode)")))
++
++(define_constraint "Ds"
++  "@internal
++ A constraint that matches an integer immediate operand valid\
++ for AdvSIMD scalar operations in SImode."
++ (and (match_code "const_int")
++      (match_test "aarch64_can_const_movi_rtx_p (op, SImode)")))
 +
 +(define_address_constraint "Dp"
 +  "@internal
 + An address valid for a prefetch instruction."
 + (match_test "aarch64_address_valid_for_prefetch_p (op, true)"))
+--- a/src/gcc/config/aarch64/iterators.md
++++ b/src/gcc/config/aarch64/iterators.md
+@@ -44,6 +44,9 @@
+ ;; Iterator for all scalar floating point modes (HF, SF, DF)
+ (define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
+ 
++;; Iterator for all scalar floating point modes (HF, SF, DF)
++(define_mode_iterator GPF_HF [HF SF DF])
++
+ ;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
+ (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
+ 
 --- a/src/gcc/config/aarch64/predicates.md
 +++ b/src/gcc/config/aarch64/predicates.md
 @@ -77,6 +77,10 @@
@@ -1773,7 +2880,18 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  (define_predicate "aarch64_plus_immediate"
    (and (match_code "const_int")
         (ior (match_test "aarch64_uimm12_shift (INTVAL (op))")
-@@ -165,6 +169,9 @@
+@@ -106,6 +110,10 @@
+   (ior (match_operand 0 "register_operand")
+        (match_operand 0 "aarch64_logical_immediate")))
+ 
++(define_predicate "aarch64_mov_imm_operand"
++  (and (match_code "const_int")
++       (match_test "aarch64_move_imm (INTVAL (op), mode)")))
++
+ (define_predicate "aarch64_logical_and_immediate"
+   (and (match_code "const_int")
+        (match_test "aarch64_and_bitmask_imm (INTVAL (op), mode)")))
+@@ -165,6 +173,9 @@
         (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
  					       0)")))
  
@@ -1994,7 +3112,35 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    offsets = arm_get_frame_offsets ();
    return offsets->outgoing_args != 0;
  }
-@@ -9285,6 +9293,10 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
+@@ -7858,6 +7866,8 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
+ 	{
+ 	  HOST_WIDE_INT val = INTVAL (index);
+ 
++	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
++	     If vldr is selected it uses arm_coproc_mem_operand.  */
+ 	  if (TARGET_LDRD)
+ 	    return val > -256 && val < 256;
+ 	  else
+@@ -7985,11 +7995,13 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
+       if (code == CONST_INT)
+ 	{
+ 	  HOST_WIDE_INT val = INTVAL (index);
+-	  /* ??? Can we assume ldrd for thumb2?  */
+-	  /* Thumb-2 ldrd only has reg+const addressing modes.  */
+-	  /* ldrd supports offsets of +-1020.
+-	     However the ldr fallback does not.  */
+-	  return val > -256 && val < 256 && (val & 3) == 0;
++	  /* Thumb-2 ldrd only has reg+const addressing modes.
++	     Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
++	     If vldr is selected it uses arm_coproc_mem_operand.  */
++	  if (TARGET_LDRD)
++	    return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
++	  else
++	    return IN_RANGE (val, -255, 4095 - 4);
+ 	}
+       else
+ 	return 0;
+@@ -9285,6 +9297,10 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
  	*cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
        else
  	*cost = LIBCALL_COST (2);
@@ -2005,7 +3151,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
        return false;	/* All arguments must be in registers.  */
  
      case MOD:
-@@ -9307,7 +9319,9 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
+@@ -9307,7 +9323,9 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
  
      /* Fall-through.  */
      case UMOD:
@@ -2016,6 +3162,35 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
        return false;	/* All arguments must be in registers.  */
  
      case ROTATE:
+@@ -13548,10 +13566,7 @@ gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
+       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
+       offset = 0;
+       if (!TARGET_THUMB1)
+-	{
+-	  base_reg = regs[0];
+-	  base_reg_rtx = newbase;
+-	}
++	base_reg_rtx = newbase;
+     }
+ 
+   for (i = 0; i < nops; i++)
+@@ -14075,7 +14090,6 @@ arm_gen_movmemqi (rtx *operands)
+ {
+   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
+   HOST_WIDE_INT srcoffset, dstoffset;
+-  int i;
+   rtx src, dst, srcbase, dstbase;
+   rtx part_bytes_reg = NULL;
+   rtx mem;
+@@ -14105,7 +14119,7 @@ arm_gen_movmemqi (rtx *operands)
+   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
+     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
+ 
+-  for (i = 0; in_words_to_go >= 2; i+=4)
++  while (in_words_to_go >= 2)
+     {
+       if (in_words_to_go > 4)
+ 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
 @@ -16857,9 +16871,10 @@ compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
    return not_to_clear_mask;
  }
@@ -2104,7 +3279,45 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  	     alter the frame layout, so is independent of the epilogue.  */
  	  int n;
  	  int frame;
-@@ -28225,17 +28248,32 @@ arm_expand_compare_and_swap (rtx operands[])
+@@ -21650,8 +21673,8 @@ arm_expand_prologue (void)
+ 	 will prevent the scheduler from moving stores to the frame
+ 	 before the stack adjustment.  */
+       if (frame_pointer_needed)
+-	insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
+-					 hard_frame_pointer_rtx));
++	emit_insn (gen_stack_tie (stack_pointer_rtx,
++				  hard_frame_pointer_rtx));
+     }
+ 
+ 
+@@ -23736,7 +23759,6 @@ thumb_pop (FILE *f, unsigned long mask)
+ {
+   int regno;
+   int lo_mask = mask & 0xFF;
+-  int pushed_words = 0;
+ 
+   gcc_assert (mask);
+ 
+@@ -23759,8 +23781,6 @@ thumb_pop (FILE *f, unsigned long mask)
+ 
+ 	  if ((lo_mask & ~1) != 0)
+ 	    fprintf (f, ", ");
+-
+-	  pushed_words++;
+ 	}
+     }
+ 
+@@ -24030,9 +24050,6 @@ thumb_exit (FILE *f, int reg_containing_return_addr)
+       move_to     = number_of_first_bit_set (regs_to_pop);
+ 
+       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
+-
+-      regs_to_pop &= ~(1 << move_to);
+-
+       --pops_needed;
+     }
+ 
+@@ -28225,17 +28242,32 @@ arm_expand_compare_and_swap (rtx operands[])
        gcc_unreachable ();
      }
  
@@ -2156,6 +3369,153 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
  	r0	   *	argument word/integer result
  	r1-r3		argument word
+--- a/src/gcc/config/arm/arm.md
++++ b/src/gcc/config/arm/arm.md
+@@ -457,14 +457,13 @@
+ )
+ 
+ (define_insn_and_split "*arm_adddi3"
+-  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+-	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+-		 (match_operand:DI 2 "arm_adddi_operand"  "r,  0, r, Dd, Dd")))
++  [(set (match_operand:DI          0 "arm_general_register_operand" "=&r,&r,&r,&r,&r")
++	(plus:DI (match_operand:DI 1 "arm_general_register_operand" "%0, 0, r, 0, r")
++		 (match_operand:DI 2 "arm_general_adddi_operand"    "r,  0, r, Dd, Dd")))
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_32BIT && !TARGET_NEON"
+   "#"
+-  "TARGET_32BIT && reload_completed
+-   && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))"
++  "TARGET_32BIT && ((!TARGET_NEON && !TARGET_IWMMXT) || reload_completed)"
+   [(parallel [(set (reg:CC_C CC_REGNUM)
+ 		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+ 				 (match_dup 1)))
+@@ -1263,13 +1262,13 @@
+ )
+ 
+ (define_insn_and_split "*arm_subdi3"
+-  [(set (match_operand:DI           0 "s_register_operand" "=&r,&r,&r")
+-	(minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0")
+-		  (match_operand:DI 2 "s_register_operand" "r,0,0")))
++  [(set (match_operand:DI           0 "arm_general_register_operand" "=&r,&r,&r")
++	(minus:DI (match_operand:DI 1 "arm_general_register_operand" "0,r,0")
++		  (match_operand:DI 2 "arm_general_register_operand" "r,0,0")))
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_32BIT && !TARGET_NEON"
+   "#"  ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
+-  "&& reload_completed"
++  "&& (!TARGET_IWMMXT || reload_completed)"
+   [(parallel [(set (reg:CC CC_REGNUM)
+ 		   (compare:CC (match_dup 1) (match_dup 2)))
+ 	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+@@ -2255,7 +2254,24 @@
+ 	(and:DI (match_operand:DI 1 "s_register_operand" "")
+ 		(match_operand:DI 2 "neon_inv_logic_op2" "")))]
+   "TARGET_32BIT"
+-  ""
++  "
++  if (!TARGET_NEON && !TARGET_IWMMXT)
++    {
++      rtx low  = simplify_gen_binary (AND, SImode,
++				      gen_lowpart (SImode, operands[1]),
++				      gen_lowpart (SImode, operands[2]));
++      rtx high = simplify_gen_binary (AND, SImode,
++				      gen_highpart (SImode, operands[1]),
++				      gen_highpart_mode (SImode, DImode,
++							 operands[2]));
++
++      emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
++      emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
++
++      DONE;
++    }
++  /* Otherwise expand pattern as above.  */
++  "
+ )
+ 
+ (define_insn_and_split "*anddi3_insn"
+@@ -3128,7 +3144,24 @@
+ 	(ior:DI (match_operand:DI 1 "s_register_operand" "")
+ 		(match_operand:DI 2 "neon_logic_op2" "")))]
+   "TARGET_32BIT"
+-  ""
++  "
++  if (!TARGET_NEON && !TARGET_IWMMXT)
++    {
++      rtx low  = simplify_gen_binary (IOR, SImode,
++				      gen_lowpart (SImode, operands[1]),
++				      gen_lowpart (SImode, operands[2]));
++      rtx high = simplify_gen_binary (IOR, SImode,
++				      gen_highpart (SImode, operands[1]),
++				      gen_highpart_mode (SImode, DImode,
++							 operands[2]));
++
++      emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
++      emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
++
++      DONE;
++    }
++  /* Otherwise expand pattern as above.  */
++  "
+ )
+ 
+ (define_insn_and_split "*iordi3_insn"
+@@ -3316,6 +3349,22 @@
+        no NEON instructions that take an immediate.  */
+     if (TARGET_IWMMXT && !REG_P (operands[2]))
+       operands[2] = force_reg (DImode, operands[2]);
++    if (!TARGET_NEON && !TARGET_IWMMXT)
++      {
++	rtx low  = simplify_gen_binary (XOR, SImode,
++					gen_lowpart (SImode, operands[1]),
++					gen_lowpart (SImode, operands[2]));
++	rtx high = simplify_gen_binary (XOR, SImode,
++					gen_highpart (SImode, operands[1]),
++					gen_highpart_mode (SImode, DImode,
++							   operands[2]));
++
++	emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
++	emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
++
++	DONE;
++      }
++    /* Otherwise expand pattern as above.  */
+   }
+ )
+ 
+@@ -5027,7 +5076,31 @@
+   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+   "")
+ 
+-(define_insn_and_split "one_cmpldi2"
++(define_expand "one_cmpldi2"
++  [(set (match_operand:DI 0 "s_register_operand" "")
++	(not:DI (match_operand:DI 1 "s_register_operand" "")))]
++  "TARGET_32BIT"
++  "
++  if (!TARGET_NEON && !TARGET_IWMMXT)
++    {
++      rtx low  = simplify_gen_unary (NOT, SImode,
++				     gen_lowpart (SImode, operands[1]),
++				     SImode);
++      rtx high = simplify_gen_unary (NOT, SImode,
++				     gen_highpart_mode (SImode, DImode,
++							operands[1]),
++				     SImode);
++
++      emit_insn (gen_rtx_SET (gen_lowpart (SImode, operands[0]), low));
++      emit_insn (gen_rtx_SET (gen_highpart (SImode, operands[0]), high));
++
++      DONE;
++    }
++  /* Otherwise expand pattern as above.  */
++  "
++)
++
++(define_insn_and_split "*one_cmpldi2_insn"
+   [(set (match_operand:DI 0 "s_register_operand"	 "=w,&r,&r,?w")
+ 	(not:DI (match_operand:DI 1 "s_register_operand" " w, 0, r, w")))]
+   "TARGET_32BIT"
 --- a/src/gcc/config/arm/arm_neon.h
 +++ b/src/gcc/config/arm/arm_neon.h
 @@ -17069,14 +17069,22 @@ __extension__ extern __inline float16x4_t
@@ -2419,7 +3779,21 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  (define_insn "sub<mode>3_fp16"
   [(set
     (match_operand:VH 0 "s_register_operand" "=w")
-@@ -664,8 +692,17 @@
+@@ -650,7 +678,7 @@
+ 		 (match_operand:VCVTF 2 "register_operand" "w")
+ 		 (match_operand:VCVTF 3 "register_operand" "0")))]
+   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+-  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+   [(set_attr "type" "neon_fp_mla_s<q>")]
+ )
+ 
+@@ -660,12 +688,21 @@
+ 		 (match_operand:VCVTF 2 "register_operand" "w")
+ 		 (match_operand:VCVTF 3 "register_operand" "0")))]
+   "TARGET_NEON && TARGET_FMA"
+-  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
    [(set_attr "type" "neon_fp_mla_s<q>")]
  )
  
@@ -2439,6 +3813,33 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  (define_insn "fma<VH:mode>4_intrinsic"
   [(set (match_operand:VH 0 "register_operand" "=w")
     (fma:VH
+@@ -683,7 +720,7 @@
+ 		   (match_operand:VCVTF 2 "register_operand" "w")
+ 		   (match_operand:VCVTF 3 "register_operand" "0")))]
+   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+-  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+   [(set_attr "type" "neon_fp_mla_s<q>")]
+ )
+ 
+@@ -694,7 +731,7 @@
+     (match_operand:VCVTF 2 "register_operand" "w")
+     (match_operand:VCVTF 3 "register_operand" "0")))]
+  "TARGET_NEON && TARGET_FMA"
+- "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++ "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_mla_s<q>")]
+ )
+ 
+@@ -715,7 +752,7 @@
+ 		         "s_register_operand" "w")]
+ 		NEON_VRINT))]
+   "TARGET_NEON && TARGET_FPU_ARMV8"
+-  "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
++  "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
+   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
+ )
+ 
 @@ -2175,6 +2212,17 @@
                      (const_string "neon_mul_<V_elem_ch><q>")))]
  )
@@ -2457,6 +3858,20 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  (define_insn "neon_vmulf<mode>"
   [(set
     (match_operand:VH 0 "s_register_operand" "=w")
+--- a/src/gcc/config/arm/predicates.md
++++ b/src/gcc/config/arm/predicates.md
+@@ -82,6 +82,11 @@
+ 	      || REGNO (op) >= FIRST_PSEUDO_REGISTER));
+ })
+ 
++(define_predicate "arm_general_adddi_operand"
++  (ior (match_operand 0 "arm_general_register_operand")
++       (and (match_code "const_int")
++	    (match_test "const_ok_for_dimode_op (INTVAL (op), PLUS)"))))
++
+ (define_predicate "vfp_register_operand"
+   (match_code "reg,subreg")
+ {
 --- a/src/gcc/config/arm/sync.md
 +++ b/src/gcc/config/arm/sync.md
 @@ -191,9 +191,9 @@
@@ -2648,6 +4063,24 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  
  
  # Option combinations to build library with
+--- a/src/gcc/config/i386/i386.c
++++ b/src/gcc/config/i386/i386.c
+@@ -29499,6 +29499,15 @@ ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
+   if (!any_condjump_p (condjmp))
+     return false;
+ 
++  unsigned int condreg1, condreg2;
++  rtx cc_reg_1;
++  ix86_fixed_condition_code_regs (&condreg1, &condreg2);
++  cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
++  if (!reg_referenced_p (cc_reg_1, PATTERN (condjmp))
++      || !condgen
++      || !modified_in_p (cc_reg_1, condgen))
++    return false;
++
+   if (get_attr_type (condgen) != TYPE_TEST
+       && get_attr_type (condgen) != TYPE_ICMP
+       && get_attr_type (condgen) != TYPE_INCDEC
 --- a/src/gcc/configure
 +++ b/src/gcc/configure
 @@ -1717,7 +1717,8 @@ Optional Packages:
@@ -2782,6 +4215,15 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  	   if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
  	     {
  	       emit_insn (uns_insns);
+@@ -9757,7 +9766,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
+ 	      if (targetm.gen_ccmp_first)
+ 		{
+ 		  gcc_checking_assert (targetm.gen_ccmp_next != NULL);
+-		  r = expand_ccmp_expr (g);
++		  r = expand_ccmp_expr (g, mode);
+ 		  if (r)
+ 		    break;
+ 		}
 --- a/src/gcc/generic-match-head.c
 +++ b/src/gcc/generic-match-head.c
 @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
@@ -3067,6 +4509,58 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
    for (; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
      {
        INITIAL_ELIMINATION_OFFSET (ep->from, ep->to, ep->initial_offset);
+--- a/src/gcc/sched-deps.c
++++ b/src/gcc/sched-deps.c
+@@ -2834,34 +2834,30 @@ static void
+ sched_macro_fuse_insns (rtx_insn *insn)
+ {
+   rtx_insn *prev;
+-
++  prev = prev_nonnote_nondebug_insn (insn);
++  if (!prev)
++    return;
++ 
+   if (any_condjump_p (insn))
+     {
+       unsigned int condreg1, condreg2;
+       rtx cc_reg_1;
+       targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+       cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+-      prev = prev_nonnote_nondebug_insn (insn);
+-      if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
+-          || !prev
+-          || !modified_in_p (cc_reg_1, prev))
+-        return;
++      if (reg_referenced_p (cc_reg_1, PATTERN (insn))
++	  && modified_in_p (cc_reg_1, prev))
++	{
++	  if (targetm.sched.macro_fusion_pair_p (prev, insn))
++	    SCHED_GROUP_P (insn) = 1;
++	  return;
++	}
+     }
+-  else
+-    {
+-      rtx insn_set = single_set (insn);
+-
+-      prev = prev_nonnote_nondebug_insn (insn);
+-      if (!prev
+-          || !insn_set
+-          || !single_set (prev))
+-        return;
+ 
++  if (single_set (insn) && single_set (prev))
++    {
++      if (targetm.sched.macro_fusion_pair_p (prev, insn))
++	SCHED_GROUP_P (insn) = 1;
+     }
+-
+-  if (targetm.sched.macro_fusion_pair_p (prev, insn))
+-    SCHED_GROUP_P (insn) = 1;
+-
+ }
+ 
+ /* Get the implicit reg pending clobbers for INSN and save them in TEMP.  */
 --- a/src/gcc/simplify-rtx.c
 +++ b/src/gcc/simplify-rtx.c
 @@ -3345,19 +3345,21 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
@@ -3295,82 +4789,308 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 +  return a / x;
 +}
 +
-+unsigned long long
-+f3 (unsigned long long a, int b)
++unsigned long long
++f3 (unsigned long long a, int b)
++{
++  unsigned long long x = 1ULL << b;
++  return a / x;
++}
++
++/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr79697.c
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-gimple -fdump-tree-cddce-details -fdump-tree-optimized" } */
++
++void f(void)
++{
++  __builtin_strdup ("abc");
++}
++
++void g(void)
++{
++  __builtin_strndup ("abc", 3);
++}
++
++void h(void)
++{
++  __builtin_realloc (0, 10);
++}
++
++/* { dg-final { scan-tree-dump "Deleting : __builtin_strdup" "cddce1" } } */
++/* { dg-final { scan-tree-dump "Deleting : __builtin_strndup" "cddce1" } } */
++/* { dg-final { scan-tree-dump "__builtin_malloc" "gimple" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++foo (int *a)
++{
++  int x = 3;
++  return __atomic_compare_exchange_n (a, &x, 0, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++}
++
++/* { dg-final { scan-assembler "stxr\\tw\[0-9\]+, wzr,.*" } } */
++/* { dg-final { scan-assembler-not "mov\\tw\[0-9\]+, 0" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++foo (int *a)
++{
++  int x = 0;
++  return __atomic_compare_exchange_n (a, &x, 4, 0,
++				      __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++}
++
++/* { dg-final { scan-assembler-times "cbnz\\tw\[0-9\]+" 2 } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/ccmp_2.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int g(void);
++int h(int a, _Bool c)
++{
++  if (a != 0 && c)
++    return g();
++  return 1;
++}
++
++/* { dg-final { scan-assembler "\tccmp\t" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 " } */
++
++int f3 (int x, int y)
++{
++  int res = x << 3;
++  return res != 0;
++}
++
++/* We should combine the shift and compare */
++/* { dg-final { scan-assembler "cmp\.*\twzr, w\[0-9\]+, lsl 3" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -mno-pc-relative-literal-loads" } */
++/* { dg-skip-if "Tiny model won't generate adrp" { *-*-* } { "-mcmodel=tiny" } { "" } } */
++
++double d0(void)
++{
++  double x = 0.0d;
++  return x;
++}
++
++double dn1(void)
++{
++  double x = -0.0d;
++  return x;
++}
++
++
++double d1(void)
++{
++  double x = 1.5d;
++  return x;
++}
++
++double d2(void)
++{
++  double x = 123256.0d;
++  return x;
++}
++
++double d3(void)
++{
++  double x = 123256123456.0d;
++  return x;
++}
++
++double d4(void)
++{
++  double x = 123456123456123456.0d;
++  return x;
++}
++
++/* { dg-final { scan-assembler-times "movi\td\[0-9\]+, #?0"                 1 } } */
++
++/* { dg-final { scan-assembler-times "adrp\tx\[0-9\]+, \.LC\[0-9\]"         2 } } */
++/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+, \\\[x\[0-9\], #:lo12:\.LC\[0-9\]\\\]" 2 } } */
++
++/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, 1\\\.5e\\\+0"        1 } } */
++
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, 25838523252736"       1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x40fe, lsl 48"      1 } } */
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -9223372036854775808" 1 } } */
++/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, x\[0-9\]+"           2 } } */
++
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_1.c
+@@ -0,0 +1,49 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
++/* { dg-add-options arm_v8_2a_fp16_scalar } */
++
++extern __fp16 foo ();
++extern void bar (__fp16* x);
++
++void f1 ()
++{
++  volatile __fp16 a = 17.0;
++}
++
++
++void f2 (__fp16 *a)
++{
++  *a = 17.0;
++}
++
++void f3 ()
++{
++  __fp16 b = foo ();
++  b = 17.0;
++  bar (&b);
++}
++
++__fp16 f4 ()
++{
++  __fp16 a = 0;
++  __fp16 b = 1;
++  __fp16 c = 2;
++  __fp16 d = 4;
++
++  __fp16 z = a + b;
++  z = z + c;
++  z = z - d;
++  return z;
++}
++
++__fp16 f5 ()
 +{
-+  unsigned long long x = 1ULL << b;
-+  return a / x;
++  __fp16 a = 16;
++  bar (&a);
++  return a;
 +}
 +
-+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, #?19520"           3 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0xbc, lsl 8"  1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x4c, lsl 8"  1 } } */
 --- /dev/null
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr79697.c
-@@ -0,0 +1,21 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/f16_mov_immediate_2.c
+@@ -0,0 +1,45 @@
 +/* { dg-do compile } */
-+/* { dg-options "-O2 -fdump-tree-gimple -fdump-tree-cddce-details -fdump-tree-optimized" } */
++/* { dg-options "-O3" } */
++/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
++/* { dg-add-options arm_v8_2a_fp16_scalar } */
 +
-+void f(void)
++#include <arm_fp16.h>
++
++float16_t f0(void)
 +{
-+  __builtin_strdup ("abc");
++  float16_t x = 0.0f;
++  return x;
 +}
 +
-+void g(void)
++float16_t fn1(void)
 +{
-+  __builtin_strndup ("abc", 3);
++  float16_t x = -0.0f;
++  return x;
 +}
 +
-+void h(void)
++float16_t f1(void)
 +{
-+  __builtin_realloc (0, 10);
++  float16_t x = 256.0f;
++  return x;
 +}
 +
-+/* { dg-final { scan-tree-dump "Deleting : __builtin_strdup" "cddce1" } } */
-+/* { dg-final { scan-tree-dump "Deleting : __builtin_strndup" "cddce1" } } */
-+/* { dg-final { scan-tree-dump "__builtin_malloc" "gimple" } } */
---- /dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
-@@ -0,0 +1,12 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2" } */
++float16_t f2(void)
++{
++  float16_t x = 123256.0f;
++  return x;
++}
 +
-+int
-+foo (int *a)
++float16_t f3(void)
 +{
-+  int x = 3;
-+  return __atomic_compare_exchange_n (a, &x, 0, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++  float16_t x = 17.0;
++  return x;
 +}
 +
-+/* { dg-final { scan-assembler "stxr\\tw\[0-9\]+, wzr,.*" } } */
-+/* { dg-final { scan-assembler-not "mov\\tw\[0-9\]+, 0" } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.4h, ?#0"         1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x80, lsl 8" 1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x5c, lsl 8" 1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x7c, lsl 8" 1 } } */
++
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 19520"              1 } } */
++/* { dg-final { scan-assembler-times "fmov\th\[0-9\], w\[0-9\]+"          1 } } */
++
 --- /dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
-@@ -0,0 +1,12 @@
++++ b/src/gcc/testsuite/gcc.target/aarch64/flt_mov_immediate_1.c
+@@ -0,0 +1,52 @@
 +/* { dg-do compile } */
-+/* { dg-options "-O2" } */
++/* { dg-options "-O3" } */
 +
-+int
-+foo (int *a)
++float f0(void)
 +{
-+  int x = 0;
-+  return __atomic_compare_exchange_n (a, &x, 4, 0,
-+				      __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++  float x = 0.0f;
++  return x;
 +}
 +
-+/* { dg-final { scan-assembler-times "cbnz\\tw\[0-9\]+" 2 } } */
---- /dev/null
-+++ b/src/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
-@@ -0,0 +1,11 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 " } */
++float fn1(void)
++{
++  float x = -0.0f;
++  return x;
++}
 +
-+int f3 (int x, int y)
++float f1(void)
 +{
-+  int res = x << 3;
-+  return res != 0;
++  float x = 256.0f;
++  return x;
 +}
 +
-+/* We should combine the shift and compare */
-+/* { dg-final { scan-assembler "cmp\.*\twzr, w\[0-9\]+, lsl 3" } } */
++float f2(void)
++{
++  float x = 123256.0f;
++  return x;
++}
++
++float f3(void)
++{
++  float x = 2.0f;
++  return x;
++}
++
++float f4(void)
++{
++  float x = -20000.1;
++  return x;
++}
++
++
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, ?#0"           1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x80, lsl 24"  1 } } */
++/* { dg-final { scan-assembler-times "movi\tv\[0-9\]+\\\.2s, 0x80, lsl 24"  1 } } */
++
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 48128"                1 } } */
++/* { dg-final { scan-assembler-times "movk\tw\[0-9\]+, 0x47f0, lsl 16"      1 } } */
++
++/* { dg-final { scan-assembler-times "fmov\ts\[0-9\]+, 2\\\.0e\\\+0"  1 } } */
++
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 16435"                1 } } */
++/* { dg-final { scan-assembler-times "movk\tw\[0-9\]+, 0xc69c, lsl 16"      1 } } */
++
 --- /dev/null
 +++ b/src/gcc/testsuite/gcc.target/aarch64/hfmode_ins_1.c
 @@ -0,0 +1,21 @@
@@ -3438,6 +5158,68 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 +/* { dg-final { scan-assembler-times "fcvtzs\t\[w,x\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
 +/* { dg-final { scan-assembler-not "bl"    } } */
 --- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/int_mov_immediate_1.c
+@@ -0,0 +1,59 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target lp64 } */
++/* { dg-options "-O3" } */
++
++long long f1(void)
++{
++  return 0xffff6666;
++}
++
++int f3(void)
++{
++  return 0xffff6666;
++}
++
++
++long f2(void)
++{
++  return 0x11110000ffff6666;
++}
++
++long f4(void)
++{
++  return 0x11110001ffff6666;
++}
++
++long f5(void)
++{
++  return 0x111100001ff6666;
++}
++
++long f6(void)
++{
++  return 0x00001111ffff6666;
++}
++
++long f7(void)
++{
++  return 0x000011116666ffff;
++}
++
++long f8(void)
++{
++  return 0x0f0011116666ffff;
++}
++
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, -39322"      1 } } */
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 4294927974"  3 } } */
++/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 1718026239"  1 } } */
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -2576941057" 1 } } */
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -39322"      1 } } */
++/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, 26214"       1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0xf00, lsl 48" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1111, lsl 48" 2 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1000, lsl 32" 1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1111, lsl 32" 3 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x111, lsl 48"  1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1ff, lsl 16"  1 } } */
++/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1, lsl 32"    1 } } */
++
+--- /dev/null
 +++ b/src/gcc/testsuite/gcc.target/aarch64/lrint-matherr.h
 @@ -0,0 +1,5 @@
 +#define TEST(name, float_type, int_type, pref) void f_##name (float_type x) \
@@ -3489,6 +5271,52 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 +/* { dg-final { scan-assembler-times "bl\tlrint"  4 } } */
 +/* { dg-final { scan-assembler-times "bl\tllrint" 2 } } */
 +/* { dg-final { scan-assembler-not "fcvtzs" } } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
+@@ -4,10 +4,10 @@
+ #pragma GCC target ("+nothing+simd, cmodel=small")
+ 
+ int
+-cal (float a)
++cal (double a)
+ {
+-  float b = 1.2;
+-  float c = 2.2;
++  double b = 3.2;
++  double c = 2.2;
+   if ((a + b) != c)
+     return 0;
+   else
+@@ -19,11 +19,11 @@ cal (float a)
+ #pragma GCC target ("cmodel=large")
+ 
+ int
+-cal2 (float a)
++cal2 (double a)
+ {
+ 
+-  float b = 1.2;
+-  float c = 2.2;
++  double b = 3.2;
++  double c = 2.2;
+   if ((a + b) != c)
+     return 0;
+   else
+@@ -33,11 +33,11 @@ cal2 (float a)
+ #pragma GCC pop_options
+ 
+ int
+-cal3 (float a)
++cal3 (double a)
+ {
+ 
+-  float b = 1.2;
+-  float c = 2.2;
++  double b = 3.2;
++  double c = 2.2;
+   if ((a + b) != c)
+     return 0;
+   else
 --- /dev/null
 +++ b/src/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_1.c
 @@ -0,0 +1,18 @@
@@ -3551,6 +5379,76 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 +
 +/* { dg-final { scan-assembler-times "udiv\tw\[0-9\]+, w\[0-9\]+" 4 } } */
 +/* { dg-final { scan-assembler-times "sdiv\tw\[0-9\]+, w\[0-9\]+" 2 } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c
+@@ -0,0 +1,67 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++
++typedef short int __attribute__ ((vector_size (16))) v8hi;
++
++v8hi
++mla8hi (v8hi v0, v8hi v1, short int v2)
++{
++  /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
++  return v0 + v1 * v2;
++}
++
++
++v8hi
++mls8hi (v8hi v0, v8hi v1, short int v2)
++{
++  /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
++  return v0 - v1 * v2;
++}
++
++typedef short int __attribute__ ((vector_size (8))) v4hi;
++
++v4hi
++mla4hi (v4hi v0, v4hi v1, short int v2)
++{
++  /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
++  return v0 + v1 * v2;
++}
++
++v4hi
++mls4hi (v4hi v0, v4hi v1, short int v2)
++{
++  /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */
++  return v0 - v1 * v2;
++}
++
++typedef int __attribute__ ((vector_size (16))) v4si;
++
++v4si
++mla4si (v4si v0, v4si v1, int v2)
++{
++  /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
++  return v0 + v1 * v2;
++}
++
++v4si
++mls4si (v4si v0, v4si v1, int v2)
++{
++  /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
++  return v0 - v1 * v2;
++}
++
++typedef int __attribute__((vector_size (8))) v2si;
++
++v2si
++mla2si (v2si v0, v2si v1, int v2)
++{
++  /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
++  return v0 + v1 * v2;
++}
++
++v2si
++mls2si (v2si v0, v2si v1, int v2)
++{
++  /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */
++  return v0 - v1 * v2;
++}
 --- a/src/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c
 +++ b/src/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c
 @@ -3,7 +3,7 @@
@@ -3713,6 +5611,27 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 +/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, #4" 1 } } */
 +/* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" } } */
 --- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/tst_imm_split_1.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++f (unsigned char *p)
++{
++  return p[0] == 50 || p[0] == 52;
++}
++
++int
++g (unsigned char *p)
++{
++  return (p[0] >> 4 & 0xfd) == 0;
++}
++
++/* { dg-final { scan-assembler-not "and\\t\[xw\]\[0-9\]+, \[xw\]\[0-9\]+.*" } } */
++/* { dg-final { scan-assembler "tst\\t\[xw\]\[0-9\]+, \[xw\]\[0-9\]+" } } */
++/* { dg-final { scan-assembler "tst\\t\[xw\]\[0-9\]+, \[xw\]\[0-9\]+, lsr 4" } } */
+--- /dev/null
 +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-init-1.c
 @@ -0,0 +1,12 @@
 +/* { dg-do compile } */
@@ -4566,6 +6485,28 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 +/* Ensure there is no IT block with more than 2 instructions, ie. we only allow
 +   IT, ITT and ITE.  */
 +/* { dg-final { scan-assembler-not "\\sit\[te\]{2}" } } */
+--- a/src/gcc/testsuite/gcc.target/arm/lto/pr65837-attr_0.c
++++ b/src/gcc/testsuite/gcc.target/arm/lto/pr65837-attr_0.c
+@@ -1,6 +1,7 @@
+ /* { dg-lto-do run } */
+ /* { dg-require-effective-target arm_neon_hw } */
+-/* { dg-lto-options {{-flto}} } */
++/* { dg-require-effective-target arm_neon_ok_no_float_abi } */
++/* { dg-lto-options {{-flto -mfpu=neon}} } */
+ 
+ #include "arm_neon.h"
+ 
+--- a/src/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c
++++ b/src/gcc/testsuite/gcc.target/arm/lto/pr65837_0.c
+@@ -1,7 +1,7 @@
+ /* { dg-lto-do run } */
+ /* { dg-require-effective-target arm_neon_hw } */
++/* { dg-require-effective-target arm_neon_ok_no_float_abi } */
+ /* { dg-lto-options {{-flto -mfpu=neon}} } */
+-/* { dg-suppress-ld-options {-mfpu=neon} } */
+ 
+ #include "arm_neon.h"
+ 
 --- /dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/movdi_movt.c
 @@ -0,0 +1,18 @@
@@ -4618,6 +6559,178 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 +#define __ARM_FEATURE_LDREX 0
 +/* { dg-warning ".__ARM_FEATURE_LDREX. redefined" "" { target *-*-* } .-1 } */
 --- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/pr77308-1.c
+@@ -0,0 +1,169 @@
++/* { dg-do compile } */
++/* { dg-options "-Os -Wstack-usage=2500" } */
++
++/* This is a modified algorithm with bit-not "~" at the Sigma-blocks.
++   It improves the test coverage of one_cmpldi2 and subdi3 patterns.
++   Unlike the original test case these insns can reach the reload pass,
++   which may result in large stack usage.  */
++
++#define SHA_LONG64 unsigned long long
++#define U64(C)     C##ULL
++
++#define SHA_LBLOCK      16
++#define SHA512_CBLOCK   (SHA_LBLOCK*8)
++
++typedef struct SHA512state_st {
++    SHA_LONG64 h[8];
++    SHA_LONG64 Nl, Nh;
++    union {
++        SHA_LONG64 d[SHA_LBLOCK];
++        unsigned char p[SHA512_CBLOCK];
++    } u;
++    unsigned int num, md_len;
++} SHA512_CTX;
++
++static const SHA_LONG64 K512[80] = {
++    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
++    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
++    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
++    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
++    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
++    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
++    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
++    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
++    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
++    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
++    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
++    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
++    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
++    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
++    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
++    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
++    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
++    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
++    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
++    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
++    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
++    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
++    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
++    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
++    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
++    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
++    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
++    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
++    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
++    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
++    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
++    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
++    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
++    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
++    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
++    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
++    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
++    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
++    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
++    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
++};
++
++#define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
++#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
++#define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
++#define Sigma0(x)       ~(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
++#define Sigma1(x)       ~(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
++#define sigma0(x)       ~(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
++#define sigma1(x)       ~(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
++#define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
++#define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
++
++#define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
++        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
++        h = Sigma0(a) + Maj(a,b,c);                     \
++        d += T1;        h += T1;                } while (0)
++#define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
++        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
++        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
++        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
++        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
++void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
++                                    unsigned int num)
++{
++    const SHA_LONG64 *W = in;
++    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
++    SHA_LONG64 X[16];
++    int i;
++
++    while (num--) {
++
++        a = ctx->h[0];
++        b = ctx->h[1];
++        c = ctx->h[2];
++        d = ctx->h[3];
++        e = ctx->h[4];
++        f = ctx->h[5];
++        g = ctx->h[6];
++        h = ctx->h[7];
++
++        T1 = X[0] = PULL64(W[0]);
++        ROUND_00_15(0, a, b, c, d, e, f, g, h);
++        T1 = X[1] = PULL64(W[1]);
++        ROUND_00_15(1, h, a, b, c, d, e, f, g);
++        T1 = X[2] = PULL64(W[2]);
++        ROUND_00_15(2, g, h, a, b, c, d, e, f);
++        T1 = X[3] = PULL64(W[3]);
++        ROUND_00_15(3, f, g, h, a, b, c, d, e);
++        T1 = X[4] = PULL64(W[4]);
++        ROUND_00_15(4, e, f, g, h, a, b, c, d);
++        T1 = X[5] = PULL64(W[5]);
++        ROUND_00_15(5, d, e, f, g, h, a, b, c);
++        T1 = X[6] = PULL64(W[6]);
++        ROUND_00_15(6, c, d, e, f, g, h, a, b);
++        T1 = X[7] = PULL64(W[7]);
++        ROUND_00_15(7, b, c, d, e, f, g, h, a);
++        T1 = X[8] = PULL64(W[8]);
++        ROUND_00_15(8, a, b, c, d, e, f, g, h);
++        T1 = X[9] = PULL64(W[9]);
++        ROUND_00_15(9, h, a, b, c, d, e, f, g);
++        T1 = X[10] = PULL64(W[10]);
++        ROUND_00_15(10, g, h, a, b, c, d, e, f);
++        T1 = X[11] = PULL64(W[11]);
++        ROUND_00_15(11, f, g, h, a, b, c, d, e);
++        T1 = X[12] = PULL64(W[12]);
++        ROUND_00_15(12, e, f, g, h, a, b, c, d);
++        T1 = X[13] = PULL64(W[13]);
++        ROUND_00_15(13, d, e, f, g, h, a, b, c);
++        T1 = X[14] = PULL64(W[14]);
++        ROUND_00_15(14, c, d, e, f, g, h, a, b);
++        T1 = X[15] = PULL64(W[15]);
++        ROUND_00_15(15, b, c, d, e, f, g, h, a);
++
++        for (i = 16; i < 80; i += 16) {
++            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
++            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
++            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
++            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
++            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
++            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
++            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
++            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
++            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
++            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
++            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
++            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
++            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
++            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
++            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
++            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
++        }
++
++        ctx->h[0] += a;
++        ctx->h[1] += b;
++        ctx->h[2] += c;
++        ctx->h[3] += d;
++        ctx->h[4] += e;
++        ctx->h[5] += f;
++        ctx->h[6] += g;
++        ctx->h[7] += h;
++
++        W += SHA_LBLOCK;
++    }
++}
+--- /dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/sdiv_costs_1.c
 @@ -0,0 +1,40 @@
 +/* { dg-do compile } */
@@ -4924,7 +7037,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 +	set stack_opt "-fstack-check"
 +    } else { set stack_opt "-fstack-check=$stack_kind" }
 +
-+    return [check_no_compiler_messages stack_check executable {
++    return [check_no_compiler_messages stack_check_$stack_kind executable {
 +	int main (void) { return 0; }
 +    } "$stack_opt"]
 +}
@@ -4932,7 +7045,66 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  # Return 1 if compilation with -freorder-blocks-and-partition is error-free
  # for trivial code, 0 otherwise.  As some targets (ARM for example) only
  # warn when -fprofile-use is also supplied we test that combination too.
-@@ -3768,12 +3779,13 @@ proc check_effective_target_arm_fp16_hw { } {
+@@ -3365,7 +3376,7 @@ proc add_options_for_arm_v8_1a_neon { flags } {
+ 	return "$flags"
+     }
+     global et_arm_v8_1a_neon_flags
+-    return "$flags $et_arm_v8_1a_neon_flags -march=armv8.1-a"
++    return "$flags $et_arm_v8_1a_neon_flags"
+ }
+ 
+ # Add the options needed for ARMv8.2 with the scalar FP16 extension.
+@@ -3428,8 +3439,9 @@ proc check_effective_target_arm_neon_ok_nocache { } {
+     global et_arm_neon_flags
+     set et_arm_neon_flags ""
+     if { [check_effective_target_arm32] } {
+-	foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon" "-mfpu=neon -mfloat-abi=softfp" "-mfpu=neon -mfloat-abi=softfp -march=armv7-a"} {
++	foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon" "-mfpu=neon -mfloat-abi=softfp" "-mfpu=neon -mfloat-abi=softfp -march=armv7-a" "-mfloat-abi=hard" "-mfpu=neon -mfloat-abi=hard" "-mfpu=neon -mfloat-abi=hard -march=armv7-a"} {
+ 	    if { [check_no_compiler_messages_nocache arm_neon_ok object {
++		#include <arm_neon.h>
+ 		int dummy;
+ 		#ifndef __ARM_NEON__
+ 		#error not NEON
+@@ -3454,6 +3466,38 @@ proc check_effective_target_arm_neon_ok { } {
+ 		check_effective_target_arm_neon_ok_nocache]
+ }
+ 
++# Return 1 if this is an ARM target supporting -mfpu=neon without any
++# -mfloat-abi= option.  Useful in tests where add_options is not
++# supported (such as lto tests).
++
++proc check_effective_target_arm_neon_ok_no_float_abi_nocache { } {
++    if { [check_effective_target_arm32] } {
++	foreach flags {"-mfpu=neon"} {
++	    if { [check_no_compiler_messages_nocache arm_neon_ok_no_float_abi object {
++		#include <arm_neon.h>
++		int dummy;
++		#ifndef __ARM_NEON__
++		#error not NEON
++		#endif
++		/* Avoid the case where a test adds -mfpu=neon, but the toolchain is
++		   configured for -mcpu=arm926ej-s, for example.  */
++		#if __ARM_ARCH < 7 || __ARM_ARCH_PROFILE == 'M'
++		#error Architecture does not support NEON.
++		#endif
++	    } "$flags"] } {
++		return 1
++	    }
++	}
++    }
++
++    return 0
++}
++
++proc check_effective_target_arm_neon_ok_no_float_abi { } {
++    return [check_cached_effective_target arm_neon_ok_no_float_abi \
++		check_effective_target_arm_neon_ok_no_float_abi_nocache]
++}
++
+ proc check_effective_target_arm_crc_ok_nocache { } {
+     global et_arm_crc_flags
+     set et_arm_crc_flags "-march=armv8-a+crc"
+@@ -3769,12 +3813,13 @@ proc check_effective_target_arm_fp16_hw { } {
  # can be selected and a routine to give the flags to select that architecture
  # Note: Extra flags may be added to disable options from newer compilers
  # (Thumb in particular - but others may be added in the future).
@@ -4949,7 +7121,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  	v4 "-march=armv4 -marm" __ARM_ARCH_4__
  	v4t "-march=armv4t" __ARM_ARCH_4T__
  	v5 "-march=armv5 -marm" __ARM_ARCH_5__
-@@ -3788,20 +3800,23 @@ foreach { armfunc armflag armdef } {
+@@ -3789,20 +3834,23 @@ foreach { armfunc armflag armdef } {
  	v7r "-march=armv7-r" __ARM_ARCH_7R__
  	v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__
  	v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__
@@ -4977,7 +7149,7 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  		#endif
  	    } "FLAG" ]
  	}
-@@ -3822,26 +3837,6 @@ foreach { armfunc armflag armdef } {
+@@ -3823,26 +3871,6 @@ foreach { armfunc armflag armdef } {
      }]
  }
  
@@ -5004,6 +7176,29 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
  # Return 1 if GCC was configured with --with-mode=
  proc check_effective_target_default_mode { } {
  
+@@ -4038,13 +4066,15 @@ proc check_effective_target_arm_v8_1a_neon_ok_nocache { } {
+     # since AArch64 only needs the -march setting.
+     foreach flags {"" "-mfpu=neon-fp-armv8" "-mfloat-abi=softfp" \
+ 		       "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
+-	if { [check_no_compiler_messages_nocache arm_v8_1a_neon_ok object {
+-	    #if !defined (__ARM_FEATURE_QRDMX)
+-	    #error "__ARM_FEATURE_QRDMX not defined"
+-	    #endif
+-	} "$flags -march=armv8.1-a"] } {
+-	    set et_arm_v8_1a_neon_flags "$flags -march=armv8.1-a"
+-	    return 1
++	foreach arches { "-march=armv8-a+rdma" "-march=armv8.1-a" } {
++	    if { [check_no_compiler_messages_nocache arm_v8_1a_neon_ok object {
++		#if !defined (__ARM_FEATURE_QRDMX)
++		#error "__ARM_FEATURE_QRDMX not defined"
++		#endif
++	    } "$flags $arches"] } {
++		set et_arm_v8_1a_neon_flags "$flags $arches"
++		return 1
++	    }
+ 	}
+     }
+ 
 --- a/src/gcc/tree-ssa-dce.c
 +++ b/src/gcc/tree-ssa-dce.c
 @@ -233,6 +233,8 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive)
@@ -5325,3 +7520,250 @@ LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefe
 +
 +SHLIB_LDFLAGS = -Wl,--soname=$(SHLIB_SONAME) \
 +                $(LDFLAGS)
+--- /dev/null
++++ b/src/libstdc++-v3/config/cpu/aarch64/opt/bits/opt_random.h
+@@ -0,0 +1,47 @@
++// Optimizations for random number functions, aarch64 version -*- C++ -*-
++
++// Copyright (C) 2017 Free Software Foundation, Inc.
++//
++// This file is part of the GNU ISO C++ Library.  This library is free
++// software; you can redistribute it and/or modify it under the
++// terms of the GNU General Public License as published by the
++// Free Software Foundation; either version 3, or (at your option)
++// any later version.
++
++// This library is distributed in the hope that it will be useful,
++// but WITHOUT ANY WARRANTY; without even the implied warranty of
++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++// GNU General Public License for more details.
++
++// Under Section 7 of GPL version 3, you are granted additional
++// permissions described in the GCC Runtime Library Exception, version
++// 3.1, as published by the Free Software Foundation.
++
++// You should have received a copy of the GNU General Public License and
++// a copy of the GCC Runtime Library Exception along with this program;
++// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++// <http://www.gnu.org/licenses/>.
++
++/** @file bits/opt_random.h
++ *  This is an internal header file, included by other library headers.
++ *  Do not attempt to use it directly. @headername{random}
++ */
++
++#ifndef _BITS_OPT_RANDOM_H
++#define _BITS_OPT_RANDOM_H 1
++
++#pragma GCC system_header
++
++
++namespace std _GLIBCXX_VISIBILITY (default)
++{
++_GLIBCXX_BEGIN_NAMESPACE_VERSION
++
++
++
++
++_GLIBCXX_END_NAMESPACE_VERSION
++} // namespace
++
++
++#endif // _BITS_OPT_RANDOM_H
+--- /dev/null
++++ b/src/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
+@@ -0,0 +1,180 @@
++// Optimizations for random number extensions, aarch64 version -*- C++ -*-
++
++// Copyright (C) 2017 Free Software Foundation, Inc.
++//
++// This file is part of the GNU ISO C++ Library.  This library is free
++// software; you can redistribute it and/or modify it under the
++// terms of the GNU General Public License as published by the
++// Free Software Foundation; either version 3, or (at your option)
++// any later version.
++
++// This library is distributed in the hope that it will be useful,
++// but WITHOUT ANY WARRANTY; without even the implied warranty of
++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++// GNU General Public License for more details.
++
++// Under Section 7 of GPL version 3, you are granted additional
++// permissions described in the GCC Runtime Library Exception, version
++// 3.1, as published by the Free Software Foundation.
++
++// You should have received a copy of the GNU General Public License and
++// a copy of the GCC Runtime Library Exception along with this program;
++// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++// <http://www.gnu.org/licenses/>.
++
++/** @file ext/random.tcc
++ *  This is an internal header file, included by other library headers.
++ *  Do not attempt to use it directly. @headername{ext/random}
++ */
++
++#ifndef _EXT_OPT_RANDOM_H
++#define _EXT_OPT_RANDOM_H 1
++
++#pragma GCC system_header
++
++#ifdef __ARM_NEON
++
++#ifdef __AARCH64EB__
++# define __VEXT(_A,_B,_C) __builtin_shuffle (_A, _B, (__Uint8x16_t) \
++    {16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \
++     24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C})
++#else
++# define __VEXT(_A,_B,_C) __builtin_shuffle (_B, _A, (__Uint8x16_t) \
++    {_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \
++     _C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15})
++#endif
++
++namespace __gnu_cxx _GLIBCXX_VISIBILITY (default)
++{
++_GLIBCXX_BEGIN_NAMESPACE_VERSION
++
++  namespace {
++    // Logical Shift right 128-bits by c * 8 bits
++
++    __extension__ extern __inline __Uint32x4_t
++    __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
++    __aarch64_lsr_128 (__Uint8x16_t __a, __const int __c)
++    {
++      const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
++				   0, 0, 0, 0, 0, 0, 0, 0};
++
++      return (__Uint32x4_t) __VEXT (__zero, __a, __c);
++    }
++
++    // Logical Shift left 128-bits by c * 8 bits
++
++    __extension__ extern __inline __Uint32x4_t
++    __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
++    __aarch64_lsl_128 (__Uint8x16_t __a, __const int __c)
++    {
++      const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0,
++				   0, 0, 0, 0, 0, 0, 0, 0};
++
++      return (__Uint32x4_t) __VEXT (__a, __zero, 16 - __c);
++    }
++
++    template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2>
++      inline __Uint32x4_t __aarch64_recursion (__Uint32x4_t __a,
++					       __Uint32x4_t __b,
++					       __Uint32x4_t __c,
++					       __Uint32x4_t __d,
++					       __Uint32x4_t __e)
++    {
++      __Uint32x4_t __y = (__b >> __sr1);
++      __Uint32x4_t __z = __aarch64_lsr_128 ((__Uint8x16_t) __c, __sr2);
++
++      __Uint32x4_t __v = __d << __sl1;
++
++      __z = __z ^ __a;
++      __z = __z ^ __v;
++
++      __Uint32x4_t __x = __aarch64_lsl_128 ((__Uint8x16_t) __a, __sl2);
++
++      __y = __y & __e;
++      __z = __z ^ __x;
++      return __z ^ __y;
++    }
++}
++
++#define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ	1
++  template<typename _UIntType, size_t __m,
++	   size_t __pos1, size_t __sl1, size_t __sl2,
++	   size_t __sr1, size_t __sr2,
++	   uint32_t __msk1, uint32_t __msk2,
++	   uint32_t __msk3, uint32_t __msk4,
++	   uint32_t __parity1, uint32_t __parity2,
++	   uint32_t __parity3, uint32_t __parity4>
++    void simd_fast_mersenne_twister_engine<_UIntType, __m,
++					   __pos1, __sl1, __sl2, __sr1, __sr2,
++					   __msk1, __msk2, __msk3, __msk4,
++					   __parity1, __parity2, __parity3,
++					   __parity4>::
++    _M_gen_rand (void)
++    {
++      __Uint32x4_t __r1 = _M_state[_M_nstate - 2];
++      __Uint32x4_t __r2 = _M_state[_M_nstate - 1];
++
++      __Uint32x4_t __aData = {__msk1, __msk2, __msk3, __msk4};
++
++      size_t __i;
++      for (__i = 0; __i < _M_nstate - __pos1; ++__i)
++	{
++	  __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2>
++	    (_M_state[__i], _M_state[__i + __pos1], __r1, __r2, __aData);
++
++	  _M_state[__i] = __r;
++
++	  __r1 = __r2;
++	  __r2 = __r;
++	}
++      for (; __i < _M_nstate; ++__i)
++	{
++	  __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2>
++	    (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2,
++	     __aData);
++
++	  _M_state[__i] = __r;
++
++	  __r1 = __r2;
++	  __r2 = __r;
++	}
++
++      _M_pos = 0;
++    }
++
++
++#define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL	1
++  template<typename _UIntType, size_t __m,
++	   size_t __pos1, size_t __sl1, size_t __sl2,
++	   size_t __sr1, size_t __sr2,
++	   uint32_t __msk1, uint32_t __msk2,
++	   uint32_t __msk3, uint32_t __msk4,
++	   uint32_t __parity1, uint32_t __parity2,
++	   uint32_t __parity3, uint32_t __parity4>
++    bool
++    operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
++	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
++	       __msk1, __msk2, __msk3, __msk4,
++	       __parity1, __parity2, __parity3, __parity4>& __lhs,
++	       const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
++	       __m, __pos1, __sl1, __sl2, __sr1, __sr2,
++	       __msk1, __msk2, __msk3, __msk4,
++	       __parity1, __parity2, __parity3, __parity4>& __rhs)
++    {
++      if (__lhs._M_pos != __rhs._M_pos)
++	return false;
++
++      __Uint32x4_t __res = __lhs._M_state[0] ^ __rhs._M_state[0];
++
++      for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
++	__res |= __lhs._M_state[__i] ^ __rhs._M_state[__i];
++
++      return (__int128) __res == 0;
++    }
++
++_GLIBCXX_END_NAMESPACE_VERSION
++  } // namespace
++
++#endif // __ARM_NEON
++
++#endif // _EXT_OPT_RANDOM_H
+--- a/src/libstdc++-v3/include/ext/random
++++ b/src/libstdc++-v3/include/ext/random
+@@ -184,6 +184,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
+ #ifdef __SSE2__
+ 	__m128i _M_state[_M_nstate];
+ #endif
++#ifdef __ARM_NEON
++#ifdef __aarch64__
++	__Uint32x4_t _M_state[_M_nstate];
++#endif
++#endif
+ 	uint32_t _M_state32[_M_nstate32];
+ 	result_type _M_stateT[state_size];
+       } __attribute__ ((__aligned__ (16)));

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/gcc-7.git



More information about the Reproducible-commits mailing list