[gcc-7] 275/354: * Update the Linaro support to the 7-2017.08 snapshot.

Ximin Luo infinity0 at debian.org
Thu Nov 23 15:51:12 UTC 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch master
in repository gcc-7.

commit 5f54be74ea3d822a040c106e2bba4a23cbfb2637
Author: doko <doko at 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca>
Date:   Tue Aug 22 11:56:22 2017 +0000

      * Update the Linaro support to the 7-2017.08 snapshot.
    
    
    git-svn-id: svn+ssh://svn.debian.org/svn/gcccvs/branches/sid/gcc-7@9641 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca
---
 debian/changelog                         |    6 +
 debian/patches/gcc-linaro-doc.diff       |   15 +-
 debian/patches/gcc-linaro-no-macros.diff |    2 +-
 debian/patches/gcc-linaro.diff           | 1194 ++++++++++++++++++++++++++++--
 4 files changed, 1164 insertions(+), 53 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index db3709e..761e31a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+gcc-7 (7.2.0-2) UNRELEASED; urgency=medium
+
+  * Update the Linaro support to the 7-2017.08 snapshot.
+
+ -- Matthias Klose <doko at debian.org>  Tue, 22 Aug 2017 13:46:26 +0200
+
 gcc-7 (7.2.0-1) unstable; urgency=medium
 
   * GCC 7.2.0 release.
diff --git a/debian/patches/gcc-linaro-doc.diff b/debian/patches/gcc-linaro-doc.diff
index 4810486..ac879e0 100644
--- a/debian/patches/gcc-linaro-doc.diff
+++ b/debian/patches/gcc-linaro-doc.diff
@@ -1,4 +1,4 @@
-# DP: Changes for the Linaro 7-2017.07 snapshot (documentation).
+# DP: Changes for the Linaro 7-2017.08 snapshot (documentation).
 
 --- a/src/gcc/doc/install.texi
 +++ b/src/gcc/doc/install.texi
@@ -26,6 +26,19 @@
  
  @multitable @columnfractions .15 .28 .30
  @item Option @tab aprofile @tab rmprofile
+--- a/src/gcc/doc/invoke.texi
++++ b/src/gcc/doc/invoke.texi
+@@ -14076,6 +14076,10 @@ Enable Large System Extension instructions.  This is on by default for
+ @option{-march=armv8.1-a}.
+ @item fp16
+ Enable FP16 extension.  This also enables floating-point instructions.
++ at item rcpc
++Enable the RcPc extension.  This does not change code generation from GCC,
++but is passed on to the assembler, enabling inline asm statements to use
++instructions from the RcPc extension.
+ 
+ @end table
+ 
 --- a/src/gcc/doc/sourcebuild.texi
 +++ b/src/gcc/doc/sourcebuild.texi
 @@ -2274,6 +2274,11 @@ the codeset to convert to.
diff --git a/debian/patches/gcc-linaro-no-macros.diff b/debian/patches/gcc-linaro-no-macros.diff
index f09ecac..737d486 100644
--- a/debian/patches/gcc-linaro-no-macros.diff
+++ b/debian/patches/gcc-linaro-no-macros.diff
@@ -89,4 +89,4 @@ Index: b/src/gcc/LINARO-VERSION
 --- a/src/gcc/LINARO-VERSION
 +++ /dev/null
 @@ -1,1 +0,0 @@
--Snapshot 7.1-2017.07
+-Snapshot 7.2-2017.08
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff
index 60979e9..4df4ae0 100644
--- a/debian/patches/gcc-linaro.diff
+++ b/debian/patches/gcc-linaro.diff
@@ -1,8 +1,8 @@
-# DP: Changes for the Linaro 7-2017.07 snapshot.
+# DP: Changes for the Linaro 7-2017.08 snapshot.
 
 MSG=$(git log origin/linaro/gcc-7-branch --format=format:"%s" -n 1 --grep "Merge branches"); SVN=${MSG##* }; git log origin/gcc-7-branch --format=format:"%H" -n 1 --grep "gcc-7-branch@${SVN%.}"
 
-LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba57b1bcc5093f3b62f853ff83e976c2e \
+LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefefa91b044ffa4a4b868ef7188e5255a \
  | egrep -v '^(diff|index) ' \
  | filterdiff --strip=1 --addoldprefix=a/src/  --addnewprefix=b/src/ \
  | sed 's,a/src//dev/null,/dev/null,'
@@ -10,7 +10,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
 --- /dev/null
 +++ b/src/gcc/LINARO-VERSION
 @@ -0,0 +1 @@
-+Snapshot 7.1-2017.07
++Snapshot 7.2-2017.08
 --- a/src/gcc/Makefile.in
 +++ b/src/gcc/Makefile.in
 @@ -845,10 +845,12 @@ BASEVER     := $(srcdir)/BASE-VER  # 4.x.y
@@ -48,7 +48,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  
 --- a/src/gcc/config.gcc
 +++ b/src/gcc/config.gcc
-@@ -3791,34 +3791,19 @@ case "${target}" in
+@@ -3796,34 +3796,19 @@ case "${target}" in
  		# Add extra multilibs
  		if test "x$with_multilib_list" != x; then
  			arm_multilibs=`echo $with_multilib_list | sed -e 's/,/ /g'`
@@ -96,7 +96,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  
  			if test "x${tmake_profile_file}" != x ; then
  				# arm/t-aprofile and arm/t-rmprofile are only
-@@ -3835,6 +3820,7 @@ case "${target}" in
+@@ -3840,6 +3825,7 @@ case "${target}" in
  				fi
  
  				tmake_file="${tmake_file} ${tmake_profile_file}"
@@ -104,6 +104,88 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  			fi
  		fi
  		;;
+--- a/src/gcc/config/aarch64/aarch64-cores.def
++++ b/src/gcc/config/aarch64/aarch64-cores.def
+@@ -43,7 +43,7 @@
+    VARIANT is the variant of the CPU.  In a GNU/Linux system it can found
+    in /proc/cpuinfo.  If this is -1, this means it can match any variant.  */
+ 
+-/* V8 Architecture Processors.  */
++/* ARMv8-A Architecture Processors.  */
+ 
+ /* ARM ('A') cores. */
+ AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
+@@ -52,33 +52,35 @@ AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AA
+ AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
+ AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
+ 
+-/* Samsung ('S') cores. */
+-AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
+-
+-/* Qualcomm ('Q') cores. */
+-AARCH64_CORE("falkor",      falkor,    cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0xC00, -1)
+-AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0xC00, -1)
+-
+ /* Cavium ('C') cores. */
+ AARCH64_CORE("thunderx",      thunderx,      thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
+ /* Do not swap around "thunderxt88p1" and "thunderxt88",
+    this order is required to handle variant correctly. */
+-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,	thunderx,  0x43, 0x0a1, 0)
+-AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1, -1)
++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,	thunderxt88,  0x43, 0x0a1, 0)
++AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 0x0a1, -1)
+ AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
+ AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
+-AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
+ 
+ /* APM ('P') cores. */
+ AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
+ 
+-/* V8.1 Architecture Processors.  */
++/* Qualcomm ('Q') cores. */
++AARCH64_CORE("falkor",      falkor,    cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0xC00, -1)
++AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0xC00, -1)
++
++/* Samsung ('S') cores. */
++AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
++
++/* ARMv8.1-A Architecture Processors.  */
+ 
+ /* Broadcom ('B') cores. */
+ AARCH64_CORE("thunderx2t99p1",  thunderx2t99p1, thunderx2t99, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+ AARCH64_CORE("vulcan",  vulcan, thunderx2t99, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+ 
+-/* V8 big.LITTLE implementations.  */
++/* Cavium ('C') cores. */
++AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
++
++/* ARMv8-A big.LITTLE implementations.  */
+ 
+ AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
+ AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
+--- a/src/gcc/config/aarch64/aarch64-cost-tables.h
++++ b/src/gcc/config/aarch64/aarch64-cost-tables.h
+@@ -136,8 +136,8 @@ const struct cpu_cost_table thunderx_extra_costs =
+     0,			/* Logical.  */
+     0,			/* Shift.  */
+     0,			/* Shift_reg.  */
+-    COSTS_N_INSNS (1),	/* Arith_shift.  */
+-    COSTS_N_INSNS (1),	/* Arith_shift_reg.  */
++    COSTS_N_INSNS (1)+1,	/* Arith_shift.  */
++    COSTS_N_INSNS (1)+1,	/* Arith_shift_reg.  */
+     COSTS_N_INSNS (1),	/* UNUSED: Log_shift.  */
+     COSTS_N_INSNS (1),	/* UNUSED: Log_shift_reg.  */
+     0,			/* Extend.  */
+--- a/src/gcc/config/aarch64/aarch64-option-extensions.def
++++ b/src/gcc/config/aarch64/aarch64-option-extensions.def
+@@ -60,4 +60,7 @@ AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, "atomics")
+    Disabling "fp16" just disables "fp16".  */
+ AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fphp asimdhp")
+ 
++/* Enabling or disabling "rcpc" only changes "rcpc".  */
++AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc")
++
+ #undef AARCH64_OPT_EXTENSION
 --- a/src/gcc/config/aarch64/aarch64-protos.h
 +++ b/src/gcc/config/aarch64/aarch64-protos.h
 @@ -203,6 +203,16 @@ struct cpu_approx_modes
@@ -162,6 +244,41 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  bool aarch64_function_arg_regno_p (unsigned);
 --- a/src/gcc/config/aarch64/aarch64-simd.md
 +++ b/src/gcc/config/aarch64/aarch64-simd.md
+@@ -44,12 +44,12 @@
+ (define_insn "aarch64_simd_dup<mode>"
+   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
+ 	(vec_duplicate:VDQ_I
+-	  (match_operand:<VEL> 1 "register_operand" "r, w")))]
++	  (match_operand:<VEL> 1 "register_operand" "w,?r")))]
+   "TARGET_SIMD"
+   "@
+-   dup\\t%0.<Vtype>, %<vw>1
+-   dup\\t%0.<Vtype>, %1.<Vetype>[0]"
+-  [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
++   dup\\t%0.<Vtype>, %1.<Vetype>[0]
++   dup\\t%0.<Vtype>, %<vw>1"
++  [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
+ )
+ 
+ (define_insn "aarch64_simd_dup<mode>"
+@@ -105,7 +105,7 @@
+      {
+      case 0: return "ldr\\t%d0, %1";
+      case 1: return "str\\t%d1, %0";
+-     case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
++     case 2: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
+      case 3: return "umov\t%0, %1.d[0]";
+      case 4: return "fmov\t%d0, %1";
+      case 5: return "mov\t%0, %1";
+@@ -136,7 +136,7 @@
+     case 1:
+ 	return "str\\t%q1, %0";
+     case 2:
+-	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
++	return "mov\t%0.<Vbtype>, %1.<Vbtype>";
+     case 3:
+     case 4:
+     case 5:
 @@ -153,6 +153,19 @@
     (set_attr "length" "4,4,4,8,8,8,4")]
  )
@@ -207,6 +324,68 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  	    (match_operand:SI 2 "immediate_operand" "i")))]
    "TARGET_SIMD"
    {
+@@ -2796,38 +2809,10 @@
+    (match_operand:VDC 2 "register_operand")]
+   "TARGET_SIMD"
+ {
+-  rtx op1, op2;
+-  if (BYTES_BIG_ENDIAN)
+-    {
+-      op1 = operands[2];
+-      op2 = operands[1];
+-    }
+-  else
+-    {
+-      op1 = operands[1];
+-      op2 = operands[2];
+-    }
+-  emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
+-  DONE;
+-}
+-)
++  aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+ 
+-(define_insn_and_split "aarch64_combine_internal<mode>"
+-  [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+-        (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
+-			   (match_operand:VDC 2 "register_operand" "w")))]
+-  "TARGET_SIMD"
+-  "#"
+-  "&& reload_completed"
+-  [(const_int 0)]
+-{
+-  if (BYTES_BIG_ENDIAN)
+-    aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
+-  else
+-    aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+   DONE;
+ }
+-[(set_attr "type" "multiple")]
+ )
+ 
+ (define_expand "aarch64_simd_combine<mode>"
+--- a/src/gcc/config/aarch64/aarch64-tune.md
++++ b/src/gcc/config/aarch64/aarch64-tune.md
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from aarch64-cores.def
+ (define_attr "tune"
+-	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
++	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
+ 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+--- a/src/gcc/config/aarch64/aarch64-tuning-flags.def
++++ b/src/gcc/config/aarch64/aarch64-tuning-flags.def
+@@ -35,4 +35,10 @@ two load/stores are not at least 8 byte aligned don't create load/store
+ pairs.   */
+ AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW)
+ 
++/* Some of the optional shift to some arthematic instructions are
++   considered cheap.  Logical shift left <=4 with or without a
++   zero extend are considered cheap.  Sign extend; non logical shift left
++   are not considered cheap.  */
++AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
++
+ #undef AARCH64_EXTRA_TUNING_OPTION
 --- a/src/gcc/config/aarch64/aarch64.c
 +++ b/src/gcc/config/aarch64/aarch64.c
 @@ -193,10 +193,10 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
@@ -222,7 +401,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
      },
    0, /* pre_modify  */
    0, /* post_modify  */
-@@ -526,6 +526,43 @@ static const cpu_approx_modes xgene1_approx_modes =
+@@ -526,6 +526,61 @@ static const cpu_approx_modes xgene1_approx_modes =
    AARCH64_APPROX_ALL	/* recip_sqrt  */
  };
  
@@ -254,19 +433,37 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
 +  3			/* default_opt_level  */
 +};
 +
++static const cpu_prefetch_tune thunderxt88_prefetch_tune =
++{
++  8,			/* num_slots  */
++  32,			/* l1_cache_size  */
++  128,			/* l1_cache_line_size  */
++  16*1024,		/* l2_cache_size  */
++  3			/* default_opt_level  */
++};
++
++static const cpu_prefetch_tune thunderx_prefetch_tune =
++{
++  8,			/* num_slots  */
++  32,			/* l1_cache_size  */
++  128,			/* l1_cache_line_size  */
++  -1,			/* l2_cache_size  */
++  -1			/* default_opt_level  */
++};
++
 +static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
 +{
-+  0,			/* num_slots  */
-+  -1,			/* l1_cache_size  */
++  8,			/* num_slots  */
++  32,			/* l1_cache_size  */
 +  64,			/* l1_cache_line_size  */
-+  -1,			/* l2_cache_size  */
++  256,			/* l2_cache_size  */
 +  -1			/* default_opt_level  */
 +};
 +
  static const struct tune_params generic_tunings =
  {
    &cortexa57_extra_costs,
-@@ -538,17 +575,17 @@ static const struct tune_params generic_tunings =
+@@ -538,17 +593,17 @@ static const struct tune_params generic_tunings =
    2, /* issue_rate  */
    (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
    8,	/* function_align.  */
@@ -289,7 +486,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  };
  
  static const struct tune_params cortexa35_tunings =
-@@ -564,7 +601,7 @@ static const struct tune_params cortexa35_tunings =
+@@ -564,7 +619,7 @@ static const struct tune_params cortexa35_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
    16,	/* function_align.  */
@@ -298,7 +495,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -572,9 +609,9 @@ static const struct tune_params cortexa35_tunings =
+@@ -572,9 +627,9 @@ static const struct tune_params cortexa35_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -310,7 +507,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  };
  
  static const struct tune_params cortexa53_tunings =
-@@ -590,7 +627,7 @@ static const struct tune_params cortexa53_tunings =
+@@ -590,7 +645,7 @@ static const struct tune_params cortexa53_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
    16,	/* function_align.  */
@@ -319,7 +516,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -598,9 +635,9 @@ static const struct tune_params cortexa53_tunings =
+@@ -598,9 +653,9 @@ static const struct tune_params cortexa53_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -331,7 +528,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  };
  
  static const struct tune_params cortexa57_tunings =
-@@ -616,7 +653,7 @@ static const struct tune_params cortexa57_tunings =
+@@ -616,7 +671,7 @@ static const struct tune_params cortexa57_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
    16,	/* function_align.  */
@@ -340,7 +537,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -624,9 +661,9 @@ static const struct tune_params cortexa57_tunings =
+@@ -624,9 +679,9 @@ static const struct tune_params cortexa57_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -352,7 +549,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  };
  
  static const struct tune_params cortexa72_tunings =
-@@ -642,7 +679,7 @@ static const struct tune_params cortexa72_tunings =
+@@ -642,7 +697,7 @@ static const struct tune_params cortexa72_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
    16,	/* function_align.  */
@@ -361,7 +558,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -650,9 +687,9 @@ static const struct tune_params cortexa72_tunings =
+@@ -650,9 +705,9 @@ static const struct tune_params cortexa72_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -373,7 +570,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  };
  
  static const struct tune_params cortexa73_tunings =
-@@ -668,7 +705,7 @@ static const struct tune_params cortexa73_tunings =
+@@ -668,7 +723,7 @@ static const struct tune_params cortexa73_tunings =
    (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
     | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
    16,	/* function_align.  */
@@ -382,7 +579,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    8,	/* loop_align.  */
    2,	/* int_reassoc_width.  */
    4,	/* fp_reassoc_width.  */
-@@ -676,11 +713,13 @@ static const struct tune_params cortexa73_tunings =
+@@ -676,11 +731,13 @@ static const struct tune_params cortexa73_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -398,7 +595,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  static const struct tune_params exynosm1_tunings =
  {
    &exynosm1_extra_costs,
-@@ -701,9 +740,9 @@ static const struct tune_params exynosm1_tunings =
+@@ -701,9 +758,34 @@ static const struct tune_params exynosm1_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    48,	/* max_case_values.  */
@@ -407,22 +604,48 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
 -  (AARCH64_EXTRA_TUNE_NONE) /* tune_flags.  */
 +  (AARCH64_EXTRA_TUNE_NONE), /* tune_flags.  */
 +  &exynosm1_prefetch_tune
++};
++
++static const struct tune_params thunderxt88_tunings =
++{
++  &thunderx_extra_costs,
++  &generic_addrcost_table,
++  &thunderx_regmove_cost,
++  &thunderx_vector_cost,
++  &generic_branch_cost,
++  &generic_approx_modes,
++  6, /* memmov_cost  */
++  2, /* issue_rate  */
++  AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
++  8,	/* function_align.  */
++  8,	/* jump_align.  */
++  8,	/* loop_align.  */
++  2,	/* int_reassoc_width.  */
++  4,	/* fp_reassoc_width.  */
++  1,	/* vec_reassoc_width.  */
++  2,	/* min_div_recip_mul_sf.  */
++  2,	/* min_div_recip_mul_df.  */
++  0,	/* max_case_values.  */
++  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
++  (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW),	/* tune_flags.  */
++  &thunderxt88_prefetch_tune
  };
  
  static const struct tune_params thunderx_tunings =
-@@ -726,9 +765,9 @@ static const struct tune_params thunderx_tunings =
+@@ -726,9 +808,10 @@ static const struct tune_params thunderx_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
 -  0,	/* cache_line_size.  */
    tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
 -  (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)	/* tune_flags.  */
-+  (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW),	/* tune_flags.  */
-+  &generic_prefetch_tune
++  (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
++   | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),	/* tune_flags.  */
++  &thunderx_prefetch_tune
  };
  
  static const struct tune_params xgene1_tunings =
-@@ -751,9 +790,9 @@ static const struct tune_params xgene1_tunings =
+@@ -751,9 +834,9 @@ static const struct tune_params xgene1_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -434,7 +657,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  };
  
  static const struct tune_params qdf24xx_tunings =
-@@ -777,9 +816,9 @@ static const struct tune_params qdf24xx_tunings =
+@@ -777,9 +860,9 @@ static const struct tune_params qdf24xx_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
@@ -446,19 +669,103 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  };
  
  static const struct tune_params thunderx2t99_tunings =
-@@ -802,9 +841,9 @@ static const struct tune_params thunderx2t99_tunings =
+@@ -802,9 +885,9 @@ static const struct tune_params thunderx2t99_tunings =
    2,	/* min_div_recip_mul_sf.  */
    2,	/* min_div_recip_mul_df.  */
    0,	/* max_case_values.  */
 -  64,	/* cache_line_size.  */
-   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
+-  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
 -  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
++  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
 +  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
 +  &thunderx2t99_prefetch_tune
  };
  
  /* Support for fine-grained override of the tuning structures.  */
-@@ -2683,11 +2722,19 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+@@ -1649,41 +1732,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
+   machine_mode dst_mode = GET_MODE (dst);
+ 
+   gcc_assert (VECTOR_MODE_P (dst_mode));
++  gcc_assert (register_operand (dst, dst_mode)
++	      && register_operand (src1, src_mode)
++	      && register_operand (src2, src_mode));
+ 
+-  if (REG_P (dst) && REG_P (src1) && REG_P (src2))
+-    {
+-      rtx (*gen) (rtx, rtx, rtx);
+-
+-      switch (src_mode)
+-	{
+-	case V8QImode:
+-	  gen = gen_aarch64_simd_combinev8qi;
+-	  break;
+-	case V4HImode:
+-	  gen = gen_aarch64_simd_combinev4hi;
+-	  break;
+-	case V2SImode:
+-	  gen = gen_aarch64_simd_combinev2si;
+-	  break;
+-	case V4HFmode:
+-	  gen = gen_aarch64_simd_combinev4hf;
+-	  break;
+-	case V2SFmode:
+-	  gen = gen_aarch64_simd_combinev2sf;
+-	  break;
+-	case DImode:
+-	  gen = gen_aarch64_simd_combinedi;
+-	  break;
+-	case DFmode:
+-	  gen = gen_aarch64_simd_combinedf;
+-	  break;
+-	default:
+-	  gcc_unreachable ();
+-	}
++  rtx (*gen) (rtx, rtx, rtx);
+ 
+-      emit_insn (gen (dst, src1, src2));
+-      return;
++  switch (src_mode)
++    {
++    case V8QImode:
++      gen = gen_aarch64_simd_combinev8qi;
++      break;
++    case V4HImode:
++      gen = gen_aarch64_simd_combinev4hi;
++      break;
++    case V2SImode:
++      gen = gen_aarch64_simd_combinev2si;
++      break;
++    case V4HFmode:
++      gen = gen_aarch64_simd_combinev4hf;
++      break;
++    case V2SFmode:
++      gen = gen_aarch64_simd_combinev2sf;
++      break;
++    case DImode:
++      gen = gen_aarch64_simd_combinedi;
++      break;
++    case DFmode:
++      gen = gen_aarch64_simd_combinedf;
++      break;
++    default:
++      gcc_unreachable ();
+     }
++
++  emit_insn (gen (dst, src1, src2));
++  return;
+ }
+ 
+ /* Split a complex SIMD move.  */
+@@ -1919,6 +2002,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
+ 	      gcc_assert (can_create_pseudo_p ());
+ 	      base = gen_reg_rtx (ptr_mode);
+ 	      aarch64_expand_mov_immediate (base, XEXP (mem, 0));
++	      if (ptr_mode != Pmode)
++		base = convert_memory_address (Pmode, base);
+ 	      mem = gen_rtx_MEM (ptr_mode, base);
+ 	    }
+ 
+@@ -2683,11 +2768,19 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
  		     plus_constant (Pmode, stack_pointer_rtx, -first));
  
        /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
@@ -483,7 +790,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
        /* Step 3: the loop
  
  	 do
-@@ -4549,6 +4596,24 @@ aarch64_classify_address (struct aarch64_address_info *info,
+@@ -4549,6 +4642,24 @@ aarch64_classify_address (struct aarch64_address_info *info,
      }
  }
  
@@ -508,7 +815,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  bool
  aarch64_symbolic_address_p (rtx x)
  {
-@@ -4633,6 +4698,50 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+@@ -4633,6 +4744,50 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
    return true;
  }
  
@@ -559,7 +866,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  /* Emit call insn with PAT and do aarch64-specific handling.  */
  
  void
-@@ -4705,7 +4814,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
+@@ -4705,7 +4860,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       the comparison will have to be swapped when we emit the assembly
       code.  */
    if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
@@ -568,7 +875,135 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
        && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
  	  || GET_CODE (x) == LSHIFTRT
  	  || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
-@@ -7482,17 +7591,13 @@ cost_plus:
+@@ -5112,6 +5267,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
+ 
+ 	case MEM:
+ 	  output_address (GET_MODE (x), XEXP (x, 0));
++	  /* Check all memory references are Pmode - even with ILP32.  */
++	  gcc_assert (GET_MODE (XEXP (x, 0)) == Pmode);
+ 	  break;
+ 
+ 	case CONST:
+@@ -5976,9 +6133,10 @@ aarch64_strip_shift (rtx x)
+ /* Helper function for rtx cost calculation.  Strip an extend
+    expression from X.  Returns the inner operand if successful, or the
+    original expression on failure.  We deal with a number of possible
+-   canonicalization variations here.  */
++   canonicalization variations here. If STRIP_SHIFT is true, then
++   we can strip off a shift also.  */
+ static rtx
+-aarch64_strip_extend (rtx x)
++aarch64_strip_extend (rtx x, bool strip_shift)
+ {
+   rtx op = x;
+ 
+@@ -6002,7 +6160,8 @@ aarch64_strip_extend (rtx x)
+ 
+   /* Now handle extended register, as this may also have an optional
+      left shift by 1..4.  */
+-  if (GET_CODE (op) == ASHIFT
++  if (strip_shift
++      && GET_CODE (op) == ASHIFT
+       && CONST_INT_P (XEXP (op, 1))
+       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
+     op = XEXP (op, 0);
+@@ -6026,6 +6185,39 @@ aarch64_shift_p (enum rtx_code code)
+   return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
+ }
+ 
++
++/* Return true iff X is a cheap shift without a sign extend. */
++
++static bool
++aarch64_cheap_mult_shift_p (rtx x)
++{
++  rtx op0, op1;
++
++  op0 = XEXP (x, 0);
++  op1 = XEXP (x, 1);
++
++  if (!(aarch64_tune_params.extra_tuning_flags
++                      & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
++    return false;
++
++  if (GET_CODE (op0) == SIGN_EXTEND)
++    return false;
++
++  if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
++      && UINTVAL (op1) <= 4)
++    return true;
++
++  if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
++    return false;
++
++  HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
++
++  if (l2 > 0 && l2 <= 4)
++    return true;
++
++  return false;
++}
++
+ /* Helper function for rtx cost calculation.  Calculate the cost of
+    a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
+    Return the calculated cost of the expression, recursing manually in to
+@@ -6063,7 +6255,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
+ 	    {
+ 	      if (compound_p)
+ 	        {
+-	          if (REG_P (op1))
++		  /* If the shift is considered cheap,
++		     then don't add any cost. */
++		  if (aarch64_cheap_mult_shift_p (x))
++		    ;
++	          else if (REG_P (op1))
+ 		    /* ARITH + shift-by-register.  */
+ 		    cost += extra_cost->alu.arith_shift_reg;
+ 		  else if (is_extend)
+@@ -6081,7 +6277,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
+ 	    }
+ 	  /* Strip extends as we will have costed them in the case above.  */
+ 	  if (is_extend)
+-	    op0 = aarch64_strip_extend (op0);
++	    op0 = aarch64_strip_extend (op0, true);
+ 
+ 	  cost += rtx_cost (op0, VOIDmode, code, 0, speed);
+ 
+@@ -6925,13 +7121,13 @@ cost_minus:
+ 	    if (speed)
+ 	      *cost += extra_cost->alu.extend_arith;
+ 
+-	    op1 = aarch64_strip_extend (op1);
++	    op1 = aarch64_strip_extend (op1, true);
+ 	    *cost += rtx_cost (op1, VOIDmode,
+ 			       (enum rtx_code) GET_CODE (op1), 0, speed);
+ 	    return true;
+ 	  }
+ 
+-	rtx new_op1 = aarch64_strip_extend (op1);
++	rtx new_op1 = aarch64_strip_extend (op1, false);
+ 
+ 	/* Cost this as an FMA-alike operation.  */
+ 	if ((GET_CODE (new_op1) == MULT
+@@ -7004,7 +7200,7 @@ cost_plus:
+ 	    if (speed)
+ 	      *cost += extra_cost->alu.extend_arith;
+ 
+-	    op0 = aarch64_strip_extend (op0);
++	    op0 = aarch64_strip_extend (op0, true);
+ 	    *cost += rtx_cost (op0, VOIDmode,
+ 			       (enum rtx_code) GET_CODE (op0), 0, speed);
+ 	    return true;
+@@ -7012,7 +7208,7 @@ cost_plus:
+ 
+ 	/* Strip any extend, leave shifts behind as we will
+ 	   cost them through mult_cost.  */
+-	new_op0 = aarch64_strip_extend (op0);
++	new_op0 = aarch64_strip_extend (op0, false);
+ 
+ 	if (GET_CODE (new_op0) == MULT
+ 	    || aarch64_shift_p (GET_CODE (new_op0)))
+@@ -7482,17 +7678,13 @@ cost_plus:
      case UMOD:
        if (speed)
  	{
@@ -589,7 +1024,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  	}
        return false;  /* All arguments need to be in registers.  */
  
-@@ -7506,7 +7611,9 @@ cost_plus:
+@@ -7506,7 +7698,9 @@ cost_plus:
  	  else if (GET_MODE_CLASS (mode) == MODE_INT)
  	    /* There is no integer SQRT, so only DIV and UDIV can get
  	       here.  */
@@ -600,7 +1035,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  	  else
  	    *cost += extra_cost->fp[mode == DFmode].div;
  	}
-@@ -8687,12 +8794,38 @@ aarch64_override_options_internal (struct gcc_options *opts)
+@@ -8687,13 +8881,39 @@ aarch64_override_options_internal (struct gcc_options *opts)
  			 opts->x_param_values,
  			 global_options_set.x_param_values);
  
@@ -623,14 +1058,14 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
      maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
 -			   selected_cpu->tune->cache_line_size,
 +			   aarch64_tune_params.prefetch->l1_cache_line_size,
- 			   opts->x_param_values,
- 			   global_options_set.x_param_values);
++			   opts->x_param_values,
++			   global_options_set.x_param_values);
 +  if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
 +    maybe_set_param_value (PARAM_L2_CACHE_SIZE,
 +			   aarch64_tune_params.prefetch->l2_cache_size,
-+			   opts->x_param_values,
-+			   global_options_set.x_param_values);
-+
+ 			   opts->x_param_values,
+ 			   global_options_set.x_param_values);
+ 
 +  /* Enable sw prefetching at specified optimization level for
 +     CPUS that have prefetch.  Lower optimization level threshold by 1
 +     when profiling is enabled.  */
@@ -639,10 +1074,11 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
 +      && aarch64_tune_params.prefetch->default_opt_level >= 0
 +      && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
 +    opts->x_flag_prefetch_loop_arrays = 1;
- 
++
    aarch64_override_options_after_change_1 (opts);
  }
-@@ -11647,6 +11780,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+ 
+@@ -11647,6 +11867,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
        return;
      }
  
@@ -700,7 +1136,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    /* Initialise a vector which is part-variable.  We want to first try
       to build those lanes which are constant in the most efficient way we
       can.  */
-@@ -11680,10 +11864,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+@@ -11680,10 +11951,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
      }
  
    /* Insert the variable lanes directly.  */
@@ -711,7 +1147,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    for (int i = 0; i < n_elts; i++)
      {
        rtx x = XVECEXP (vals, 0, i);
-@@ -12049,6 +12229,17 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12049,6 +12316,17 @@ aarch64_split_compare_and_swap (rtx operands[])
    mode = GET_MODE (mem);
    model = memmodel_from_int (INTVAL (model_rtx));
  
@@ -729,7 +1165,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    label1 = NULL;
    if (!is_weak)
      {
-@@ -12065,11 +12256,21 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12065,11 +12343,21 @@ aarch64_split_compare_and_swap (rtx operands[])
    else
      aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
  
@@ -756,7 +1192,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  
    aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
  
-@@ -12088,7 +12289,15 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12088,7 +12376,15 @@ aarch64_split_compare_and_swap (rtx operands[])
      }
  
    emit_label (label2);
@@ -773,6 +1209,49 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    /* Emit any final barrier needed for a __sync operation.  */
    if (is_mm_sync (model))
      aarch64_emit_post_barrier (model);
+--- a/src/gcc/config/aarch64/aarch64.h
++++ b/src/gcc/config/aarch64/aarch64.h
+@@ -98,14 +98,24 @@
+     && (ALIGN) < BITS_PER_WORD)			\
+    ? BITS_PER_WORD : ALIGN)
+ 
+-#define DATA_ALIGNMENT(EXP, ALIGN)		\
+-  ((((ALIGN) < BITS_PER_WORD)			\
+-    && (TREE_CODE (EXP) == ARRAY_TYPE		\
+-	|| TREE_CODE (EXP) == UNION_TYPE	\
+-	|| TREE_CODE (EXP) == RECORD_TYPE))	\
+-   ? BITS_PER_WORD : (ALIGN))
+-
+-#define LOCAL_ALIGNMENT(EXP, ALIGN) DATA_ALIGNMENT(EXP, ALIGN)
++/* Align definitions of arrays, unions and structures so that
++   initializations and copies can be made more efficient.  This is not
++   ABI-changing, so it only affects places where we can see the
++   definition.  Increasing the alignment tends to introduce padding,
++   so don't do this when optimizing for size/conserving stack space.  */
++#define AARCH64_EXPAND_ALIGNMENT(COND, EXP, ALIGN)			\
++  (((COND) && ((ALIGN) < BITS_PER_WORD)					\
++    && (TREE_CODE (EXP) == ARRAY_TYPE					\
++	|| TREE_CODE (EXP) == UNION_TYPE				\
++	|| TREE_CODE (EXP) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
++
++/* Align global data.  */
++#define DATA_ALIGNMENT(EXP, ALIGN)			\
++  AARCH64_EXPAND_ALIGNMENT (!optimize_size, EXP, ALIGN)
++
++/* Similarly, make sure that objects on the stack are sensibly aligned.  */
++#define LOCAL_ALIGNMENT(EXP, ALIGN)				\
++  AARCH64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN)
+ 
+ #define STRUCTURE_SIZE_BOUNDARY		8
+ 
+@@ -140,6 +150,7 @@ extern unsigned aarch64_architecture_version;
+ #define AARCH64_FL_F16	      (1 << 9)  /* Has ARMv8.2-A FP16 extensions.  */
+ /* ARMv8.3-A architecture extensions.  */
+ #define AARCH64_FL_V8_3	      (1 << 10)  /* Has ARMv8.3-A features.  */
++#define AARCH64_FL_RCPC	      (1 << 11)  /* Has support for RCpc model.  */
+ 
+ /* Has FP and SIMD.  */
+ #define AARCH64_FL_FPSIMD     (AARCH64_FL_FP | AARCH64_FL_SIMD)
 --- a/src/gcc/config/aarch64/aarch64.md
 +++ b/src/gcc/config/aarch64/aarch64.md
 @@ -519,27 +519,31 @@
@@ -1057,6 +1536,24 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    "(register_operand (operands[0], DImode)
      || aarch64_reg_or_zero (operands[1], DImode))"
    "@
+@@ -1123,7 +1017,7 @@
+    #
+    #
+    #
+-   orr\\t%0.16b, %1.16b, %1.16b
++   mov\\t%0.16b, %1.16b
+    ldp\\t%0, %H0, %1
+    stp\\t%1, %H1, %0
+    stp\\txzr, xzr, %0
+@@ -1237,7 +1131,7 @@
+   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
+     || aarch64_reg_or_fp_zero (operands[1], TFmode))"
+   "@
+-   orr\\t%0.16b, %1.16b, %1.16b
++   mov\\t%0.16b, %1.16b
+    #
+    #
+    #
 @@ -2340,6 +2234,55 @@
    [(set_attr "type" "alus_sreg")]
  )
@@ -1113,7 +1610,26 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  (define_insn "*sub_<shift>_<mode>"
    [(set (match_operand:GPI 0 "register_operand" "=r")
  	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
-@@ -5030,14 +4973,16 @@
+@@ -4997,6 +4940,18 @@
+   [(set_attr "type" "f_minmax<stype>")]
+ )
+ 
++(define_expand "lrint<GPF:mode><GPI:mode>2"
++  [(match_operand:GPI 0 "register_operand")
++   (match_operand:GPF 1 "register_operand")]
++  "TARGET_FLOAT"
++{
++  rtx cvt = gen_reg_rtx (<GPF:MODE>mode);
++  emit_insn (gen_rint<GPF:mode>2 (cvt, operands[1]));
++  emit_insn (gen_lbtrunc<GPF:mode><GPI:mode>2 (operands[0], cvt));
++  DONE;
++}
++)
++
+ ;; For copysign (x, y), we want to generate:
+ ;;
+ ;;   LDR d2, #(1 << 63)
+@@ -5030,14 +4985,16 @@
     (match_operand:SF 2 "register_operand")]
    "TARGET_FLOAT && TARGET_SIMD"
  {
@@ -1163,6 +1679,24 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
         (match_operand:SI 4 "const_int_operand")			;; is_weak
         (match_operand:SI 5 "const_int_operand")			;; mod_s
         (match_operand:SI 6 "const_int_operand")]		;; mod_f
+@@ -94,7 +94,7 @@
+    (set (match_dup 1)
+     (unspec_volatile:SHORT
+       [(match_operand:SI 2 "aarch64_plus_operand" "rI")	;; expected
+-       (match_operand:SHORT 3 "register_operand" "r")	;; desired
++       (match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ")	;; desired
+        (match_operand:SI 4 "const_int_operand")		;; is_weak
+        (match_operand:SI 5 "const_int_operand")		;; mod_s
+        (match_operand:SI 6 "const_int_operand")]	;; mod_f
+@@ -119,7 +119,7 @@
+    (set (match_dup 1)
+     (unspec_volatile:GPI
+       [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
+-       (match_operand:GPI 3 "register_operand" "r")		;; desired
++       (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")		;; desired
+        (match_operand:SI 4 "const_int_operand")			;; is_weak
+        (match_operand:SI 5 "const_int_operand")			;; mod_s
+        (match_operand:SI 6 "const_int_operand")]		;; mod_f
 @@ -534,7 +534,7 @@
      (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
     (set (match_operand:ALLI 1 "aarch64_sync_memory_operand" "=Q")
@@ -1172,6 +1706,24 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
         (match_operand:SI 3 "const_int_operand")]
        UNSPECV_SX))]
    ""
+@@ -616,7 +616,7 @@
+   (set (match_dup 1)
+    (unspec_volatile:SHORT
+     [(match_dup 0)
+-     (match_operand:SHORT 2 "register_operand" "r")	;; value.
++     (match_operand:SHORT 2 "aarch64_reg_or_zero" "rZ")	;; value.
+      (match_operand:SI 3 "const_int_operand" "")]	;; model.
+     UNSPECV_ATOMIC_CAS))]
+  "TARGET_LSE && reload_completed"
+@@ -640,7 +640,7 @@
+   (set (match_dup 1)
+    (unspec_volatile:GPI
+     [(match_dup 0)
+-     (match_operand:GPI 2 "register_operand" "r")	;; value.
++     (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")	;; value.
+      (match_operand:SI 3 "const_int_operand" "")]	;; model.
+     UNSPECV_ATOMIC_CAS))]
+   "TARGET_LSE && reload_completed"
 --- a/src/gcc/config/aarch64/constraints.md
 +++ b/src/gcc/config/aarch64/constraints.md
 @@ -98,6 +98,14 @@
@@ -1259,7 +1811,12 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
 +  "thunderx2t99_i1")
 --- a/src/gcc/config/arm/aarch-common-protos.h
 +++ b/src/gcc/config/arm/aarch-common-protos.h
-@@ -30,7 +30,9 @@ extern bool aarch_rev16_p (rtx);
+@@ -25,12 +25,13 @@
+ 
+ extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
+ extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *);
+-extern int aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *, rtx_insn *);
+ extern bool aarch_rev16_p (rtx);
  extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
  extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode);
  extern int arm_early_load_addr_dep (rtx, rtx);
@@ -1296,7 +1853,21 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  /* Return nonzero if the CONSUMER instruction (an ALU op) does not
     have an early register shift value or amount dependency on the
     result of PRODUCER.  */
-@@ -336,6 +354,24 @@ arm_early_store_addr_dep (rtx producer, rtx consumer)
+@@ -254,12 +272,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
+     return 0;
+ 
+   if ((early_op = arm_find_shift_sub_rtx (op)))
+-    {
+-      if (REG_P (early_op))
+-	early_op = op;
+-
+-      return !reg_overlap_mentioned_p (value, early_op);
+-    }
++    return !reg_overlap_mentioned_p (value, early_op);
+ 
+   return 0;
+ }
+@@ -336,6 +349,24 @@ arm_early_store_addr_dep (rtx producer, rtx consumer)
    return !arm_no_early_store_addr_dep (producer, consumer);
  }
  
@@ -1321,6 +1892,45 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  /* Return non-zero iff the consumer (a multiply-accumulate or a
     multiple-subtract instruction) has an accumulator dependency on the
     result of the producer and no other dependency on that result.  It
+@@ -472,38 +503,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
+   return (REGNO (dest) == REGNO (accumulator));
+ }
+ 
+-/* Return nonzero if the CONSUMER instruction is some sort of
+-   arithmetic or logic + shift operation, and the register we are
+-   writing in PRODUCER is not used in a register shift by register
+-   operation.  */
+-
+-int
+-aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
+-					   rtx_insn *consumer)
+-{
+-  rtx value, op;
+-  rtx early_op;
+-
+-  if (!arm_get_set_operands (producer, consumer, &value, &op))
+-    return 0;
+-
+-  if ((early_op = arm_find_shift_sub_rtx (op)))
+-    {
+-      if (REG_P (early_op))
+-	early_op = op;
+-
+-      /* Any other canonicalisation of a shift is a shift-by-constant
+-	 so we don't care.  */
+-      if (GET_CODE (early_op) == ASHIFT)
+-	return (!REG_P (XEXP (early_op, 0))
+-		|| !REG_P (XEXP (early_op, 1)));
+-      else
+-	return 1;
+-    }
+-
+-  return 0;
+-}
+-
+ /* Return non-zero if the consumer (a multiply-accumulate instruction)
+    has an accumulator dependency on the result of the producer (a
+    multiplication instruction) and no other dependency on that result.  */
 --- a/src/gcc/config/arm/aarch-cost-tables.h
 +++ b/src/gcc/config/arm/aarch-cost-tables.h
 @@ -154,7 +154,7 @@ const struct cpu_cost_table cortexa53_extra_costs =
@@ -1620,6 +2230,15 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  #endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC.  */
 --- a/src/gcc/config/arm/cortex-a53.md
 +++ b/src/gcc/config/arm/cortex-a53.md
+@@ -211,7 +211,7 @@
+ 
+ (define_bypass 1 "cortex_a53_alu*"
+ 		 "cortex_a53_alu_shift*"
+-		 "aarch_forward_to_shift_is_not_shifted_reg")
++		 "arm_no_early_alu_shift_dep")
+ 
+ (define_bypass 2 "cortex_a53_alu*"
+ 		 "cortex_a53_alu_*,cortex_a53_shift*")
 @@ -254,6 +254,16 @@
  		 "cortex_a53_store*"
  		 "arm_no_early_store_addr_dep")
@@ -1637,6 +2256,102 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  ;; Model a GP->FP register move as similar to stores.
  
  (define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*"
+@@ -501,19 +511,19 @@
+ ;; Floating-point arithmetic.
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+-(define_insn_reservation "cortex_a53_fpalu" 5
++(define_insn_reservation "cortex_a53_fpalu" 4
+   (and (eq_attr "tune" "cortexa53")
+ 	(eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov,
+ 			f_cvt, fcmps, fcmpd, fccmps, fccmpd, fcsel,
+ 			f_rints, f_rintd, f_minmaxs, f_minmaxd"))
+   "cortex_a53_slot_any,cortex_a53_fp_alu")
+ 
+-(define_insn_reservation "cortex_a53_fconst" 3
++(define_insn_reservation "cortex_a53_fconst" 2
+   (and (eq_attr "tune" "cortexa53")
+        (eq_attr "type" "fconsts,fconstd"))
+   "cortex_a53_slot_any,cortex_a53_fp_alu")
+ 
+-(define_insn_reservation "cortex_a53_fpmul" 5
++(define_insn_reservation "cortex_a53_fpmul" 4
+   (and (eq_attr "tune" "cortexa53")
+        (eq_attr "type" "fmuls,fmuld"))
+   "cortex_a53_slot_any,cortex_a53_fp_mul")
+@@ -564,7 +574,7 @@
+ ;; Floating-point load/store.
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+-(define_insn_reservation "cortex_a53_f_load_64" 4
++(define_insn_reservation "cortex_a53_f_load_64" 3
+   (and (eq_attr "tune" "cortexa53")
+        (ior (eq_attr "type" "f_loads,f_loadd")
+ 	    (eq_attr "cortex_a53_advsimd_type"
+@@ -572,7 +582,7 @@
+   "cortex_a53_slot_any+cortex_a53_ls_agen,
+    cortex_a53_load")
+ 
+-(define_insn_reservation "cortex_a53_f_load_many" 5
++(define_insn_reservation "cortex_a53_f_load_many" 4
+   (and (eq_attr "tune" "cortexa53")
+        (eq_attr "cortex_a53_advsimd_type"
+ 		"advsimd_load_128,advsimd_load_lots"))
+@@ -606,22 +616,22 @@
+ ;; or a 128-bit operation in which case we require in our model that we
+ ;; issue from slot 0.
+ 
+-(define_insn_reservation "cortex_a53_advsimd_alu" 5
++(define_insn_reservation "cortex_a53_advsimd_alu" 4
+   (and (eq_attr "tune" "cortexa53")
+        (eq_attr "cortex_a53_advsimd_type" "advsimd_alu"))
+   "cortex_a53_slot_any,cortex_a53_fp_alu")
+ 
+-(define_insn_reservation "cortex_a53_advsimd_alu_q" 5
++(define_insn_reservation "cortex_a53_advsimd_alu_q" 4
+   (and (eq_attr "tune" "cortexa53")
+        (eq_attr "cortex_a53_advsimd_type" "advsimd_alu_q"))
+   "cortex_a53_slot0,cortex_a53_fp_alu_q")
+ 
+-(define_insn_reservation "cortex_a53_advsimd_mul" 5
++(define_insn_reservation "cortex_a53_advsimd_mul" 4
+   (and (eq_attr "tune" "cortexa53")
+        (eq_attr "cortex_a53_advsimd_type" "advsimd_mul"))
+   "cortex_a53_slot_any,cortex_a53_fp_mul")
+ 
+-(define_insn_reservation "cortex_a53_advsimd_mul_q" 5
++(define_insn_reservation "cortex_a53_advsimd_mul_q" 4
+   (and (eq_attr "tune" "cortexa53")
+        (eq_attr "cortex_a53_advsimd_type" "advsimd_mul_q"))
+   "cortex_a53_slot0,cortex_a53_fp_mul_q")
+@@ -700,20 +710,18 @@
+ ;; multiply-accumulate operations as a bypass reducing the latency
+ ;; of producing instructions to near zero.
+ 
+-(define_bypass 1 "cortex_a53_fp*,
++(define_bypass 1 "cortex_a53_fpalu,
++		  cortex_a53_fpmul,
+ 		  cortex_a53_r2f,
++		  cortex_a53_r2f_cvt,
++		  cortex_a53_fconst,
+ 		  cortex_a53_f_load*"
+ 		 "cortex_a53_fpmac"
+ 		 "aarch_accumulator_forwarding")
+ 
+-;; Model a bypass from the result of an FP operation to a use.
+-
+-(define_bypass 4 "cortex_a53_fpalu,
+-		  cortex_a53_fpmul"
+-		 "cortex_a53_fpalu,
+-		  cortex_a53_fpmul,
+-		  cortex_a53_fpmac,
+-		  cortex_a53_advsimd_div*")
++(define_bypass 4 "cortex_a53_fpmac"
++		 "cortex_a53_fpmac"
++		 "aarch_accumulator_forwarding")
+ 
+ ;; We want AESE and AESMC to end up consecutive to one another.
+ 
 --- a/src/gcc/config/arm/iterators.md
 +++ b/src/gcc/config/arm/iterators.md
 @@ -45,6 +45,9 @@
@@ -2038,6 +2753,17 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  DEBUG_COUNTER (registered_jump_thread)
  DEBUG_COUNTER (sched2_func)
  DEBUG_COUNTER (sched_block)
+--- a/src/gcc/emit-rtl.h
++++ b/src/gcc/emit-rtl.h
+@@ -267,7 +267,7 @@ struct GTY(()) rtl_data {
+ 
+   /* Nonzero if function being compiled doesn't contain any calls
+      (ignoring the prologue and epilogue).  This is set prior to
+-     local register allocation and is valid for the remaining
++     register allocation in IRA and is valid for the remaining
+      compiler passes.  */
+   bool is_leaf;
+ 
 --- a/src/gcc/expr.c
 +++ b/src/gcc/expr.c
 @@ -8838,6 +8838,15 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
@@ -2056,6 +2782,16 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  	   if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
  	     {
  	       emit_insn (uns_insns);
+--- a/src/gcc/generic-match-head.c
++++ b/src/gcc/generic-match-head.c
+@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "dumpfile.h"
+ #include "case-cfn-macros.h"
+ #include "gimplify.h"
++#include "optabs-tree.h"
+ 
+ 
+ /* Routine to determine if the types T1 and T2 are effectively
 --- a/src/gcc/gimple-fold.c
 +++ b/src/gcc/gimple-fold.c
 @@ -3252,6 +3252,28 @@ gimple_fold_builtin_acc_on_device (gimple_stmt_iterator *gsi, tree arg0)
@@ -2097,6 +2833,16 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
      default:;
      }
  
+--- a/src/gcc/gimple-match-head.c
++++ b/src/gcc/gimple-match-head.c
+@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "internal-fn.h"
+ #include "case-cfn-macros.h"
+ #include "gimplify.h"
++#include "optabs-tree.h"
+ 
+ 
+ /* Forward declarations of the private auto-generated matchers.
 --- a/src/gcc/lra-constraints.c
 +++ b/src/gcc/lra-constraints.c
 @@ -5394,6 +5394,29 @@ choose_split_class (enum reg_class allocno_class,
@@ -2195,6 +2941,114 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
    /* Clear self elimination offsets.  */
    for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
      self_elim_offsets[ep->from] = 0;
+--- a/src/gcc/lto/lto-partition.c
++++ b/src/gcc/lto/lto-partition.c
+@@ -132,7 +132,7 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
+ 
+   /* Be sure that we never try to duplicate partitioned symbol
+      or add external symbol.  */
+-  gcc_assert (c != SYMBOL_EXTERNAL
++  gcc_assert ((c != SYMBOL_EXTERNAL || node->alias)
+ 	      && (c == SYMBOL_DUPLICATE || !symbol_partitioned_p (node)));
+ 
+   part->symbols++;
+--- a/src/gcc/lto/lto-symtab.c
++++ b/src/gcc/lto/lto-symtab.c
+@@ -953,6 +953,42 @@ lto_symtab_merge_symbols (void)
+ 	      if (tgt)
+ 		node->resolve_alias (tgt, true);
+ 	    }
++	  /* If the symbol was preempted outside IR, see if we want to get rid
++	     of the definition.  */
++	  if (node->analyzed
++	      && !DECL_EXTERNAL (node->decl)
++	      && (node->resolution == LDPR_PREEMPTED_REG
++		  || node->resolution == LDPR_RESOLVED_IR
++		  || node->resolution == LDPR_RESOLVED_EXEC
++		  || node->resolution == LDPR_RESOLVED_DYN))
++	    {
++	      DECL_EXTERNAL (node->decl) = 1;
++	      /* If alias to local symbol was preempted by external definition,
++		 we know it is not pointing to the local symbol.  Remove it.  */
++	      if (node->alias
++		  && !node->weakref
++		  && !node->transparent_alias
++		  && node->get_alias_target ()->binds_to_current_def_p ())
++		{
++		  node->alias = false;
++		  node->remove_all_references ();
++		  node->definition = false;
++		  node->analyzed = false;
++		  node->cpp_implicit_alias = false;
++		}
++	      else if (!node->alias
++		       && node->definition
++		       && node->get_availability () <= AVAIL_INTERPOSABLE)
++		{
++		  if ((cnode = dyn_cast <cgraph_node *> (node)) != NULL)
++		    cnode->reset ();
++		  else
++		    {
++		      node->analyzed = node->definition = false;
++		      node->remove_all_references ();
++		    }
++		}
++	    }
+ 
+ 	  if (!(cnode = dyn_cast <cgraph_node *> (node))
+ 	      || !cnode->clone_of
+--- a/src/gcc/match.pd
++++ b/src/gcc/match.pd
+@@ -147,6 +147,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+     (op @0 integer_onep)
+     (non_lvalue @0)))
+ 
++/* (A / (1 << B)) -> (A >> B).
++   Only for unsigned A.  For signed A, this would not preserve rounding
++   toward zero.
++   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
++(simplify
++ (trunc_div @0 (lshift integer_onep at 1 @2))
++ (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
++      && (!VECTOR_TYPE_P (type)
++	  || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
++	  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)))
++  (rshift @0 @2)))
++
+ /* Preserve explicit divisions by 0: the C++ front-end wants to detect
+    undefined behavior in constexpr evaluation, and assuming that the division
+    traps enables better optimizations than these anyway.  */
+--- a/src/gcc/optabs-tree.c
++++ b/src/gcc/optabs-tree.c
+@@ -376,3 +376,18 @@ init_tree_optimization_optabs (tree optnode)
+       ggc_free (tmp_optabs);
+     }
+ }
++
++/* Return TRUE if the target has support for vector right shift of an
++   operand of type TYPE.  If OT_TYPE is OPTAB_DEFAULT, check for existence
++   of a shift by either a scalar or a vector.  Otherwise, check only
++   for a shift that matches OT_TYPE.  */
++
++bool
++target_supports_op_p (tree type, enum tree_code code,
++		      enum optab_subtype ot_subtype)
++{
++  optab ot = optab_for_tree_code (code, type, ot_subtype);
++  return (ot != unknown_optab
++	  && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
++}
++
+--- a/src/gcc/optabs-tree.h
++++ b/src/gcc/optabs-tree.h
+@@ -41,5 +41,7 @@ bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
+ bool expand_vec_cmp_expr_p (tree, tree, enum tree_code);
+ bool expand_vec_cond_expr_p (tree, tree, enum tree_code);
+ void init_tree_optimization_optabs (tree);
++bool target_supports_op_p (tree, enum tree_code,
++			   enum optab_subtype = optab_default);
+ 
+ #endif
 --- a/src/gcc/reload1.c
 +++ b/src/gcc/reload1.c
 @@ -3821,6 +3821,7 @@ verify_initial_elim_offsets (void)
@@ -2371,6 +3225,31 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
 +
 +/* { dg-final { scan-rtl-dump "\\(const_int 34 " "combine" { target aarch64*-*-* } } } */
 +
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/lto/pr69866_0.c
+@@ -0,0 +1,13 @@
++/* { dg-lto-do link } */
++
++int _umh(int i)
++{
++  return i+1;
++}
++
++int weaks(int i) __attribute__((weak, alias("_umh")));
++
++int main()
++{
++  return weaks(10);
++}
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/lto/pr69866_1.c
+@@ -0,0 +1,6 @@
++/* { dg-options { -fno-lto } } */
++
++int weaks(int i)
++{
++  return i+1;
++}
 --- a/src/gcc/testsuite/gcc.dg/pr47443.c
 +++ b/src/gcc/testsuite/gcc.dg/pr47443.c
 @@ -1,5 +1,6 @@
@@ -2397,6 +3276,34 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  
  /* Check that the expected warning is issued for large frames.  */
 --- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
+@@ -0,0 +1,25 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
++
++unsigned int
++f1 (unsigned int a, unsigned int b)
++{
++  unsigned int x = 1U << b;
++  return a / x;
++}
++
++unsigned long
++f2 (unsigned long a, int b)
++{
++  unsigned long x = 1UL << b;
++  return a / x;
++}
++
++unsigned long long
++f3 (unsigned long long a, int b)
++{
++  unsigned long long x = 1ULL << b;
++  return a / x;
++}
++
++/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */
+--- /dev/null
 +++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr79697.c
 @@ -0,0 +1,21 @@
 +/* { dg-do compile } */
@@ -2489,6 +3396,100 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
 +/* { dg-final { scan-assembler-times "ins\\t" 2 } } */
 +/* { dg-final { scan-assembler-not "dup\\t" } } */
 --- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/inline-lrint_1.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target lp64 } */
++/* { dg-options "-O3 -fno-math-errno" } */
++
++#include "lrint-matherr.h"
++
++TEST (dld, double, long, )
++TEST (flf, float , long, )
++
++TEST (did, double, int, )
++TEST (fif, float , int, )
++
++TEST (dlld, double, long long, l)
++TEST (fllf, float , long long, l)
++
++/* { dg-final { scan-assembler-times "frintx\t\[d,s\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-not "bl"    } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target ilp32 } */
++/* { dg-options "-O3 -fno-math-errno" } */
++
++#include "lrint-matherr.h"
++
++TEST (dld, double, long, )
++TEST (flf, float , long, )
++
++TEST (did, double, int, )
++TEST (fif, float , int, )
++
++TEST (dlld, double, long long, l)
++TEST (fllf, float , long long, l)
++
++/* { dg-final { scan-assembler-times "frintx\t\[d,s\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-times "fcvtzs\t\[w,x\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-not "bl"    } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/lrint-matherr.h
+@@ -0,0 +1,5 @@
++#define TEST(name, float_type, int_type, pref) void f_##name (float_type x) \
++{									    \
++  volatile float_type a = __builtin_rint (x);				    \
++  volatile int_type   b = __builtin_l##pref##rint (x);			    \
++}
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/no-inline-lrint_1.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target lp64 } */
++/* { dg-options "-O3" } */
++
++#include "lrint-matherr.h"
++
++TEST (dld, double, long, )
++TEST (flf, float , long, )
++
++TEST (did, double, int, )
++TEST (fif, float , int, )
++
++TEST (dlld, double, long long, l)
++TEST (fllf, float , long long, l)
++
++/* { dg-final { scan-assembler-times "frintx\t\[d,s\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-times "bl\tlrint"  4 } } */
++/* { dg-final { scan-assembler-times "bl\tllrint" 2 } } */
++/* { dg-final { scan-assembler-not "fcvtzs" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/no-inline-lrint_2.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target ilp32 } */
++/* { dg-options "-O3" } */
++
++#include "lrint-matherr.h"
++
++TEST (dld, double, long, )
++TEST (flf, float , long, )
++
++TEST (did, double, int, )
++TEST (fif, float , int, )
++
++TEST (dlld, double, long long, l)
++TEST (fllf, float , long long, l)
++
++/* { dg-final { scan-assembler-times "frintx\t\[d,s\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-times "bl\tlrint"  4 } } */
++/* { dg-final { scan-assembler-times "bl\tllrint" 2 } } */
++/* { dg-final { scan-assembler-not "fcvtzs" } } */
+--- /dev/null
 +++ b/src/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_1.c
 @@ -0,0 +1,18 @@
 +/* { dg-do compile } */
@@ -3548,6 +4549,23 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
 +  return vaddq_f16 (vmulq_f16 (a, vnegq_f16 (b)), c);
 +}
 +/* { dg-final { scan-assembler-times {vfms\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } } */
+--- a/src/gcc/testsuite/gcc.target/arm/its.c
++++ b/src/gcc/testsuite/gcc.target/arm/its.c
+@@ -1,4 +1,6 @@
+ /* { dg-do compile } */
++/* { dg-require-effective-target arm_cortex_m } */
++/* { dg-require-effective-target arm_thumb2 } */
+ /* { dg-options "-O2" }  */
+ int test (int a, int b)
+ {
+@@ -17,4 +19,6 @@ int test (int a, int b)
+     r -= 3;
+   return r;
+ }
+-/* { dg-final { scan-assembler-times "\tit" 2 { target arm_thumb2 } } } */
++/* Ensure there is no IT block with more than 2 instructions, ie. we only allow
++   IT, ITT and ITE.  */
++/* { dg-final { scan-assembler-not "\\sit\[te\]{2}" } } */
 --- /dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/movdi_movt.c
 @@ -0,0 +1,18 @@
@@ -3601,9 +4619,11 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
 +/* { dg-warning ".__ARM_FEATURE_LDREX. redefined" "" { target *-*-* } .-1 } */
 --- /dev/null
 +++ b/src/gcc/testsuite/gcc.target/arm/sdiv_costs_1.c
-@@ -0,0 +1,38 @@
+@@ -0,0 +1,40 @@
 +/* { dg-do compile } */
-+/* { dg-options "-O3 -march=armv8-a" } */
++/* { dg-options "-O3" } */
++/* { dg-require-effective-target arm_arch_v8a_ok } */
++/* { dg-add-options arm_arch_v8a } */
 +
 +/* Both sdiv and udiv can be used here, so prefer udiv.  */
 +int f1 (unsigned char *p)
@@ -3912,6 +4932,78 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
  # Return 1 if compilation with -freorder-blocks-and-partition is error-free
  # for trivial code, 0 otherwise.  As some targets (ARM for example) only
  # warn when -fprofile-use is also supplied we test that combination too.
+@@ -3768,12 +3779,13 @@ proc check_effective_target_arm_fp16_hw { } {
+ # can be selected and a routine to give the flags to select that architecture
+ # Note: Extra flags may be added to disable options from newer compilers
+ # (Thumb in particular - but others may be added in the future).
+-# -march=armv7ve is special and is handled explicitly after this loop because
+-# it needs more than one predefine check to identify.
++# Warning: Do not use check_effective_target_arm_arch_*_ok for architecture
++# extension (eg. ARMv8.1-A) since there is no macro defined for them.  See
++# how only __ARM_ARCH_8A__ is checked for ARMv8.1-A.
+ # Usage: /* { dg-require-effective-target arm_arch_v5_ok } */
+ #        /* { dg-add-options arm_arch_v5 } */
+ #	 /* { dg-require-effective-target arm_arch_v5_multilib } */
+-foreach { armfunc armflag armdef } {
++foreach { armfunc armflag armdefs } {
+ 	v4 "-march=armv4 -marm" __ARM_ARCH_4__
+ 	v4t "-march=armv4t" __ARM_ARCH_4T__
+ 	v5 "-march=armv5 -marm" __ARM_ARCH_5__
+@@ -3788,20 +3800,23 @@ foreach { armfunc armflag armdef } {
+ 	v7r "-march=armv7-r" __ARM_ARCH_7R__
+ 	v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__
+ 	v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__
++	v7ve "-march=armv7ve -marm"
++		"__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV"
+ 	v8a "-march=armv8-a" __ARM_ARCH_8A__
+ 	v8_1a "-march=armv8.1a" __ARM_ARCH_8A__
+ 	v8_2a "-march=armv8.2a" __ARM_ARCH_8A__
+-	v8m_base "-march=armv8-m.base -mthumb -mfloat-abi=soft" __ARM_ARCH_8M_BASE__
++	v8m_base "-march=armv8-m.base -mthumb -mfloat-abi=soft"
++		__ARM_ARCH_8M_BASE__
+ 	v8m_main "-march=armv8-m.main -mthumb" __ARM_ARCH_8M_MAIN__ } {
+-    eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
++    eval [string map [list FUNC $armfunc FLAG $armflag DEFS $armdefs ] {
+ 	proc check_effective_target_arm_arch_FUNC_ok { } {
+ 	    if { [ string match "*-marm*" "FLAG" ] &&
+ 		![check_effective_target_arm_arm_ok] } {
+ 		return 0
+ 	    }
+ 	    return [check_no_compiler_messages arm_arch_FUNC_ok assembly {
+-		#if !defined (DEF)
+-		#error !DEF
++		#if !(DEFS)
++		#error !(DEFS)
+ 		#endif
+ 	    } "FLAG" ]
+ 	}
+@@ -3822,26 +3837,6 @@ foreach { armfunc armflag armdef } {
+     }]
+ }
+ 
+-# Same functions as above but for -march=armv7ve.  To uniquely identify
+-# -march=armv7ve we need to check for __ARM_ARCH_7A__ as well as
+-# __ARM_FEATURE_IDIV otherwise it aliases with armv7-a.
+-
+-proc check_effective_target_arm_arch_v7ve_ok { } {
+-  if { [ string match "*-marm*" "-march=armv7ve" ] &&
+-	![check_effective_target_arm_arm_ok] } {
+-		return 0
+-    }
+-  return [check_no_compiler_messages arm_arch_v7ve_ok assembly {
+-  #if !defined (__ARM_ARCH_7A__) || !defined (__ARM_FEATURE_IDIV)
+-  #error !armv7ve
+-  #endif
+-  } "-march=armv7ve" ]
+-}
+-
+-proc add_options_for_arm_arch_v7ve { flags } {
+-    return "$flags -march=armv7ve"
+-}
+-
+ # Return 1 if GCC was configured with --with-mode=
+ proc check_effective_target_default_mode { } {
+ 
 --- a/src/gcc/tree-ssa-dce.c
 +++ b/src/gcc/tree-ssa-dce.c
 @@ -233,6 +233,8 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/gcc-7.git



More information about the Reproducible-commits mailing list