[gcc-7] 275/354: * Update the Linaro support to the 7-2017.08 snapshot.
Ximin Luo
infinity0 at debian.org
Thu Nov 23 15:51:12 UTC 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch master
in repository gcc-7.
commit 5f54be74ea3d822a040c106e2bba4a23cbfb2637
Author: doko <doko at 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca>
Date: Tue Aug 22 11:56:22 2017 +0000
* Update the Linaro support to the 7-2017.08 snapshot.
git-svn-id: svn+ssh://svn.debian.org/svn/gcccvs/branches/sid/gcc-7@9641 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca
---
debian/changelog | 6 +
debian/patches/gcc-linaro-doc.diff | 15 +-
debian/patches/gcc-linaro-no-macros.diff | 2 +-
debian/patches/gcc-linaro.diff | 1194 ++++++++++++++++++++++++++++--
4 files changed, 1164 insertions(+), 53 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index db3709e..761e31a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+gcc-7 (7.2.0-2) UNRELEASED; urgency=medium
+
+ * Update the Linaro support to the 7-2017.08 snapshot.
+
+ -- Matthias Klose <doko at debian.org> Tue, 22 Aug 2017 13:46:26 +0200
+
gcc-7 (7.2.0-1) unstable; urgency=medium
* GCC 7.2.0 release.
diff --git a/debian/patches/gcc-linaro-doc.diff b/debian/patches/gcc-linaro-doc.diff
index 4810486..ac879e0 100644
--- a/debian/patches/gcc-linaro-doc.diff
+++ b/debian/patches/gcc-linaro-doc.diff
@@ -1,4 +1,4 @@
-# DP: Changes for the Linaro 7-2017.07 snapshot (documentation).
+# DP: Changes for the Linaro 7-2017.08 snapshot (documentation).
--- a/src/gcc/doc/install.texi
+++ b/src/gcc/doc/install.texi
@@ -26,6 +26,19 @@
@multitable @columnfractions .15 .28 .30
@item Option @tab aprofile @tab rmprofile
+--- a/src/gcc/doc/invoke.texi
++++ b/src/gcc/doc/invoke.texi
+@@ -14076,6 +14076,10 @@ Enable Large System Extension instructions. This is on by default for
+ @option{-march=armv8.1-a}.
+ @item fp16
+ Enable FP16 extension. This also enables floating-point instructions.
++ at item rcpc
++Enable the RcPc extension. This does not change code generation from GCC,
++but is passed on to the assembler, enabling inline asm statements to use
++instructions from the RcPc extension.
+
+ @end table
+
--- a/src/gcc/doc/sourcebuild.texi
+++ b/src/gcc/doc/sourcebuild.texi
@@ -2274,6 +2274,11 @@ the codeset to convert to.
diff --git a/debian/patches/gcc-linaro-no-macros.diff b/debian/patches/gcc-linaro-no-macros.diff
index f09ecac..737d486 100644
--- a/debian/patches/gcc-linaro-no-macros.diff
+++ b/debian/patches/gcc-linaro-no-macros.diff
@@ -89,4 +89,4 @@ Index: b/src/gcc/LINARO-VERSION
--- a/src/gcc/LINARO-VERSION
+++ /dev/null
@@ -1,1 +0,0 @@
--Snapshot 7.1-2017.07
+-Snapshot 7.2-2017.08
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff
index 60979e9..4df4ae0 100644
--- a/debian/patches/gcc-linaro.diff
+++ b/debian/patches/gcc-linaro.diff
@@ -1,8 +1,8 @@
-# DP: Changes for the Linaro 7-2017.07 snapshot.
+# DP: Changes for the Linaro 7-2017.08 snapshot.
MSG=$(git log origin/linaro/gcc-7-branch --format=format:"%s" -n 1 --grep "Merge branches"); SVN=${MSG##* }; git log origin/gcc-7-branch --format=format:"%H" -n 1 --grep "gcc-7-branch@${SVN%.}"
-LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba57b1bcc5093f3b62f853ff83e976c2e \
+LANG=C git diff --no-renames 91215b2674c0c51f649dbe1dea7bc27d14d33a6f 966360eefefa91b044ffa4a4b868ef7188e5255a \
| egrep -v '^(diff|index) ' \
| filterdiff --strip=1 --addoldprefix=a/src/ --addnewprefix=b/src/ \
| sed 's,a/src//dev/null,/dev/null,'
@@ -10,7 +10,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
--- /dev/null
+++ b/src/gcc/LINARO-VERSION
@@ -0,0 +1 @@
-+Snapshot 7.1-2017.07
++Snapshot 7.2-2017.08
--- a/src/gcc/Makefile.in
+++ b/src/gcc/Makefile.in
@@ -845,10 +845,12 @@ BASEVER := $(srcdir)/BASE-VER # 4.x.y
@@ -48,7 +48,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
--- a/src/gcc/config.gcc
+++ b/src/gcc/config.gcc
-@@ -3791,34 +3791,19 @@ case "${target}" in
+@@ -3796,34 +3796,19 @@ case "${target}" in
# Add extra multilibs
if test "x$with_multilib_list" != x; then
arm_multilibs=`echo $with_multilib_list | sed -e 's/,/ /g'`
@@ -96,7 +96,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
if test "x${tmake_profile_file}" != x ; then
# arm/t-aprofile and arm/t-rmprofile are only
-@@ -3835,6 +3820,7 @@ case "${target}" in
+@@ -3840,6 +3825,7 @@ case "${target}" in
fi
tmake_file="${tmake_file} ${tmake_profile_file}"
@@ -104,6 +104,88 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
fi
fi
;;
+--- a/src/gcc/config/aarch64/aarch64-cores.def
++++ b/src/gcc/config/aarch64/aarch64-cores.def
+@@ -43,7 +43,7 @@
+ VARIANT is the variant of the CPU. In a GNU/Linux system it can found
+ in /proc/cpuinfo. If this is -1, this means it can match any variant. */
+
+-/* V8 Architecture Processors. */
++/* ARMv8-A Architecture Processors. */
+
+ /* ARM ('A') cores. */
+ AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
+@@ -52,33 +52,35 @@ AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AA
+ AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
+ AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
+
+-/* Samsung ('S') cores. */
+-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
+-
+-/* Qualcomm ('Q') cores. */
+-AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
+-AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
+-
+ /* Cavium ('C') cores. */
+ AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
+ /* Do not swap around "thunderxt88p1" and "thunderxt88",
+ this order is required to handle variant correctly. */
+-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a1, 0)
+-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a1, -1)
++AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
++AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
+ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
+ AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
+-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
+
+ /* APM ('P') cores. */
+ AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
+
+-/* V8.1 Architecture Processors. */
++/* Qualcomm ('Q') cores. */
++AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
++AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
++
++/* Samsung ('S') cores. */
++AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
++
++/* ARMv8.1-A Architecture Processors. */
+
+ /* Broadcom ('B') cores. */
+ AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+ AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+
+-/* V8 big.LITTLE implementations. */
++/* Cavium ('C') cores. */
++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
++
++/* ARMv8-A big.LITTLE implementations. */
+
+ AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
+ AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
+--- a/src/gcc/config/aarch64/aarch64-cost-tables.h
++++ b/src/gcc/config/aarch64/aarch64-cost-tables.h
+@@ -136,8 +136,8 @@ const struct cpu_cost_table thunderx_extra_costs =
+ 0, /* Logical. */
+ 0, /* Shift. */
+ 0, /* Shift_reg. */
+- COSTS_N_INSNS (1), /* Arith_shift. */
+- COSTS_N_INSNS (1), /* Arith_shift_reg. */
++ COSTS_N_INSNS (1)+1, /* Arith_shift. */
++ COSTS_N_INSNS (1)+1, /* Arith_shift_reg. */
+ COSTS_N_INSNS (1), /* UNUSED: Log_shift. */
+ COSTS_N_INSNS (1), /* UNUSED: Log_shift_reg. */
+ 0, /* Extend. */
+--- a/src/gcc/config/aarch64/aarch64-option-extensions.def
++++ b/src/gcc/config/aarch64/aarch64-option-extensions.def
+@@ -60,4 +60,7 @@ AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, "atomics")
+ Disabling "fp16" just disables "fp16". */
+ AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fphp asimdhp")
+
++/* Enabling or disabling "rcpc" only changes "rcpc". */
++AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc")
++
+ #undef AARCH64_OPT_EXTENSION
--- a/src/gcc/config/aarch64/aarch64-protos.h
+++ b/src/gcc/config/aarch64/aarch64-protos.h
@@ -203,6 +203,16 @@ struct cpu_approx_modes
@@ -162,6 +244,41 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
bool aarch64_function_arg_regno_p (unsigned);
--- a/src/gcc/config/aarch64/aarch64-simd.md
+++ b/src/gcc/config/aarch64/aarch64-simd.md
+@@ -44,12 +44,12 @@
+ (define_insn "aarch64_simd_dup<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
+ (vec_duplicate:VDQ_I
+- (match_operand:<VEL> 1 "register_operand" "r, w")))]
++ (match_operand:<VEL> 1 "register_operand" "w,?r")))]
+ "TARGET_SIMD"
+ "@
+- dup\\t%0.<Vtype>, %<vw>1
+- dup\\t%0.<Vtype>, %1.<Vetype>[0]"
+- [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
++ dup\\t%0.<Vtype>, %1.<Vetype>[0]
++ dup\\t%0.<Vtype>, %<vw>1"
++ [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
+ )
+
+ (define_insn "aarch64_simd_dup<mode>"
+@@ -105,7 +105,7 @@
+ {
+ case 0: return "ldr\\t%d0, %1";
+ case 1: return "str\\t%d1, %0";
+- case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
++ case 2: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
+ case 3: return "umov\t%0, %1.d[0]";
+ case 4: return "fmov\t%d0, %1";
+ case 5: return "mov\t%0, %1";
+@@ -136,7 +136,7 @@
+ case 1:
+ return "str\\t%q1, %0";
+ case 2:
+- return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
++ return "mov\t%0.<Vbtype>, %1.<Vbtype>";
+ case 3:
+ case 4:
+ case 5:
@@ -153,6 +153,19 @@
(set_attr "length" "4,4,4,8,8,8,4")]
)
@@ -207,6 +324,68 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
{
+@@ -2796,38 +2809,10 @@
+ (match_operand:VDC 2 "register_operand")]
+ "TARGET_SIMD"
+ {
+- rtx op1, op2;
+- if (BYTES_BIG_ENDIAN)
+- {
+- op1 = operands[2];
+- op2 = operands[1];
+- }
+- else
+- {
+- op1 = operands[1];
+- op2 = operands[2];
+- }
+- emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
+- DONE;
+-}
+-)
++ aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+
+-(define_insn_and_split "aarch64_combine_internal<mode>"
+- [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+- (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
+- (match_operand:VDC 2 "register_operand" "w")))]
+- "TARGET_SIMD"
+- "#"
+- "&& reload_completed"
+- [(const_int 0)]
+-{
+- if (BYTES_BIG_ENDIAN)
+- aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
+- else
+- aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+ DONE;
+ }
+-[(set_attr "type" "multiple")]
+ )
+
+ (define_expand "aarch64_simd_combine<mode>"
+--- a/src/gcc/config/aarch64/aarch64-tune.md
++++ b/src/gcc/config/aarch64/aarch64-tune.md
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from aarch64-cores.def
+ (define_attr "tune"
+- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
++ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
+ (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+--- a/src/gcc/config/aarch64/aarch64-tuning-flags.def
++++ b/src/gcc/config/aarch64/aarch64-tuning-flags.def
+@@ -35,4 +35,10 @@ two load/stores are not at least 8 byte aligned don't create load/store
+ pairs. */
+ AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW)
+
++/* Some of the optional shift to some arthematic instructions are
++ considered cheap. Logical shift left <=4 with or without a
++ zero extend are considered cheap. Sign extend; non logical shift left
++ are not considered cheap. */
++AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
++
+ #undef AARCH64_EXTRA_TUNING_OPTION
--- a/src/gcc/config/aarch64/aarch64.c
+++ b/src/gcc/config/aarch64/aarch64.c
@@ -193,10 +193,10 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
@@ -222,7 +401,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
},
0, /* pre_modify */
0, /* post_modify */
-@@ -526,6 +526,43 @@ static const cpu_approx_modes xgene1_approx_modes =
+@@ -526,6 +526,61 @@ static const cpu_approx_modes xgene1_approx_modes =
AARCH64_APPROX_ALL /* recip_sqrt */
};
@@ -254,19 +433,37 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
+ 3 /* default_opt_level */
+};
+
++static const cpu_prefetch_tune thunderxt88_prefetch_tune =
++{
++ 8, /* num_slots */
++ 32, /* l1_cache_size */
++ 128, /* l1_cache_line_size */
++ 16*1024, /* l2_cache_size */
++ 3 /* default_opt_level */
++};
++
++static const cpu_prefetch_tune thunderx_prefetch_tune =
++{
++ 8, /* num_slots */
++ 32, /* l1_cache_size */
++ 128, /* l1_cache_line_size */
++ -1, /* l2_cache_size */
++ -1 /* default_opt_level */
++};
++
+static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
+{
-+ 0, /* num_slots */
-+ -1, /* l1_cache_size */
++ 8, /* num_slots */
++ 32, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
-+ -1, /* l2_cache_size */
++ 256, /* l2_cache_size */
+ -1 /* default_opt_level */
+};
+
static const struct tune_params generic_tunings =
{
&cortexa57_extra_costs,
-@@ -538,17 +575,17 @@ static const struct tune_params generic_tunings =
+@@ -538,17 +593,17 @@ static const struct tune_params generic_tunings =
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
8, /* function_align. */
@@ -289,7 +486,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
};
static const struct tune_params cortexa35_tunings =
-@@ -564,7 +601,7 @@ static const struct tune_params cortexa35_tunings =
+@@ -564,7 +619,7 @@ static const struct tune_params cortexa35_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
16, /* function_align. */
@@ -298,7 +495,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -572,9 +609,9 @@ static const struct tune_params cortexa35_tunings =
+@@ -572,9 +627,9 @@ static const struct tune_params cortexa35_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -310,7 +507,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
};
static const struct tune_params cortexa53_tunings =
-@@ -590,7 +627,7 @@ static const struct tune_params cortexa53_tunings =
+@@ -590,7 +645,7 @@ static const struct tune_params cortexa53_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
16, /* function_align. */
@@ -319,7 +516,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -598,9 +635,9 @@ static const struct tune_params cortexa53_tunings =
+@@ -598,9 +653,9 @@ static const struct tune_params cortexa53_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -331,7 +528,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
};
static const struct tune_params cortexa57_tunings =
-@@ -616,7 +653,7 @@ static const struct tune_params cortexa57_tunings =
+@@ -616,7 +671,7 @@ static const struct tune_params cortexa57_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
16, /* function_align. */
@@ -340,7 +537,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -624,9 +661,9 @@ static const struct tune_params cortexa57_tunings =
+@@ -624,9 +679,9 @@ static const struct tune_params cortexa57_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -352,7 +549,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
};
static const struct tune_params cortexa72_tunings =
-@@ -642,7 +679,7 @@ static const struct tune_params cortexa72_tunings =
+@@ -642,7 +697,7 @@ static const struct tune_params cortexa72_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
16, /* function_align. */
@@ -361,7 +558,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -650,9 +687,9 @@ static const struct tune_params cortexa72_tunings =
+@@ -650,9 +705,9 @@ static const struct tune_params cortexa72_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -373,7 +570,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
};
static const struct tune_params cortexa73_tunings =
-@@ -668,7 +705,7 @@ static const struct tune_params cortexa73_tunings =
+@@ -668,7 +723,7 @@ static const struct tune_params cortexa73_tunings =
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
16, /* function_align. */
@@ -382,7 +579,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
-@@ -676,11 +713,13 @@ static const struct tune_params cortexa73_tunings =
+@@ -676,11 +731,13 @@ static const struct tune_params cortexa73_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -398,7 +595,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
static const struct tune_params exynosm1_tunings =
{
&exynosm1_extra_costs,
-@@ -701,9 +740,9 @@ static const struct tune_params exynosm1_tunings =
+@@ -701,9 +758,34 @@ static const struct tune_params exynosm1_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
48, /* max_case_values. */
@@ -407,22 +604,48 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &exynosm1_prefetch_tune
++};
++
++static const struct tune_params thunderxt88_tunings =
++{
++ &thunderx_extra_costs,
++ &generic_addrcost_table,
++ &thunderx_regmove_cost,
++ &thunderx_vector_cost,
++ &generic_branch_cost,
++ &generic_approx_modes,
++ 6, /* memmov_cost */
++ 2, /* issue_rate */
++ AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
++ 8, /* function_align. */
++ 8, /* jump_align. */
++ 8, /* loop_align. */
++ 2, /* int_reassoc_width. */
++ 4, /* fp_reassoc_width. */
++ 1, /* vec_reassoc_width. */
++ 2, /* min_div_recip_mul_sf. */
++ 2, /* min_div_recip_mul_df. */
++ 0, /* max_case_values. */
++ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
++ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
++ &thunderxt88_prefetch_tune
};
static const struct tune_params thunderx_tunings =
-@@ -726,9 +765,9 @@ static const struct tune_params thunderx_tunings =
+@@ -726,9 +808,10 @@ static const struct tune_params thunderx_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
-+ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
-+ &generic_prefetch_tune
++ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
++ | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
++ &thunderx_prefetch_tune
};
static const struct tune_params xgene1_tunings =
-@@ -751,9 +790,9 @@ static const struct tune_params xgene1_tunings =
+@@ -751,9 +834,9 @@ static const struct tune_params xgene1_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -434,7 +657,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
};
static const struct tune_params qdf24xx_tunings =
-@@ -777,9 +816,9 @@ static const struct tune_params qdf24xx_tunings =
+@@ -777,9 +860,9 @@ static const struct tune_params qdf24xx_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
@@ -446,19 +669,103 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
};
static const struct tune_params thunderx2t99_tunings =
-@@ -802,9 +841,9 @@ static const struct tune_params thunderx2t99_tunings =
+@@ -802,9 +885,9 @@ static const struct tune_params thunderx2t99_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 64, /* cache_line_size. */
- tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
+- tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &thunderx2t99_prefetch_tune
};
/* Support for fine-grained override of the tuning structures. */
-@@ -2683,11 +2722,19 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+@@ -1649,41 +1732,41 @@ aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
+ machine_mode dst_mode = GET_MODE (dst);
+
+ gcc_assert (VECTOR_MODE_P (dst_mode));
++ gcc_assert (register_operand (dst, dst_mode)
++ && register_operand (src1, src_mode)
++ && register_operand (src2, src_mode));
+
+- if (REG_P (dst) && REG_P (src1) && REG_P (src2))
+- {
+- rtx (*gen) (rtx, rtx, rtx);
+-
+- switch (src_mode)
+- {
+- case V8QImode:
+- gen = gen_aarch64_simd_combinev8qi;
+- break;
+- case V4HImode:
+- gen = gen_aarch64_simd_combinev4hi;
+- break;
+- case V2SImode:
+- gen = gen_aarch64_simd_combinev2si;
+- break;
+- case V4HFmode:
+- gen = gen_aarch64_simd_combinev4hf;
+- break;
+- case V2SFmode:
+- gen = gen_aarch64_simd_combinev2sf;
+- break;
+- case DImode:
+- gen = gen_aarch64_simd_combinedi;
+- break;
+- case DFmode:
+- gen = gen_aarch64_simd_combinedf;
+- break;
+- default:
+- gcc_unreachable ();
+- }
++ rtx (*gen) (rtx, rtx, rtx);
+
+- emit_insn (gen (dst, src1, src2));
+- return;
++ switch (src_mode)
++ {
++ case V8QImode:
++ gen = gen_aarch64_simd_combinev8qi;
++ break;
++ case V4HImode:
++ gen = gen_aarch64_simd_combinev4hi;
++ break;
++ case V2SImode:
++ gen = gen_aarch64_simd_combinev2si;
++ break;
++ case V4HFmode:
++ gen = gen_aarch64_simd_combinev4hf;
++ break;
++ case V2SFmode:
++ gen = gen_aarch64_simd_combinev2sf;
++ break;
++ case DImode:
++ gen = gen_aarch64_simd_combinedi;
++ break;
++ case DFmode:
++ gen = gen_aarch64_simd_combinedf;
++ break;
++ default:
++ gcc_unreachable ();
+ }
++
++ emit_insn (gen (dst, src1, src2));
++ return;
+ }
+
+ /* Split a complex SIMD move. */
+@@ -1919,6 +2002,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
+ gcc_assert (can_create_pseudo_p ());
+ base = gen_reg_rtx (ptr_mode);
+ aarch64_expand_mov_immediate (base, XEXP (mem, 0));
++ if (ptr_mode != Pmode)
++ base = convert_memory_address (Pmode, base);
+ mem = gen_rtx_MEM (ptr_mode, base);
+ }
+
+@@ -2683,11 +2768,19 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
plus_constant (Pmode, stack_pointer_rtx, -first));
/* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
@@ -483,7 +790,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
/* Step 3: the loop
do
-@@ -4549,6 +4596,24 @@ aarch64_classify_address (struct aarch64_address_info *info,
+@@ -4549,6 +4642,24 @@ aarch64_classify_address (struct aarch64_address_info *info,
}
}
@@ -508,7 +815,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
bool
aarch64_symbolic_address_p (rtx x)
{
-@@ -4633,6 +4698,50 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+@@ -4633,6 +4744,50 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
return true;
}
@@ -559,7 +866,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
/* Emit call insn with PAT and do aarch64-specific handling. */
void
-@@ -4705,7 +4814,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
+@@ -4705,7 +4860,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
the comparison will have to be swapped when we emit the assembly
code. */
if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
@@ -568,7 +875,135 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
&& (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
|| GET_CODE (x) == LSHIFTRT
|| GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
-@@ -7482,17 +7591,13 @@ cost_plus:
+@@ -5112,6 +5267,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
+
+ case MEM:
+ output_address (GET_MODE (x), XEXP (x, 0));
++ /* Check all memory references are Pmode - even with ILP32. */
++ gcc_assert (GET_MODE (XEXP (x, 0)) == Pmode);
+ break;
+
+ case CONST:
+@@ -5976,9 +6133,10 @@ aarch64_strip_shift (rtx x)
+ /* Helper function for rtx cost calculation. Strip an extend
+ expression from X. Returns the inner operand if successful, or the
+ original expression on failure. We deal with a number of possible
+- canonicalization variations here. */
++ canonicalization variations here. If STRIP_SHIFT is true, then
++ we can strip off a shift also. */
+ static rtx
+-aarch64_strip_extend (rtx x)
++aarch64_strip_extend (rtx x, bool strip_shift)
+ {
+ rtx op = x;
+
+@@ -6002,7 +6160,8 @@ aarch64_strip_extend (rtx x)
+
+ /* Now handle extended register, as this may also have an optional
+ left shift by 1..4. */
+- if (GET_CODE (op) == ASHIFT
++ if (strip_shift
++ && GET_CODE (op) == ASHIFT
+ && CONST_INT_P (XEXP (op, 1))
+ && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
+ op = XEXP (op, 0);
+@@ -6026,6 +6185,39 @@ aarch64_shift_p (enum rtx_code code)
+ return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
+ }
+
++
++/* Return true iff X is a cheap shift without a sign extend. */
++
++static bool
++aarch64_cheap_mult_shift_p (rtx x)
++{
++ rtx op0, op1;
++
++ op0 = XEXP (x, 0);
++ op1 = XEXP (x, 1);
++
++ if (!(aarch64_tune_params.extra_tuning_flags
++ & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
++ return false;
++
++ if (GET_CODE (op0) == SIGN_EXTEND)
++ return false;
++
++ if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
++ && UINTVAL (op1) <= 4)
++ return true;
++
++ if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
++ return false;
++
++ HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
++
++ if (l2 > 0 && l2 <= 4)
++ return true;
++
++ return false;
++}
++
+ /* Helper function for rtx cost calculation. Calculate the cost of
+ a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
+ Return the calculated cost of the expression, recursing manually in to
+@@ -6063,7 +6255,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
+ {
+ if (compound_p)
+ {
+- if (REG_P (op1))
++ /* If the shift is considered cheap,
++ then don't add any cost. */
++ if (aarch64_cheap_mult_shift_p (x))
++ ;
++ else if (REG_P (op1))
+ /* ARITH + shift-by-register. */
+ cost += extra_cost->alu.arith_shift_reg;
+ else if (is_extend)
+@@ -6081,7 +6277,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
+ }
+ /* Strip extends as we will have costed them in the case above. */
+ if (is_extend)
+- op0 = aarch64_strip_extend (op0);
++ op0 = aarch64_strip_extend (op0, true);
+
+ cost += rtx_cost (op0, VOIDmode, code, 0, speed);
+
+@@ -6925,13 +7121,13 @@ cost_minus:
+ if (speed)
+ *cost += extra_cost->alu.extend_arith;
+
+- op1 = aarch64_strip_extend (op1);
++ op1 = aarch64_strip_extend (op1, true);
+ *cost += rtx_cost (op1, VOIDmode,
+ (enum rtx_code) GET_CODE (op1), 0, speed);
+ return true;
+ }
+
+- rtx new_op1 = aarch64_strip_extend (op1);
++ rtx new_op1 = aarch64_strip_extend (op1, false);
+
+ /* Cost this as an FMA-alike operation. */
+ if ((GET_CODE (new_op1) == MULT
+@@ -7004,7 +7200,7 @@ cost_plus:
+ if (speed)
+ *cost += extra_cost->alu.extend_arith;
+
+- op0 = aarch64_strip_extend (op0);
++ op0 = aarch64_strip_extend (op0, true);
+ *cost += rtx_cost (op0, VOIDmode,
+ (enum rtx_code) GET_CODE (op0), 0, speed);
+ return true;
+@@ -7012,7 +7208,7 @@ cost_plus:
+
+ /* Strip any extend, leave shifts behind as we will
+ cost them through mult_cost. */
+- new_op0 = aarch64_strip_extend (op0);
++ new_op0 = aarch64_strip_extend (op0, false);
+
+ if (GET_CODE (new_op0) == MULT
+ || aarch64_shift_p (GET_CODE (new_op0)))
+@@ -7482,17 +7678,13 @@ cost_plus:
case UMOD:
if (speed)
{
@@ -589,7 +1024,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
}
return false; /* All arguments need to be in registers. */
-@@ -7506,7 +7611,9 @@ cost_plus:
+@@ -7506,7 +7698,9 @@ cost_plus:
else if (GET_MODE_CLASS (mode) == MODE_INT)
/* There is no integer SQRT, so only DIV and UDIV can get
here. */
@@ -600,7 +1035,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
else
*cost += extra_cost->fp[mode == DFmode].div;
}
-@@ -8687,12 +8794,38 @@ aarch64_override_options_internal (struct gcc_options *opts)
+@@ -8687,13 +8881,39 @@ aarch64_override_options_internal (struct gcc_options *opts)
opts->x_param_values,
global_options_set.x_param_values);
@@ -623,14 +1058,14 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
- selected_cpu->tune->cache_line_size,
+ aarch64_tune_params.prefetch->l1_cache_line_size,
- opts->x_param_values,
- global_options_set.x_param_values);
++ opts->x_param_values,
++ global_options_set.x_param_values);
+ if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
+ maybe_set_param_value (PARAM_L2_CACHE_SIZE,
+ aarch64_tune_params.prefetch->l2_cache_size,
-+ opts->x_param_values,
-+ global_options_set.x_param_values);
-+
+ opts->x_param_values,
+ global_options_set.x_param_values);
+
+ /* Enable sw prefetching at specified optimization level for
+ CPUS that have prefetch. Lower optimization level threshold by 1
+ when profiling is enabled. */
@@ -639,10 +1074,11 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
+ && aarch64_tune_params.prefetch->default_opt_level >= 0
+ && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
+ opts->x_flag_prefetch_loop_arrays = 1;
-
++
aarch64_override_options_after_change_1 (opts);
}
-@@ -11647,6 +11780,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+
+@@ -11647,6 +11867,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
return;
}
@@ -700,7 +1136,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
/* Initialise a vector which is part-variable. We want to first try
to build those lanes which are constant in the most efficient way we
can. */
-@@ -11680,10 +11864,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+@@ -11680,10 +11951,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
}
/* Insert the variable lanes directly. */
@@ -711,7 +1147,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
for (int i = 0; i < n_elts; i++)
{
rtx x = XVECEXP (vals, 0, i);
-@@ -12049,6 +12229,17 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12049,6 +12316,17 @@ aarch64_split_compare_and_swap (rtx operands[])
mode = GET_MODE (mem);
model = memmodel_from_int (INTVAL (model_rtx));
@@ -729,7 +1165,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
label1 = NULL;
if (!is_weak)
{
-@@ -12065,11 +12256,21 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12065,11 +12343,21 @@ aarch64_split_compare_and_swap (rtx operands[])
else
aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
@@ -756,7 +1192,7 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
-@@ -12088,7 +12289,15 @@ aarch64_split_compare_and_swap (rtx operands[])
+@@ -12088,7 +12376,15 @@ aarch64_split_compare_and_swap (rtx operands[])
}
emit_label (label2);
@@ -773,6 +1209,49 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
/* Emit any final barrier needed for a __sync operation. */
if (is_mm_sync (model))
aarch64_emit_post_barrier (model);
+--- a/src/gcc/config/aarch64/aarch64.h
++++ b/src/gcc/config/aarch64/aarch64.h
+@@ -98,14 +98,24 @@
+ && (ALIGN) < BITS_PER_WORD) \
+ ? BITS_PER_WORD : ALIGN)
+
+-#define DATA_ALIGNMENT(EXP, ALIGN) \
+- ((((ALIGN) < BITS_PER_WORD) \
+- && (TREE_CODE (EXP) == ARRAY_TYPE \
+- || TREE_CODE (EXP) == UNION_TYPE \
+- || TREE_CODE (EXP) == RECORD_TYPE)) \
+- ? BITS_PER_WORD : (ALIGN))
+-
+-#define LOCAL_ALIGNMENT(EXP, ALIGN) DATA_ALIGNMENT(EXP, ALIGN)
++/* Align definitions of arrays, unions and structures so that
++ initializations and copies can be made more efficient. This is not
++ ABI-changing, so it only affects places where we can see the
++ definition. Increasing the alignment tends to introduce padding,
++ so don't do this when optimizing for size/conserving stack space. */
++#define AARCH64_EXPAND_ALIGNMENT(COND, EXP, ALIGN) \
++ (((COND) && ((ALIGN) < BITS_PER_WORD) \
++ && (TREE_CODE (EXP) == ARRAY_TYPE \
++ || TREE_CODE (EXP) == UNION_TYPE \
++ || TREE_CODE (EXP) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
++
++/* Align global data. */
++#define DATA_ALIGNMENT(EXP, ALIGN) \
++ AARCH64_EXPAND_ALIGNMENT (!optimize_size, EXP, ALIGN)
++
++/* Similarly, make sure that objects on the stack are sensibly aligned. */
++#define LOCAL_ALIGNMENT(EXP, ALIGN) \
++ AARCH64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN)
+
+ #define STRUCTURE_SIZE_BOUNDARY 8
+
+@@ -140,6 +150,7 @@ extern unsigned aarch64_architecture_version;
+ #define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */
+ /* ARMv8.3-A architecture extensions. */
+ #define AARCH64_FL_V8_3 (1 << 10) /* Has ARMv8.3-A features. */
++#define AARCH64_FL_RCPC (1 << 11) /* Has support for RCpc model. */
+
+ /* Has FP and SIMD. */
+ #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
--- a/src/gcc/config/aarch64/aarch64.md
+++ b/src/gcc/config/aarch64/aarch64.md
@@ -519,27 +519,31 @@
@@ -1057,6 +1536,24 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
+@@ -1123,7 +1017,7 @@
+ #
+ #
+ #
+- orr\\t%0.16b, %1.16b, %1.16b
++ mov\\t%0.16b, %1.16b
+ ldp\\t%0, %H0, %1
+ stp\\t%1, %H1, %0
+ stp\\txzr, xzr, %0
+@@ -1237,7 +1131,7 @@
+ "TARGET_FLOAT && (register_operand (operands[0], TFmode)
+ || aarch64_reg_or_fp_zero (operands[1], TFmode))"
+ "@
+- orr\\t%0.16b, %1.16b, %1.16b
++ mov\\t%0.16b, %1.16b
+ #
+ #
+ #
@@ -2340,6 +2234,55 @@
[(set_attr "type" "alus_sreg")]
)
@@ -1113,7 +1610,26 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
(define_insn "*sub_<shift>_<mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
(minus:GPI (match_operand:GPI 3 "register_operand" "r")
-@@ -5030,14 +4973,16 @@
+@@ -4997,6 +4940,18 @@
+ [(set_attr "type" "f_minmax<stype>")]
+ )
+
++(define_expand "lrint<GPF:mode><GPI:mode>2"
++ [(match_operand:GPI 0 "register_operand")
++ (match_operand:GPF 1 "register_operand")]
++ "TARGET_FLOAT"
++{
++ rtx cvt = gen_reg_rtx (<GPF:MODE>mode);
++ emit_insn (gen_rint<GPF:mode>2 (cvt, operands[1]));
++ emit_insn (gen_lbtrunc<GPF:mode><GPI:mode>2 (operands[0], cvt));
++ DONE;
++}
++)
++
+ ;; For copysign (x, y), we want to generate:
+ ;;
+ ;; LDR d2, #(1 << 63)
+@@ -5030,14 +4985,16 @@
(match_operand:SF 2 "register_operand")]
"TARGET_FLOAT && TARGET_SIMD"
{
@@ -1163,6 +1679,24 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
(match_operand:SI 4 "const_int_operand") ;; is_weak
(match_operand:SI 5 "const_int_operand") ;; mod_s
(match_operand:SI 6 "const_int_operand")] ;; mod_f
+@@ -94,7 +94,7 @@
+ (set (match_dup 1)
+ (unspec_volatile:SHORT
+ [(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected
+- (match_operand:SHORT 3 "register_operand" "r") ;; desired
++ (match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ") ;; desired
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
+@@ -119,7 +119,7 @@
+ (set (match_dup 1)
+ (unspec_volatile:GPI
+ [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect
+- (match_operand:GPI 3 "register_operand" "r") ;; desired
++ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ") ;; desired
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
@@ -534,7 +534,7 @@
(unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
(set (match_operand:ALLI 1 "aarch64_sync_memory_operand" "=Q")
@@ -1172,6 +1706,24 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
(match_operand:SI 3 "const_int_operand")]
UNSPECV_SX))]
""
+@@ -616,7 +616,7 @@
+ (set (match_dup 1)
+ (unspec_volatile:SHORT
+ [(match_dup 0)
+- (match_operand:SHORT 2 "register_operand" "r") ;; value.
++ (match_operand:SHORT 2 "aarch64_reg_or_zero" "rZ") ;; value.
+ (match_operand:SI 3 "const_int_operand" "")] ;; model.
+ UNSPECV_ATOMIC_CAS))]
+ "TARGET_LSE && reload_completed"
+@@ -640,7 +640,7 @@
+ (set (match_dup 1)
+ (unspec_volatile:GPI
+ [(match_dup 0)
+- (match_operand:GPI 2 "register_operand" "r") ;; value.
++ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") ;; value.
+ (match_operand:SI 3 "const_int_operand" "")] ;; model.
+ UNSPECV_ATOMIC_CAS))]
+ "TARGET_LSE && reload_completed"
--- a/src/gcc/config/aarch64/constraints.md
+++ b/src/gcc/config/aarch64/constraints.md
@@ -98,6 +98,14 @@
@@ -1259,7 +1811,12 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
+ "thunderx2t99_i1")
--- a/src/gcc/config/arm/aarch-common-protos.h
+++ b/src/gcc/config/arm/aarch-common-protos.h
-@@ -30,7 +30,9 @@ extern bool aarch_rev16_p (rtx);
+@@ -25,12 +25,13 @@
+
+ extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
+ extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *);
+-extern int aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *, rtx_insn *);
+ extern bool aarch_rev16_p (rtx);
extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode);
extern int arm_early_load_addr_dep (rtx, rtx);
@@ -1296,7 +1853,21 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
/* Return nonzero if the CONSUMER instruction (an ALU op) does not
have an early register shift value or amount dependency on the
result of PRODUCER. */
-@@ -336,6 +354,24 @@ arm_early_store_addr_dep (rtx producer, rtx consumer)
+@@ -254,12 +272,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
+ return 0;
+
+ if ((early_op = arm_find_shift_sub_rtx (op)))
+- {
+- if (REG_P (early_op))
+- early_op = op;
+-
+- return !reg_overlap_mentioned_p (value, early_op);
+- }
++ return !reg_overlap_mentioned_p (value, early_op);
+
+ return 0;
+ }
+@@ -336,6 +349,24 @@ arm_early_store_addr_dep (rtx producer, rtx consumer)
return !arm_no_early_store_addr_dep (producer, consumer);
}
@@ -1321,6 +1892,45 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
/* Return non-zero iff the consumer (a multiply-accumulate or a
multiple-subtract instruction) has an accumulator dependency on the
result of the producer and no other dependency on that result. It
+@@ -472,38 +503,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
+ return (REGNO (dest) == REGNO (accumulator));
+ }
+
+-/* Return nonzero if the CONSUMER instruction is some sort of
+- arithmetic or logic + shift operation, and the register we are
+- writing in PRODUCER is not used in a register shift by register
+- operation. */
+-
+-int
+-aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
+- rtx_insn *consumer)
+-{
+- rtx value, op;
+- rtx early_op;
+-
+- if (!arm_get_set_operands (producer, consumer, &value, &op))
+- return 0;
+-
+- if ((early_op = arm_find_shift_sub_rtx (op)))
+- {
+- if (REG_P (early_op))
+- early_op = op;
+-
+- /* Any other canonicalisation of a shift is a shift-by-constant
+- so we don't care. */
+- if (GET_CODE (early_op) == ASHIFT)
+- return (!REG_P (XEXP (early_op, 0))
+- || !REG_P (XEXP (early_op, 1)));
+- else
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+ /* Return non-zero if the consumer (a multiply-accumulate instruction)
+ has an accumulator dependency on the result of the producer (a
+ multiplication instruction) and no other dependency on that result. */
--- a/src/gcc/config/arm/aarch-cost-tables.h
+++ b/src/gcc/config/arm/aarch-cost-tables.h
@@ -154,7 +154,7 @@ const struct cpu_cost_table cortexa53_extra_costs =
@@ -1620,6 +2230,15 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
#endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC. */
--- a/src/gcc/config/arm/cortex-a53.md
+++ b/src/gcc/config/arm/cortex-a53.md
+@@ -211,7 +211,7 @@
+
+ (define_bypass 1 "cortex_a53_alu*"
+ "cortex_a53_alu_shift*"
+- "aarch_forward_to_shift_is_not_shifted_reg")
++ "arm_no_early_alu_shift_dep")
+
+ (define_bypass 2 "cortex_a53_alu*"
+ "cortex_a53_alu_*,cortex_a53_shift*")
@@ -254,6 +254,16 @@
"cortex_a53_store*"
"arm_no_early_store_addr_dep")
@@ -1637,6 +2256,102 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
;; Model a GP->FP register move as similar to stores.
(define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*"
+@@ -501,19 +511,19 @@
+ ;; Floating-point arithmetic.
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+-(define_insn_reservation "cortex_a53_fpalu" 5
++(define_insn_reservation "cortex_a53_fpalu" 4
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov,
+ f_cvt, fcmps, fcmpd, fccmps, fccmpd, fcsel,
+ f_rints, f_rintd, f_minmaxs, f_minmaxd"))
+ "cortex_a53_slot_any,cortex_a53_fp_alu")
+
+-(define_insn_reservation "cortex_a53_fconst" 3
++(define_insn_reservation "cortex_a53_fconst" 2
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "fconsts,fconstd"))
+ "cortex_a53_slot_any,cortex_a53_fp_alu")
+
+-(define_insn_reservation "cortex_a53_fpmul" 5
++(define_insn_reservation "cortex_a53_fpmul" 4
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "fmuls,fmuld"))
+ "cortex_a53_slot_any,cortex_a53_fp_mul")
+@@ -564,7 +574,7 @@
+ ;; Floating-point load/store.
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+-(define_insn_reservation "cortex_a53_f_load_64" 4
++(define_insn_reservation "cortex_a53_f_load_64" 3
+ (and (eq_attr "tune" "cortexa53")
+ (ior (eq_attr "type" "f_loads,f_loadd")
+ (eq_attr "cortex_a53_advsimd_type"
+@@ -572,7 +582,7 @@
+ "cortex_a53_slot_any+cortex_a53_ls_agen,
+ cortex_a53_load")
+
+-(define_insn_reservation "cortex_a53_f_load_many" 5
++(define_insn_reservation "cortex_a53_f_load_many" 4
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "cortex_a53_advsimd_type"
+ "advsimd_load_128,advsimd_load_lots"))
+@@ -606,22 +616,22 @@
+ ;; or a 128-bit operation in which case we require in our model that we
+ ;; issue from slot 0.
+
+-(define_insn_reservation "cortex_a53_advsimd_alu" 5
++(define_insn_reservation "cortex_a53_advsimd_alu" 4
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "cortex_a53_advsimd_type" "advsimd_alu"))
+ "cortex_a53_slot_any,cortex_a53_fp_alu")
+
+-(define_insn_reservation "cortex_a53_advsimd_alu_q" 5
++(define_insn_reservation "cortex_a53_advsimd_alu_q" 4
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "cortex_a53_advsimd_type" "advsimd_alu_q"))
+ "cortex_a53_slot0,cortex_a53_fp_alu_q")
+
+-(define_insn_reservation "cortex_a53_advsimd_mul" 5
++(define_insn_reservation "cortex_a53_advsimd_mul" 4
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "cortex_a53_advsimd_type" "advsimd_mul"))
+ "cortex_a53_slot_any,cortex_a53_fp_mul")
+
+-(define_insn_reservation "cortex_a53_advsimd_mul_q" 5
++(define_insn_reservation "cortex_a53_advsimd_mul_q" 4
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "cortex_a53_advsimd_type" "advsimd_mul_q"))
+ "cortex_a53_slot0,cortex_a53_fp_mul_q")
+@@ -700,20 +710,18 @@
+ ;; multiply-accumulate operations as a bypass reducing the latency
+ ;; of producing instructions to near zero.
+
+-(define_bypass 1 "cortex_a53_fp*,
++(define_bypass 1 "cortex_a53_fpalu,
++ cortex_a53_fpmul,
+ cortex_a53_r2f,
++ cortex_a53_r2f_cvt,
++ cortex_a53_fconst,
+ cortex_a53_f_load*"
+ "cortex_a53_fpmac"
+ "aarch_accumulator_forwarding")
+
+-;; Model a bypass from the result of an FP operation to a use.
+-
+-(define_bypass 4 "cortex_a53_fpalu,
+- cortex_a53_fpmul"
+- "cortex_a53_fpalu,
+- cortex_a53_fpmul,
+- cortex_a53_fpmac,
+- cortex_a53_advsimd_div*")
++(define_bypass 4 "cortex_a53_fpmac"
++ "cortex_a53_fpmac"
++ "aarch_accumulator_forwarding")
+
+ ;; We want AESE and AESMC to end up consecutive to one another.
+
--- a/src/gcc/config/arm/iterators.md
+++ b/src/gcc/config/arm/iterators.md
@@ -45,6 +45,9 @@
@@ -2038,6 +2753,17 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
DEBUG_COUNTER (registered_jump_thread)
DEBUG_COUNTER (sched2_func)
DEBUG_COUNTER (sched_block)
+--- a/src/gcc/emit-rtl.h
++++ b/src/gcc/emit-rtl.h
+@@ -267,7 +267,7 @@ struct GTY(()) rtl_data {
+
+ /* Nonzero if function being compiled doesn't contain any calls
+ (ignoring the prologue and epilogue). This is set prior to
+- local register allocation and is valid for the remaining
++ register allocation in IRA and is valid for the remaining
+ compiler passes. */
+ bool is_leaf;
+
--- a/src/gcc/expr.c
+++ b/src/gcc/expr.c
@@ -8838,6 +8838,15 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
@@ -2056,6 +2782,16 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
{
emit_insn (uns_insns);
+--- a/src/gcc/generic-match-head.c
++++ b/src/gcc/generic-match-head.c
+@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
+ #include "dumpfile.h"
+ #include "case-cfn-macros.h"
+ #include "gimplify.h"
++#include "optabs-tree.h"
+
+
+ /* Routine to determine if the types T1 and T2 are effectively
--- a/src/gcc/gimple-fold.c
+++ b/src/gcc/gimple-fold.c
@@ -3252,6 +3252,28 @@ gimple_fold_builtin_acc_on_device (gimple_stmt_iterator *gsi, tree arg0)
@@ -2097,6 +2833,16 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
default:;
}
+--- a/src/gcc/gimple-match-head.c
++++ b/src/gcc/gimple-match-head.c
+@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
+ #include "internal-fn.h"
+ #include "case-cfn-macros.h"
+ #include "gimplify.h"
++#include "optabs-tree.h"
+
+
+ /* Forward declarations of the private auto-generated matchers.
--- a/src/gcc/lra-constraints.c
+++ b/src/gcc/lra-constraints.c
@@ -5394,6 +5394,29 @@ choose_split_class (enum reg_class allocno_class,
@@ -2195,6 +2941,114 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
/* Clear self elimination offsets. */
for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
self_elim_offsets[ep->from] = 0;
+--- a/src/gcc/lto/lto-partition.c
++++ b/src/gcc/lto/lto-partition.c
+@@ -132,7 +132,7 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
+
+ /* Be sure that we never try to duplicate partitioned symbol
+ or add external symbol. */
+- gcc_assert (c != SYMBOL_EXTERNAL
++ gcc_assert ((c != SYMBOL_EXTERNAL || node->alias)
+ && (c == SYMBOL_DUPLICATE || !symbol_partitioned_p (node)));
+
+ part->symbols++;
+--- a/src/gcc/lto/lto-symtab.c
++++ b/src/gcc/lto/lto-symtab.c
+@@ -953,6 +953,42 @@ lto_symtab_merge_symbols (void)
+ if (tgt)
+ node->resolve_alias (tgt, true);
+ }
++ /* If the symbol was preempted outside IR, see if we want to get rid
++ of the definition. */
++ if (node->analyzed
++ && !DECL_EXTERNAL (node->decl)
++ && (node->resolution == LDPR_PREEMPTED_REG
++ || node->resolution == LDPR_RESOLVED_IR
++ || node->resolution == LDPR_RESOLVED_EXEC
++ || node->resolution == LDPR_RESOLVED_DYN))
++ {
++ DECL_EXTERNAL (node->decl) = 1;
++ /* If alias to local symbol was preempted by external definition,
++ we know it is not pointing to the local symbol. Remove it. */
++ if (node->alias
++ && !node->weakref
++ && !node->transparent_alias
++ && node->get_alias_target ()->binds_to_current_def_p ())
++ {
++ node->alias = false;
++ node->remove_all_references ();
++ node->definition = false;
++ node->analyzed = false;
++ node->cpp_implicit_alias = false;
++ }
++ else if (!node->alias
++ && node->definition
++ && node->get_availability () <= AVAIL_INTERPOSABLE)
++ {
++ if ((cnode = dyn_cast <cgraph_node *> (node)) != NULL)
++ cnode->reset ();
++ else
++ {
++ node->analyzed = node->definition = false;
++ node->remove_all_references ();
++ }
++ }
++ }
+
+ if (!(cnode = dyn_cast <cgraph_node *> (node))
+ || !cnode->clone_of
+--- a/src/gcc/match.pd
++++ b/src/gcc/match.pd
+@@ -147,6 +147,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ (op @0 integer_onep)
+ (non_lvalue @0)))
+
++/* (A / (1 << B)) -> (A >> B).
++ Only for unsigned A. For signed A, this would not preserve rounding
++ toward zero.
++ For example: (-1 / ( 1 << B)) != -1 >> B. */
++(simplify
++ (trunc_div @0 (lshift integer_onep at 1 @2))
++ (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
++ && (!VECTOR_TYPE_P (type)
++ || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
++ || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)))
++ (rshift @0 @2)))
++
+ /* Preserve explicit divisions by 0: the C++ front-end wants to detect
+ undefined behavior in constexpr evaluation, and assuming that the division
+ traps enables better optimizations than these anyway. */
+--- a/src/gcc/optabs-tree.c
++++ b/src/gcc/optabs-tree.c
+@@ -376,3 +376,18 @@ init_tree_optimization_optabs (tree optnode)
+ ggc_free (tmp_optabs);
+ }
+ }
++
++/* Return TRUE if the target has support for vector right shift of an
++ operand of type TYPE. If OT_TYPE is OPTAB_DEFAULT, check for existence
++ of a shift by either a scalar or a vector. Otherwise, check only
++ for a shift that matches OT_TYPE. */
++
++bool
++target_supports_op_p (tree type, enum tree_code code,
++ enum optab_subtype ot_subtype)
++{
++ optab ot = optab_for_tree_code (code, type, ot_subtype);
++ return (ot != unknown_optab
++ && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
++}
++
+--- a/src/gcc/optabs-tree.h
++++ b/src/gcc/optabs-tree.h
+@@ -41,5 +41,7 @@ bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
+ bool expand_vec_cmp_expr_p (tree, tree, enum tree_code);
+ bool expand_vec_cond_expr_p (tree, tree, enum tree_code);
+ void init_tree_optimization_optabs (tree);
++bool target_supports_op_p (tree, enum tree_code,
++ enum optab_subtype = optab_default);
+
+ #endif
--- a/src/gcc/reload1.c
+++ b/src/gcc/reload1.c
@@ -3821,6 +3821,7 @@ verify_initial_elim_offsets (void)
@@ -2371,6 +3225,31 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
+
+/* { dg-final { scan-rtl-dump "\\(const_int 34 " "combine" { target aarch64*-*-* } } } */
+
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/lto/pr69866_0.c
+@@ -0,0 +1,13 @@
++/* { dg-lto-do link } */
++
++int _umh(int i)
++{
++ return i+1;
++}
++
++int weaks(int i) __attribute__((weak, alias("_umh")));
++
++int main()
++{
++ return weaks(10);
++}
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/lto/pr69866_1.c
+@@ -0,0 +1,6 @@
++/* { dg-options { -fno-lto } } */
++
++int weaks(int i)
++{
++ return i+1;
++}
--- a/src/gcc/testsuite/gcc.dg/pr47443.c
+++ b/src/gcc/testsuite/gcc.dg/pr47443.c
@@ -1,5 +1,6 @@
@@ -2397,6 +3276,34 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
/* Check that the expected warning is issued for large frames. */
--- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
+@@ -0,0 +1,25 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
++
++unsigned int
++f1 (unsigned int a, unsigned int b)
++{
++ unsigned int x = 1U << b;
++ return a / x;
++}
++
++unsigned long
++f2 (unsigned long a, int b)
++{
++ unsigned long x = 1UL << b;
++ return a / x;
++}
++
++unsigned long long
++f3 (unsigned long long a, int b)
++{
++ unsigned long long x = 1ULL << b;
++ return a / x;
++}
++
++/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */
+--- /dev/null
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr79697.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
@@ -2489,6 +3396,100 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
+/* { dg-final { scan-assembler-times "ins\\t" 2 } } */
+/* { dg-final { scan-assembler-not "dup\\t" } } */
--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/inline-lrint_1.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target lp64 } */
++/* { dg-options "-O3 -fno-math-errno" } */
++
++#include "lrint-matherr.h"
++
++TEST (dld, double, long, )
++TEST (flf, float , long, )
++
++TEST (did, double, int, )
++TEST (fif, float , int, )
++
++TEST (dlld, double, long long, l)
++TEST (fllf, float , long long, l)
++
++/* { dg-final { scan-assembler-times "frintx\t\[d,s\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-times "fcvtzs\tx\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-not "bl" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target ilp32 } */
++/* { dg-options "-O3 -fno-math-errno" } */
++
++#include "lrint-matherr.h"
++
++TEST (dld, double, long, )
++TEST (flf, float , long, )
++
++TEST (did, double, int, )
++TEST (fif, float , int, )
++
++TEST (dlld, double, long long, l)
++TEST (fllf, float , long long, l)
++
++/* { dg-final { scan-assembler-times "frintx\t\[d,s\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-times "fcvtzs\t\[w,x\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-not "bl" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/lrint-matherr.h
+@@ -0,0 +1,5 @@
++#define TEST(name, float_type, int_type, pref) void f_##name (float_type x) \
++{ \
++ volatile float_type a = __builtin_rint (x); \
++ volatile int_type b = __builtin_l##pref##rint (x); \
++}
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/no-inline-lrint_1.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target lp64 } */
++/* { dg-options "-O3" } */
++
++#include "lrint-matherr.h"
++
++TEST (dld, double, long, )
++TEST (flf, float , long, )
++
++TEST (did, double, int, )
++TEST (fif, float , int, )
++
++TEST (dlld, double, long long, l)
++TEST (fllf, float , long long, l)
++
++/* { dg-final { scan-assembler-times "frintx\t\[d,s\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-times "bl\tlrint" 4 } } */
++/* { dg-final { scan-assembler-times "bl\tllrint" 2 } } */
++/* { dg-final { scan-assembler-not "fcvtzs" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/no-inline-lrint_2.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target ilp32 } */
++/* { dg-options "-O3" } */
++
++#include "lrint-matherr.h"
++
++TEST (dld, double, long, )
++TEST (flf, float , long, )
++
++TEST (did, double, int, )
++TEST (fif, float , int, )
++
++TEST (dlld, double, long long, l)
++TEST (fllf, float , long long, l)
++
++/* { dg-final { scan-assembler-times "frintx\t\[d,s\]\[0-9\]+, \[d,s\]\[0-9\]+" 6 } } */
++/* { dg-final { scan-assembler-times "bl\tlrint" 4 } } */
++/* { dg-final { scan-assembler-times "bl\tllrint" 2 } } */
++/* { dg-final { scan-assembler-not "fcvtzs" } } */
+--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
@@ -3548,6 +4549,23 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
+ return vaddq_f16 (vmulq_f16 (a, vnegq_f16 (b)), c);
+}
+/* { dg-final { scan-assembler-times {vfms\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } } */
+--- a/src/gcc/testsuite/gcc.target/arm/its.c
++++ b/src/gcc/testsuite/gcc.target/arm/its.c
+@@ -1,4 +1,6 @@
+ /* { dg-do compile } */
++/* { dg-require-effective-target arm_cortex_m } */
++/* { dg-require-effective-target arm_thumb2 } */
+ /* { dg-options "-O2" } */
+ int test (int a, int b)
+ {
+@@ -17,4 +19,6 @@ int test (int a, int b)
+ r -= 3;
+ return r;
+ }
+-/* { dg-final { scan-assembler-times "\tit" 2 { target arm_thumb2 } } } */
++/* Ensure there is no IT block with more than 2 instructions, ie. we only allow
++ IT, ITT and ITE. */
++/* { dg-final { scan-assembler-not "\\sit\[te\]{2}" } } */
--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/arm/movdi_movt.c
@@ -0,0 +1,18 @@
@@ -3601,9 +4619,11 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
+/* { dg-warning ".__ARM_FEATURE_LDREX. redefined" "" { target *-*-* } .-1 } */
--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/arm/sdiv_costs_1.c
-@@ -0,0 +1,38 @@
+@@ -0,0 +1,40 @@
+/* { dg-do compile } */
-+/* { dg-options "-O3 -march=armv8-a" } */
++/* { dg-options "-O3" } */
++/* { dg-require-effective-target arm_arch_v8a_ok } */
++/* { dg-add-options arm_arch_v8a } */
+
+/* Both sdiv and udiv can be used here, so prefer udiv. */
+int f1 (unsigned char *p)
@@ -3912,6 +4932,78 @@ LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba5
# Return 1 if compilation with -freorder-blocks-and-partition is error-free
# for trivial code, 0 otherwise. As some targets (ARM for example) only
# warn when -fprofile-use is also supplied we test that combination too.
+@@ -3768,12 +3779,13 @@ proc check_effective_target_arm_fp16_hw { } {
+ # can be selected and a routine to give the flags to select that architecture
+ # Note: Extra flags may be added to disable options from newer compilers
+ # (Thumb in particular - but others may be added in the future).
+-# -march=armv7ve is special and is handled explicitly after this loop because
+-# it needs more than one predefine check to identify.
++# Warning: Do not use check_effective_target_arm_arch_*_ok for architecture
++# extension (eg. ARMv8.1-A) since there is no macro defined for them. See
++# how only __ARM_ARCH_8A__ is checked for ARMv8.1-A.
+ # Usage: /* { dg-require-effective-target arm_arch_v5_ok } */
+ # /* { dg-add-options arm_arch_v5 } */
+ # /* { dg-require-effective-target arm_arch_v5_multilib } */
+-foreach { armfunc armflag armdef } {
++foreach { armfunc armflag armdefs } {
+ v4 "-march=armv4 -marm" __ARM_ARCH_4__
+ v4t "-march=armv4t" __ARM_ARCH_4T__
+ v5 "-march=armv5 -marm" __ARM_ARCH_5__
+@@ -3788,20 +3800,23 @@ foreach { armfunc armflag armdef } {
+ v7r "-march=armv7-r" __ARM_ARCH_7R__
+ v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__
+ v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__
++ v7ve "-march=armv7ve -marm"
++ "__ARM_ARCH_7A__ && __ARM_FEATURE_IDIV"
+ v8a "-march=armv8-a" __ARM_ARCH_8A__
+ v8_1a "-march=armv8.1a" __ARM_ARCH_8A__
+ v8_2a "-march=armv8.2a" __ARM_ARCH_8A__
+- v8m_base "-march=armv8-m.base -mthumb -mfloat-abi=soft" __ARM_ARCH_8M_BASE__
++ v8m_base "-march=armv8-m.base -mthumb -mfloat-abi=soft"
++ __ARM_ARCH_8M_BASE__
+ v8m_main "-march=armv8-m.main -mthumb" __ARM_ARCH_8M_MAIN__ } {
+- eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
++ eval [string map [list FUNC $armfunc FLAG $armflag DEFS $armdefs ] {
+ proc check_effective_target_arm_arch_FUNC_ok { } {
+ if { [ string match "*-marm*" "FLAG" ] &&
+ ![check_effective_target_arm_arm_ok] } {
+ return 0
+ }
+ return [check_no_compiler_messages arm_arch_FUNC_ok assembly {
+- #if !defined (DEF)
+- #error !DEF
++ #if !(DEFS)
++ #error !(DEFS)
+ #endif
+ } "FLAG" ]
+ }
+@@ -3822,26 +3837,6 @@ foreach { armfunc armflag armdef } {
+ }]
+ }
+
+-# Same functions as above but for -march=armv7ve. To uniquely identify
+-# -march=armv7ve we need to check for __ARM_ARCH_7A__ as well as
+-# __ARM_FEATURE_IDIV otherwise it aliases with armv7-a.
+-
+-proc check_effective_target_arm_arch_v7ve_ok { } {
+- if { [ string match "*-marm*" "-march=armv7ve" ] &&
+- ![check_effective_target_arm_arm_ok] } {
+- return 0
+- }
+- return [check_no_compiler_messages arm_arch_v7ve_ok assembly {
+- #if !defined (__ARM_ARCH_7A__) || !defined (__ARM_FEATURE_IDIV)
+- #error !armv7ve
+- #endif
+- } "-march=armv7ve" ]
+-}
+-
+-proc add_options_for_arm_arch_v7ve { flags } {
+- return "$flags -march=armv7ve"
+-}
+-
+ # Return 1 if GCC was configured with --with-mode=
+ proc check_effective_target_default_mode { } {
+
--- a/src/gcc/tree-ssa-dce.c
+++ b/src/gcc/tree-ssa-dce.c
@@ -233,6 +233,8 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/gcc-7.git
More information about the Reproducible-commits
mailing list