[gcc-7] 253/354: * Update the Linaro support to the 7-2017.07 snapshot.
Ximin Luo
infinity0 at debian.org
Thu Nov 23 15:51:02 UTC 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch master
in repository gcc-7.
commit b70fe6649a3c661bf2075273f16c0c3f12897fbd
Author: doko <doko at 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca>
Date: Tue Jul 18 10:57:21 2017 +0000
* Update the Linaro support to the 7-2017.07 snapshot.
git-svn-id: svn+ssh://svn.debian.org/svn/gcccvs/branches/sid/gcc-7@9587 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca
---
debian/changelog | 3 +-
debian/patches/gcc-linaro-doc.diff | 47 +-
debian/patches/gcc-linaro-no-macros.diff | 4 +-
debian/patches/gcc-linaro.diff | 4474 +++++++++++++++++++++++++++---
4 files changed, 4067 insertions(+), 461 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index 29a818a..723e340 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -8,12 +8,13 @@ gcc-7 (7.1.0-10) UNRELEASED; urgency=medium
[ Matthias Klose ]
* Fix gnat cross build on m68k (Adrian Glaubitz). Closes: #862927.
* Enable gnat cross build on m68k. Closes: #868365.
+ * Update the Linaro support to the 7-2017.07 snapshot.
[ Aurelien Jarno ]
* libgo-s390x-default-isa.diff: do not build libgo with -march=z196,
use the default ISA instead.
- -- Matthias Klose <doko at debian.org> Tue, 11 Jul 2017 18:21:21 +0200
+ -- Matthias Klose <doko at debian.org> Tue, 18 Jul 2017 12:55:40 +0200
gcc-7 (7.1.0-9) unstable; urgency=medium
diff --git a/debian/patches/gcc-linaro-doc.diff b/debian/patches/gcc-linaro-doc.diff
index d0ab0ea..4810486 100644
--- a/debian/patches/gcc-linaro-doc.diff
+++ b/debian/patches/gcc-linaro-doc.diff
@@ -1,8 +1,8 @@
-# DP: Changes for the Linaro 7-2017.05 snapshot (documentation).
+# DP: Changes for the Linaro 7-2017.07 snapshot (documentation).
--- a/src/gcc/doc/install.texi
+++ b/src/gcc/doc/install.texi
-@@ -1092,14 +1092,18 @@ for each target is given below.
+@@ -1097,14 +1097,18 @@ for each target is given below.
@table @code
@item arm*-*-*
@@ -26,3 +26,46 @@
@multitable @columnfractions .15 .28 .30
@item Option @tab aprofile @tab rmprofile
+--- a/src/gcc/doc/sourcebuild.texi
++++ b/src/gcc/doc/sourcebuild.texi
+@@ -2274,6 +2274,11 @@ the codeset to convert to.
+ Skip the test if the target does not support profiling with option
+ @var{profopt}.
+
++ at item dg-require-stack-check @var{check}
++Skip the test if the target does not support the @code{-fstack-check}
++option. If @var{check} is @code{""}, support for @code{-fstack-check}
++is checked, for @code{-fstack-check=("@var{check}")} otherwise.
++
+ @item dg-require-visibility @var{vis}
+ Skip the test if the target does not support the @code{visibility} attribute.
+ If @var{vis} is @code{""}, support for @code{visibility("hidden")} is
+--- a/src/gcc/doc/tm.texi
++++ b/src/gcc/doc/tm.texi
+@@ -3684,6 +3684,15 @@ such as the result of @code{get_frame_size ()} and the tables of
+ registers @code{df_regs_ever_live_p} and @code{call_used_regs}.
+ @end defmac
+
++ at deftypefn {Target Hook} void TARGET_COMPUTE_FRAME_LAYOUT (void)
++This target hook is called once each time the frame layout needs to be
++recalculated. The calculations can be cached by the target and can then
++be used by @code{INITIAL_ELIMINATION_OFFSET} instead of re-computing the
++layout on every invocation of that hook. This is particularly useful
++for targets that have an expensive frame layout function. Implementing
++this callback is optional.
++ at end deftypefn
++
+ @node Stack Arguments
+ @subsection Passing Function Arguments on the Stack
+ @cindex arguments on stack
+--- a/src/gcc/doc/tm.texi.in
++++ b/src/gcc/doc/tm.texi.in
+@@ -3213,6 +3213,8 @@ such as the result of @code{get_frame_size ()} and the tables of
+ registers @code{df_regs_ever_live_p} and @code{call_used_regs}.
+ @end defmac
+
++ at hook TARGET_COMPUTE_FRAME_LAYOUT
++
+ @node Stack Arguments
+ @subsection Passing Function Arguments on the Stack
+ @cindex arguments on stack
diff --git a/debian/patches/gcc-linaro-no-macros.diff b/debian/patches/gcc-linaro-no-macros.diff
index f7c635f..f09ecac 100644
--- a/debian/patches/gcc-linaro-no-macros.diff
+++ b/debian/patches/gcc-linaro-no-macros.diff
@@ -88,5 +88,5 @@ Index: b/src/gcc/LINARO-VERSION
===================================================================
--- a/src/gcc/LINARO-VERSION
+++ /dev/null
-@@ -1 +0,0 @@
--7.1-2017.05~dev
+@@ -1,1 +0,0 @@
+-Snapshot 7.1-2017.07
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff
index b569af0..60979e9 100644
--- a/debian/patches/gcc-linaro.diff
+++ b/debian/patches/gcc-linaro.diff
@@ -1,33 +1,19 @@
-# DP: Changes for the Linaro 7-2017.05 snapshot.
+# DP: Changes for the Linaro 7-2017.07 snapshot.
MSG=$(git log origin/linaro/gcc-7-branch --format=format:"%s" -n 1 --grep "Merge branches"); SVN=${MSG##* }; git log origin/gcc-7-branch --format=format:"%H" -n 1 --grep "gcc-7-branch@${SVN%.}"
-LANG=C git diff --no-renames 4f4f68662706100e1fb1bb4e73ee50061d626f81 ffc354ab2f2465daf14068b1ad2c7afec87a1c9e \
+LANG=C git diff --no-renames d4064d4a3d1f9160d187e105d218c105b541f3c7 d19e70aba57b1bcc5093f3b62f853ff83e976c2e \
| egrep -v '^(diff|index) ' \
| filterdiff --strip=1 --addoldprefix=a/src/ --addnewprefix=b/src/ \
| sed 's,a/src//dev/null,/dev/null,'
-Index: b/src/.gitreview
-===================================================================
---- /dev/null
-+++ b/src/.gitreview
-@@ -0,0 +1,5 @@
-+[gerrit]
-+host=review.linaro.org
-+port=29418
-+project=toolchain/gcc
-+defaultbranch=linaro-local/gcc-7-integration-branch
-Index: b/src/gcc/LINARO-VERSION
-===================================================================
--- /dev/null
+++ b/src/gcc/LINARO-VERSION
@@ -0,0 +1 @@
-+7.1-2017.05~dev
-Index: b/src/gcc/Makefile.in
-===================================================================
++Snapshot 7.1-2017.07
--- a/src/gcc/Makefile.in
+++ b/src/gcc/Makefile.in
-@@ -845,10 +845,12 @@ BASEVER := $(srcdir)/BASE-VER # 4.x
+@@ -845,10 +845,12 @@ BASEVER := $(srcdir)/BASE-VER # 4.x.y
DEVPHASE := $(srcdir)/DEV-PHASE # experimental, prerelease, ""
DATESTAMP := $(srcdir)/DATESTAMP # YYYYMMDD or empty
REVISION := $(srcdir)/REVISION # [BRANCH revision XXXXXX]
@@ -60,8 +46,6 @@ Index: b/src/gcc/Makefile.in
CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
-Index: b/src/gcc/config.gcc
-===================================================================
--- a/src/gcc/config.gcc
+++ b/src/gcc/config.gcc
@@ -3791,34 +3791,19 @@ case "${target}" in
@@ -120,11 +104,112 @@ Index: b/src/gcc/config.gcc
fi
fi
;;
-Index: b/src/gcc/config/aarch64/aarch64.c
-===================================================================
+--- a/src/gcc/config/aarch64/aarch64-protos.h
++++ b/src/gcc/config/aarch64/aarch64-protos.h
+@@ -203,6 +203,16 @@ struct cpu_approx_modes
+ const unsigned int recip_sqrt; /* Reciprocal square root. */
+ };
+
++/* Cache prefetch settings for prefetch-loop-arrays. */
++struct cpu_prefetch_tune
++{
++ const int num_slots;
++ const int l1_cache_size;
++ const int l1_cache_line_size;
++ const int l2_cache_size;
++ const int default_opt_level;
++};
++
+ struct tune_params
+ {
+ const struct cpu_cost_table *insn_extra_cost;
+@@ -224,9 +234,6 @@ struct tune_params
+ int min_div_recip_mul_df;
+ /* Value for aarch64_case_values_threshold; or 0 for the default. */
+ unsigned int max_case_values;
+- /* Value for PARAM_L1_CACHE_LINE_SIZE; or 0 to use the default. */
+- unsigned int cache_line_size;
+-
+ /* An enum specifying how to take into account CPU autoprefetch capabilities
+ during instruction scheduling:
+ - AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account.
+@@ -244,6 +251,10 @@ struct tune_params
+ } autoprefetcher_model;
+
+ unsigned int extra_tuning_flags;
++
++ /* Place prefetch struct pointer at the end to enable type checking
++ errors when tune_params misses elements (e.g., from erroneous merges). */
++ const struct cpu_prefetch_tune *prefetch;
+ };
+
+ #define AARCH64_FUSION_PAIR(x, name) \
+@@ -301,6 +312,7 @@ extern struct tune_params aarch64_tune_params;
+
+ HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
+ int aarch64_get_condition_code (rtx);
++bool aarch64_address_valid_for_prefetch_p (rtx, bool);
+ bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+ unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
+ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
+@@ -311,6 +323,7 @@ bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
+ bool aarch64_constant_address_p (rtx);
+ bool aarch64_emit_approx_div (rtx, rtx, rtx);
+ bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
++void aarch64_expand_call (rtx, rtx, bool);
+ bool aarch64_expand_movmem (rtx *);
+ bool aarch64_float_const_zero_rtx_p (rtx);
+ bool aarch64_function_arg_regno_p (unsigned);
+--- a/src/gcc/config/aarch64/aarch64-simd.md
++++ b/src/gcc/config/aarch64/aarch64-simd.md
+@@ -153,6 +153,19 @@
+ (set_attr "length" "4,4,4,8,8,8,4")]
+ )
+
++;; When storing lane zero we can use the normal STR and its more permissive
++;; addressing modes.
++
++(define_insn "aarch64_store_lane0<mode>"
++ [(set (match_operand:<VEL> 0 "memory_operand" "=m")
++ (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
++ (parallel [(match_operand 2 "const_int_operand" "n")])))]
++ "TARGET_SIMD
++ && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
++ "str\\t%<Vetype>1, %0"
++ [(set_attr "type" "neon_store1_1reg<q>")]
++)
++
+ (define_insn "load_pair<mode>"
+ [(set (match_operand:VD 0 "register_operand" "=w")
+ (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
+@@ -561,18 +574,18 @@
+ gcc_unreachable ();
+ }
+ }
+- [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
++ [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
+ )
+
+ (define_insn "*aarch64_simd_vec_copy_lane<mode>"
+- [(set (match_operand:VALL 0 "register_operand" "=w")
+- (vec_merge:VALL
+- (vec_duplicate:VALL
++ [(set (match_operand:VALL_F16 0 "register_operand" "=w")
++ (vec_merge:VALL_F16
++ (vec_duplicate:VALL_F16
+ (vec_select:<VEL>
+- (match_operand:VALL 3 "register_operand" "w")
++ (match_operand:VALL_F16 3 "register_operand" "w")
+ (parallel
+ [(match_operand:SI 4 "immediate_operand" "i")])))
+- (match_operand:VALL 1 "register_operand" "0")
++ (match_operand:VALL_F16 1 "register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "TARGET_SIMD"
+ {
--- a/src/gcc/config/aarch64/aarch64.c
+++ b/src/gcc/config/aarch64/aarch64.c
-@@ -193,10 +193,10 @@ static const struct aarch64_flag_desc aa
+@@ -193,10 +193,10 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
static const struct cpu_addrcost_table generic_addrcost_table =
{
{
@@ -137,7 +222,51 @@ Index: b/src/gcc/config/aarch64/aarch64.c
},
0, /* pre_modify */
0, /* post_modify */
-@@ -538,8 +538,8 @@ static const struct tune_params generic_
+@@ -526,6 +526,43 @@ static const cpu_approx_modes xgene1_approx_modes =
+ AARCH64_APPROX_ALL /* recip_sqrt */
+ };
+
++/* Generic prefetch settings (which disable prefetch). */
++static const cpu_prefetch_tune generic_prefetch_tune =
++{
++ 0, /* num_slots */
++ -1, /* l1_cache_size */
++ -1, /* l1_cache_line_size */
++ -1, /* l2_cache_size */
++ -1 /* default_opt_level */
++};
++
++static const cpu_prefetch_tune exynosm1_prefetch_tune =
++{
++ 0, /* num_slots */
++ -1, /* l1_cache_size */
++ 64, /* l1_cache_line_size */
++ -1, /* l2_cache_size */
++ -1 /* default_opt_level */
++};
++
++static const cpu_prefetch_tune qdf24xx_prefetch_tune =
++{
++ 4, /* num_slots */
++ 32, /* l1_cache_size */
++ 64, /* l1_cache_line_size */
++ 1024, /* l2_cache_size */
++ 3 /* default_opt_level */
++};
++
++static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
++{
++ 0, /* num_slots */
++ -1, /* l1_cache_size */
++ 64, /* l1_cache_line_size */
++ -1, /* l2_cache_size */
++ -1 /* default_opt_level */
++};
++
+ static const struct tune_params generic_tunings =
+ {
+ &cortexa57_extra_costs,
+@@ -538,17 +575,17 @@ static const struct tune_params generic_tunings =
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
8, /* function_align. */
@@ -148,526 +277,3959 @@ Index: b/src/gcc/config/aarch64/aarch64.c
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1, /* vec_reassoc_width. */
-@@ -547,7 +547,7 @@ static const struct tune_params generic_
+ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
+- 0, /* cache_line_size. */
- tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
++ &generic_prefetch_tune
};
-Index: b/src/gcc/config/arm/arm-builtins.c
-===================================================================
---- a/src/gcc/config/arm/arm-builtins.c
-+++ b/src/gcc/config/arm/arm-builtins.c
-@@ -1893,10 +1893,10 @@ arm_init_builtins (void)
- = build_function_type_list (unsigned_type_node, NULL);
-
- arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
-- = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
-+ = add_builtin_function ("__builtin_arm_get_fpscr", ftype_get_fpscr,
- ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
- arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
-- = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
-+ = add_builtin_function ("__builtin_arm_set_fpscr", ftype_set_fpscr,
- ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
- }
+ static const struct tune_params cortexa35_tunings =
+@@ -564,7 +601,7 @@ static const struct tune_params cortexa35_tunings =
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+ | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
+ 16, /* function_align. */
+- 8, /* jump_align. */
++ 4, /* jump_align. */
+ 8, /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+@@ -572,9 +609,9 @@ static const struct tune_params cortexa35_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+- 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
++ &generic_prefetch_tune
+ };
-Index: b/src/gcc/config/arm/arm.c
-===================================================================
---- a/src/gcc/config/arm/arm.c
-+++ b/src/gcc/config/arm/arm.c
-@@ -28236,17 +28236,32 @@ arm_expand_compare_and_swap (rtx operand
- gcc_unreachable ();
- }
+ static const struct tune_params cortexa53_tunings =
+@@ -590,7 +627,7 @@ static const struct tune_params cortexa53_tunings =
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+ | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
+ 16, /* function_align. */
+- 8, /* jump_align. */
++ 4, /* jump_align. */
+ 8, /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+@@ -598,9 +635,9 @@ static const struct tune_params cortexa53_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+- 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
++ &generic_prefetch_tune
+ };
-- switch (mode)
-+ if (TARGET_THUMB1)
- {
-- case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
-- case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
-- case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
-- case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
-- default:
-- gcc_unreachable ();
-+ switch (mode)
+ static const struct tune_params cortexa57_tunings =
+@@ -616,7 +653,7 @@ static const struct tune_params cortexa57_tunings =
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+ | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
+ 16, /* function_align. */
+- 8, /* jump_align. */
++ 4, /* jump_align. */
+ 8, /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+@@ -624,9 +661,9 @@ static const struct tune_params cortexa57_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+- 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
++ &generic_prefetch_tune
+ };
+
+ static const struct tune_params cortexa72_tunings =
+@@ -642,7 +679,7 @@ static const struct tune_params cortexa72_tunings =
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+ | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
+ 16, /* function_align. */
+- 8, /* jump_align. */
++ 4, /* jump_align. */
+ 8, /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+@@ -650,9 +687,9 @@ static const struct tune_params cortexa72_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+- 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
++ &generic_prefetch_tune
+ };
+
+ static const struct tune_params cortexa73_tunings =
+@@ -668,7 +705,7 @@ static const struct tune_params cortexa73_tunings =
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+ | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
+ 16, /* function_align. */
+- 8, /* jump_align. */
++ 4, /* jump_align. */
+ 8, /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+@@ -676,11 +713,13 @@ static const struct tune_params cortexa73_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+- 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
++ &generic_prefetch_tune
+ };
+
++
++
+ static const struct tune_params exynosm1_tunings =
+ {
+ &exynosm1_extra_costs,
+@@ -701,9 +740,9 @@ static const struct tune_params exynosm1_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 48, /* max_case_values. */
+- 64, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
++ &exynosm1_prefetch_tune
+ };
+
+ static const struct tune_params thunderx_tunings =
+@@ -726,9 +765,9 @@ static const struct tune_params thunderx_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+- 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
++ &generic_prefetch_tune
+ };
+
+ static const struct tune_params xgene1_tunings =
+@@ -751,9 +790,9 @@ static const struct tune_params xgene1_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+- 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
++ &generic_prefetch_tune
+ };
+
+ static const struct tune_params qdf24xx_tunings =
+@@ -777,9 +816,9 @@ static const struct tune_params qdf24xx_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+- 64, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
++ &qdf24xx_prefetch_tune
+ };
+
+ static const struct tune_params thunderx2t99_tunings =
+@@ -802,9 +841,9 @@ static const struct tune_params thunderx2t99_tunings =
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+- 64, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
++ &thunderx2t99_prefetch_tune
+ };
+
+ /* Support for fine-grained override of the tuning structures. */
+@@ -2683,11 +2722,19 @@ aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+ plus_constant (Pmode, stack_pointer_rtx, -first));
+
+ /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
+- emit_set_insn (reg2,
+- plus_constant (Pmode, stack_pointer_rtx,
+- -(first + rounded_size)));
+-
+-
++ HOST_WIDE_INT adjustment = - (first + rounded_size);
++ if (! aarch64_uimm12_shift (adjustment))
+ {
-+ case QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
-+ case HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
-+ case SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
-+ case DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
-+ default:
-+ gcc_unreachable ();
++ aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
++ true, Pmode);
++ emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
+ }
-+ }
-+ else
-+ {
-+ switch (mode)
++ else
+ {
-+ case QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
-+ case HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
-+ case SImode: gen = gen_atomic_compare_and_swap32si_1; break;
-+ case DImode: gen = gen_atomic_compare_and_swap32di_1; break;
-+ default:
-+ gcc_unreachable ();
++ emit_set_insn (reg2,
++ plus_constant (Pmode, stack_pointer_rtx, adjustment));
+ }
- }
++
+ /* Step 3: the loop
-- bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM);
-+ bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
- emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
+ do
+@@ -4549,6 +4596,24 @@ aarch64_classify_address (struct aarch64_address_info *info,
+ }
+ }
- if (mode == QImode || mode == HImode)
-Index: b/src/gcc/config/arm/iterators.md
-===================================================================
---- a/src/gcc/config/arm/iterators.md
-+++ b/src/gcc/config/arm/iterators.md
-@@ -45,6 +45,9 @@
- ;; A list of the 32bit and 64bit integer modes
- (define_mode_iterator SIDI [SI DI])
++/* Return true if the address X is valid for a PRFM instruction.
++ STRICT_P is true if we should do strict checking with
++ aarch64_classify_address. */
++
++bool
++aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
++{
++ struct aarch64_address_info addr;
++
++ /* PRFM accepts the same addresses as DImode... */
++ bool res = aarch64_classify_address (&addr, x, DImode, MEM, strict_p);
++ if (!res)
++ return false;
++
++ /* ... except writeback forms. */
++ return addr.type != ADDRESS_REG_WB;
++}
++
+ bool
+ aarch64_symbolic_address_p (rtx x)
+ {
+@@ -4633,6 +4698,50 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+ return true;
+ }
-+;; A list of atomic compare and swap success return modes
-+(define_mode_iterator CCSI [(CC_Z "TARGET_32BIT") (SI "TARGET_THUMB1")])
++/* This function is used by the call expanders of the machine description.
++ RESULT is the register in which the result is returned. It's NULL for
++ "call" and "sibcall".
++ MEM is the location of the function call.
++ SIBCALL indicates whether this function call is normal call or sibling call.
++ It will generate different pattern accordingly. */
+
- ;; A list of modes which the VFP unit can handle
- (define_mode_iterator SDF [(SF "") (DF "TARGET_VFP_DOUBLE")])
++void
++aarch64_expand_call (rtx result, rtx mem, bool sibcall)
++{
++ rtx call, callee, tmp;
++ rtvec vec;
++ machine_mode mode;
++
++ gcc_assert (MEM_P (mem));
++ callee = XEXP (mem, 0);
++ mode = GET_MODE (callee);
++ gcc_assert (mode == Pmode);
++
++ /* Decide if we should generate indirect calls by loading the
++ address of the callee into a register before performing
++ the branch-and-link. */
++ if (SYMBOL_REF_P (callee)
++ ? (aarch64_is_long_call_p (callee)
++ || aarch64_is_noplt_call_p (callee))
++ : !REG_P (callee))
++ XEXP (mem, 0) = force_reg (mode, callee);
++
++ call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
++
++ if (result != NULL_RTX)
++ call = gen_rtx_SET (result, call);
++
++ if (sibcall)
++ tmp = ret_rtx;
++ else
++ tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
++
++ vec = gen_rtvec (2, call, tmp);
++ call = gen_rtx_PARALLEL (VOIDmode, vec);
++
++ aarch64_emit_call_insn (call);
++}
++
+ /* Emit call insn with PAT and do aarch64-specific handling. */
-@@ -411,6 +414,10 @@
- ;; Mode attributes
- ;;----------------------------------------------------------------------------
+ void
+@@ -4705,7 +4814,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
+ the comparison will have to be swapped when we emit the assembly
+ code. */
+ if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+- && (REG_P (y) || GET_CODE (y) == SUBREG)
++ && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
+ && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
+ || GET_CODE (x) == LSHIFTRT
+ || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
+@@ -7482,17 +7591,13 @@ cost_plus:
+ case UMOD:
+ if (speed)
+ {
++ /* Slighly prefer UMOD over SMOD. */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
+ *cost += (extra_cost->mult[mode == DImode].add
+- + extra_cost->mult[mode == DImode].idiv);
+- else if (mode == DFmode)
+- *cost += (extra_cost->fp[1].mult
+- + extra_cost->fp[1].div);
+- else if (mode == SFmode)
+- *cost += (extra_cost->fp[0].mult
+- + extra_cost->fp[0].div);
++ + extra_cost->mult[mode == DImode].idiv
++ + (code == MOD ? 1 : 0));
+ }
+ return false; /* All arguments need to be in registers. */
-+;; Determine name of atomic compare and swap from success result mode. This
-+;; distinguishes between 16-bit Thumb and 32-bit Thumb/ARM.
-+(define_mode_attr arch [(CC_Z "32") (SI "t1")])
-+
- ;; Determine element size suffix from vector mode.
- (define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")])
+@@ -7506,7 +7611,9 @@ cost_plus:
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
+ /* There is no integer SQRT, so only DIV and UDIV can get
+ here. */
+- *cost += extra_cost->mult[mode == DImode].idiv;
++ *cost += (extra_cost->mult[mode == DImode].idiv
++ /* Slighly prefer UDIV over SDIV. */
++ + (code == DIV ? 1 : 0));
+ else
+ *cost += extra_cost->fp[mode == DFmode].div;
+ }
+@@ -8687,12 +8794,38 @@ aarch64_override_options_internal (struct gcc_options *opts)
+ opts->x_param_values,
+ global_options_set.x_param_values);
-Index: b/src/gcc/config/arm/sync.md
-===================================================================
---- a/src/gcc/config/arm/sync.md
-+++ b/src/gcc/config/arm/sync.md
-@@ -191,9 +191,9 @@
+- /* Set the L1 cache line size. */
+- if (selected_cpu->tune->cache_line_size != 0)
++ /* Set up parameters to be used in prefetching algorithm. Do not
++ override the defaults unless we are tuning for a core we have
++ researched values for. */
++ if (aarch64_tune_params.prefetch->num_slots > 0)
++ maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
++ aarch64_tune_params.prefetch->num_slots,
++ opts->x_param_values,
++ global_options_set.x_param_values);
++ if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
++ maybe_set_param_value (PARAM_L1_CACHE_SIZE,
++ aarch64_tune_params.prefetch->l1_cache_size,
++ opts->x_param_values,
++ global_options_set.x_param_values);
++ if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
+ maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+- selected_cpu->tune->cache_line_size,
++ aarch64_tune_params.prefetch->l1_cache_line_size,
+ opts->x_param_values,
+ global_options_set.x_param_values);
++ if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
++ maybe_set_param_value (PARAM_L2_CACHE_SIZE,
++ aarch64_tune_params.prefetch->l2_cache_size,
++ opts->x_param_values,
++ global_options_set.x_param_values);
++
++ /* Enable sw prefetching at specified optimization level for
++ CPUS that have prefetch. Lower optimization level threshold by 1
++ when profiling is enabled. */
++ if (opts->x_flag_prefetch_loop_arrays < 0
++ && !opts->x_optimize_size
++ && aarch64_tune_params.prefetch->default_opt_level >= 0
++ && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
++ opts->x_flag_prefetch_loop_arrays = 1;
- ;; Constraints of this pattern must be at least as strict as those of the
- ;; cbranchsi operations in thumb1.md and aim to be as permissive.
--(define_insn_and_split "atomic_compare_and_swap<mode>_1"
-- [(set (match_operand 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out
-- (unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS))
-+(define_insn_and_split "atomic_compare_and_swap<CCSI:arch><NARROW:mode>_1"
-+ [(set (match_operand:CCSI 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out
-+ (unspec_volatile:CCSI [(const_int 0)] VUNSPEC_ATOMIC_CAS))
- (set (match_operand:SI 1 "s_register_operand" "=&r,&l,&0,&l*h") ;; val out
- (zero_extend:SI
- (match_operand:NARROW 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua"))) ;; memory
-@@ -223,9 +223,9 @@
+ aarch64_override_options_after_change_1 (opts);
+ }
+@@ -11647,6 +11780,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+ return;
+ }
- ;; Constraints of this pattern must be at least as strict as those of the
- ;; cbranchsi operations in thumb1.md and aim to be as permissive.
--(define_insn_and_split "atomic_compare_and_swap<mode>_1"
-- [(set (match_operand 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out
-- (unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS))
-+(define_insn_and_split "atomic_compare_and_swap<CCSI:arch><SIDI:mode>_1"
-+ [(set (match_operand:CCSI 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out
-+ (unspec_volatile:CCSI [(const_int 0)] VUNSPEC_ATOMIC_CAS))
- (set (match_operand:SIDI 1 "s_register_operand" "=&r,&l,&0,&l*h") ;; val out
- (match_operand:SIDI 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua")) ;; memory
- (set (match_dup 2)
-Index: b/src/gcc/config/arm/t-aprofile
-===================================================================
---- a/src/gcc/config/arm/t-aprofile
-+++ b/src/gcc/config/arm/t-aprofile
-@@ -24,30 +24,13 @@
- # have their default values during the configure step. We enforce
- # this during the top-level configury.
++ enum insn_code icode = optab_handler (vec_set_optab, mode);
++ gcc_assert (icode != CODE_FOR_nothing);
++
++ /* If there are only variable elements, try to optimize
++ the insertion using dup for the most common element
++ followed by insertions. */
++
++ /* The algorithm will fill matches[*][0] with the earliest matching element,
++ and matches[X][1] with the count of duplicate elements (if X is the
++ earliest element which has duplicates). */
++
++ if (n_var == n_elts && n_elts <= 16)
++ {
++ int matches[16][2] = {0};
++ for (int i = 0; i < n_elts; i++)
++ {
++ for (int j = 0; j <= i; j++)
++ {
++ if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
++ {
++ matches[i][0] = j;
++ matches[j][1]++;
++ break;
++ }
++ }
++ }
++ int maxelement = 0;
++ int maxv = 0;
++ for (int i = 0; i < n_elts; i++)
++ if (matches[i][1] > maxv)
++ {
++ maxelement = i;
++ maxv = matches[i][1];
++ }
++
++ /* Create a duplicate of the most common element. */
++ rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
++ aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
++
++ /* Insert the rest. */
++ for (int i = 0; i < n_elts; i++)
++ {
++ rtx x = XVECEXP (vals, 0, i);
++ if (matches[i][0] == maxelement)
++ continue;
++ x = copy_to_mode_reg (inner_mode, x);
++ emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
++ }
++ return;
++ }
++
+ /* Initialise a vector which is part-variable. We want to first try
+ to build those lanes which are constant in the most efficient way we
+ can. */
+@@ -11680,10 +11864,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+ }
--MULTILIB_OPTIONS =
--MULTILIB_DIRNAMES =
--MULTILIB_EXCEPTIONS =
--MULTILIB_MATCHES =
--MULTILIB_REUSE =
+ /* Insert the variable lanes directly. */
-
--# We have the following hierachy:
--# ISA: A32 (.) or T32 (thumb)
--# Architecture: ARMv7-A (v7-a), ARMv7VE (v7ve), or ARMv8-A (v8-a).
--# FPU: VFPv3-D16 (fpv3), NEONv1 (simdv1), VFPv4-D16 (fpv4),
--# NEON-VFPV4 (simdvfpv4), NEON for ARMv8 (simdv8), or None (.).
--# Float-abi: Soft (.), softfp (softfp), or hard (hardfp).
+- enum insn_code icode = optab_handler (vec_set_optab, mode);
+- gcc_assert (icode != CODE_FOR_nothing);
-
--MULTILIB_OPTIONS += mthumb
--MULTILIB_DIRNAMES += thumb
-+# Arch and FPU variants to build libraries with
-
--MULTILIB_OPTIONS += march=armv7-a/march=armv7ve/march=armv8-a
--MULTILIB_DIRNAMES += v7-a v7ve v8-a
-+MULTI_ARCH_OPTS_A = march=armv7-a/march=armv7ve/march=armv8-a
-+MULTI_ARCH_DIRS_A = v7-a v7ve v8-a
+ for (int i = 0; i < n_elts; i++)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+@@ -12049,6 +12229,17 @@ aarch64_split_compare_and_swap (rtx operands[])
+ mode = GET_MODE (mem);
+ model = memmodel_from_int (INTVAL (model_rtx));
--MULTILIB_OPTIONS += mfpu=vfpv3-d16/mfpu=neon/mfpu=vfpv4-d16/mfpu=neon-vfpv4/mfpu=neon-fp-armv8
--MULTILIB_DIRNAMES += fpv3 simdv1 fpv4 simdvfpv4 simdv8
--
--MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard
--MULTILIB_DIRNAMES += softfp hard
-+MULTI_FPU_OPTS_A = mfpu=vfpv3-d16/mfpu=neon/mfpu=vfpv4-d16/mfpu=neon-vfpv4/mfpu=neon-fp-armv8
-+MULTI_FPU_DIRS_A = fpv3 simdv1 fpv4 simdvfpv4 simdv8
++ /* When OLDVAL is zero and we want the strong version we can emit a tighter
++ loop:
++ .label1:
++ LD[A]XR rval, [mem]
++ CBNZ rval, .label2
++ ST[L]XR scratch, newval, [mem]
++ CBNZ scratch, .label1
++ .label2:
++ CMP rval, 0. */
++ bool strong_zero_p = !is_weak && oldval == const0_rtx;
++
+ label1 = NULL;
+ if (!is_weak)
+ {
+@@ -12065,11 +12256,21 @@ aarch64_split_compare_and_swap (rtx operands[])
+ else
+ aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
+- cond = aarch64_gen_compare_reg (NE, rval, oldval);
+- x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
++ if (strong_zero_p)
++ {
++ x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
++ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
++ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
++ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
++ }
++ else
++ {
++ cond = aarch64_gen_compare_reg (NE, rval, oldval);
++ x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
++ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
++ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
++ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
++ }
- # Option combinations to build library with
-@@ -71,7 +54,11 @@ MULTILIB_REQUIRED += *march=armv8-a
- MULTILIB_REQUIRED += *march=armv8-a/mfpu=neon-fp-armv8/mfloat-abi=*
+ aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
+@@ -12088,7 +12289,15 @@ aarch64_split_compare_and_swap (rtx operands[])
+ }
-+# Matches
+ emit_label (label2);
+-
++ /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
++ to set the condition flags. If this is not used it will be removed by
++ later passes. */
++ if (strong_zero_p)
++ {
++ cond = gen_rtx_REG (CCmode, CC_REGNUM);
++ x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
++ emit_insn (gen_rtx_SET (cond, x));
++ }
+ /* Emit any final barrier needed for a __sync operation. */
+ if (is_mm_sync (model))
+ aarch64_emit_post_barrier (model);
+--- a/src/gcc/config/aarch64/aarch64.md
++++ b/src/gcc/config/aarch64/aarch64.md
+@@ -519,27 +519,31 @@
+ )
+
+ (define_insn "prefetch"
+- [(prefetch (match_operand:DI 0 "register_operand" "r")
++ [(prefetch (match_operand:DI 0 "aarch64_prefetch_operand" "Dp")
+ (match_operand:QI 1 "const_int_operand" "")
+ (match_operand:QI 2 "const_int_operand" ""))]
+ ""
+ {
+- const char * pftype[2][4] =
++ const char * pftype[2][4] =
+ {
+- {"prfm\\tPLDL1STRM, %a0",
+- "prfm\\tPLDL3KEEP, %a0",
+- "prfm\\tPLDL2KEEP, %a0",
+- "prfm\\tPLDL1KEEP, %a0"},
+- {"prfm\\tPSTL1STRM, %a0",
+- "prfm\\tPSTL3KEEP, %a0",
+- "prfm\\tPSTL2KEEP, %a0",
+- "prfm\\tPSTL1KEEP, %a0"},
++ {"prfm\\tPLDL1STRM, %0",
++ "prfm\\tPLDL3KEEP, %0",
++ "prfm\\tPLDL2KEEP, %0",
++ "prfm\\tPLDL1KEEP, %0"},
++ {"prfm\\tPSTL1STRM, %0",
++ "prfm\\tPSTL3KEEP, %0",
++ "prfm\\tPSTL2KEEP, %0",
++ "prfm\\tPSTL1KEEP, %0"},
+ };
+
+ int locality = INTVAL (operands[2]);
+
+ gcc_assert (IN_RANGE (locality, 0, 3));
+
++ /* PRFM accepts the same addresses as a 64-bit LDR so wrap
++ the address into a DImode MEM so that aarch64_print_operand knows
++ how to print it. */
++ operands[0] = gen_rtx_MEM (DImode, operands[0]);
+ return pftype[INTVAL(operands[1])][locality];
+ }
+ [(set_attr "type" "load1")]
+@@ -713,12 +717,6 @@
+ ;; Subroutine calls and sibcalls
+ ;; -------------------------------------------------------------------
+
+-(define_expand "call_internal"
+- [(parallel [(call (match_operand 0 "memory_operand" "")
+- (match_operand 1 "general_operand" ""))
+- (use (match_operand 2 "" ""))
+- (clobber (reg:DI LR_REGNUM))])])
+-
+ (define_expand "call"
+ [(parallel [(call (match_operand 0 "memory_operand" "")
+ (match_operand 1 "general_operand" ""))
+@@ -727,57 +725,22 @@
+ ""
+ "
+ {
+- rtx callee, pat;
+-
+- /* In an untyped call, we can get NULL for operand 2. */
+- if (operands[2] == NULL)
+- operands[2] = const0_rtx;
+-
+- /* Decide if we should generate indirect calls by loading the
+- 64-bit address of the callee into a register before performing
+- the branch-and-link. */
+- callee = XEXP (operands[0], 0);
+- if (GET_CODE (callee) == SYMBOL_REF
+- ? (aarch64_is_long_call_p (callee)
+- || aarch64_is_noplt_call_p (callee))
+- : !REG_P (callee))
+- XEXP (operands[0], 0) = force_reg (Pmode, callee);
+-
+- pat = gen_call_internal (operands[0], operands[1], operands[2]);
+- aarch64_emit_call_insn (pat);
++ aarch64_expand_call (NULL_RTX, operands[0], false);
+ DONE;
+ }"
+ )
+
+-(define_insn "*call_reg"
+- [(call (mem:DI (match_operand:DI 0 "register_operand" "r"))
++(define_insn "*call_insn"
++ [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf"))
+ (match_operand 1 "" ""))
+- (use (match_operand 2 "" ""))
+ (clobber (reg:DI LR_REGNUM))]
+ ""
+- "blr\\t%0"
+- [(set_attr "type" "call")]
+-)
+-
+-(define_insn "*call_symbol"
+- [(call (mem:DI (match_operand:DI 0 "" ""))
+- (match_operand 1 "" ""))
+- (use (match_operand 2 "" ""))
+- (clobber (reg:DI LR_REGNUM))]
+- "GET_CODE (operands[0]) == SYMBOL_REF
+- && !aarch64_is_long_call_p (operands[0])
+- && !aarch64_is_noplt_call_p (operands[0])"
+- "bl\\t%a0"
+- [(set_attr "type" "call")]
++ "@
++ blr\\t%0
++ bl\\t%a0"
++ [(set_attr "type" "call, call")]
+ )
+
+-(define_expand "call_value_internal"
+- [(parallel [(set (match_operand 0 "" "")
+- (call (match_operand 1 "memory_operand" "")
+- (match_operand 2 "general_operand" "")))
+- (use (match_operand 3 "" ""))
+- (clobber (reg:DI LR_REGNUM))])])
+-
+ (define_expand "call_value"
+ [(parallel [(set (match_operand 0 "" "")
+ (call (match_operand 1 "memory_operand" "")
+@@ -787,60 +750,23 @@
+ ""
+ "
+ {
+- rtx callee, pat;
+-
+- /* In an untyped call, we can get NULL for operand 3. */
+- if (operands[3] == NULL)
+- operands[3] = const0_rtx;
+-
+- /* Decide if we should generate indirect calls by loading the
+- 64-bit address of the callee into a register before performing
+- the branch-and-link. */
+- callee = XEXP (operands[1], 0);
+- if (GET_CODE (callee) == SYMBOL_REF
+- ? (aarch64_is_long_call_p (callee)
+- || aarch64_is_noplt_call_p (callee))
+- : !REG_P (callee))
+- XEXP (operands[1], 0) = force_reg (Pmode, callee);
+-
+- pat = gen_call_value_internal (operands[0], operands[1], operands[2],
+- operands[3]);
+- aarch64_emit_call_insn (pat);
++ aarch64_expand_call (operands[0], operands[1], false);
+ DONE;
+ }"
+ )
+
+-(define_insn "*call_value_reg"
++(define_insn "*call_value_insn"
+ [(set (match_operand 0 "" "")
+- (call (mem:DI (match_operand:DI 1 "register_operand" "r"))
++ (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf"))
+ (match_operand 2 "" "")))
+- (use (match_operand 3 "" ""))
+ (clobber (reg:DI LR_REGNUM))]
+ ""
+- "blr\\t%1"
+- [(set_attr "type" "call")]
+-
+-)
+-
+-(define_insn "*call_value_symbol"
+- [(set (match_operand 0 "" "")
+- (call (mem:DI (match_operand:DI 1 "" ""))
+- (match_operand 2 "" "")))
+- (use (match_operand 3 "" ""))
+- (clobber (reg:DI LR_REGNUM))]
+- "GET_CODE (operands[1]) == SYMBOL_REF
+- && !aarch64_is_long_call_p (operands[1])
+- && !aarch64_is_noplt_call_p (operands[1])"
+- "bl\\t%a1"
+- [(set_attr "type" "call")]
++ "@
++ blr\\t%1
++ bl\\t%a1"
++ [(set_attr "type" "call, call")]
+ )
+
+-(define_expand "sibcall_internal"
+- [(parallel [(call (match_operand 0 "memory_operand" "")
+- (match_operand 1 "general_operand" ""))
+- (return)
+- (use (match_operand 2 "" ""))])])
+-
+ (define_expand "sibcall"
+ [(parallel [(call (match_operand 0 "memory_operand" "")
+ (match_operand 1 "general_operand" ""))
+@@ -848,29 +774,11 @@
+ (use (match_operand 2 "" ""))])]
+ ""
+ {
+- rtx pat;
+- rtx callee = XEXP (operands[0], 0);
+- if (!REG_P (callee)
+- && ((GET_CODE (callee) != SYMBOL_REF)
+- || aarch64_is_noplt_call_p (callee)))
+- XEXP (operands[0], 0) = force_reg (Pmode, callee);
+-
+- if (operands[2] == NULL_RTX)
+- operands[2] = const0_rtx;
+-
+- pat = gen_sibcall_internal (operands[0], operands[1], operands[2]);
+- aarch64_emit_call_insn (pat);
++ aarch64_expand_call (NULL_RTX, operands[0], true);
+ DONE;
+ }
+ )
+
+-(define_expand "sibcall_value_internal"
+- [(parallel [(set (match_operand 0 "" "")
+- (call (match_operand 1 "memory_operand" "")
+- (match_operand 2 "general_operand" "")))
+- (return)
+- (use (match_operand 3 "" ""))])])
+-
+ (define_expand "sibcall_value"
+ [(parallel [(set (match_operand 0 "" "")
+ (call (match_operand 1 "memory_operand" "")
+@@ -879,19 +787,7 @@
+ (use (match_operand 3 "" ""))])]
+ ""
+ {
+- rtx pat;
+- rtx callee = XEXP (operands[1], 0);
+- if (!REG_P (callee)
+- && ((GET_CODE (callee) != SYMBOL_REF)
+- || aarch64_is_noplt_call_p (callee)))
+- XEXP (operands[1], 0) = force_reg (Pmode, callee);
+-
+- if (operands[3] == NULL_RTX)
+- operands[3] = const0_rtx;
+-
+- pat = gen_sibcall_value_internal (operands[0], operands[1], operands[2],
+- operands[3]);
+- aarch64_emit_call_insn (pat);
++ aarch64_expand_call (operands[0], operands[1], true);
+ DONE;
+ }
+ )
+@@ -899,8 +795,7 @@
+ (define_insn "*sibcall_insn"
+ [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf"))
+ (match_operand 1 "" ""))
+- (return)
+- (use (match_operand 2 "" ""))]
++ (return)]
+ "SIBLING_CALL_P (insn)"
+ "@
+ br\\t%0
+@@ -913,8 +808,7 @@
+ (call (mem:DI
+ (match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf"))
+ (match_operand 2 "" "")))
+- (return)
+- (use (match_operand 3 "" ""))]
++ (return)]
+ "SIBLING_CALL_P (insn)"
+ "@
+ br\\t%1
+@@ -1026,8 +920,8 @@
+ )
+
+ (define_insn_and_split "*movsi_aarch64"
+- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w")
+- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
++ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w,r,*w")
++ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w"))]
+ "(register_operand (operands[0], SImode)
+ || aarch64_reg_or_zero (operands[1], SImode))"
+ "@
+@@ -1058,8 +952,8 @@
+ )
+
+ (define_insn_and_split "*movdi_aarch64"
+- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w, r,*w,w")
+- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
++ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w,r,*w,w")
++ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
+ "(register_operand (operands[0], DImode)
+ || aarch64_reg_or_zero (operands[1], DImode))"
+ "@
+@@ -2340,6 +2234,55 @@
+ [(set_attr "type" "alus_sreg")]
+ )
+
++(define_insn "sub<mode>3_compare1_imm"
++ [(set (reg:CC CC_REGNUM)
++ (compare:CC
++ (match_operand:GPI 1 "register_operand" "r")
++ (match_operand:GPI 3 "const_int_operand" "n")))
++ (set (match_operand:GPI 0 "register_operand" "=r")
++ (plus:GPI (match_dup 1)
++ (match_operand:GPI 2 "aarch64_sub_immediate" "J")))]
++ "INTVAL (operands[3]) == -INTVAL (operands[2])"
++ "subs\\t%<w>0, %<w>1, #%n2"
++ [(set_attr "type" "alus_sreg")]
++)
+
- # CPU Matches
-+MULTILIB_MATCHES += march?armv7-a=mcpu?marvell-pj4
-+MULTILIB_MATCHES += march?armv7-a=mcpu?generic-armv7-a
- MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a8
- MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a9
- MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a5
-Index: b/src/gcc/config/arm/t-multilib
-===================================================================
---- /dev/null
-+++ b/src/gcc/config/arm/t-multilib
-@@ -0,0 +1,69 @@
-+# Copyright (C) 2016 Free Software Foundation, Inc.
-+#
-+# This file is part of GCC.
-+#
-+# GCC is free software; you can redistribute it and/or modify
-+# it under the terms of the GNU General Public License as published by
-+# the Free Software Foundation; either version 3, or (at your option)
-+# any later version.
-+#
-+# GCC is distributed in the hope that it will be useful,
-+# but WITHOUT ANY WARRANTY; without even the implied warranty of
-+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+# GNU General Public License for more details.
-+#
-+# You should have received a copy of the GNU General Public License
-+# along with GCC; see the file COPYING3. If not see
-+# <http://www.gnu.org/licenses/>.
++(define_peephole2
++ [(set (match_operand:GPI 0 "register_operand")
++ (minus:GPI (match_operand:GPI 1 "aarch64_reg_or_zero")
++ (match_operand:GPI 2 "aarch64_reg_or_zero")))
++ (set (reg:CC CC_REGNUM)
++ (compare:CC
++ (match_dup 1)
++ (match_dup 2)))]
++ "!reg_overlap_mentioned_p (operands[0], operands[1])
++ && !reg_overlap_mentioned_p (operands[0], operands[2])"
++ [(const_int 0)]
++ {
++ emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
++ operands[2]));
++ DONE;
++ }
++)
+
-+# This is a target makefile fragment that attempts to get
-+# multilibs built for the range of CPU's, FPU's and ABI's that
-+# are relevant for the ARM architecture. It should not be used in
-+# conjunction with another make file fragment and assumes --with-arch,
-+# --with-cpu, --with-fpu, --with-float, --with-mode have their default
-+# values during the configure step. We enforce this during the
-+# top-level configury.
++(define_peephole2
++ [(set (match_operand:GPI 0 "register_operand")
++ (plus:GPI (match_operand:GPI 1 "register_operand")
++ (match_operand:GPI 2 "aarch64_sub_immediate")))
++ (set (reg:CC CC_REGNUM)
++ (compare:CC
++ (match_dup 1)
++ (match_operand:GPI 3 "const_int_operand")))]
++ "!reg_overlap_mentioned_p (operands[0], operands[1])
++ && INTVAL (operands[3]) == -INTVAL (operands[2])"
++ [(const_int 0)]
++ {
++ emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
++ operands[2], operands[3]));
++ DONE;
++ }
++)
+
-+MULTILIB_OPTIONS =
-+MULTILIB_DIRNAMES =
-+MULTILIB_EXCEPTIONS =
-+MULTILIB_MATCHES =
-+MULTILIB_REUSE =
+ (define_insn "*sub_<shift>_<mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (minus:GPI (match_operand:GPI 3 "register_operand" "r")
+@@ -5030,14 +4973,16 @@
+ (match_operand:SF 2 "register_operand")]
+ "TARGET_FLOAT && TARGET_SIMD"
+ {
+- rtx mask = gen_reg_rtx (DImode);
++ rtx v_bitmask = gen_reg_rtx (V2SImode);
+
+ /* Juggle modes to get us in to a vector mode for BSL. */
+- rtx op1 = lowpart_subreg (V2SFmode, operands[1], SFmode);
++ rtx op1 = lowpart_subreg (DImode, operands[1], SFmode);
+ rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode);
+ rtx tmp = gen_reg_rtx (V2SFmode);
+- emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 31));
+- emit_insn (gen_aarch64_simd_bslv2sf (tmp, mask, op2, op1));
++ emit_move_insn (v_bitmask,
++ aarch64_simd_gen_const_vector_dup (V2SImode,
++ HOST_WIDE_INT_M1U << 31));
++ emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1));
+ emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
+ DONE;
+ }
+--- a/src/gcc/config/aarch64/atomics.md
++++ b/src/gcc/config/aarch64/atomics.md
+@@ -25,7 +25,7 @@
+ (match_operand:ALLI 1 "register_operand" "") ;; val out
+ (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory
+ (match_operand:ALLI 3 "general_operand" "") ;; expected
+- (match_operand:ALLI 4 "register_operand" "") ;; desired
++ (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired
+ (match_operand:SI 5 "const_int_operand") ;; is_weak
+ (match_operand:SI 6 "const_int_operand") ;; mod_s
+ (match_operand:SI 7 "const_int_operand")] ;; mod_f
+@@ -45,7 +45,7 @@
+ (set (match_dup 1)
+ (unspec_volatile:SHORT
+ [(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected
+- (match_operand:SHORT 3 "register_operand" "r") ;; desired
++ (match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ") ;; desired
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
+@@ -69,7 +69,7 @@
+ (set (match_dup 1)
+ (unspec_volatile:GPI
+ [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect
+- (match_operand:GPI 3 "register_operand" "r") ;; desired
++ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ") ;; desired
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
+@@ -534,7 +534,7 @@
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
+ (set (match_operand:ALLI 1 "aarch64_sync_memory_operand" "=Q")
+ (unspec_volatile:ALLI
+- [(match_operand:ALLI 2 "register_operand" "r")
++ [(match_operand:ALLI 2 "aarch64_reg_or_zero" "rZ")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPECV_SX))]
+ ""
+--- a/src/gcc/config/aarch64/constraints.md
++++ b/src/gcc/config/aarch64/constraints.md
+@@ -98,6 +98,14 @@
+ (and (match_code "high")
+ (match_test "aarch64_valid_symref (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
+
++(define_constraint "Usa"
++ "@internal
++ A constraint that matches an absolute symbolic address that can be
++ loaded by a single ADR."
++ (and (match_code "const,symbol_ref,label_ref")
++ (match_test "aarch64_symbolic_address_p (op)")
++ (match_test "aarch64_mov_operand_p (op, GET_MODE (op))")))
+
-+comma := ,
-+tm_multilib_list := $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
+ (define_constraint "Uss"
+ "@internal
+ A constraint that matches an immediate shift constant in SImode."
+@@ -118,7 +126,8 @@
+ (define_constraint "Usf"
+ "@internal Usf is a symbol reference under the context where plt stub allowed."
+ (and (match_code "symbol_ref")
+- (match_test "!aarch64_is_noplt_call_p (op)")))
++ (match_test "!(aarch64_is_noplt_call_p (op)
++ || aarch64_is_long_call_p (op))")))
+
+ (define_constraint "UsM"
+ "@internal
+@@ -214,3 +223,8 @@
+ A constraint that matches an immediate operand valid for AdvSIMD scalar."
+ (and (match_code "const_int")
+ (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))")))
+
-+HAS_APROFILE := $(filter aprofile,$(tm_multilib_list))
-+HAS_RMPROFILE := $(filter rmprofile,$(tm_multilib_list))
++(define_address_constraint "Dp"
++ "@internal
++ An address valid for a prefetch instruction."
++ (match_test "aarch64_address_valid_for_prefetch_p (op, true)"))
+--- a/src/gcc/config/aarch64/predicates.md
++++ b/src/gcc/config/aarch64/predicates.md
+@@ -77,6 +77,10 @@
+ (define_predicate "aarch64_fp_vec_pow2"
+ (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
+
++(define_predicate "aarch64_sub_immediate"
++ (and (match_code "const_int")
++ (match_test "aarch64_uimm12_shift (-INTVAL (op))")))
+
-+ifneq (,$(HAS_APROFILE))
-+include $(srcdir)/config/arm/t-aprofile
-+endif
-+ifneq (,$(HAS_RMPROFILE))
-+include $(srcdir)/config/arm/t-rmprofile
-+endif
-+SEP := $(and $(HAS_APROFILE),$(HAS_RMPROFILE),/)
+ (define_predicate "aarch64_plus_immediate"
+ (and (match_code "const_int")
+ (ior (match_test "aarch64_uimm12_shift (INTVAL (op))")
+@@ -165,6 +169,9 @@
+ (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
+ 0)")))
+
++(define_predicate "aarch64_prefetch_operand"
++ (match_test "aarch64_address_valid_for_prefetch_p (op, false)"))
+
+ (define_predicate "aarch64_valid_symref"
+ (match_code "const, symbol_ref, label_ref")
+ {
+--- a/src/gcc/config/aarch64/thunderx2t99.md
++++ b/src/gcc/config/aarch64/thunderx2t99.md
+@@ -441,3 +441,23 @@
+ (and (eq_attr "tune" "thunderx2t99")
+ (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q"))
+ "thunderx2t99_ls01,thunderx2t99_f01")
+
-+# We have the following hierachy:
-+# ISA: A32 (.) or T16/T32 (thumb)
-+# Architecture: ARMv6-M (v6-m), ARMv7-M (v7-m), ARMv7E-M (v7e-m),
-+# ARMv7 (v7-ar), ARMv7-A (v7-a), ARMv7VE (v7ve),
-+# ARMv8-M Baseline (v8-m.base), ARMv8-M Mainline (v8-m.main)
-+# or ARMv8-A (v8-a).
-+# FPU: VFPv3-D16 (fpv3), NEONv1 (simdv1), FPV4-SP-D16 (fpv4-sp),
-+# VFPv4-D16 (fpv4), NEON-VFPV4 (simdvfpv4), FPV5-SP-D16 (fpv5-sp),
-+# VFPv5-D16 (fpv5), NEON for ARMv8 (simdv8), or None (.).
-+# Float-abi: Soft (.), softfp (softfp), or hard (hard).
++;; Crypto extensions.
+
-+MULTILIB_OPTIONS += mthumb
-+MULTILIB_DIRNAMES += thumb
++(define_insn_reservation "thunderx2t99_aes" 5
++ (and (eq_attr "tune" "thunderx2t99")
++ (eq_attr "type" "crypto_aese,crypto_aesmc"))
++ "thunderx2t99_f1")
+
-+MULTILIB_OPTIONS += $(MULTI_ARCH_OPTS_A)$(SEP)$(MULTI_ARCH_OPTS_RM)
-+MULTILIB_DIRNAMES += $(MULTI_ARCH_DIRS_A) $(MULTI_ARCH_DIRS_RM)
++(define_insn_reservation "thunderx2t99_sha" 7
++ (and (eq_attr "tune" "thunderx2t99")
++ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,crypto_sha1_slow,\
++ crypto_sha256_fast,crypto_sha256_slow"))
++ "thunderx2t99_f1")
+
-+MULTILIB_OPTIONS += $(MULTI_FPU_OPTS_A)$(SEP)$(MULTI_FPU_OPTS_RM)
-+MULTILIB_DIRNAMES += $(MULTI_FPU_DIRS_A) $(MULTI_FPU_DIRS_RM)
++;; CRC extension.
+
-+MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard
-+MULTILIB_DIRNAMES += softfp hard
-Index: b/src/gcc/config/arm/t-rmprofile
-===================================================================
---- a/src/gcc/config/arm/t-rmprofile
-+++ b/src/gcc/config/arm/t-rmprofile
-@@ -24,33 +24,14 @@
- # values during the configure step. We enforce this during the
- # top-level configury.
++(define_insn_reservation "thunderx2t99_crc" 4
++ (and (eq_attr "tune" "thunderx2t99")
++ (eq_attr "type" "crc"))
++ "thunderx2t99_i1")
+--- a/src/gcc/config/arm/aarch-common-protos.h
++++ b/src/gcc/config/arm/aarch-common-protos.h
+@@ -30,7 +30,9 @@ extern bool aarch_rev16_p (rtx);
+ extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
+ extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode);
+ extern int arm_early_load_addr_dep (rtx, rtx);
++extern int arm_early_load_addr_dep_ptr (rtx, rtx);
+ extern int arm_early_store_addr_dep (rtx, rtx);
++extern int arm_early_store_addr_dep_ptr (rtx, rtx);
+ extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
+ extern int arm_mac_accumulator_is_result (rtx, rtx);
+ extern int arm_no_early_alu_shift_dep (rtx, rtx);
+--- a/src/gcc/config/arm/aarch-common.c
++++ b/src/gcc/config/arm/aarch-common.c
+@@ -241,6 +241,24 @@ arm_early_load_addr_dep (rtx producer, rtx consumer)
+ return reg_overlap_mentioned_p (value, addr);
+ }
--MULTILIB_OPTIONS =
--MULTILIB_DIRNAMES =
--MULTILIB_EXCEPTIONS =
--MULTILIB_MATCHES =
--MULTILIB_REUSE =
--
--# We have the following hierachy:
--# ISA: A32 (.) or T16/T32 (thumb).
--# Architecture: ARMv6S-M (v6-m), ARMv7-M (v7-m), ARMv7E-M (v7e-m),
--# ARMv8-M Baseline (v8-m.base) or ARMv8-M Mainline (v8-m.main).
--# FPU: VFPv3-D16 (fpv3), FPV4-SP-D16 (fpv4-sp), FPV5-SP-D16 (fpv5-sp),
--# VFPv5-D16 (fpv5), or None (.).
--# Float-abi: Soft (.), softfp (softfp), or hard (hardfp).
--
--# Options to build libraries with
--
--MULTILIB_OPTIONS += mthumb
--MULTILIB_DIRNAMES += thumb
++/* Return nonzero if the CONSUMER instruction (a load) does need
++ a Pmode PRODUCER's value to calculate the address. */
++
++int
++arm_early_load_addr_dep_ptr (rtx producer, rtx consumer)
++{
++ rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
++ rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
++
++ if (!value || !addr || !MEM_P (SET_SRC (value)))
++ return 0;
++
++ value = SET_DEST (value);
++ addr = SET_SRC (addr);
++
++ return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
++}
++
+ /* Return nonzero if the CONSUMER instruction (an ALU op) does not
+ have an early register shift value or amount dependency on the
+ result of PRODUCER. */
+@@ -336,6 +354,24 @@ arm_early_store_addr_dep (rtx producer, rtx consumer)
+ return !arm_no_early_store_addr_dep (producer, consumer);
+ }
--MULTILIB_OPTIONS += march=armv6s-m/march=armv7-m/march=armv7e-m/march=armv7/march=armv8-m.base/march=armv8-m.main
--MULTILIB_DIRNAMES += v6-m v7-m v7e-m v7-ar v8-m.base v8-m.main
-+# Arch and FPU variants to build libraries with
++/* Return nonzero if the CONSUMER instruction (a store) does need
++ a Pmode PRODUCER's value to calculate the address. */
++
++int
++arm_early_store_addr_dep_ptr (rtx producer, rtx consumer)
++{
++ rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
++ rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
++
++ if (!value || !addr || !MEM_P (SET_SRC (value)))
++ return 0;
++
++ value = SET_DEST (value);
++ addr = SET_DEST (addr);
++
++ return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
++}
++
+ /* Return non-zero iff the consumer (a multiply-accumulate or a
+ multiple-subtract instruction) has an accumulator dependency on the
+ result of the producer and no other dependency on that result. It
+--- a/src/gcc/config/arm/aarch-cost-tables.h
++++ b/src/gcc/config/arm/aarch-cost-tables.h
+@@ -154,7 +154,7 @@ const struct cpu_cost_table cortexa53_extra_costs =
+ COSTS_N_INSNS (1), /* extend. */
+ COSTS_N_INSNS (1), /* add. */
+ COSTS_N_INSNS (1), /* extend_add. */
+- COSTS_N_INSNS (7) /* idiv. */
++ COSTS_N_INSNS (9) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+--- a/src/gcc/config/arm/arm-builtins.c
++++ b/src/gcc/config/arm/arm-builtins.c
+@@ -3058,15 +3058,15 @@ arm_expand_builtin (tree exp,
+ }
--MULTILIB_OPTIONS += mfpu=vfpv3-d16/mfpu=fpv4-sp-d16/mfpu=fpv5-sp-d16/mfpu=fpv5-d16
--MULTILIB_DIRNAMES += fpv3 fpv4-sp fpv5-sp fpv5
-+MULTI_ARCH_OPTS_RM = march=armv6s-m/march=armv7-m/march=armv7e-m/march=armv7/march=armv8-m.base/march=armv8-m.main
-+MULTI_ARCH_DIRS_RM = v6-m v7-m v7e-m v7-ar v8-m.base v8-m.main
+ for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+- if (d->code == (const enum arm_builtins) fcode)
++ if (d->code == (enum arm_builtins) fcode)
+ return arm_expand_binop_builtin (d->icode, exp, target);
--MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard
--MULTILIB_DIRNAMES += softfp hard
-+MULTI_FPU_OPTS_RM = mfpu=vfpv3-d16/mfpu=fpv4-sp-d16/mfpu=fpv5-sp-d16/mfpu=fpv5-d16
-+MULTI_FPU_DIRS_RM = fpv3 fpv4-sp fpv5-sp fpv5
+ for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+- if (d->code == (const enum arm_builtins) fcode)
++ if (d->code == (enum arm_builtins) fcode)
+ return arm_expand_unop_builtin (d->icode, exp, target, 0);
+ for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+- if (d->code == (const enum arm_builtins) fcode)
++ if (d->code == (enum arm_builtins) fcode)
+ return arm_expand_ternop_builtin (d->icode, exp, target);
- # Option combinations to build library with
-Index: b/src/gcc/configure
-===================================================================
---- a/src/gcc/configure
-+++ b/src/gcc/configure
-@@ -1717,7 +1717,8 @@ Optional Packages:
- --with-stabs arrange to use stabs instead of host debug format
- --with-dwarf2 force the default debug format to be DWARF 2
- --with-specs=SPECS add SPECS to driver command-line processing
-- --with-pkgversion=PKG Use PKG in the version string in place of "GCC"
-+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro
-+ GCC `cat $srcdir/LINARO-VERSION`"
- --with-bugurl=URL Direct users to URL to report a bug
- --with-multilib-list select multilibs (AArch64, SH and x86-64 only)
- --with-gnu-ld assume the C compiler uses GNU ld default=no
-@@ -7637,7 +7638,7 @@ if test "${with_pkgversion+set}" = set;
- *) PKGVERSION="($withval) " ;;
- esac
- else
-- PKGVERSION="(GCC) "
-+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
+ /* @@@ Should really do something sensible here. */
+--- a/src/gcc/config/arm/arm.c
++++ b/src/gcc/config/arm/arm.c
+@@ -85,6 +85,7 @@ static bool arm_const_not_ok_for_debug_p (rtx);
+ static int arm_needs_doubleword_align (machine_mode, const_tree);
+ static int arm_compute_static_chain_stack_bytes (void);
+ static arm_stack_offsets *arm_get_frame_offsets (void);
++static void arm_compute_frame_layout (void);
+ static void arm_add_gc_roots (void);
+ static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
+ unsigned HOST_WIDE_INT, rtx, rtx, int, int);
+@@ -680,6 +681,9 @@ static const struct attribute_spec arm_attribute_table[] =
+ #undef TARGET_SCALAR_MODE_SUPPORTED_P
+ #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
- fi
++#undef TARGET_COMPUTE_FRAME_LAYOUT
++#define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
++
+ #undef TARGET_FRAME_POINTER_REQUIRED
+ #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
-@@ -18433,7 +18434,7 @@ else
- lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
- lt_status=$lt_dlunknown
- cat > conftest.$ac_ext <<_LT_EOF
--#line 18436 "configure"
-+#line 18437 "configure"
- #include "confdefs.h"
+@@ -4009,6 +4013,10 @@ use_simple_return_p (void)
+ {
+ arm_stack_offsets *offsets;
- #if HAVE_DLFCN_H
-@@ -18539,7 +18540,7 @@ else
- lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
- lt_status=$lt_dlunknown
- cat > conftest.$ac_ext <<_LT_EOF
--#line 18542 "configure"
-+#line 18543 "configure"
- #include "confdefs.h"
++ /* Note this function can be called before or after reload. */
++ if (!reload_completed)
++ arm_compute_frame_layout ();
++
+ offsets = arm_get_frame_offsets ();
+ return offsets->outgoing_args != 0;
+ }
+@@ -9285,6 +9293,10 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
+ else
+ *cost = LIBCALL_COST (2);
++
++ /* Make the cost of sdiv more expensive so when both sdiv and udiv are
++ possible udiv is prefered. */
++ *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
+ return false; /* All arguments must be in registers. */
- #if HAVE_DLFCN_H
-Index: b/src/gcc/cppbuiltin.c
-===================================================================
---- a/src/gcc/cppbuiltin.c
-+++ b/src/gcc/cppbuiltin.c
-@@ -53,18 +53,41 @@ parse_basever (int *major, int *minor, i
- *patchlevel = s_patchlevel;
+ case MOD:
+@@ -9307,7 +9319,9 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
+
+ /* Fall-through. */
+ case UMOD:
+- *cost = LIBCALL_COST (2);
++ /* Make the cost of sdiv more expensive so when both sdiv and udiv are
++ possible udiv is prefered. */
++ *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
+ return false; /* All arguments must be in registers. */
+
+ case ROTATE:
+@@ -16857,9 +16871,10 @@ compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
+ return not_to_clear_mask;
}
-+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]"
-+ to create Linaro release number YYYYMM and spin version. */
-+static void
-+parse_linarover (int *release, int *spin)
-+{
-+ static int s_year = -1, s_month, s_spin;
+-/* Saves callee saved registers, clears callee saved registers and caller saved
+- registers not used to pass arguments before a cmse_nonsecure_call. And
+- restores the callee saved registers after. */
++/* Clears caller saved registers not used to pass arguments before a
++ cmse_nonsecure_call. Saving, clearing and restoring of callee saved
++ registers is done in __gnu_cmse_nonsecure_call libcall.
++ See libgcc/config/arm/cmse_nonsecure_call.S. */
+
+ static void
+ cmse_nonsecure_call_clear_caller_saved (void)
+@@ -19094,7 +19109,7 @@ arm_compute_static_chain_stack_bytes (void)
+
+ /* Compute a bit mask of which registers need to be
+ saved on the stack for the current function.
+- This is used by arm_get_frame_offsets, which may add extra registers. */
++ This is used by arm_compute_frame_layout, which may add extra registers. */
+
+ static unsigned long
+ arm_compute_save_reg_mask (void)
+@@ -20728,12 +20743,25 @@ any_sibcall_could_use_r3 (void)
+ alignment. */
+
+
++/* Return cached stack offsets. */
+
-+ if (s_year == -1)
-+ if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3)
-+ {
-+ sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month);
-+ s_spin = 0;
-+ }
++static arm_stack_offsets *
++arm_get_frame_offsets (void)
++{
++ struct arm_stack_offsets *offsets;
+
-+ if (release)
-+ *release = s_year * 100 + s_month;
++ offsets = &cfun->machine->stack_offsets;
+
-+ if (spin)
-+ *spin = s_spin;
++ return offsets;
+}
++
++
+ /* Calculate stack offsets. These are used to calculate register elimination
+ offsets and in prologue/epilogue code. Also calculates which registers
+ should be saved. */
- /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__. */
- static void
- define__GNUC__ (cpp_reader *pfile)
+-static arm_stack_offsets *
+-arm_get_frame_offsets (void)
++static void
++arm_compute_frame_layout (void)
{
-- int major, minor, patchlevel;
-+ int major, minor, patchlevel, linaro_release, linaro_spin;
+ struct arm_stack_offsets *offsets;
+ unsigned long func_type;
+@@ -20744,9 +20772,6 @@ arm_get_frame_offsets (void)
- parse_basever (&major, &minor, &patchlevel);
-+ parse_linarover (&linaro_release, &linaro_spin);
- cpp_define_formatted (pfile, "__GNUC__=%d", major);
- cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
- cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
- cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
-+ cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release);
-+ cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin);
- cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
- cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
- cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
-Index: b/src/gcc/simplify-rtx.c
-===================================================================
+ offsets = &cfun->machine->stack_offsets;
+
+- if (reload_completed)
+- return offsets;
+-
+ /* Initially this is the size of the local variables. It will translated
+ into an offset once we have determined the size of preceding data. */
+ frame_size = ROUND_UP_WORD (get_frame_size ());
+@@ -20811,7 +20836,7 @@ arm_get_frame_offsets (void)
+ {
+ offsets->outgoing_args = offsets->soft_frame;
+ offsets->locals_base = offsets->soft_frame;
+- return offsets;
++ return;
+ }
+
+ /* Ensure SFP has the correct alignment. */
+@@ -20887,8 +20912,6 @@ arm_get_frame_offsets (void)
+ offsets->outgoing_args += 4;
+ gcc_assert (!(offsets->outgoing_args & 7));
+ }
+-
+- return offsets;
+ }
+
+
+@@ -21522,7 +21545,7 @@ arm_expand_prologue (void)
+ {
+ /* If no coprocessor registers are being pushed and we don't have
+ to worry about a frame pointer then push extra registers to
+- create the stack frame. This is done is a way that does not
++ create the stack frame. This is done in a way that does not
+ alter the frame layout, so is independent of the epilogue. */
+ int n;
+ int frame;
+@@ -28225,17 +28248,32 @@ arm_expand_compare_and_swap (rtx operands[])
+ gcc_unreachable ();
+ }
+
+- switch (mode)
++ if (TARGET_THUMB1)
+ {
+- case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
+- case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
+- case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
+- case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
+- default:
+- gcc_unreachable ();
++ switch (mode)
++ {
++ case QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
++ case HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
++ case SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
++ case DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
++ default:
++ gcc_unreachable ();
++ }
++ }
++ else
++ {
++ switch (mode)
++ {
++ case QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
++ case HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
++ case SImode: gen = gen_atomic_compare_and_swap32si_1; break;
++ case DImode: gen = gen_atomic_compare_and_swap32di_1; break;
++ default:
++ gcc_unreachable ();
++ }
+ }
+
+- bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM);
++ bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
+ emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
+
+ if (mode == QImode || mode == HImode)
+--- a/src/gcc/config/arm/arm.h
++++ b/src/gcc/config/arm/arm.h
+@@ -682,7 +682,7 @@ extern int arm_arch_cmse;
+ /* Standard register usage. */
+
+ /* Register allocation in ARM Procedure Call Standard
+- (S - saved over call).
++ (S - saved over call, F - Frame-related).
+
+ r0 * argument word/integer result
+ r1-r3 argument word
+--- a/src/gcc/config/arm/arm_neon.h
++++ b/src/gcc/config/arm/arm_neon.h
+@@ -17069,14 +17069,22 @@ __extension__ extern __inline float16x4_t
+ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+ vadd_f16 (float16x4_t __a, float16x4_t __b)
+ {
++#ifdef __FAST_MATH__
++ return __a + __b;
++#else
+ return __builtin_neon_vaddv4hf (__a, __b);
++#endif
+ }
+
+ __extension__ extern __inline float16x8_t
+ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+ vaddq_f16 (float16x8_t __a, float16x8_t __b)
+ {
++#ifdef __FAST_MATH__
++ return __a + __b;
++#else
+ return __builtin_neon_vaddv8hf (__a, __b);
++#endif
+ }
+
+ __extension__ extern __inline uint16x4_t
+@@ -17587,7 +17595,11 @@ __extension__ extern __inline float16x4_t
+ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+ vmul_f16 (float16x4_t __a, float16x4_t __b)
+ {
++#ifdef __FAST_MATH__
++ return __a * __b;
++#else
+ return __builtin_neon_vmulfv4hf (__a, __b);
++#endif
+ }
+
+ __extension__ extern __inline float16x4_t
+@@ -17608,7 +17620,11 @@ __extension__ extern __inline float16x8_t
+ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+ vmulq_f16 (float16x8_t __a, float16x8_t __b)
+ {
++#ifdef __FAST_MATH__
++ return __a * __b;
++#else
+ return __builtin_neon_vmulfv8hf (__a, __b);
++#endif
+ }
+
+ __extension__ extern __inline float16x8_t
+@@ -17804,14 +17820,22 @@ __extension__ extern __inline float16x4_t
+ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+ vsub_f16 (float16x4_t __a, float16x4_t __b)
+ {
++#ifdef __FAST_MATH__
++ return __a - __b;
++#else
+ return __builtin_neon_vsubv4hf (__a, __b);
++#endif
+ }
+
+ __extension__ extern __inline float16x8_t
+ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+ vsubq_f16 (float16x8_t __a, float16x8_t __b)
+ {
++#ifdef __FAST_MATH__
++ return __a - __b;
++#else
+ return __builtin_neon_vsubv8hf (__a, __b);
++#endif
+ }
+
+ #endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC. */
+--- a/src/gcc/config/arm/cortex-a53.md
++++ b/src/gcc/config/arm/cortex-a53.md
+@@ -254,6 +254,16 @@
+ "cortex_a53_store*"
+ "arm_no_early_store_addr_dep")
+
++;; Model a bypass for load to load/store address.
++
++(define_bypass 3 "cortex_a53_load1"
++ "cortex_a53_load*"
++ "arm_early_load_addr_dep_ptr")
++
++(define_bypass 3 "cortex_a53_load1"
++ "cortex_a53_store*"
++ "arm_early_store_addr_dep_ptr")
++
+ ;; Model a GP->FP register move as similar to stores.
+
+ (define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*"
+--- a/src/gcc/config/arm/iterators.md
++++ b/src/gcc/config/arm/iterators.md
+@@ -45,6 +45,9 @@
+ ;; A list of the 32bit and 64bit integer modes
+ (define_mode_iterator SIDI [SI DI])
+
++;; A list of atomic compare and swap success return modes
++(define_mode_iterator CCSI [(CC_Z "TARGET_32BIT") (SI "TARGET_THUMB1")])
++
+ ;; A list of modes which the VFP unit can handle
+ (define_mode_iterator SDF [(SF "") (DF "TARGET_VFP_DOUBLE")])
+
+@@ -411,6 +414,10 @@
+ ;; Mode attributes
+ ;;----------------------------------------------------------------------------
+
++;; Determine name of atomic compare and swap from success result mode. This
++;; distinguishes between 16-bit Thumb and 32-bit Thumb/ARM.
++(define_mode_attr arch [(CC_Z "32") (SI "t1")])
++
+ ;; Determine element size suffix from vector mode.
+ (define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")])
+
+--- a/src/gcc/config/arm/neon.md
++++ b/src/gcc/config/arm/neon.md
+@@ -505,6 +505,23 @@
+ (const_string "neon_add<q>")))]
+ )
+
++;; As with SFmode, full support for HFmode vector arithmetic is only available
++;; when flag-unsafe-math-optimizations is enabled.
++
++(define_insn "add<mode>3"
++ [(set
++ (match_operand:VH 0 "s_register_operand" "=w")
++ (plus:VH
++ (match_operand:VH 1 "s_register_operand" "w")
++ (match_operand:VH 2 "s_register_operand" "w")))]
++ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
++ "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++ [(set (attr "type")
++ (if_then_else (match_test "<Is_float_mode>")
++ (const_string "neon_fp_addsub_s<q>")
++ (const_string "neon_add<q>")))]
++)
++
+ (define_insn "add<mode>3_fp16"
+ [(set
+ (match_operand:VH 0 "s_register_operand" "=w")
+@@ -557,6 +574,17 @@
+ (const_string "neon_sub<q>")))]
+ )
+
++(define_insn "sub<mode>3"
++ [(set
++ (match_operand:VH 0 "s_register_operand" "=w")
++ (minus:VH
++ (match_operand:VH 1 "s_register_operand" "w")
++ (match_operand:VH 2 "s_register_operand" "w")))]
++ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
++ "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++ [(set_attr "type" "neon_sub<q>")]
++)
++
+ (define_insn "sub<mode>3_fp16"
+ [(set
+ (match_operand:VH 0 "s_register_operand" "=w")
+@@ -664,8 +692,17 @@
+ [(set_attr "type" "neon_fp_mla_s<q>")]
+ )
+
+-;; There is limited support for unsafe-math optimizations using the NEON FP16
+-;; arithmetic instructions, so only the intrinsic is currently supported.
++(define_insn "fma<VH:mode>4"
++ [(set (match_operand:VH 0 "register_operand" "=w")
++ (fma:VH
++ (match_operand:VH 1 "register_operand" "w")
++ (match_operand:VH 2 "register_operand" "w")
++ (match_operand:VH 3 "register_operand" "0")))]
++ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
++ "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++ [(set_attr "type" "neon_fp_mla_s<q>")]
++)
++
+ (define_insn "fma<VH:mode>4_intrinsic"
+ [(set (match_operand:VH 0 "register_operand" "=w")
+ (fma:VH
+@@ -2175,6 +2212,17 @@
+ (const_string "neon_mul_<V_elem_ch><q>")))]
+ )
+
++(define_insn "mul<mode>3"
++ [(set
++ (match_operand:VH 0 "s_register_operand" "=w")
++ (mult:VH
++ (match_operand:VH 1 "s_register_operand" "w")
++ (match_operand:VH 2 "s_register_operand" "w")))]
++ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
++ "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
++ [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
++)
++
+ (define_insn "neon_vmulf<mode>"
+ [(set
+ (match_operand:VH 0 "s_register_operand" "=w")
+--- a/src/gcc/config/arm/sync.md
++++ b/src/gcc/config/arm/sync.md
+@@ -191,9 +191,9 @@
+
+ ;; Constraints of this pattern must be at least as strict as those of the
+ ;; cbranchsi operations in thumb1.md and aim to be as permissive.
+-(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+- [(set (match_operand 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out
+- (unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS))
++(define_insn_and_split "atomic_compare_and_swap<CCSI:arch><NARROW:mode>_1"
++ [(set (match_operand:CCSI 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out
++ (unspec_volatile:CCSI [(const_int 0)] VUNSPEC_ATOMIC_CAS))
+ (set (match_operand:SI 1 "s_register_operand" "=&r,&l,&0,&l*h") ;; val out
+ (zero_extend:SI
+ (match_operand:NARROW 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua"))) ;; memory
+@@ -223,9 +223,9 @@
+
+ ;; Constraints of this pattern must be at least as strict as those of the
+ ;; cbranchsi operations in thumb1.md and aim to be as permissive.
+-(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+- [(set (match_operand 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out
+- (unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS))
++(define_insn_and_split "atomic_compare_and_swap<CCSI:arch><SIDI:mode>_1"
++ [(set (match_operand:CCSI 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out
++ (unspec_volatile:CCSI [(const_int 0)] VUNSPEC_ATOMIC_CAS))
+ (set (match_operand:SIDI 1 "s_register_operand" "=&r,&l,&0,&l*h") ;; val out
+ (match_operand:SIDI 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua")) ;; memory
+ (set (match_dup 2)
+--- a/src/gcc/config/arm/t-aprofile
++++ b/src/gcc/config/arm/t-aprofile
+@@ -24,30 +24,13 @@
+ # have their default values during the configure step. We enforce
+ # this during the top-level configury.
+
+-MULTILIB_OPTIONS =
+-MULTILIB_DIRNAMES =
+-MULTILIB_EXCEPTIONS =
+-MULTILIB_MATCHES =
+-MULTILIB_REUSE =
++# Arch and FPU variants to build libraries with
+
+-# We have the following hierachy:
+-# ISA: A32 (.) or T32 (thumb)
+-# Architecture: ARMv7-A (v7-a), ARMv7VE (v7ve), or ARMv8-A (v8-a).
+-# FPU: VFPv3-D16 (fpv3), NEONv1 (simdv1), VFPv4-D16 (fpv4),
+-# NEON-VFPV4 (simdvfpv4), NEON for ARMv8 (simdv8), or None (.).
+-# Float-abi: Soft (.), softfp (softfp), or hard (hardfp).
++MULTI_ARCH_OPTS_A = march=armv7-a/march=armv7ve/march=armv8-a
++MULTI_ARCH_DIRS_A = v7-a v7ve v8-a
+
+-MULTILIB_OPTIONS += mthumb
+-MULTILIB_DIRNAMES += thumb
+-
+-MULTILIB_OPTIONS += march=armv7-a/march=armv7ve/march=armv8-a
+-MULTILIB_DIRNAMES += v7-a v7ve v8-a
+-
+-MULTILIB_OPTIONS += mfpu=vfpv3-d16/mfpu=neon/mfpu=vfpv4-d16/mfpu=neon-vfpv4/mfpu=neon-fp-armv8
+-MULTILIB_DIRNAMES += fpv3 simdv1 fpv4 simdvfpv4 simdv8
+-
+-MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard
+-MULTILIB_DIRNAMES += softfp hard
++MULTI_FPU_OPTS_A = mfpu=vfpv3-d16/mfpu=neon/mfpu=vfpv4-d16/mfpu=neon-vfpv4/mfpu=neon-fp-armv8
++MULTI_FPU_DIRS_A = fpv3 simdv1 fpv4 simdvfpv4 simdv8
+
+
+ # Option combinations to build library with
+@@ -71,7 +54,11 @@ MULTILIB_REQUIRED += *march=armv8-a
+ MULTILIB_REQUIRED += *march=armv8-a/mfpu=neon-fp-armv8/mfloat-abi=*
+
+
++# Matches
++
+ # CPU Matches
++MULTILIB_MATCHES += march?armv7-a=mcpu?marvell-pj4
++MULTILIB_MATCHES += march?armv7-a=mcpu?generic-armv7-a
+ MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a8
+ MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a9
+ MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a5
+--- /dev/null
++++ b/src/gcc/config/arm/t-multilib
+@@ -0,0 +1,69 @@
++# Copyright (C) 2016 Free Software Foundation, Inc.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 3, or (at your option)
++# any later version.
++#
++# GCC is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3. If not see
++# <http://www.gnu.org/licenses/>.
++
++# This is a target makefile fragment that attempts to get
++# multilibs built for the range of CPU's, FPU's and ABI's that
++# are relevant for the ARM architecture. It should not be used in
++# conjunction with another make file fragment and assumes --with-arch,
++# --with-cpu, --with-fpu, --with-float, --with-mode have their default
++# values during the configure step. We enforce this during the
++# top-level configury.
++
++MULTILIB_OPTIONS =
++MULTILIB_DIRNAMES =
++MULTILIB_EXCEPTIONS =
++MULTILIB_MATCHES =
++MULTILIB_REUSE =
++
++comma := ,
++tm_multilib_list := $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
++
++HAS_APROFILE := $(filter aprofile,$(tm_multilib_list))
++HAS_RMPROFILE := $(filter rmprofile,$(tm_multilib_list))
++
++ifneq (,$(HAS_APROFILE))
++include $(srcdir)/config/arm/t-aprofile
++endif
++ifneq (,$(HAS_RMPROFILE))
++include $(srcdir)/config/arm/t-rmprofile
++endif
++SEP := $(and $(HAS_APROFILE),$(HAS_RMPROFILE),/)
++
++
++# We have the following hierachy:
++# ISA: A32 (.) or T16/T32 (thumb)
++# Architecture: ARMv6-M (v6-m), ARMv7-M (v7-m), ARMv7E-M (v7e-m),
++# ARMv7 (v7-ar), ARMv7-A (v7-a), ARMv7VE (v7ve),
++# ARMv8-M Baseline (v8-m.base), ARMv8-M Mainline (v8-m.main)
++# or ARMv8-A (v8-a).
++# FPU: VFPv3-D16 (fpv3), NEONv1 (simdv1), FPV4-SP-D16 (fpv4-sp),
++# VFPv4-D16 (fpv4), NEON-VFPV4 (simdvfpv4), FPV5-SP-D16 (fpv5-sp),
++# VFPv5-D16 (fpv5), NEON for ARMv8 (simdv8), or None (.).
++# Float-abi: Soft (.), softfp (softfp), or hard (hard).
++
++MULTILIB_OPTIONS += mthumb
++MULTILIB_DIRNAMES += thumb
++
++MULTILIB_OPTIONS += $(MULTI_ARCH_OPTS_A)$(SEP)$(MULTI_ARCH_OPTS_RM)
++MULTILIB_DIRNAMES += $(MULTI_ARCH_DIRS_A) $(MULTI_ARCH_DIRS_RM)
++
++MULTILIB_OPTIONS += $(MULTI_FPU_OPTS_A)$(SEP)$(MULTI_FPU_OPTS_RM)
++MULTILIB_DIRNAMES += $(MULTI_FPU_DIRS_A) $(MULTI_FPU_DIRS_RM)
++
++MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard
++MULTILIB_DIRNAMES += softfp hard
+--- a/src/gcc/config/arm/t-rmprofile
++++ b/src/gcc/config/arm/t-rmprofile
+@@ -24,33 +24,14 @@
+ # values during the configure step. We enforce this during the
+ # top-level configury.
+
+-MULTILIB_OPTIONS =
+-MULTILIB_DIRNAMES =
+-MULTILIB_EXCEPTIONS =
+-MULTILIB_MATCHES =
+-MULTILIB_REUSE =
+
+-# We have the following hierachy:
+-# ISA: A32 (.) or T16/T32 (thumb).
+-# Architecture: ARMv6S-M (v6-m), ARMv7-M (v7-m), ARMv7E-M (v7e-m),
+-# ARMv8-M Baseline (v8-m.base) or ARMv8-M Mainline (v8-m.main).
+-# FPU: VFPv3-D16 (fpv3), FPV4-SP-D16 (fpv4-sp), FPV5-SP-D16 (fpv5-sp),
+-# VFPv5-D16 (fpv5), or None (.).
+-# Float-abi: Soft (.), softfp (softfp), or hard (hardfp).
++# Arch and FPU variants to build libraries with
+
+-# Options to build libraries with
++MULTI_ARCH_OPTS_RM = march=armv6s-m/march=armv7-m/march=armv7e-m/march=armv7/march=armv8-m.base/march=armv8-m.main
++MULTI_ARCH_DIRS_RM = v6-m v7-m v7e-m v7-ar v8-m.base v8-m.main
+
+-MULTILIB_OPTIONS += mthumb
+-MULTILIB_DIRNAMES += thumb
+-
+-MULTILIB_OPTIONS += march=armv6s-m/march=armv7-m/march=armv7e-m/march=armv7/march=armv8-m.base/march=armv8-m.main
+-MULTILIB_DIRNAMES += v6-m v7-m v7e-m v7-ar v8-m.base v8-m.main
+-
+-MULTILIB_OPTIONS += mfpu=vfpv3-d16/mfpu=fpv4-sp-d16/mfpu=fpv5-sp-d16/mfpu=fpv5-d16
+-MULTILIB_DIRNAMES += fpv3 fpv4-sp fpv5-sp fpv5
+-
+-MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard
+-MULTILIB_DIRNAMES += softfp hard
++MULTI_FPU_OPTS_RM = mfpu=vfpv3-d16/mfpu=fpv4-sp-d16/mfpu=fpv5-sp-d16/mfpu=fpv5-d16
++MULTI_FPU_DIRS_RM = fpv3 fpv4-sp fpv5-sp fpv5
+
+
+ # Option combinations to build library with
+--- a/src/gcc/configure
++++ b/src/gcc/configure
+@@ -1717,7 +1717,8 @@ Optional Packages:
+ --with-stabs arrange to use stabs instead of host debug format
+ --with-dwarf2 force the default debug format to be DWARF 2
+ --with-specs=SPECS add SPECS to driver command-line processing
+- --with-pkgversion=PKG Use PKG in the version string in place of "GCC"
++ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro
++ GCC `cat $srcdir/LINARO-VERSION`"
+ --with-bugurl=URL Direct users to URL to report a bug
+ --with-multilib-list select multilibs (AArch64, SH and x86-64 only)
+ --with-gnu-ld assume the C compiler uses GNU ld default=no
+@@ -7637,7 +7638,7 @@ if test "${with_pkgversion+set}" = set; then :
+ *) PKGVERSION="($withval) " ;;
+ esac
+ else
+- PKGVERSION="(GCC) "
++ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
+
+ fi
+
+@@ -18433,7 +18434,7 @@ else
+ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+ lt_status=$lt_dlunknown
+ cat > conftest.$ac_ext <<_LT_EOF
+-#line 18436 "configure"
++#line 18437 "configure"
+ #include "confdefs.h"
+
+ #if HAVE_DLFCN_H
+@@ -18539,7 +18540,7 @@ else
+ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+ lt_status=$lt_dlunknown
+ cat > conftest.$ac_ext <<_LT_EOF
+-#line 18542 "configure"
++#line 18543 "configure"
+ #include "confdefs.h"
+
+ #if HAVE_DLFCN_H
+--- a/src/gcc/configure.ac
++++ b/src/gcc/configure.ac
+@@ -929,7 +929,7 @@ AC_ARG_WITH(specs,
+ )
+ AC_SUBST(CONFIGURE_SPECS)
+
+-ACX_PKGVERSION([GCC])
++ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
+ ACX_BUGURL([https://gcc.gnu.org/bugs/])
+
+ # Sanity check enable_languages in case someone does not run the toplevel
+--- a/src/gcc/cppbuiltin.c
++++ b/src/gcc/cppbuiltin.c
+@@ -53,18 +53,41 @@ parse_basever (int *major, int *minor, int *patchlevel)
+ *patchlevel = s_patchlevel;
+ }
+
++/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]"
++ to create Linaro release number YYYYMM and spin version. */
++static void
++parse_linarover (int *release, int *spin)
++{
++ static int s_year = -1, s_month, s_spin;
++
++ if (s_year == -1)
++ if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3)
++ {
++ sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month);
++ s_spin = 0;
++ }
++
++ if (release)
++ *release = s_year * 100 + s_month;
++
++ if (spin)
++ *spin = s_spin;
++}
+
+ /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__. */
+ static void
+ define__GNUC__ (cpp_reader *pfile)
+ {
+- int major, minor, patchlevel;
++ int major, minor, patchlevel, linaro_release, linaro_spin;
+
+ parse_basever (&major, &minor, &patchlevel);
++ parse_linarover (&linaro_release, &linaro_spin);
+ cpp_define_formatted (pfile, "__GNUC__=%d", major);
+ cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
+ cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
+ cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
++ cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release);
++ cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin);
+ cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
+ cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
+ cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
+--- a/src/gcc/dbgcnt.def
++++ b/src/gcc/dbgcnt.def
+@@ -174,6 +174,7 @@ DEBUG_COUNTER (merged_ipa_icf)
+ DEBUG_COUNTER (postreload_cse)
+ DEBUG_COUNTER (pre)
+ DEBUG_COUNTER (pre_insn)
++DEBUG_COUNTER (prefetch)
+ DEBUG_COUNTER (registered_jump_thread)
+ DEBUG_COUNTER (sched2_func)
+ DEBUG_COUNTER (sched_block)
+--- a/src/gcc/expr.c
++++ b/src/gcc/expr.c
+@@ -8838,6 +8838,15 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
+ end_sequence ();
+ unsigned uns_cost = seq_cost (uns_insns, speed_p);
+ unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
++
++ /* If costs are the same then use as tie breaker the other
++ other factor. */
++ if (uns_cost == sgn_cost)
++ {
++ uns_cost = seq_cost (uns_insns, !speed_p);
++ sgn_cost = seq_cost (sgn_insns, !speed_p);
++ }
++
+ if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
+ {
+ emit_insn (uns_insns);
+--- a/src/gcc/gimple-fold.c
++++ b/src/gcc/gimple-fold.c
+@@ -3252,6 +3252,28 @@ gimple_fold_builtin_acc_on_device (gimple_stmt_iterator *gsi, tree arg0)
+ return true;
+ }
+
++/* Fold realloc (0, n) -> malloc (n). */
++
++static bool
++gimple_fold_builtin_realloc (gimple_stmt_iterator *gsi)
++{
++ gimple *stmt = gsi_stmt (*gsi);
++ tree arg = gimple_call_arg (stmt, 0);
++ tree size = gimple_call_arg (stmt, 1);
++
++ if (operand_equal_p (arg, null_pointer_node, 0))
++ {
++ tree fn_malloc = builtin_decl_implicit (BUILT_IN_MALLOC);
++ if (fn_malloc)
++ {
++ gcall *repl = gimple_build_call (fn_malloc, 1, size);
++ replace_call_with_call_and_fold (gsi, repl);
++ return true;
++ }
++ }
++ return false;
++}
++
+ /* Fold the non-target builtin at *GSI and return whether any simplification
+ was made. */
+
+@@ -3410,6 +3432,9 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi)
+ case BUILT_IN_ACC_ON_DEVICE:
+ return gimple_fold_builtin_acc_on_device (gsi,
+ gimple_call_arg (stmt, 0));
++ case BUILT_IN_REALLOC:
++ return gimple_fold_builtin_realloc (gsi);
++
+ default:;
+ }
+
+--- a/src/gcc/lra-constraints.c
++++ b/src/gcc/lra-constraints.c
+@@ -5394,6 +5394,29 @@ choose_split_class (enum reg_class allocno_class,
+ #endif
+ }
+
++/* Copy any equivalence information from ORIGINAL_REGNO to NEW_REGNO.
++ It only makes sense to call this function if NEW_REGNO is always
++ equal to ORIGINAL_REGNO. */
++
++static void
++lra_copy_reg_equiv (unsigned int new_regno, unsigned int original_regno)
++{
++ if (!ira_reg_equiv[original_regno].defined_p)
++ return;
++
++ ira_expand_reg_equiv ();
++ ira_reg_equiv[new_regno].defined_p = true;
++ if (ira_reg_equiv[original_regno].memory)
++ ira_reg_equiv[new_regno].memory
++ = copy_rtx (ira_reg_equiv[original_regno].memory);
++ if (ira_reg_equiv[original_regno].constant)
++ ira_reg_equiv[new_regno].constant
++ = copy_rtx (ira_reg_equiv[original_regno].constant);
++ if (ira_reg_equiv[original_regno].invariant)
++ ira_reg_equiv[new_regno].invariant
++ = copy_rtx (ira_reg_equiv[original_regno].invariant);
++}
++
+ /* Do split transformations for insn INSN, which defines or uses
+ ORIGINAL_REGNO. NEXT_USAGE_INSNS specifies which instruction in
+ the EBB next uses ORIGINAL_REGNO; it has the same form as the
+@@ -5515,6 +5538,7 @@ split_reg (bool before_p, int original_regno, rtx_insn *insn,
+ new_reg = lra_create_new_reg (mode, original_reg, rclass, "split");
+ reg_renumber[REGNO (new_reg)] = hard_regno;
+ }
++ int new_regno = REGNO (new_reg);
+ save = emit_spill_move (true, new_reg, original_reg);
+ if (NEXT_INSN (save) != NULL_RTX && !call_save_p)
+ {
+@@ -5523,7 +5547,7 @@ split_reg (bool before_p, int original_regno, rtx_insn *insn,
+ fprintf
+ (lra_dump_file,
+ " Rejecting split %d->%d resulting in > 2 save insns:\n",
+- original_regno, REGNO (new_reg));
++ original_regno, new_regno);
+ dump_rtl_slim (lra_dump_file, save, NULL, -1, 0);
+ fprintf (lra_dump_file,
+ " ))))))))))))))))))))))))))))))))))))))))))))))))\n");
+@@ -5538,18 +5562,24 @@ split_reg (bool before_p, int original_regno, rtx_insn *insn,
+ fprintf (lra_dump_file,
+ " Rejecting split %d->%d "
+ "resulting in > 2 restore insns:\n",
+- original_regno, REGNO (new_reg));
++ original_regno, new_regno);
+ dump_rtl_slim (lra_dump_file, restore, NULL, -1, 0);
+ fprintf (lra_dump_file,
+ " ))))))))))))))))))))))))))))))))))))))))))))))))\n");
+ }
+ return false;
+ }
++ /* Transfer equivalence information to the spill register, so that
++ if we fail to allocate the spill register, we have the option of
++ rematerializing the original value instead of spilling to the stack. */
++ if (!HARD_REGISTER_NUM_P (original_regno)
++ && mode == PSEUDO_REGNO_MODE (original_regno))
++ lra_copy_reg_equiv (new_regno, original_regno);
+ after_p = usage_insns[original_regno].after_p;
+- lra_reg_info[REGNO (new_reg)].restore_rtx = regno_reg_rtx[original_regno];
+- bitmap_set_bit (&check_only_regs, REGNO (new_reg));
++ lra_reg_info[new_regno].restore_rtx = regno_reg_rtx[original_regno];
++ bitmap_set_bit (&check_only_regs, new_regno);
+ bitmap_set_bit (&check_only_regs, original_regno);
+- bitmap_set_bit (&lra_split_regs, REGNO (new_reg));
++ bitmap_set_bit (&lra_split_regs, new_regno);
+ for (;;)
+ {
+ if (GET_CODE (next_usage_insns) != INSN_LIST)
+@@ -5565,7 +5595,7 @@ split_reg (bool before_p, int original_regno, rtx_insn *insn,
+ if (lra_dump_file != NULL)
+ {
+ fprintf (lra_dump_file, " Split reuse change %d->%d:\n",
+- original_regno, REGNO (new_reg));
++ original_regno, new_regno);
+ dump_insn_slim (lra_dump_file, as_a <rtx_insn *> (usage_insn));
+ }
+ }
+--- a/src/gcc/lra-eliminations.c
++++ b/src/gcc/lra-eliminations.c
+@@ -1196,6 +1196,8 @@ update_reg_eliminate (bitmap insns_with_changed_offsets)
+ struct lra_elim_table *ep, *ep1;
+ HARD_REG_SET temp_hard_reg_set;
+
++ targetm.compute_frame_layout ();
++
+ /* Clear self elimination offsets. */
+ for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
+ self_elim_offsets[ep->from] = 0;
+--- a/src/gcc/reload1.c
++++ b/src/gcc/reload1.c
+@@ -3821,6 +3821,7 @@ verify_initial_elim_offsets (void)
+ if (!num_eliminable)
+ return true;
+
++ targetm.compute_frame_layout ();
+ for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
+ {
+ INITIAL_ELIMINATION_OFFSET (ep->from, ep->to, t);
+@@ -3838,6 +3839,7 @@ set_initial_elim_offsets (void)
+ {
+ struct elim_table *ep = reg_eliminate;
+
++ targetm.compute_frame_layout ();
+ for (; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
+ {
+ INITIAL_ELIMINATION_OFFSET (ep->from, ep->to, ep->initial_offset);
--- a/src/gcc/simplify-rtx.c
+++ b/src/gcc/simplify-rtx.c
-@@ -3345,19 +3345,21 @@ simplify_binary_operation_1 (enum rtx_co
+@@ -3345,19 +3345,21 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
&& UINTVAL (trueop0) == GET_MODE_MASK (mode)
&& ! side_effects_p (op1))
return op0;
+
-+ canonicalize_shift:
- /* Given:
- scalar modes M1, M2
- scalar constants c1, c2
- size (M2) > size (M1)
- c1 == size (M2) - size (M1)
- optimize:
-- (ashiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2) (const_int <c1>))
-+ ([a|l]shiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2) (const_int <c1>))
- <low_part>)
- (const_int <c2>))
- to:
-- (subreg:M1 (ashiftrt:M2 (reg:M2) (const_int <c1 + c2>))
-+ (subreg:M1 ([a|l]shiftrt:M2 (reg:M2) (const_int <c1 + c2>))
- <low_part>). */
-- if (code == ASHIFTRT
-+ if ((code == ASHIFTRT || code == LSHIFTRT)
- && !VECTOR_MODE_P (mode)
- && SUBREG_P (op0)
- && CONST_INT_P (op1)
-@@ -3374,13 +3376,13 @@ simplify_binary_operation_1 (enum rtx_co
- rtx tmp = GEN_INT (INTVAL (XEXP (SUBREG_REG (op0), 1))
- + INTVAL (op1));
- machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
-- tmp = simplify_gen_binary (ASHIFTRT,
-+ tmp = simplify_gen_binary (code,
- GET_MODE (SUBREG_REG (op0)),
- XEXP (SUBREG_REG (op0), 0),
- tmp);
- return lowpart_subreg (mode, tmp, inner_mode);
- }
-- canonicalize_shift:
++ canonicalize_shift:
+ /* Given:
+ scalar modes M1, M2
+ scalar constants c1, c2
+ size (M2) > size (M1)
+ c1 == size (M2) - size (M1)
+ optimize:
+- (ashiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2) (const_int <c1>))
++ ([a|l]shiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2) (const_int <c1>))
+ <low_part>)
+ (const_int <c2>))
+ to:
+- (subreg:M1 (ashiftrt:M2 (reg:M2) (const_int <c1 + c2>))
++ (subreg:M1 ([a|l]shiftrt:M2 (reg:M2) (const_int <c1 + c2>))
+ <low_part>). */
+- if (code == ASHIFTRT
++ if ((code == ASHIFTRT || code == LSHIFTRT)
+ && !VECTOR_MODE_P (mode)
+ && SUBREG_P (op0)
+ && CONST_INT_P (op1)
+@@ -3374,13 +3376,13 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
+ rtx tmp = GEN_INT (INTVAL (XEXP (SUBREG_REG (op0), 1))
+ + INTVAL (op1));
+ machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
+- tmp = simplify_gen_binary (ASHIFTRT,
++ tmp = simplify_gen_binary (code,
+ GET_MODE (SUBREG_REG (op0)),
+ XEXP (SUBREG_REG (op0), 0),
+ tmp);
+ return lowpart_subreg (mode, tmp, inner_mode);
+ }
+- canonicalize_shift:
++
+ if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
+ {
+ val = INTVAL (op1) & (GET_MODE_PRECISION (mode) - 1);
+--- a/src/gcc/target.def
++++ b/src/gcc/target.def
+@@ -5395,6 +5395,18 @@ five otherwise. This is best for most machines.",
+ unsigned int, (void),
+ default_case_values_threshold)
+
++/* Optional callback to advise the target to compute the frame layout. */
++DEFHOOK
++(compute_frame_layout,
++ "This target hook is called once each time the frame layout needs to be\n\
++recalculated. The calculations can be cached by the target and can then\n\
++be used by @code{INITIAL_ELIMINATION_OFFSET} instead of re-computing the\n\
++layout on every invocation of that hook. This is particularly useful\n\
++for targets that have an expensive frame layout function. Implementing\n\
++this callback is optional.",
++ void, (void),
++ hook_void_void)
++
+ /* Return true if a function must have and use a frame pointer. */
+ DEFHOOK
+ (frame_pointer_required,
+--- a/src/gcc/testsuite/g++.dg/other/i386-9.C
++++ b/src/gcc/testsuite/g++.dg/other/i386-9.C
+@@ -2,6 +2,7 @@
+ // Testcase by Zdenek Sojka <zsojka at seznam.cz>
+
+ // { dg-do run { target i?86-*-* x86_64-*-* } }
++/* { dg-require-stack-check "" } */
+ // { dg-options "-Os -mpreferred-stack-boundary=5 -fstack-check -fno-omit-frame-pointer" }
+
+ int main()
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.c-torture/compile/stack-check-1.c
+@@ -0,0 +1,4 @@
++/* { dg-require-effective-target untyped_assembly } */
++/* { dg-require-stack-check "" } */
++/* { dg-additional-options "-fstack-check" } */
++#include "20031023-1.c"
+--- a/src/gcc/testsuite/gcc.c-torture/execute/pr78622.c
++++ b/src/gcc/testsuite/gcc.c-torture/execute/pr78622.c
+@@ -1,6 +1,7 @@
+ /* PR middle-end/78622 - [7 Regression] -Wformat-overflow/-fprintf-return-value
+ incorrect with overflow/wrapping
+ { dg-skip-if "Requires %hhd format" { hppa*-*-hpux* } { "*" } { "" } }
++ { dg-require-effective-target c99_runtime }
+ { dg-additional-options "-Wformat-overflow=2" } */
+
+ __attribute__((noinline, noclone)) int
+--- a/src/gcc/testsuite/gcc.dg/graphite/run-id-pr47653.c
++++ b/src/gcc/testsuite/gcc.dg/graphite/run-id-pr47653.c
+@@ -1,3 +1,4 @@
++/* { dg-require-stack-check "generic" } */
+ /* { dg-options "-O -fstack-check=generic -ftree-pre -fgraphite-identity" } */
+ /* nvptx doesn't expose a stack. */
+ /* { dg-skip-if "" { nvptx-*-* } { "*" } { "" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/lsr-div1.c
+@@ -0,0 +1,57 @@
++/* Test division by const int generates only one shift. */
++/* { dg-do run } */
++/* { dg-options "-O2 -fdump-rtl-combine-all" } */
++/* { dg-options "-O2 -fdump-rtl-combine-all -mtune=cortex-a53" { target aarch64*-*-* } } */
++/* { dg-require-effective-target int32plus } */
++
++extern void abort (void);
++
++#define NOINLINE __attribute__((noinline))
++
++static NOINLINE int
++f1 (unsigned int n)
++{
++ return n % 0x33;
++}
++
++static NOINLINE int
++f2 (unsigned int n)
++{
++ return n % 0x12;
++}
++
++int
++main ()
++{
++ int a = 0xaaaaaaaa;
++ int b = 0x55555555;
++ int c;
++ c = f1 (a);
++ if (c != 0x11)
++ abort ();
++ c = f1 (b);
++ if (c != 0x22)
++ abort ();
++ c = f2 (a);
++ if (c != 0xE)
++ abort ();
++ c = f2 (b);
++ if (c != 0x7)
++ abort ();
++ return 0;
++}
++
++/* Following replacement pattern of intger division by constant, GCC is expected
++ to generate UMULL and (x)SHIFTRT. This test checks that considering division
++ by const 0x33, gcc generates a single LSHIFTRT by 37, instead of
++ two - LSHIFTRT by 32 and LSHIFTRT by 5. */
++
++/* { dg-final { scan-rtl-dump "\\(set \\(subreg:DI \\(reg:SI" "combine" { target aarch64*-*-* } } } */
++/* { dg-final { scan-rtl-dump "\\(lshiftrt:DI \\(reg:DI" "combine" { target aarch64*-*-* } } } */
++/* { dg-final { scan-rtl-dump "\\(const_int 37 " "combine" { target aarch64*-*-* } } } */
++
++/* Similarly, considering division by const 0x12, gcc generates a
++ single LSHIFTRT by 34, instead of two - LSHIFTRT by 32 and LSHIFTRT by 2. */
++
++/* { dg-final { scan-rtl-dump "\\(const_int 34 " "combine" { target aarch64*-*-* } } } */
++
+--- a/src/gcc/testsuite/gcc.dg/pr47443.c
++++ b/src/gcc/testsuite/gcc.dg/pr47443.c
+@@ -1,5 +1,6 @@
+ /* PR tree-optimization/47443 */
+ /* { dg-do compile } */
++/* { dg-require-stack-check "generic" } */
+ /* { dg-options "-O -fstack-check=generic" } */
+
+ static inline int bar (char *c, int i)
+--- a/src/gcc/testsuite/gcc.dg/pr48134.c
++++ b/src/gcc/testsuite/gcc.dg/pr48134.c
+@@ -1,4 +1,5 @@
+ /* { dg-do compile } */
++/* { dg-require-stack-check "specific" } */
+ /* { dg-options "-O2 -fstack-check=specific -fno-tree-dse -fno-tree-fre -fno-tree-loop-optimize -g" } */
+
+ struct S
+--- a/src/gcc/testsuite/gcc.dg/pr70017.c
++++ b/src/gcc/testsuite/gcc.dg/pr70017.c
+@@ -1,4 +1,5 @@
+ /* { dg-do compile } */
++/* { dg-require-stack-check "generic" } */
+ /* { dg-options "-fstack-check=generic" } */
+
+ /* Check that the expected warning is issued for large frames. */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr79697.c
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-gimple -fdump-tree-cddce-details -fdump-tree-optimized" } */
++
++void f(void)
++{
++ __builtin_strdup ("abc");
++}
++
++void g(void)
++{
++ __builtin_strndup ("abc", 3);
++}
++
++void h(void)
++{
++ __builtin_realloc (0, 10);
++}
++
++/* { dg-final { scan-tree-dump "Deleting : __builtin_strdup" "cddce1" } } */
++/* { dg-final { scan-tree-dump "Deleting : __builtin_strndup" "cddce1" } } */
++/* { dg-final { scan-tree-dump "__builtin_malloc" "gimple" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++foo (int *a)
++{
++ int x = 3;
++ return __atomic_compare_exchange_n (a, &x, 0, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++}
++
++/* { dg-final { scan-assembler "stxr\\tw\[0-9\]+, wzr,.*" } } */
++/* { dg-final { scan-assembler-not "mov\\tw\[0-9\]+, 0" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++foo (int *a)
++{
++ int x = 0;
++ return __atomic_compare_exchange_n (a, &x, 4, 0,
++ __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
++}
++
++/* { dg-final { scan-assembler-times "cbnz\\tw\[0-9\]+" 2 } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 " } */
++
++int f3 (int x, int y)
++{
++ int res = x << 3;
++ return res != 0;
++}
++
++/* We should combine the shift and compare */
++/* { dg-final { scan-assembler "cmp\.*\twzr, w\[0-9\]+, lsl 3" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/hfmode_ins_1.c
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++/* Check that we can perform this in a single INS without doing any DUPs. */
++
++#include <arm_neon.h>
++
++float16x8_t
++foo (float16x8_t a, float16x8_t b)
++{
++ return vsetq_lane_f16 (vgetq_lane_f16 (b, 2), a, 3);
++}
++
++float16x4_t
++bar (float16x4_t a, float16x4_t b)
++{
++ return vset_lane_f16 (vget_lane_f16 (b, 2), a, 3);
++}
++
++/* { dg-final { scan-assembler-times "ins\\t" 2 } } */
++/* { dg-final { scan-assembler-not "dup\\t" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_1.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++/* Check that we can generate the immediate-offset addressing
++ mode for PRFM. */
++
++#define ARRSIZE 65
++int *bad_addr[ARRSIZE];
++
++void
++prefetch_for_read (void)
++{
++ int i;
++ for (i = 0; i < ARRSIZE; i++)
++ __builtin_prefetch (bad_addr[i] + 2, 0, 0);
++}
++
++/* { dg-final { scan-assembler-times "prfm.*\\\[x\[0-9\]+, 8\\\]" 1 } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/sdiv_costs_1.c
+@@ -0,0 +1,38 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++
++/* Both sdiv and udiv can be used here, so prefer udiv. */
++int f1 (unsigned char *p)
++{
++ return 100 / p[1];
++}
++
++int f2 (unsigned char *p, unsigned short x)
++{
++ return x / p[0];
++}
++
++int f3 (unsigned char *p, int x)
++{
++ x &= 0x7fffffff;
++ return x / p[0];
++}
++
++int f5 (unsigned char *p, unsigned short x)
++{
++ return x % p[0];
++}
++
++/* This should only generate signed divisions. */
++int f4 (unsigned char *p)
++{
++ return -100 / p[1];
++}
++
++int f6 (unsigned char *p, short x)
++{
++ return x % p[0];
++}
++
++/* { dg-final { scan-assembler-times "udiv\tw\[0-9\]+, w\[0-9\]+" 4 } } */
++/* { dg-final { scan-assembler-times "sdiv\tw\[0-9\]+, w\[0-9\]+" 2 } } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c
+@@ -3,7 +3,7 @@
+ __attribute__((target ("arch=armv8-a-typo"))) void
+ foo ()
+ {
++ /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'armv8-a'?" "" { target *-*-* } .-1 } */
++ /* { dg-error "unknown value 'armv8-a-typo' for 'arch' target attribute" "" { target *-*-* } .-2 } */
++ /* { dg-error "target attribute 'arch=armv8-a-typo' is invalid" "" { target *-*-* } .-3 } */
+ }
+-/* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'armv8-a'?" "" { target *-*-* } 5 } */
+-/* { dg-error "unknown value 'armv8-a-typo' for 'arch' target attribute" "" { target *-*-* } 5 } */
+-/* { dg-error "target attribute 'arch=armv8-a-typo' is invalid" "" { target *-*-* } 5 } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c
+@@ -3,7 +3,7 @@
+ __attribute__((target ("cpu=cortex-a57-typo"))) void
+ foo ()
+ {
++ /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57?" "" { target *-*-* } .-1 } */
++ /* { dg-error "unknown value 'cortex-a57-typo' for 'cpu' target attribute" "" { target *-*-* } .-2 } */
++ /* { dg-error "target attribute 'cpu=cortex-a57-typo' is invalid" "" { target *-*-* } .-3 } */
+ }
+-/* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57?" "" { target *-*-* } 5 } */
+-/* { dg-error "unknown value 'cortex-a57-typo' for 'cpu' target attribute" "" { target *-*-* } 5 } */
+-/* { dg-error "target attribute 'cpu=cortex-a57-typo' is invalid" "" { target *-*-* } 5 } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c
+@@ -3,7 +3,7 @@
+ __attribute__((target ("tune=cortex-a57-typo"))) void
+ foo ()
+ {
++ /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57?" "" { target *-*-* } .-1 } */
++ /* { dg-error "unknown value 'cortex-a57-typo' for 'tune' target attribute" "" { target *-*-* } .-2 } */
++ /* { dg-error "target attribute 'tune=cortex-a57-typo' is invalid" "" { target *-*-* } .-3 } */
+ }
+-/* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57?" "" { target *-*-* } 5 } */
+-/* { dg-error "unknown value 'cortex-a57-typo' for 'tune' target attribute" "" { target *-*-* } 5 } */
+-/* { dg-error "target attribute 'tune=cortex-a57-typo' is invalid" "" { target *-*-* } 5 } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/spill_1.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++typedef int v4si __attribute__ ((vector_size (16)));
++
++void bar (void);
++void
++foo (void)
++{
++ v4si x = { 1, 1, 1, 1 };
++ asm ("# %0" :: "w" (x));
++ bar ();
++ asm ("# %0" :: "w" (x));
++}
++
++/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.4s,} 2 } } */
++/* { dg-final { scan-assembler-not {\tldr\t} } } */
++/* { dg-final { scan-assembler-not {\tstr\t} } } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/stack-checking.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/stack-checking.c
+@@ -1,4 +1,5 @@
+ /* { dg-do run { target { *-*-linux* } } } */
++/* { dg-require-stack-check "" } */
+ /* { dg-options "-fstack-check" } */
+
+ int main(void)
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/store_lane0_str_1.c
+@@ -0,0 +1,54 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++typedef int v2si __attribute__ ((vector_size (8)));
++typedef float v2sf __attribute__ ((vector_size (8)));
++typedef short v4hi __attribute__ ((vector_size (8)));
++typedef __fp16 v4hf __attribute__ ((vector_size (8)));
++typedef char v8qi __attribute__ ((vector_size (8)));
++
++typedef int v4si __attribute__ ((vector_size (16)));
++typedef float v4sf __attribute__ ((vector_size (16)));
++typedef short v8hi __attribute__ ((vector_size (16)));
++typedef __fp16 v8hf __attribute__ ((vector_size (16)));
++typedef char v16qi __attribute__ ((vector_size (16)));
++typedef long long v2di __attribute__ ((vector_size (16)));
++typedef double v2df __attribute__ ((vector_size (16)));
++
++#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
++#define LANE(N) (N - 1)
++#else
++#define LANE(N) 0
++#endif
++
++#define FUNC(T, E, N) \
++void \
++store_lane_##T (T x, E *y) \
++{ \
++ y[0] = x[N - 1 - LANE (N)]; \
++ y[3] = x[LANE (N)]; \
++}
++
++FUNC (v2si, int, 2)
++FUNC (v2sf, float, 2)
++FUNC (v4hi, short, 4)
++FUNC (v4hf, __fp16, 4)
++FUNC (v8qi, char, 8)
++
++FUNC (v4si, int, 4)
++FUNC (v4sf, float, 4)
++FUNC (v8hi, short, 8)
++FUNC (v8hf, __fp16, 8)
++FUNC (v16qi, char, 16)
++FUNC (v2di, long long, 2)
++FUNC (v2df, double, 2)
++
++/* When storing lane zero of a vector we can use the scalar STR instruction
++ that supports more addressing modes. */
++
++/* { dg-final { scan-assembler-times "str\ts\[0-9\]+" 4 } } */
++/* { dg-final { scan-assembler-times "str\tb\[0-9\]+" 2 } } */
++/* { dg-final { scan-assembler-times "str\th\[0-9\]+" 4 } } */
++/* { dg-final { scan-assembler-times "str\td\[0-9\]+" 2 } } */
++/* { dg-final { scan-assembler-not "umov" } } */
++/* { dg-final { scan-assembler-not "dup" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/subs_compare_1.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++foo (int a, int b)
++{
++ int x = a - b;
++ if (a <= b)
++ return x;
++ else
++ return 0;
++}
++
++/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */
++/* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/subs_compare_2.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++foo (int a, int b)
++{
++ int x = a - 4;
++ if (a < 4)
++ return x;
++ else
++ return 0;
++}
++
++/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, #4" 1 } } */
++/* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/vect-init-1.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++#define vector __attribute__((vector_size(16)))
++
++vector float combine (float a, float b, float c, float d)
++{
++ return (vector float) { a, b, c, d };
++}
++
++/* { dg-final { scan-assembler-not "movi\t" } } */
++/* { dg-final { scan-assembler-not "orr\t" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/vect-init-2.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++#define vector __attribute__((vector_size(16)))
++
++vector float combine (float a, float b, float d)
++{
++ return (vector float) { a, b, a, d };
++}
++
++/* { dg-final { scan-assembler-not "movi\t" } } */
++/* { dg-final { scan-assembler-not "orr\t" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/vect-init-3.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++#define vector __attribute__((vector_size(16)))
++
++vector float combine (float a, float b)
++{
++ return (vector float) { a, b, a, b };
++}
++
++/* { dg-final { scan-assembler-not "movi\t" } } */
++/* { dg-final { scan-assembler-not "orr\t" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/vect-init-4.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++#define vector __attribute__((vector_size(16)))
++
++vector float combine (float a, float b)
++{
++ return (vector float) { a, b, b, a };
++}
++
++/* { dg-final { scan-assembler-not "movi\t" } } */
++/* { dg-final { scan-assembler-not "orr\t" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/vect-init-5.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++#define vector __attribute__((vector_size(16)))
++
++vector float combine (float a, float b)
++{
++ return (vector float) { a, b, a, a };
++}
++
++/* { dg-final { scan-assembler-not "movi\t" } } */
++/* { dg-final { scan-assembler-not "orr\t" } } */
+--- a/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
++++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
+@@ -3,7 +3,8 @@
+ /* { dg-options "-O2 -ffast-math" } */
+ /* { dg-add-options arm_v8_2a_fp16_neon } */
+
+-/* Test instructions generated for half-precision arithmetic. */
++/* Test instructions generated for half-precision arithmetic with
++ unsafe-math-optimizations enabled. */
+
+ typedef __fp16 float16_t;
+ typedef __simd64_float16_t float16x4_t;
+@@ -90,9 +91,18 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t)
+ /* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */
+ /* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } } */
+
+-/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
+-/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
+-/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
++/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
++/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
++/* { dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
++/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
++/* { dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
++/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
++/* { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
+ /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
+ /* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */
+ /* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c
+@@ -0,0 +1,109 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */
++/* { dg-options "-O2 -fno-fast-math" } */
++/* { dg-add-options arm_v8_2a_fp16_neon } */
++
++/* Test instructions generated for half-precision arithmetic without
++ unsafe-math-optimizations. */
++
++typedef __fp16 float16_t;
++typedef __simd64_float16_t float16x4_t;
++typedef __simd128_float16_t float16x8_t;
++
++typedef short int16x4_t __attribute__ ((vector_size (8)));
++typedef short int int16x8_t __attribute__ ((vector_size (16)));
++
++float16_t
++fp16_abs (float16_t a)
++{
++ return (a < 0) ? -a : a;
++}
++
++#define TEST_UNOP(NAME, OPERATOR, TY) \
++ TY test_##NAME##_##TY (TY a) \
++ { \
++ return OPERATOR (a); \
++ }
++
++#define TEST_BINOP(NAME, OPERATOR, TY) \
++ TY test_##NAME##_##TY (TY a, TY b) \
++ { \
++ return a OPERATOR b; \
++ }
++
++#define TEST_CMP(NAME, OPERATOR, RTY, TY) \
++ RTY test_##NAME##_##TY (TY a, TY b) \
++ { \
++ return a OPERATOR b; \
++ }
++
++/* Scalars. */
++
++TEST_UNOP (neg, -, float16_t)
++TEST_UNOP (abs, fp16_abs, float16_t)
++
++TEST_BINOP (add, +, float16_t)
++TEST_BINOP (sub, -, float16_t)
++TEST_BINOP (mult, *, float16_t)
++TEST_BINOP (div, /, float16_t)
++
++TEST_CMP (equal, ==, int, float16_t)
++TEST_CMP (unequal, !=, int, float16_t)
++TEST_CMP (lessthan, <, int, float16_t)
++TEST_CMP (greaterthan, >, int, float16_t)
++TEST_CMP (lessthanequal, <=, int, float16_t)
++TEST_CMP (greaterthanqual, >=, int, float16_t)
++
++/* Vectors of size 4. */
++
++TEST_UNOP (neg, -, float16x4_t)
++
++TEST_BINOP (add, +, float16x4_t)
++TEST_BINOP (sub, -, float16x4_t)
++TEST_BINOP (mult, *, float16x4_t)
++TEST_BINOP (div, /, float16x4_t)
++
++TEST_CMP (equal, ==, int16x4_t, float16x4_t)
++TEST_CMP (unequal, !=, int16x4_t, float16x4_t)
++TEST_CMP (lessthan, <, int16x4_t, float16x4_t)
++TEST_CMP (greaterthan, >, int16x4_t, float16x4_t)
++TEST_CMP (lessthanequal, <=, int16x4_t, float16x4_t)
++TEST_CMP (greaterthanqual, >=, int16x4_t, float16x4_t)
++
++/* Vectors of size 8. */
++
++TEST_UNOP (neg, -, float16x8_t)
++
++TEST_BINOP (add, +, float16x8_t)
++TEST_BINOP (sub, -, float16x8_t)
++TEST_BINOP (mult, *, float16x8_t)
++TEST_BINOP (div, /, float16x8_t)
++
++TEST_CMP (equal, ==, int16x8_t, float16x8_t)
++TEST_CMP (unequal, !=, int16x8_t, float16x8_t)
++TEST_CMP (lessthan, <, int16x8_t, float16x8_t)
++TEST_CMP (greaterthan, >, int16x8_t, float16x8_t)
++TEST_CMP (lessthanequal, <=, int16x8_t, float16x8_t)
++TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t)
++
++/* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */
++/* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } } */
++/* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
++/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
++/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
++/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
++/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */
++
++/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */
++/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, #0} 2 } } */
++
++/* { dg-final { scan-assembler-not {vabs\.f16} } } */
++
++/* { dg-final { scan-assembler-not {vadd\.f32} } } */
++/* { dg-final { scan-assembler-not {vsub\.f32} } } */
++/* { dg-final { scan-assembler-not {vmul\.f32} } } */
++/* { dg-final { scan-assembler-not {vdiv\.f32} } } */
++/* { dg-final { scan-assembler-not {vcmp\.f16} } } */
++/* { dg-final { scan-assembler-not {vcmpe\.f16} } } */
+--- a/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-1.c
++++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-1.c
+@@ -137,7 +137,7 @@
+ }
+
+ VCMP1_TEST (vceqz)
+-/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-0]+, #0} 1 } } */
++/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
+ /* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
+
+ VCMP1_TEST (vcgtz)
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-2.c
+@@ -0,0 +1,491 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */
++/* { dg-options "-O2 -ffast-math" } */
++/* { dg-add-options arm_v8_2a_fp16_neon } */
++
++/* Test instructions generated for the FP16 vector intrinsics with
++ -ffast-math */
++
++#include <arm_neon.h>
++
++#define MSTRCAT(L, str) L##str
++
++#define UNOP_TEST(insn) \
++ float16x4_t \
++ MSTRCAT (test_##insn, _16x4) (float16x4_t a) \
++ { \
++ return MSTRCAT (insn, _f16) (a); \
++ } \
++ float16x8_t \
++ MSTRCAT (test_##insn, _16x8) (float16x8_t a) \
++ { \
++ return MSTRCAT (insn, q_f16) (a); \
++ }
++
++#define BINOP_TEST(insn) \
++ float16x4_t \
++ MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b) \
++ { \
++ return MSTRCAT (insn, _f16) (a, b); \
++ } \
++ float16x8_t \
++ MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b) \
++ { \
++ return MSTRCAT (insn, q_f16) (a, b); \
++ }
++
++#define BINOP_LANE_TEST(insn, I) \
++ float16x4_t \
++ MSTRCAT (test_##insn##_lane, _16x4) (float16x4_t a, float16x4_t b) \
++ { \
++ return MSTRCAT (insn, _lane_f16) (a, b, I); \
++ } \
++ float16x8_t \
++ MSTRCAT (test_##insn##_lane, _16x8) (float16x8_t a, float16x4_t b) \
++ { \
++ return MSTRCAT (insn, q_lane_f16) (a, b, I); \
++ }
++
++#define BINOP_LANEQ_TEST(insn, I) \
++ float16x4_t \
++ MSTRCAT (test_##insn##_laneq, _16x4) (float16x4_t a, float16x8_t b) \
++ { \
++ return MSTRCAT (insn, _laneq_f16) (a, b, I); \
++ } \
++ float16x8_t \
++ MSTRCAT (test_##insn##_laneq, _16x8) (float16x8_t a, float16x8_t b) \
++ { \
++ return MSTRCAT (insn, q_laneq_f16) (a, b, I); \
++ } \
++
++#define BINOP_N_TEST(insn) \
++ float16x4_t \
++ MSTRCAT (test_##insn##_n, _16x4) (float16x4_t a, float16_t b) \
++ { \
++ return MSTRCAT (insn, _n_f16) (a, b); \
++ } \
++ float16x8_t \
++ MSTRCAT (test_##insn##_n, _16x8) (float16x8_t a, float16_t b) \
++ { \
++ return MSTRCAT (insn, q_n_f16) (a, b); \
++ }
++
++#define TERNOP_TEST(insn) \
++ float16_t \
++ MSTRCAT (test_##insn, _16) (float16_t a, float16_t b, float16_t c) \
++ { \
++ return MSTRCAT (insn, h_f16) (a, b, c); \
++ } \
++ float16x4_t \
++ MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b, \
++ float16x4_t c) \
++ { \
++ return MSTRCAT (insn, _f16) (a, b, c); \
++ } \
++ float16x8_t \
++ MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b, \
++ float16x8_t c) \
++ { \
++ return MSTRCAT (insn, q_f16) (a, b, c); \
++ }
++
++#define VCMP1_TEST(insn) \
++ uint16x4_t \
++ MSTRCAT (test_##insn, _16x4) (float16x4_t a) \
++ { \
++ return MSTRCAT (insn, _f16) (a); \
++ } \
++ uint16x8_t \
++ MSTRCAT (test_##insn, _16x8) (float16x8_t a) \
++ { \
++ return MSTRCAT (insn, q_f16) (a); \
++ }
++
++#define VCMP2_TEST(insn) \
++ uint16x4_t \
++ MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b) \
++ { \
++ return MSTRCAT (insn, _f16) (a, b); \
++ } \
++ uint16x8_t \
++ MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b) \
++ { \
++ return MSTRCAT (insn, q_f16) (a, b); \
++ }
++
++#define VCVT_TEST(insn, TY, TO, FR) \
++ MSTRCAT (TO, 16x4_t) \
++ MSTRCAT (test_##insn, TY) (MSTRCAT (FR, 16x4_t) a) \
++ { \
++ return MSTRCAT (insn, TY) (a); \
++ } \
++ MSTRCAT (TO, 16x8_t) \
++ MSTRCAT (test_##insn##_q, TY) (MSTRCAT (FR, 16x8_t) a) \
++ { \
++ return MSTRCAT (insn, q##TY) (a); \
++ }
++
++#define VCVT_N_TEST(insn, TY, TO, FR) \
++ MSTRCAT (TO, 16x4_t) \
++ MSTRCAT (test_##insn##_n, TY) (MSTRCAT (FR, 16x4_t) a) \
++ { \
++ return MSTRCAT (insn, _n##TY) (a, 1); \
++ } \
++ MSTRCAT (TO, 16x8_t) \
++ MSTRCAT (test_##insn##_n_q, TY) (MSTRCAT (FR, 16x8_t) a) \
++ { \
++ return MSTRCAT (insn, q_n##TY) (a, 1); \
++ }
++
++VCMP1_TEST (vceqz)
++/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
++/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
++
++VCMP1_TEST (vcgtz)
++/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
++/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
++
++VCMP1_TEST (vcgez)
++/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
++/* { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
++
++VCMP1_TEST (vcltz)
++/* { dg-final { scan-assembler-times {vclt.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
++/* { dg-final { scan-assembler-times {vclt.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
++
++VCMP1_TEST (vclez)
++/* { dg-final { scan-assembler-times {vcle\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */
++/* { dg-final { scan-assembler-times {vcle\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */
++
++VCVT_TEST (vcvt, _f16_s16, float, int)
++VCVT_N_TEST (vcvt, _f16_s16, float, int)
++/* { dg-final { scan-assembler-times {vcvt\.f16\.s16\td[0-9]+, d[0-9]+} 2 } }
++ { dg-final { scan-assembler-times {vcvt\.f16\.s16\tq[0-9]+, q[0-9]+} 2 } }
++ { dg-final { scan-assembler-times {vcvt\.f16\.s16\td[0-9]+, d[0-9]+, #1} 1 } }
++ { dg-final { scan-assembler-times {vcvt\.f16\.s16\tq[0-9]+, q[0-9]+, #1} 1 } } */
++
++VCVT_TEST (vcvt, _f16_u16, float, uint)
++VCVT_N_TEST (vcvt, _f16_u16, float, uint)
++/* { dg-final { scan-assembler-times {vcvt\.f16\.u16\td[0-9]+, d[0-9]+} 2 } }
++ { dg-final { scan-assembler-times {vcvt\.f16\.u16\tq[0-9]+, q[0-9]+} 2 } }
++ { dg-final { scan-assembler-times {vcvt\.f16\.u16\td[0-9]+, d[0-9]+, #1} 1 } }
++ { dg-final { scan-assembler-times {vcvt\.f16\.u16\tq[0-9]+, q[0-9]+, #1} 1 } } */
++
++VCVT_TEST (vcvt, _s16_f16, int, float)
++VCVT_N_TEST (vcvt, _s16_f16, int, float)
++/* { dg-final { scan-assembler-times {vcvt\.s16\.f16\td[0-9]+, d[0-9]+} 2 } }
++ { dg-final { scan-assembler-times {vcvt\.s16\.f16\tq[0-9]+, q[0-9]+} 2 } }
++ { dg-final { scan-assembler-times {vcvt\.s16\.f16\td[0-9]+, d[0-9]+, #1} 1 } }
++ { dg-final { scan-assembler-times {vcvt\.s16\.f16\tq[0-9]+, q[0-9]+, #1} 1 } } */
++
++VCVT_TEST (vcvt, _u16_f16, uint, float)
++VCVT_N_TEST (vcvt, _u16_f16, uint, float)
++/* { dg-final { scan-assembler-times {vcvt\.u16\.f16\td[0-9]+, d[0-9]+} 2 } }
++ { dg-final { scan-assembler-times {vcvt\.u16\.f16\tq[0-9]+, q[0-9]+} 2 } }
++ { dg-final { scan-assembler-times {vcvt\.u16\.f16\td[0-9]+, d[0-9]+, #1} 1 } }
++ { dg-final { scan-assembler-times {vcvt\.u16\.f16\tq[0-9]+, q[0-9]+, #1} 1 } } */
++
++VCVT_TEST (vcvta, _s16_f16, int, float)
++/* { dg-final { scan-assembler-times {vcvta\.s16\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcvta\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } }
++*/
++
++VCVT_TEST (vcvta, _u16_f16, uint, float)
++/* { dg-final { scan-assembler-times {vcvta\.u16\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcvta\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } }
++*/
++
++VCVT_TEST (vcvtm, _s16_f16, int, float)
++/* { dg-final { scan-assembler-times {vcvtm\.s16\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcvtm\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } }
++*/
++
++VCVT_TEST (vcvtm, _u16_f16, uint, float)
++/* { dg-final { scan-assembler-times {vcvtm\.u16\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcvtm\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } }
++*/
++
++VCVT_TEST (vcvtn, _s16_f16, int, float)
++/* { dg-final { scan-assembler-times {vcvtn\.s16\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcvtn\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } }
++*/
++
++VCVT_TEST (vcvtn, _u16_f16, uint, float)
++/* { dg-final { scan-assembler-times {vcvtn\.u16\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcvtn\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } }
++*/
++
++VCVT_TEST (vcvtp, _s16_f16, int, float)
++/* { dg-final { scan-assembler-times {vcvtp\.s16\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcvtp\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } }
++*/
++
++VCVT_TEST (vcvtp, _u16_f16, uint, float)
++/* { dg-final { scan-assembler-times {vcvtp\.u16\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcvtp\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } }
++*/
++
++UNOP_TEST (vabs)
++/* { dg-final { scan-assembler-times {vabs\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vabs\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++UNOP_TEST (vneg)
++/* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++UNOP_TEST (vrecpe)
++/* { dg-final { scan-assembler-times {vrecpe\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrecpe\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++UNOP_TEST (vrnd)
++/* { dg-final { scan-assembler-times {vrintz\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrintz\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++UNOP_TEST (vrnda)
++/* { dg-final { scan-assembler-times {vrinta\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrinta\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++UNOP_TEST (vrndm)
++/* { dg-final { scan-assembler-times {vrintm\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrintm\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++UNOP_TEST (vrndn)
++/* { dg-final { scan-assembler-times {vrintn\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrintn\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++UNOP_TEST (vrndp)
++/* { dg-final { scan-assembler-times {vrintp\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrintp\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++UNOP_TEST (vrndx)
++/* { dg-final { scan-assembler-times {vrintx\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrintx\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++UNOP_TEST (vrsqrte)
++/* { dg-final { scan-assembler-times {vrsqrte\.f16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrsqrte\.f16\tq[0-9]+, q[0-9]+} 1 } } */
++
++BINOP_TEST (vadd)
++/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++BINOP_TEST (vabd)
++/* { dg-final { scan-assembler-times {vabd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vabd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++VCMP2_TEST (vcage)
++/* { dg-final { scan-assembler-times {vacge\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vacge\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++VCMP2_TEST (vcagt)
++/* { dg-final { scan-assembler-times {vacgt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vacgt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++VCMP2_TEST (vcale)
++/* { dg-final { scan-assembler-times {vacle\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vacle\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++VCMP2_TEST (vcalt)
++/* { dg-final { scan-assembler-times {vaclt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vaclt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++VCMP2_TEST (vceq)
++/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++VCMP2_TEST (vcge)
++/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++VCMP2_TEST (vcgt)
++/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcgt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++VCMP2_TEST (vcle)
++/* { dg-final { scan-assembler-times {vcle\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vcle\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++VCMP2_TEST (vclt)
++/* { dg-final { scan-assembler-times {vclt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vclt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++BINOP_TEST (vmax)
++/* { dg-final { scan-assembler-times {vmax\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vmax\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++BINOP_TEST (vmin)
++/* { dg-final { scan-assembler-times {vmin\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vmin\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++BINOP_TEST (vmaxnm)
++/* { dg-final { scan-assembler-times {vmaxnm\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vmaxnm\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++BINOP_TEST (vminnm)
++/* { dg-final { scan-assembler-times {vminnm\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vminnm\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++BINOP_TEST (vmul)
++/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 3 } }
++ { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++BINOP_LANE_TEST (vmul, 2)
++/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[2\]} 1 } }
++ { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[2\]} 1 } } */
++BINOP_N_TEST (vmul)
++/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\]} 1 } }
++ { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\]} 1 } }*/
++
++float16x4_t
++test_vpadd_16x4 (float16x4_t a, float16x4_t b)
++{
++ return vpadd_f16 (a, b);
++}
++/* { dg-final { scan-assembler-times {vpadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
++
++float16x4_t
++test_vpmax_16x4 (float16x4_t a, float16x4_t b)
++{
++ return vpmax_f16 (a, b);
++}
++/* { dg-final { scan-assembler-times {vpmax\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
++
++float16x4_t
++test_vpmin_16x4 (float16x4_t a, float16x4_t b)
++{
++ return vpmin_f16 (a, b);
++}
++/* { dg-final { scan-assembler-times {vpmin\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
++
++BINOP_TEST (vsub)
++/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++BINOP_TEST (vrecps)
++/* { dg-final { scan-assembler-times {vrecps\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrecps\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++BINOP_TEST (vrsqrts)
++/* { dg-final { scan-assembler-times {vrsqrts\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrsqrts\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++TERNOP_TEST (vfma)
++/* { dg-final { scan-assembler-times {vfma\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vfma\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++TERNOP_TEST (vfms)
++/* { dg-final { scan-assembler-times {vfms\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vfms\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++float16x4_t
++test_vmov_n_f16 (float16_t a)
++{
++ return vmov_n_f16 (a);
++}
++
++float16x4_t
++test_vdup_n_f16 (float16_t a)
++{
++ return vdup_n_f16 (a);
++}
++/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, r[0-9]+} 2 } } */
++
++float16x8_t
++test_vmovq_n_f16 (float16_t a)
++{
++ return vmovq_n_f16 (a);
++}
++
++float16x8_t
++test_vdupq_n_f16 (float16_t a)
++{
++ return vdupq_n_f16 (a);
++}
++/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, r[0-9]+} 2 } } */
++
++float16x4_t
++test_vdup_lane_f16 (float16x4_t a)
++{
++ return vdup_lane_f16 (a, 1);
++}
++/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, d[0-9]+\[1\]} 1 } } */
++
++float16x8_t
++test_vdupq_lane_f16 (float16x4_t a)
++{
++ return vdupq_lane_f16 (a, 1);
++}
++/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, d[0-9]+\[1\]} 1 } } */
++
++float16x4_t
++test_vext_f16 (float16x4_t a, float16x4_t b)
++{
++ return vext_f16 (a, b, 1);
++}
++/* { dg-final { scan-assembler-times {vext\.16\td[0-9]+, d[0-9]+, d[0-9]+, #1} 1 } } */
++
++float16x8_t
++test_vextq_f16 (float16x8_t a, float16x8_t b)
++{
++ return vextq_f16 (a, b, 1);
++}
++/* { dg-final { scan-assembler-times {vext\.16\tq[0-9]+, q[0-9]+, q[0-9]+, #1} 1 } } */
++
++UNOP_TEST (vrev64)
++/* { dg-final { scan-assembler-times {vrev64\.16\td[0-9]+, d[0-9]+} 1 } }
++ { dg-final { scan-assembler-times {vrev64\.16\tq[0-9]+, q[0-9]+} 1 } } */
++
++float16x4_t
++test_vbsl16x4 (uint16x4_t a, float16x4_t b, float16x4_t c)
++{
++ return vbsl_f16 (a, b, c);
++}
++/* { dg-final { scan-assembler-times {vbsl\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */
++
++float16x8_t
++test_vbslq16x8 (uint16x8_t a, float16x8_t b, float16x8_t c)
++{
++ return vbslq_f16 (a, b, c);
++}
++/*{ dg-final { scan-assembler-times {vbsl\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
++
++float16x4x2_t
++test_vzip16x4 (float16x4_t a, float16x4_t b)
++{
++ return vzip_f16 (a, b);
++}
++/* { dg-final { scan-assembler-times {vzip\.16\td[0-9]+, d[0-9]+} 1 } } */
++
++float16x8x2_t
++test_vzipq16x8 (float16x8_t a, float16x8_t b)
++{
++ return vzipq_f16 (a, b);
++}
++/*{ dg-final { scan-assembler-times {vzip\.16\tq[0-9]+, q[0-9]+} 1 } } */
++
++float16x4x2_t
++test_vuzp16x4 (float16x4_t a, float16x4_t b)
++{
++ return vuzp_f16 (a, b);
++}
++/* { dg-final { scan-assembler-times {vuzp\.16\td[0-9]+, d[0-9]+} 1 } } */
++
++float16x8x2_t
++test_vuzpq16x8 (float16x8_t a, float16x8_t b)
++{
++ return vuzpq_f16 (a, b);
++}
++/*{ dg-final { scan-assembler-times {vuzp\.16\tq[0-9]+, q[0-9]+} 1 } } */
++
++float16x4x2_t
++test_vtrn16x4 (float16x4_t a, float16x4_t b)
++{
++ return vtrn_f16 (a, b);
++}
++/* { dg-final { scan-assembler-times {vtrn\.16\td[0-9]+, d[0-9]+} 1 } } */
++
++float16x8x2_t
++test_vtrnq16x8 (float16x8_t a, float16x8_t b)
++{
++ return vtrnq_f16 (a, b);
++}
++/*{ dg-final { scan-assembler-times {vtrn\.16\tq[0-9]+, q[0-9]+} 1 } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-3.c
+@@ -0,0 +1,108 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */
++/* { dg-options "-O2 -ffast-math" } */
++/* { dg-add-options arm_v8_2a_fp16_neon } */
++
++/* Test compiler use of FP16 FMA/FMS instructions with -ffast-math. */
++
++#include <arm_neon.h>
++
++float16x4_t
++test_vfma_1 (float16x4_t a, float16x4_t b, float16x4_t c)
++{
++ return vadd_f16 (vmul_f16 (a, b), c);
++}
++
++float16x4_t
++test_vfma_2 (float16x4_t a, float16x4_t b, float16x4_t c)
++{
++ return vsub_f16 (vmul_f16 (a, b), vneg_f16 (c));
++}
++
++float16x4_t
++test_vfma_3 (float16x4_t a, float16x4_t b, float16x4_t c)
++{
++ return vsub_f16 (vmul_f16 (vneg_f16 (a), vneg_f16 (b)), vneg_f16 (c));
++}
++
++float16x4_t
++test_vfma_4 (float16x4_t a, float16x4_t b, float16x4_t c)
++{
++ return vsub_f16 (vmul_f16 (a, b), vneg_f16 (c));
++}
++/* { dg-final { scan-assembler-times {vfma\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 4 } } */
++
++float16x8_t
++test_vfmaq_1 (float16x8_t a, float16x8_t b, float16x8_t c)
++{
++ return vaddq_f16 (vmulq_f16 (a, b), c);
++}
+
- if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
- {
- val = INTVAL (op1) & (GET_MODE_PRECISION (mode) - 1);
-Index: b/src/gcc/testsuite/gcc.c-torture/execute/pr78622.c
-===================================================================
---- a/src/gcc/testsuite/gcc.c-torture/execute/pr78622.c
-+++ b/src/gcc/testsuite/gcc.c-torture/execute/pr78622.c
-@@ -1,6 +1,7 @@
- /* PR middle-end/78622 - [7 Regression] -Wformat-overflow/-fprintf-return-value
- incorrect with overflow/wrapping
- { dg-skip-if "Requires %hhd format" { hppa*-*-hpux* } { "*" } { "" } }
-+ { dg-require-effective-target c99_runtime }
- { dg-additional-options "-Wformat-overflow=2" } */
-
- __attribute__((noinline, noclone)) int
-Index: b/src/gcc/testsuite/gcc.dg/lsr-div1.c
-===================================================================
++float16x8_t
++test_vfmaq_2 (float16x8_t a, float16x8_t b, float16x8_t c)
++{
++ return vsubq_f16 (vmulq_f16 (a, b), vnegq_f16 (c));
++}
++
++float16x8_t
++test_vfmaq_3 (float16x8_t a, float16x8_t b, float16x8_t c)
++{
++ return vsubq_f16 (vmulq_f16 (vnegq_f16 (a), vnegq_f16 (b)), vnegq_f16 (c));
++}
++
++float16x8_t
++test_vfmaq_4 (float16x8_t a, float16x8_t b, float16x8_t c)
++{
++ return vsubq_f16 (vmulq_f16 (a, b), vnegq_f16 (c));
++}
++/* { dg-final { scan-assembler-times {vfma\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } } */
++
++float16x4_t
++test_vfms_1 (float16x4_t a, float16x4_t b, float16x4_t c)
++{
++ return vsub_f16 (c, vmul_f16 (a, b));
++}
++
++float16x4_t
++test_vfms_2 (float16x4_t a, float16x4_t b, float16x4_t c)
++{
++ return vsub_f16 (a, vmul_f16 (b, c));
++}
++
++float16x4_t
++test_vfms_3 (float16x4_t a, float16x4_t b, float16x4_t c)
++{
++ return vadd_f16 (vmul_f16 (vneg_f16 (a), b), c);
++}
++
++float16x4_t
++test_vfms_4 (float16x4_t a, float16x4_t b, float16x4_t c)
++{
++ return vadd_f16 (vmul_f16 (a, vneg_f16 (b)), c);
++}
++/* { dg-final { scan-assembler-times {vfms\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 4 } } */
++
++float16x8_t
++test_vfmsq_1 (float16x8_t a, float16x8_t b, float16x8_t c)
++{
++ return vsubq_f16 (c, vmulq_f16 (a, b));
++}
++
++float16x8_t
++test_vfmsq_2 (float16x8_t a, float16x8_t b, float16x8_t c)
++{
++ return vsubq_f16 (a, vmulq_f16 (b, c));
++}
++
++float16x8_t
++test_vfmsq_3 (float16x8_t a, float16x8_t b, float16x8_t c)
++{
++ return vaddq_f16 (vmulq_f16 (vnegq_f16 (a), b), c);
++}
++
++float16x8_t
++test_vfmsq_4 (float16x8_t a, float16x8_t b, float16x8_t c)
++{
++ return vaddq_f16 (vmulq_f16 (a, vnegq_f16 (b)), c);
++}
++/* { dg-final { scan-assembler-times {vfms\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 4 } } */
--- /dev/null
-+++ b/src/gcc/testsuite/gcc.dg/lsr-div1.c
-@@ -0,0 +1,57 @@
-+/* Test division by const int generates only one shift. */
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fdump-rtl-combine-all" } */
-+/* { dg-options "-O2 -fdump-rtl-combine-all -mtune=cortex-a53" { target aarch64*-*-* } } */
-+/* { dg-require-effective-target int32plus } */
++++ b/src/gcc/testsuite/gcc.target/arm/movdi_movt.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile { target { arm_cortex_m && { arm_thumb2_ok || arm_thumb1_movt_ok } } } } */
++/* { dg-options "-O2 -mslow-flash-data" } */
+
-+extern void abort (void);
++unsigned long long
++movdi_1 (int a)
++{
++ return 0xF0F00000LLU;
++}
+
-+#define NOINLINE __attribute__((noinline))
++unsigned long long
++movdi_2 (int a)
++{
++ return 0xF0F0000000000000LLU;
++}
+
-+static NOINLINE int
-+f1 (unsigned int n)
++/* Accept r1 because big endian targets put the low bits in the highest
++ numbered register of a pair. */
++/* { dg-final { scan-assembler-times "movt\tr\[01\], 61680" 2 } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/movsi_movt.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile { target { arm_cortex_m && { arm_thumb2_ok || arm_thumb1_movt_ok } } } } */
++/* { dg-options "-O2 -mslow-flash-data" } */
++
++unsigned
++movsi (void)
+{
-+ return n % 0x33;
++ return 0xF0F00000U;
+}
+
-+static NOINLINE int
-+f2 (unsigned int n)
++/* { dg-final { scan-assembler-times "movt\tr0, 61680" 1 } } */
+--- a/src/gcc/testsuite/gcc.target/arm/pr69180.c
++++ b/src/gcc/testsuite/gcc.target/arm/pr69180.c
+@@ -8,9 +8,10 @@
+ #pragma GCC target ("fpu=neon-fp-armv8")
+
+ #define __ARM_NEON_FP 0
++/* { dg-warning ".__ARM_NEON_FP. redefined" "" { target *-*-* } .-1 } */
++
+ #define __ARM_FP 0
+-#define __ARM_FEATURE_LDREX 0
++/* { dg-warning ".__ARM_FP. redefined" "" { target *-*-* } .-1 } */
+
+-/* { dg-warning ".__ARM_NEON_FP. redefined" "" { target *-*-* } 10 } */
+-/* { dg-warning ".__ARM_FP. redefined" "" { target *-*-* } 11 } */
+-/* { dg-warning ".__ARM_FEATURE_LDREX. redefined" "" { target *-*-* } 12 } */
++#define __ARM_FEATURE_LDREX 0
++/* { dg-warning ".__ARM_FEATURE_LDREX. redefined" "" { target *-*-* } .-1 } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/sdiv_costs_1.c
+@@ -0,0 +1,38 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -march=armv8-a" } */
++
++/* Both sdiv and udiv can be used here, so prefer udiv. */
++int f1 (unsigned char *p)
+{
-+ return n % 0x12;
++ return 100 / p[1];
+}
+
-+int
-+main ()
++int f2 (unsigned char *p, unsigned short x)
+{
-+ int a = 0xaaaaaaaa;
-+ int b = 0x55555555;
-+ int c;
-+ c = f1 (a);
-+ if (c != 0x11)
-+ abort ();
-+ c = f1 (b);
-+ if (c != 0x22)
-+ abort ();
-+ c = f2 (a);
-+ if (c != 0xE)
-+ abort ();
-+ c = f2 (b);
-+ if (c != 0x7)
-+ abort ();
-+ return 0;
++ return x / p[0];
+}
+
-+/* Following replacement pattern of intger division by constant, GCC is expected
-+ to generate UMULL and (x)SHIFTRT. This test checks that considering division
-+ by const 0x33, gcc generates a single LSHIFTRT by 37, instead of
-+ two - LSHIFTRT by 32 and LSHIFTRT by 5. */
++int f3 (unsigned char *p, int x)
++{
++ x &= 0x7fffffff;
++ return x / p[0];
++}
+
-+/* { dg-final { scan-rtl-dump "\\(set \\(subreg:DI \\(reg:SI" "combine" { target aarch64*-*-* } } } */
-+/* { dg-final { scan-rtl-dump "\\(lshiftrt:DI \\(reg:DI" "combine" { target aarch64*-*-* } } } */
-+/* { dg-final { scan-rtl-dump "\\(const_int 37 " "combine" { target aarch64*-*-* } } } */
++int f5 (unsigned char *p, unsigned short x)
++{
++ return x % p[0];
++}
+
-+/* Similarly, considering division by const 0x12, gcc generates a
-+ single LSHIFTRT by 34, instead of two - LSHIFTRT by 32 and LSHIFTRT by 2. */
++/* This should only generate signed divisions. */
++int f4 (unsigned char *p)
++{
++ return -100 / p[1];
++}
+
-+/* { dg-final { scan-rtl-dump "\\(const_int 34 " "combine" { target aarch64*-*-* } } } */
++int f6 (unsigned char *p, short x)
++{
++ return x % p[0];
++}
+
-Index: b/src/gcc/testsuite/gcc.target/arm/fpscr.c
-===================================================================
++/* { dg-final { scan-assembler-times "udiv\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 4 } } */
++/* { dg-final { scan-assembler-times "sdiv\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 2 } } */
+--- a/src/gcc/testsuite/gcc.target/arm/stack-checking.c
++++ b/src/gcc/testsuite/gcc.target/arm/stack-checking.c
+@@ -1,6 +1,6 @@
+ /* { dg-do run { target { *-*-linux* } } } */
++/* { dg-require-stack-check "" } */
+ /* { dg-options "-fstack-check" } */
+-/* { dg-skip-if "" { arm_thumb1 } } */
+
+ int main(void)
+ {
--- /dev/null
-+++ b/src/gcc/testsuite/gcc.target/arm/fpscr.c
-@@ -0,0 +1,16 @@
-+/* Test the fpscr builtins. */
++++ b/src/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-1.c
+@@ -0,0 +1,73 @@
++/* The option -mslow-flash-data is just for performance tuning, it
++ doesn't totally disable the use of literal pools. But for below
++ simple cases, the use of literal pool should be replaced by
++ movw/movt or read-only constant pool. */
+
+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_fp_ok } */
-+/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
-+/* { dg-add-options arm_fp } */
++/* { dg-require-effective-target arm_cortex_m } */
++/* { dg-require-effective-target arm_thumb2_ok } */
++/* { dg-options "-O2 -mthumb -mslow-flash-data" } */
+
-+void
-+test_fpscr ()
++float sf;
++double df;
++long long l;
++static char *p = "Hello World";
++
++float
++testsf (float *p)
++{
++ if (*p > 1.1234f)
++ return 2.1234f;
++ else
++ return 3.1234f;
++}
++
++double
++testdf (double *p)
++{
++ if (*p > 4.1234)
++ return 2.1234;
++ else
++ return 3.1234;
++}
++
++long long
++testll (long long *p)
++{
++ if (*p > 0x123456789ABCDEFll)
++ return 0x111111111ll;
++ else
++ return 0x222222222ll;
++}
++
++char *
++testchar ()
++{
++ return p + 4;
++}
++
++int
++foo (int a, int b)
+{
-+ volatile unsigned int status = __builtin_arm_get_fpscr ();
-+ __builtin_arm_set_fpscr (status);
++ int i;
++ volatile int *labelref = &&label1;
++
++ if (a > b)
++ {
++ while (i < b)
++ {
++ a += *labelref;
++ i += 1;
++ }
++ goto *labelref;
++ }
++ else
++ b = b + 3;
++
++ a = a * b;
++
++label1:
++ return a + b;
++}
++
++/* { dg-final { scan-assembler-not "\\.(float|l\\?double|\d?byte|short|int|long|quad|word)\\s+\[^.\]" } } */
+--- a/src/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data.c
++++ b/src//dev/null
+@@ -1,73 +0,0 @@
+-/* The option -mslow-flash-data is just for performance tuning, it
+- doesn't totally disable the use of literal pools. But for below
+- simple cases, the use of literal pool should be replaced by
+- movw/movt or read-only constant pool. */
+-
+-/* { dg-do compile } */
+-/* { dg-require-effective-target arm_cortex_m } */
+-/* { dg-require-effective-target arm_thumb2_ok } */
+-/* { dg-options "-O2 -mthumb -mslow-flash-data" } */
+-
+-float sf;
+-double df;
+-long long l;
+-static char *p = "Hello World";
+-
+-float
+-testsf (float *p)
+-{
+- if (*p > 1.1234f)
+- return 2.1234f;
+- else
+- return 3.1234f;
+-}
+-
+-double
+-testdf (double *p)
+-{
+- if (*p > 4.1234)
+- return 2.1234;
+- else
+- return 3.1234;
+-}
+-
+-long long
+-testll (long long *p)
+-{
+- if (*p > 0x123456789ABCDEFll)
+- return 0x111111111ll;
+- else
+- return 0x222222222ll;
+-}
+-
+-char *
+-testchar ()
+-{
+- return p + 4;
+-}
+-
+-int
+-foo (int a, int b)
+-{
+- int i;
+- volatile int *labelref = &&label1;
+-
+- if (a > b)
+- {
+- while (i < b)
+- {
+- a += *labelref;
+- i += 1;
+- }
+- goto *labelref;
+- }
+- else
+- b = b + 3;
+-
+- a = a * b;
+-
+-label1:
+- return a + b;
+-}
+-
+-/* { dg-final { scan-assembler-not "\\.(float|l\\?double|\d?byte|short|int|long|quad|word)\\s+\[^.\]" } } */
+--- a/src/gcc/testsuite/gcc.target/i386/pr48723.c
++++ b/src/gcc/testsuite/gcc.target/i386/pr48723.c
+@@ -1,4 +1,5 @@
+ /* { dg-do compile } */
++/* { dg-require-stack-check "" } */
+ /* { dg-options "-fstack-check -mavx" } */
+
+ struct S0
+--- a/src/gcc/testsuite/gcc.target/i386/pr55672.c
++++ b/src/gcc/testsuite/gcc.target/i386/pr55672.c
+@@ -1,4 +1,5 @@
+ /* { dg-do compile } */
++/* { dg-require-stack-check "generic" } */
+ /* { dg-options "-O -fstack-check=generic" } */
+
+ int main ()
+--- a/src/gcc/testsuite/gcc.target/i386/pr67265-2.c
++++ b/src/gcc/testsuite/gcc.target/i386/pr67265-2.c
+@@ -1,4 +1,5 @@
+ /* { dg-do compile } */
++/* { dg-require-stack-check "" } */
+ /* { dg-options "-O -fstack-check" } */
+
+ void foo (int n)
+--- a/src/gcc/testsuite/gcc.target/i386/pr67265.c
++++ b/src/gcc/testsuite/gcc.target/i386/pr67265.c
+@@ -2,6 +2,7 @@
+ /* Reduced testcase by Johannes Dewender <gnu at JonnyJD.net> */
+
+ /* { dg-do compile } */
++/* { dg-require-stack-check "" } */
+ /* { dg-options "-O -fstack-check -fPIC" } */
+
+ int a, b, c, d, e;
+--- a/src/gcc/testsuite/gnat.dg/opt49.adb
++++ b/src/gcc/testsuite/gnat.dg/opt49.adb
+@@ -1,4 +1,5 @@
+ -- { dg-do run }
++-- { dg-require-stack-check "" }
+ -- { dg-options "-O -fstack-check" }
+
+ procedure Opt49 is
+--- a/src/gcc/testsuite/gnat.dg/stack_check1.adb
++++ b/src/gcc/testsuite/gnat.dg/stack_check1.adb
+@@ -1,4 +1,5 @@
+ -- { dg-do run }
++-- { dg-require-stack-check "" }
+ -- { dg-options "-fstack-check" }
+
+ -- This test requires architecture- and OS-specific support code for unwinding
+--- a/src/gcc/testsuite/gnat.dg/stack_check2.adb
++++ b/src/gcc/testsuite/gnat.dg/stack_check2.adb
+@@ -1,4 +1,5 @@
+ -- { dg-do run }
++-- { dg-require-stack-check "" }
+ -- { dg-options "-fstack-check" }
+
+ -- This test requires architecture- and OS-specific support code for unwinding
+--- a/src/gcc/testsuite/gnat.dg/stack_check3.adb
++++ b/src/gcc/testsuite/gnat.dg/stack_check3.adb
+@@ -1,4 +1,5 @@
+ -- { dg-do compile }
++-- { dg-require-stack-check "" }
+ -- { dg-options "-O -fstack-check" }
+
+ package body Stack_Check3 is
+--- a/src/gcc/testsuite/lib/target-supports-dg.exp
++++ b/src/gcc/testsuite/lib/target-supports-dg.exp
+@@ -265,6 +265,21 @@ proc dg-require-linker-plugin { args } {
+ }
+ }
+
++# If this target does not support the "stack-check" option, skip this
++# test.
++
++proc dg-require-stack-check { args } {
++ set stack_check_available [ check_stack_check_available [lindex $args 1 ] ]
++ if { $stack_check_available == -1 } {
++ upvar name name
++ unresolved "$name"
++ }
++ if { $stack_check_available != 1 } {
++ upvar dg-do-what dg-do-what
++ set dg-do-what [list [lindex ${dg-do-what} 0] "N" "P"]
++ }
++}
++
+ # Add any target-specific flags needed for accessing the given list
+ # of features. This must come after all dg-options.
+
+--- a/src/gcc/testsuite/lib/target-supports.exp
++++ b/src/gcc/testsuite/lib/target-supports.exp
+@@ -1029,6 +1029,17 @@ proc check_effective_target_fstack_protector {} {
+ } "-fstack-protector"]
+ }
+
++# Return 1 if the target supports -fstack-check or -fstack-check=$stack_kind
++proc check_stack_check_available { stack_kind } {
++ if [string match "" $stack_kind] then {
++ set stack_opt "-fstack-check"
++ } else { set stack_opt "-fstack-check=$stack_kind" }
++
++ return [check_no_compiler_messages stack_check executable {
++ int main (void) { return 0; }
++ } "$stack_opt"]
+}
+
-+/* { dg-final { scan-assembler "mrc\tp10, 7, r\[0-9\]+, cr1, cr0, 0" } } */
-+/* { dg-final { scan-assembler "mcr\tp10, 7, r\[0-9\]+, cr1, cr0, 0" } } */
+ # Return 1 if compilation with -freorder-blocks-and-partition is error-free
+ # for trivial code, 0 otherwise. As some targets (ARM for example) only
+ # warn when -fprofile-use is also supplied we test that combination too.
+--- a/src/gcc/tree-ssa-dce.c
++++ b/src/gcc/tree-ssa-dce.c
+@@ -233,6 +233,8 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive)
+ case BUILT_IN_CALLOC:
+ case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
++ case BUILT_IN_STRDUP:
++ case BUILT_IN_STRNDUP:
+ return;
+
+ default:;
+--- a/src/gcc/tree-ssa-loop-prefetch.c
++++ b/src/gcc/tree-ssa-loop-prefetch.c
+@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see
+ #include "tree-inline.h"
+ #include "tree-data-ref.h"
+ #include "diagnostic-core.h"
++#include "dbgcnt.h"
+
+ /* This pass inserts prefetch instructions to optimize cache usage during
+ accesses to arrays in loops. It processes loops sequentially and:
+@@ -227,6 +228,7 @@ struct mem_ref_group
+ tree step; /* Step of the reference. */
+ struct mem_ref *refs; /* References in the group. */
+ struct mem_ref_group *next; /* Next group of references. */
++ unsigned int uid; /* Group UID, used only for debugging. */
+ };
+
+ /* Assigned to PREFETCH_BEFORE when all iterations are to be prefetched. */
+@@ -269,6 +271,7 @@ struct mem_ref
+ unsigned reuse_distance; /* The amount of data accessed before the first
+ reuse of this value. */
+ struct mem_ref *next; /* The next reference in the group. */
++ unsigned int uid; /* Ref UID, used only for debugging. */
+ unsigned write_p : 1; /* Is it a write? */
+ unsigned independent_p : 1; /* True if the reference is independent on
+ all other references inside the loop. */
+@@ -290,11 +293,8 @@ dump_mem_details (FILE *file, tree base, tree step,
+ else
+ print_generic_expr (file, step, TDF_TREE);
+ fprintf (file, ")\n");
+- fprintf (file, " delta ");
+- fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
+- fprintf (file, "\n");
+- fprintf (file, " %s\n", write_p ? "write" : "read");
+- fprintf (file, "\n");
++ fprintf (file, " delta " HOST_WIDE_INT_PRINT_DEC "\n", delta);
++ fprintf (file, " %s\n\n", write_p ? "write" : "read");
+ }
+
+ /* Dumps information about reference REF to FILE. */
+@@ -302,12 +302,9 @@ dump_mem_details (FILE *file, tree base, tree step,
+ static void
+ dump_mem_ref (FILE *file, struct mem_ref *ref)
+ {
+- fprintf (file, "Reference %p:\n", (void *) ref);
+-
+- fprintf (file, " group %p ", (void *) ref->group);
+-
+- dump_mem_details (file, ref->group->base, ref->group->step, ref->delta,
+- ref->write_p);
++ fprintf (file, "reference %u:%u (", ref->group->uid, ref->uid);
++ print_generic_expr (file, ref->mem, TDF_SLIM);
++ fprintf (file, ")\n");
+ }
+
+ /* Finds a group with BASE and STEP in GROUPS, or creates one if it does not
+@@ -316,6 +313,9 @@ dump_mem_ref (FILE *file, struct mem_ref *ref)
+ static struct mem_ref_group *
+ find_or_create_group (struct mem_ref_group **groups, tree base, tree step)
+ {
++ /* Global count for setting struct mem_ref_group->uid. */
++ static unsigned int last_mem_ref_group_uid = 0;
++
+ struct mem_ref_group *group;
+
+ for (; *groups; groups = &(*groups)->next)
+@@ -335,6 +335,7 @@ find_or_create_group (struct mem_ref_group **groups, tree base, tree step)
+ group->base = base;
+ group->step = step;
+ group->refs = NULL;
++ group->uid = ++last_mem_ref_group_uid;
+ group->next = *groups;
+ *groups = group;
+
+@@ -348,11 +349,14 @@ static void
+ record_ref (struct mem_ref_group *group, gimple *stmt, tree mem,
+ HOST_WIDE_INT delta, bool write_p)
+ {
++ unsigned int last_mem_ref_uid = 0;
+ struct mem_ref **aref;
+
+ /* Do not record the same address twice. */
+ for (aref = &group->refs; *aref; aref = &(*aref)->next)
+ {
++ last_mem_ref_uid = (*aref)->uid;
++
+ /* It does not have to be possible for write reference to reuse the read
+ prefetch, or vice versa. */
+ if (!WRITE_CAN_USE_READ_PREFETCH
+@@ -381,9 +385,16 @@ record_ref (struct mem_ref_group *group, gimple *stmt, tree mem,
+ (*aref)->next = NULL;
+ (*aref)->independent_p = false;
+ (*aref)->storent_p = false;
++ (*aref)->uid = last_mem_ref_uid + 1;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+- dump_mem_ref (dump_file, *aref);
++ {
++ dump_mem_ref (dump_file, *aref);
++
++ fprintf (dump_file, " group %u ", group->uid);
++ dump_mem_details (dump_file, group->base, group->step, delta,
++ write_p);
++ }
+ }
+
+ /* Release memory references in GROUPS. */
+@@ -938,7 +949,7 @@ prune_group_by_reuse (struct mem_ref_group *group)
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+- fprintf (dump_file, "Reference %p:", (void *) ref_pruned);
++ dump_mem_ref (dump_file, ref_pruned);
+
+ if (ref_pruned->prefetch_before == PREFETCH_ALL
+ && ref_pruned->prefetch_mod == 1)
+@@ -986,8 +997,8 @@ should_issue_prefetch_p (struct mem_ref *ref)
+ if (ref->prefetch_before != PREFETCH_ALL)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+- fprintf (dump_file, "Ignoring %p due to prefetch_before\n",
+- (void *) ref);
++ fprintf (dump_file, "Ignoring reference %u:%u due to prefetch_before\n",
++ ref->group->uid, ref->uid);
+ return false;
+ }
+
+@@ -995,7 +1006,7 @@ should_issue_prefetch_p (struct mem_ref *ref)
+ if (ref->storent_p)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+- fprintf (dump_file, "Ignoring nontemporal store %p\n", (void *) ref);
++ fprintf (dump_file, "Ignoring nontemporal store reference %u:%u\n", ref->group->uid, ref->uid);
+ return false;
+ }
+
+@@ -1058,7 +1069,14 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
+ if (2 * remaining_prefetch_slots < prefetch_slots)
+ continue;
+
++ /* Stop prefetching if debug counter is activated. */
++ if (!dbg_cnt (prefetch))
++ continue;
++
+ ref->issue_prefetch_p = true;
++ if (dump_file && (dump_flags & TDF_DETAILS))
++ fprintf (dump_file, "Decided to issue prefetch for reference %u:%u\n",
++ ref->group->uid, ref->uid);
+
+ if (remaining_prefetch_slots <= prefetch_slots)
+ return true;
+@@ -1122,9 +1140,9 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
+ bool nontemporal = ref->reuse_distance >= L2_CACHE_SIZE_BYTES;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+- fprintf (dump_file, "Issued%s prefetch for %p.\n",
++ fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n",
+ nontemporal ? " nontemporal" : "",
+- (void *) ref);
++ ref->group->uid, ref->uid);
+
+ bsi = gsi_for_stmt (ref->stmt);
+
+@@ -1144,8 +1162,8 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
+ delta = (ahead + ap * ref->prefetch_mod) *
+ int_cst_value (ref->group->step);
+ addr = fold_build_pointer_plus_hwi (addr_base, delta);
+- addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, NULL,
+- true, GSI_SAME_STMT);
++ addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
++ NULL, true, GSI_SAME_STMT);
+ }
+ else
+ {
+@@ -1229,8 +1247,8 @@ mark_nontemporal_store (struct mem_ref *ref)
+ return false;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+- fprintf (dump_file, "Marked reference %p as a nontemporal store.\n",
+- (void *) ref);
++ fprintf (dump_file, "Marked reference %u:%u as a nontemporal store.\n",
++ ref->group->uid, ref->uid);
+
+ gimple_assign_set_nontemporal_move (ref->stmt, true);
+ ref->storent_p = true;
+@@ -1340,7 +1358,7 @@ should_unroll_loop_p (struct loop *loop, struct tree_niter_desc *desc,
+
+ /* Determine the coefficient by that unroll LOOP, from the information
+ contained in the list of memory references REFS. Description of
+- umber of iterations of LOOP is stored to DESC. NINSNS is the number of
++ number of iterations of LOOP is stored to DESC. NINSNS is the number of
+ insns of the LOOP. EST_NITER is the estimated number of iterations of
+ the loop, or -1 if no estimate is available. */
+
+@@ -1715,8 +1733,8 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
+ fprintf (dump_file, "Reuse distances:\n");
+ for (gr = refs; gr; gr = gr->next)
+ for (ref = gr->refs; ref; ref = ref->next)
+- fprintf (dump_file, " ref %p distance %u\n",
+- (void *) ref, ref->reuse_distance);
++ fprintf (dump_file, " reference %u:%u distance %u\n",
++ ref->group->uid, ref->uid, ref->reuse_distance);
+ }
+
+ return true;
+--- a/src/libgcc/config.host
++++ b/src/libgcc/config.host
+@@ -231,6 +231,10 @@ case ${host} in
+ ;;
+ esac
+ ;;
++*-*-fuchsia*)
++ tmake_file="$tmake_file t-crtstuff-pic t-libgcc-pic t-eh-dw2-dip t-slibgcc t-slibgcc-fuchsia"
++ extra_parts="crtbegin.o crtend.o"
++ ;;
+ *-*-linux* | frv-*-*linux* | *-*-kfreebsd*-gnu | *-*-gnu* | *-*-kopensolaris*-gnu)
+ tmake_file="$tmake_file t-crtstuff-pic t-libgcc-pic t-eh-dw2-dip t-slibgcc t-slibgcc-gld t-slibgcc-elf-ver t-linux"
+ extra_parts="crtbegin.o crtbeginS.o crtbeginT.o crtend.o crtendS.o"
+@@ -342,6 +346,10 @@ aarch64*-*-freebsd*)
+ tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
+ md_unwind_header=aarch64/freebsd-unwind.h
+ ;;
++aarch64*-*-fuchsia*)
++ tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
++ tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
++ ;;
+ aarch64*-*-linux*)
+ extra_parts="$extra_parts crtfastmath.o"
+ md_unwind_header=aarch64/linux-unwind.h
+@@ -394,6 +402,12 @@ arm*-*-freebsd*) # ARM FreeBSD EABI
+ unwind_header=config/arm/unwind-arm.h
+ tmake_file="${tmake_file} t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
+ ;;
++arm*-*-fuchsia*)
++ tmake_file="${tmake_file} arm/t-arm arm/t-elf arm/t-bpabi"
++ tmake_file="${tmake_file} arm/tsoftfp t-softfp"
++ tm_file="${tm_file} arm/bpabi-lib.h"
++ unwind_header=config/arm/unwind-arm.h
++ ;;
+ arm*-*-netbsdelf*)
+ tmake_file="$tmake_file arm/t-arm arm/t-netbsd t-slibgcc-gld-nover"
+ ;;
+@@ -588,6 +602,9 @@ i[34567]86-*-elf*)
+ x86_64-*-elf* | x86_64-*-rtems*)
+ tmake_file="$tmake_file i386/t-crtstuff t-crtstuff-pic t-libgcc-pic"
+ ;;
++x86_64-*-fuchsia*)
++ tmake_file="$tmake_file t-libgcc-pic"
++ ;;
+ i[34567]86-*-dragonfly*)
+ tmake_file="${tmake_file} i386/t-dragonfly i386/t-crtstuff"
+ md_unwind_header=i386/dragonfly-unwind.h
+--- a/src/libgcc/config/arm/unwind-arm.h
++++ b/src/libgcc/config/arm/unwind-arm.h
+@@ -49,7 +49,7 @@ extern "C" {
+ return 0;
+
+ #if (defined(linux) && !defined(__uClinux__)) || defined(__NetBSD__) \
+- || defined(__FreeBSD__)
++ || defined(__FreeBSD__) || defined(__fuchsia__)
+ /* Pc-relative indirect. */
+ #define _GLIBCXX_OVERRIDE_TTYPE_ENCODING (DW_EH_PE_pcrel | DW_EH_PE_indirect)
+ tmp += ptr;
+--- /dev/null
++++ b/src/libgcc/config/t-slibgcc-fuchsia
+@@ -0,0 +1,44 @@
++# Copyright (C) 2017 Free Software Foundation, Inc.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 3, or (at your option)
++# any later version.
++#
++# GCC is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3. If not see
++# <http://www.gnu.org/licenses/>.
++
++# Fuchsia-specific shared library overrides.
++
++SHLIB_LDFLAGS = -Wl,--soname=$(SHLIB_SONAME) \
++ $(LDFLAGS)
++# Copyright (C) 2017 Free Software Foundation, Inc.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 3, or (at your option)
++# any later version.
++#
++# GCC is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3. If not see
++# <http://www.gnu.org/licenses/>.
++
++# Fuchsia-specific shared library overrides.
++
++SHLIB_LDFLAGS = -Wl,--soname=$(SHLIB_SONAME) \
++ $(LDFLAGS)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/gcc-7.git
More information about the Reproducible-commits
mailing list