[gcc-6] 195/401: * Update the Linaro support to the 6-2016.08 snapshot.
Ximin Luo
infinity0 at debian.org
Wed Apr 5 15:49:18 UTC 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch pu/reproducible_builds
in repository gcc-6.
commit 7688e557f84af11294e3ce6a84750049d7e1c5b2
Author: doko <doko at 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca>
Date: Tue Aug 30 17:43:43 2016 +0000
* Update the Linaro support to the 6-2016.08 snapshot.
git-svn-id: svn://anonscm.debian.org/gcccvs/branches/sid/gcc-6@8959 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca
---
debian/changelog | 1 +
debian/patches/gcc-linaro-doc.diff | 58 +-
debian/patches/gcc-linaro-no-macros.diff | 2 +-
debian/patches/gcc-linaro.diff | 4056 ++++++++++++++++++++++++++++--
debian/patches/vulcan-costs.diff | 259 --
debian/patches/vulcan-cpu-doc.diff | 27 -
debian/patches/vulcan-cpu.diff | 39 -
debian/rules.patch | 3 -
8 files changed, 3911 insertions(+), 534 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index f09147a..d063b15 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -9,6 +9,7 @@ gcc-6 (6.2.0-2) UNRELEASED; urgency=medium
* Fix install location of D header files for cross builds (YunQiang Su).
Closes: #835847.
* Fix PR c++/77379, taken from the trunk.
+ * Update the Linaro support to the 6-2016.08 snapshot.
-- Matthias Klose <doko at debian.org> Wed, 24 Aug 2016 08:07:34 +0200
diff --git a/debian/patches/gcc-linaro-doc.diff b/debian/patches/gcc-linaro-doc.diff
index 2909dd6..dd2b39c 100644
--- a/debian/patches/gcc-linaro-doc.diff
+++ b/debian/patches/gcc-linaro-doc.diff
@@ -1,4 +1,4 @@
-# DP: Changes for the Linaro 6-2016.07 release (documentation).
+# DP: Changes for the Linaro 6-2016.08 release (documentation).
--- a/src/gcc/doc/cpp.texi
+++ b/src/gcc/doc/cpp.texi
@@ -13,7 +13,16 @@
like this:
--- a/src/gcc/doc/invoke.texi
+++ b/src/gcc/doc/invoke.texi
-@@ -9478,6 +9478,11 @@ Size of minimal partition for WHOPR (in estimated instructions).
+@@ -573,6 +573,8 @@ Objective-C and Objective-C++ Dialects}.
+ -mfix-cortex-a53-835769 -mno-fix-cortex-a53-835769 @gol
+ -mfix-cortex-a53-843419 -mno-fix-cortex-a53-843419 @gol
+ -mlow-precision-recip-sqrt -mno-low-precision-recip-sqrt at gol
++-mlow-precision-sqrt -mno-low-precision-sqrt at gol
++-mlow-precision-div -mno-low-precision-div @gol
+ -march=@var{name} -mcpu=@var{name} -mtune=@var{name}}
+
+ @emph{Adapteva Epiphany Options}
+@@ -9478,6 +9480,11 @@ Size of minimal partition for WHOPR (in estimated instructions).
This prevents expenses of splitting very small programs into too many
partitions.
@@ -25,7 +34,7 @@
@item cxx-max-namespaces-for-diagnostic-help
The maximum number of namespaces to consult for suggestions when C++
name lookup fails for an identifier. The default is 1000.
-@@ -12828,9 +12833,9 @@ These options are defined for AArch64 implementations:
+@@ -12828,9 +12835,9 @@ These options are defined for AArch64 implementations:
@item -mabi=@var{name}
@opindex mabi
Generate code for the specified data model. Permissible values
@@ -38,7 +47,7 @@
The default depends on the specific target configuration. Note that
the LP64 and ILP32 ABIs are not link-compatible; you must compile your
-@@ -12855,25 +12860,24 @@ Generate little-endian code. This is the default when GCC is configured for an
+@@ -12855,25 +12862,24 @@ Generate little-endian code. This is the default when GCC is configured for an
@item -mcmodel=tiny
@opindex mcmodel=tiny
Generate code for the tiny code model. The program and its statically defined
@@ -71,7 +80,7 @@
@item -momit-leaf-frame-pointer
@itemx -mno-omit-leaf-frame-pointer
-@@ -12895,7 +12899,7 @@ of TLS variables.
+@@ -12895,7 +12901,7 @@ of TLS variables.
@item -mtls-size=@var{size}
@opindex mtls-size
Specify bit size of immediate TLS offsets. Valid values are 12, 24, 32, 48.
@@ -80,7 +89,7 @@
@item -mfix-cortex-a53-835769
@itemx -mno-fix-cortex-a53-835769
-@@ -12915,12 +12919,13 @@ corresponding flag to the linker.
+@@ -12915,12 +12921,34 @@ corresponding flag to the linker.
@item -mlow-precision-recip-sqrt
@item -mno-low-precision-recip-sqrt
@@ -92,21 +101,42 @@
-approximation, which in turn depends on the target processor.
+ at opindex mlow-precision-recip-sqrt
+ at opindex mno-low-precision-recip-sqrt
-+Enable or disable reciprocal square root approximation.
++Enable or disable the reciprocal square root approximation.
+This option only has an effect if @option{-ffast-math} or
+ at option{-funsafe-math-optimizations} is used as well. Enabling this reduces
+precision of reciprocal square root results to about 16 bits for
+single precision and to 32 bits for double precision.
++
++ at item -mlow-precision-sqrt
++ at item -mno-low-precision-sqrt
++ at opindex -mlow-precision-sqrt
++ at opindex -mno-low-precision-sqrt
++Enable or disable the square root approximation.
++This option only has an effect if @option{-ffast-math} or
++ at option{-funsafe-math-optimizations} is used as well. Enabling this reduces
++precision of square root results to about 16 bits for
++single precision and to 32 bits for double precision.
++If enabled, it implies @option{-mlow-precision-recip-sqrt}.
++
++ at item -mlow-precision-div
++ at item -mno-low-precision-div
++ at opindex -mlow-precision-div
++ at opindex -mno-low-precision-div
++Enable or disable the division approximation.
++This option only has an effect if @option{-ffast-math} or
++ at option{-funsafe-math-optimizations} is used as well. Enabling this reduces
++precision of division results to about 16 bits for
++single precision and to 32 bits for double precision.
@item -march=@var{name}
@opindex march
-@@ -12957,17 +12962,15 @@ Specify the name of the target processor for which GCC should tune the
+@@ -12957,17 +12985,15 @@ Specify the name of the target processor for which GCC should tune the
performance of the code. Permissible values for this option are:
@samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a57},
@samp{cortex-a72}, @samp{exynos-m1}, @samp{qdf24xx}, @samp{thunderx},
- at samp{xgene1}.
-+ at samp{xgene1}, @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},
-+ at samp{native}.
++ at samp{xgene1}, @samp{vulcan}, @samp{cortex-a57.cortex-a53},
++ at samp{cortex-a72.cortex-a53}, @samp{native}.
-Additionally, this option can specify that GCC should tune the performance
-of the code for a big.LITTLE system. Permissible values for this
@@ -124,7 +154,7 @@
Where none of @option{-mtune=}, @option{-mcpu=} or @option{-march=}
are specified, the code is tuned to perform well across a range
-@@ -12987,12 +12990,6 @@ documented in the sub-section on
+@@ -12987,12 +13013,6 @@ documented in the sub-section on
Feature Modifiers}. Where conflicting feature modifiers are
specified, the right-most feature is used.
@@ -137,7 +167,7 @@
GCC uses @var{name} to determine what kind of instructions it can emit when
generating assembly code (as if by @option{-march}) and to determine
the target processor for which to tune for performance (as if
-@@ -13010,11 +13007,11 @@ across releases.
+@@ -13010,11 +13030,11 @@ across releases.
This option is only intended to be useful when developing GCC.
@item -mpc-relative-literal-loads
@@ -154,7 +184,7 @@
@end table
-@@ -13045,9 +13042,9 @@ Enable Large System Extension instructions. This is on by default for
+@@ -13045,9 +13065,9 @@ Enable Large System Extension instructions. This is on by default for
@end table
@@ -167,7 +197,7 @@
@node Adapteva Epiphany Options
@subsection Adapteva Epiphany Options
-@@ -18082,7 +18079,7 @@ IEEE 754 floating-point data.
+@@ -18082,7 +18102,7 @@ IEEE 754 floating-point data.
The @option{-mnan=legacy} option selects the legacy encoding. In this
case quiet NaNs (qNaNs) are denoted by the first bit of their trailing
diff --git a/debian/patches/gcc-linaro-no-macros.diff b/debian/patches/gcc-linaro-no-macros.diff
index df3d913..9da5f40 100644
--- a/debian/patches/gcc-linaro-no-macros.diff
+++ b/debian/patches/gcc-linaro-no-macros.diff
@@ -89,7 +89,7 @@ Index: b/src/gcc/LINARO-VERSION
--- a/src/gcc/LINARO-VERSION
+++ /dev/null
@@ -1 +0,0 @@
--Snapshot 6.1-2016.07
+-6.1-2016.08~dev
Index: b/src/gcc/configure.ac
===================================================================
--- a/src/gcc/configure.ac
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff
index 2ad91f2..3494a03 100644
--- a/debian/patches/gcc-linaro.diff
+++ b/debian/patches/gcc-linaro.diff
@@ -1,8 +1,8 @@
-# DP: Changes for the Linaro 6-2016.07 release.
+# DP: Changes for the Linaro 6-2016.08 release.
MSG=$(git log origin/linaro/gcc-6-branch --format=format:"%s" -n 1 --grep "Merge branches"); SVN=${MSG##* }; git log origin/gcc-6-branch --format=format:"%H" -n 1 --grep "gcc-6-branch@${SVN%.}"
-LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b8175015c74b948ff1e32197 \
+LANG=C git diff ac6fe0ee825550e1dfefffd649d49133011d5eb8..91b11ff9859dee06a84ac410a5588dd1faf3462a \
| egrep -v '^(diff|index) ' \
| filterdiff --strip=1 --addoldprefix=a/src/ --addnewprefix=b/src/ \
| sed 's,a/src//dev/null,/dev/null,'
@@ -10,7 +10,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
--- /dev/null
+++ b/src/gcc/LINARO-VERSION
@@ -0,0 +1 @@
-+Snapshot 6.1-2016.07
++6.1-2016.08~dev
--- a/src/gcc/Makefile.in
+++ b/src/gcc/Makefile.in
@@ -832,10 +832,12 @@ BASEVER := $(srcdir)/BASE-VER # 4.x.y
@@ -114,6 +114,143 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
fi
;;
+--- a/src/gcc/config/aarch64/aarch64-builtins.c
++++ b/src/gcc/config/aarch64/aarch64-builtins.c
+@@ -173,6 +173,10 @@ aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_unsigned, qualifier_none, qualifier_immediate };
+ #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
+ static enum aarch64_type_qualifiers
++aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
++ = { qualifier_none, qualifier_unsigned, qualifier_immediate };
++#define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
++static enum aarch64_type_qualifiers
+ aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
+ #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
+--- a/src/gcc/config/aarch64/aarch64-cores.def
++++ b/src/gcc/config/aarch64/aarch64-cores.def
+@@ -49,6 +49,10 @@ AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AA
+ AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1")
+ AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
+
++/* V8.1 Architecture Processors. */
++
++AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516")
++
+ /* V8 big.LITTLE implementations. */
+
+ AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
+--- a/src/gcc/config/aarch64/aarch64-cost-tables.h
++++ b/src/gcc/config/aarch64/aarch64-cost-tables.h
+@@ -127,6 +127,108 @@ const struct cpu_cost_table thunderx_extra_costs =
+ }
+ };
+
++const struct cpu_cost_table vulcan_extra_costs =
++{
++ /* ALU */
++ {
++ 0, /* Arith. */
++ 0, /* Logical. */
++ 0, /* Shift. */
++ 0, /* Shift_reg. */
++ COSTS_N_INSNS (1), /* Arith_shift. */
++ COSTS_N_INSNS (1), /* Arith_shift_reg. */
++ COSTS_N_INSNS (1), /* Log_shift. */
++ COSTS_N_INSNS (1), /* Log_shift_reg. */
++ 0, /* Extend. */
++ COSTS_N_INSNS (1), /* Extend_arith. */
++ 0, /* Bfi. */
++ 0, /* Bfx. */
++ COSTS_N_INSNS (3), /* Clz. */
++ 0, /* Rev. */
++ 0, /* Non_exec. */
++ true /* Non_exec_costs_exec. */
++ },
++ {
++ /* MULT SImode */
++ {
++ COSTS_N_INSNS (4), /* Simple. */
++ COSTS_N_INSNS (4), /* Flag_setting. */
++ COSTS_N_INSNS (4), /* Extend. */
++ COSTS_N_INSNS (5), /* Add. */
++ COSTS_N_INSNS (5), /* Extend_add. */
++ COSTS_N_INSNS (18) /* Idiv. */
++ },
++ /* MULT DImode */
++ {
++ COSTS_N_INSNS (4), /* Simple. */
++ 0, /* Flag_setting. */
++ COSTS_N_INSNS (4), /* Extend. */
++ COSTS_N_INSNS (5), /* Add. */
++ COSTS_N_INSNS (5), /* Extend_add. */
++ COSTS_N_INSNS (26) /* Idiv. */
++ }
++ },
++ /* LD/ST */
++ {
++ COSTS_N_INSNS (4), /* Load. */
++ COSTS_N_INSNS (4), /* Load_sign_extend. */
++ COSTS_N_INSNS (5), /* Ldrd. */
++ COSTS_N_INSNS (4), /* Ldm_1st. */
++ 1, /* Ldm_regs_per_insn_1st. */
++ 1, /* Ldm_regs_per_insn_subsequent. */
++ COSTS_N_INSNS (4), /* Loadf. */
++ COSTS_N_INSNS (4), /* Loadd. */
++ COSTS_N_INSNS (4), /* Load_unaligned. */
++ 0, /* Store. */
++ 0, /* Strd. */
++ 0, /* Stm_1st. */
++ 1, /* Stm_regs_per_insn_1st. */
++ 1, /* Stm_regs_per_insn_subsequent. */
++ 0, /* Storef. */
++ 0, /* Stored. */
++ 0, /* Store_unaligned. */
++ COSTS_N_INSNS (1), /* Loadv. */
++ COSTS_N_INSNS (1) /* Storev. */
++ },
++ {
++ /* FP SFmode */
++ {
++ COSTS_N_INSNS (4), /* Div. */
++ COSTS_N_INSNS (1), /* Mult. */
++ COSTS_N_INSNS (1), /* Mult_addsub. */
++ COSTS_N_INSNS (1), /* Fma. */
++ COSTS_N_INSNS (1), /* Addsub. */
++ COSTS_N_INSNS (1), /* Fpconst. */
++ COSTS_N_INSNS (1), /* Neg. */
++ COSTS_N_INSNS (1), /* Compare. */
++ COSTS_N_INSNS (2), /* Widen. */
++ COSTS_N_INSNS (2), /* Narrow. */
++ COSTS_N_INSNS (2), /* Toint. */
++ COSTS_N_INSNS (2), /* Fromint. */
++ COSTS_N_INSNS (2) /* Roundint. */
++ },
++ /* FP DFmode */
++ {
++ COSTS_N_INSNS (6), /* Div. */
++ COSTS_N_INSNS (1), /* Mult. */
++ COSTS_N_INSNS (1), /* Mult_addsub. */
++ COSTS_N_INSNS (1), /* Fma. */
++ COSTS_N_INSNS (1), /* Addsub. */
++ COSTS_N_INSNS (1), /* Fpconst. */
++ COSTS_N_INSNS (1), /* Neg. */
++ COSTS_N_INSNS (1), /* Compare. */
++ COSTS_N_INSNS (2), /* Widen. */
++ COSTS_N_INSNS (2), /* Narrow. */
++ COSTS_N_INSNS (2), /* Toint. */
++ COSTS_N_INSNS (2), /* Fromint. */
++ COSTS_N_INSNS (2) /* Roundint. */
++ }
++ },
++ /* Vector */
++ {
++ COSTS_N_INSNS (1) /* Alu. */
++ }
++};
+
+
+ #endif
--- a/src/gcc/config/aarch64/aarch64-elf.h
+++ b/src/gcc/config/aarch64/aarch64-elf.h
@@ -25,15 +25,6 @@
@@ -145,7 +282,46 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
CC_MODE (CC_C); /* Only C bit of condition flags is valid. */
--- a/src/gcc/config/aarch64/aarch64-protos.h
+++ b/src/gcc/config/aarch64/aarch64-protos.h
-@@ -290,6 +290,7 @@ bool aarch64_constant_address_p (rtx);
+@@ -178,6 +178,25 @@ struct cpu_branch_cost
+ const int unpredictable; /* Unpredictable branch or optimizing for speed. */
+ };
+
++/* Control approximate alternatives to certain FP operators. */
++#define AARCH64_APPROX_MODE(MODE) \
++ ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \
++ ? (1 << ((MODE) - MIN_MODE_FLOAT)) \
++ : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \
++ ? (1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
++ + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
++ : (0))
++#define AARCH64_APPROX_NONE (0)
++#define AARCH64_APPROX_ALL (-1)
++
++/* Allowed modes for approximations. */
++struct cpu_approx_modes
++{
++ const unsigned int division; /* Division. */
++ const unsigned int sqrt; /* Square root. */
++ const unsigned int recip_sqrt; /* Reciprocal square root. */
++};
++
+ struct tune_params
+ {
+ const struct cpu_cost_table *insn_extra_cost;
+@@ -185,6 +204,7 @@ struct tune_params
+ const struct cpu_regmove_cost *regmove_cost;
+ const struct cpu_vector_cost *vec_costs;
+ const struct cpu_branch_cost *branch_costs;
++ const struct cpu_approx_modes *approx_modes;
+ int memmov_cost;
+ int issue_rate;
+ unsigned int fusible_ops;
+@@ -287,9 +307,12 @@ bool aarch64_cannot_change_mode_class (machine_mode,
+ enum reg_class);
+ bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
+ bool aarch64_constant_address_p (rtx);
++bool aarch64_emit_approx_div (rtx, rtx, rtx);
++bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
bool aarch64_expand_movmem (rtx *);
bool aarch64_float_const_zero_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned);
@@ -153,7 +329,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
bool aarch64_gen_movmemqi (rtx *);
bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
bool aarch64_is_extend_from_extract (machine_mode, rtx, rtx);
-@@ -335,11 +336,9 @@ machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
+@@ -335,11 +358,9 @@ machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
machine_mode);
int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
int aarch64_hard_regno_nregs (unsigned, machine_mode);
@@ -165,9 +341,41 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
rtx aarch64_mask_from_zextract_ops (rtx, rtx);
const char *aarch64_output_move_struct (rtx *operands);
rtx aarch64_return_addr (int, rtx);
+@@ -369,7 +390,6 @@ void aarch64_register_pragmas (void);
+ void aarch64_relayout_simd_types (void);
+ void aarch64_reset_previous_fndecl (void);
+ void aarch64_save_restore_target_globals (tree);
+-void aarch64_emit_approx_rsqrt (rtx, rtx);
+
+ /* Initialize builtins for SIMD intrinsics. */
+ void init_aarch64_simd_builtins (void);
+--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
++++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
+@@ -449,3 +449,21 @@
+ /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>. */
+ BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0)
+ BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0)
++
++ /* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3. */
++ BUILTIN_VSDQ_SDI (SHIFTIMM, scvtf, 3)
++ BUILTIN_VSDQ_SDI (FCVTIMM_SUS, ucvtf, 3)
++ BUILTIN_VALLF (SHIFTIMM, fcvtzs, 3)
++ BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3)
++
++ /* Implemented by aarch64_rsqrte<mode>. */
++ BUILTIN_VALLF (UNOP, rsqrte, 0)
++
++ /* Implemented by aarch64_rsqrts<mode>. */
++ BUILTIN_VALLF (BINOP, rsqrts, 0)
++
++ /* Implemented by fabd<mode>3. */
++ BUILTIN_VALLF (BINOP, fabd, 3)
++
++ /* Implemented by aarch64_faddp<mode>. */
++ BUILTIN_VDQF (BINOP, faddp, 0)
--- a/src/gcc/config/aarch64/aarch64-simd.md
+++ b/src/gcc/config/aarch64/aarch64-simd.md
-@@ -371,15 +371,15 @@
+@@ -371,18 +371,18 @@
[(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
)
@@ -190,8 +398,82 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
+ [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
)
- (define_insn "aarch64_rsqrte_<mode>2"
-@@ -1579,16 +1579,16 @@
+-(define_insn "aarch64_rsqrte_<mode>2"
++(define_insn "aarch64_rsqrte<mode>"
+ [(set (match_operand:VALLF 0 "register_operand" "=w")
+ (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+ UNSPEC_RSQRTE))]
+@@ -390,7 +390,7 @@
+ "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
+ [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
+
+-(define_insn "aarch64_rsqrts_<mode>3"
++(define_insn "aarch64_rsqrts<mode>"
+ [(set (match_operand:VALLF 0 "register_operand" "=w")
+ (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
+ (match_operand:VALLF 2 "register_operand" "w")]
+@@ -405,7 +405,7 @@
+ UNSPEC_RSQRT))]
+ "TARGET_SIMD"
+ {
+- aarch64_emit_approx_rsqrt (operands[0], operands[1]);
++ aarch64_emit_approx_sqrt (operands[0], operands[1], true);
+ DONE;
+ })
+
+@@ -474,23 +474,14 @@
+ [(set_attr "type" "neon_arith_acc<q>")]
+ )
+
+-(define_insn "fabd<mode>_3"
+- [(set (match_operand:VDQF 0 "register_operand" "=w")
+- (abs:VDQF (minus:VDQF
+- (match_operand:VDQF 1 "register_operand" "w")
+- (match_operand:VDQF 2 "register_operand" "w"))))]
+- "TARGET_SIMD"
+- "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+- [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
+-)
+-
+-(define_insn "*fabd_scalar<mode>3"
+- [(set (match_operand:GPF 0 "register_operand" "=w")
+- (abs:GPF (minus:GPF
+- (match_operand:GPF 1 "register_operand" "w")
+- (match_operand:GPF 2 "register_operand" "w"))))]
++(define_insn "fabd<mode>3"
++ [(set (match_operand:VALLF 0 "register_operand" "=w")
++ (abs:VALLF
++ (minus:VALLF
++ (match_operand:VALLF 1 "register_operand" "w")
++ (match_operand:VALLF 2 "register_operand" "w"))))]
+ "TARGET_SIMD"
+- "fabd\t%<s>0, %<s>1, %<s>2"
++ "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+ [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
+ )
+
+@@ -1509,7 +1500,19 @@
+ [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
+ )
+
+-(define_insn "div<mode>3"
++(define_expand "div<mode>3"
++ [(set (match_operand:VDQF 0 "register_operand")
++ (div:VDQF (match_operand:VDQF 1 "general_operand")
++ (match_operand:VDQF 2 "register_operand")))]
++ "TARGET_SIMD"
++{
++ if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
++ DONE;
++
++ operands[1] = force_reg (<MODE>mode, operands[1]);
++})
++
++(define_insn "*div<mode>3"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+ (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
+ (match_operand:VDQF 2 "register_operand" "w")))]
+@@ -1579,16 +1582,16 @@
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
@@ -217,7 +499,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_insn "*aarch64_fma4_elt_to_64v2df"
-@@ -1656,17 +1656,17 @@
+@@ -1656,17 +1659,17 @@
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
@@ -246,7 +528,36 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_insn "*aarch64_fnma4_elt_to_64v2df"
-@@ -1979,19 +1979,6 @@
+@@ -1778,6 +1781,28 @@
+ [(set_attr "type" "neon_fp_cvt_widen_s")]
+ )
+
++;; Convert between fixed-point and floating-point (vector modes)
++
++(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VDQF:mode>3"
++ [(set (match_operand:<VDQF:FCVT_TARGET> 0 "register_operand" "=w")
++ (unspec:<VDQF:FCVT_TARGET> [(match_operand:VDQF 1 "register_operand" "w")
++ (match_operand:SI 2 "immediate_operand" "i")]
++ FCVT_F2FIXED))]
++ "TARGET_SIMD"
++ "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
++ [(set_attr "type" "neon_fp_to_int_<VDQF:Vetype><q>")]
++)
++
++(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_SDI:mode>3"
++ [(set (match_operand:<VDQ_SDI:FCVT_TARGET> 0 "register_operand" "=w")
++ (unspec:<VDQ_SDI:FCVT_TARGET> [(match_operand:VDQ_SDI 1 "register_operand" "w")
++ (match_operand:SI 2 "immediate_operand" "i")]
++ FCVT_FIXED2F))]
++ "TARGET_SIMD"
++ "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
++ [(set_attr "type" "neon_int_to_fp_<VDQ_SDI:Vetype><q>")]
++)
++
+ ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
+ ;; is inconsistent with vector ordering elsewhere in the compiler, in that
+ ;; the meaning of HI and LO changes depending on the target endianness.
+@@ -1979,17 +2004,14 @@
}
)
@@ -261,12 +572,18 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
- emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
- DONE;
- }
--)
--
++(define_insn "aarch64_faddp<mode>"
++ [(set (match_operand:VDQF 0 "register_operand" "=w")
++ (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
++ (match_operand:VDQF 2 "register_operand" "w")]
++ UNSPEC_FADDV))]
++ "TARGET_SIMD"
++ "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
++ [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
+ )
+
(define_insn "aarch64_reduc_plus_internal<mode>"
- [(set (match_operand:VDQV 0 "register_operand" "=w")
- (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
-@@ -2010,9 +1997,9 @@
+@@ -2010,24 +2032,15 @@
[(set_attr "type" "neon_reduc_add")]
)
@@ -279,7 +596,33 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
UNSPEC_FADDV))]
"TARGET_SIMD"
"faddp\\t%<Vetype>0, %1.<Vtype>"
-@@ -2635,7 +2622,7 @@
+ [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
+ )
+
+-(define_insn "aarch64_addpv4sf"
+- [(set (match_operand:V4SF 0 "register_operand" "=w")
+- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
+- UNSPEC_FADDV))]
+- "TARGET_SIMD"
+- "faddp\\t%0.4s, %1.4s, %1.4s"
+- [(set_attr "type" "neon_fp_reduc_add_s_q")]
+-)
+-
+ (define_expand "reduc_plus_scal_v4sf"
+ [(set (match_operand:SF 0 "register_operand")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
+@@ -2036,8 +2049,8 @@
+ {
+ rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
+ rtx scratch = gen_reg_rtx (V4SFmode);
+- emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
+- emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
++ emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
++ emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
+ emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
+ DONE;
+ })
+@@ -2635,7 +2648,7 @@
(define_insn "*aarch64_combinez<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
@@ -288,7 +631,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
(match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"@
-@@ -2651,7 +2638,7 @@
+@@ -2651,7 +2664,7 @@
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
(match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
@@ -297,7 +640,25 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"@
mov\\t%0.8b, %1.8b
-@@ -4652,7 +4639,7 @@
+@@ -4297,7 +4310,16 @@
+
+ ;; sqrt
+
+-(define_insn "sqrt<mode>2"
++(define_expand "sqrt<mode>2"
++ [(set (match_operand:VDQF 0 "register_operand")
++ (sqrt:VDQF (match_operand:VDQF 1 "register_operand")))]
++ "TARGET_SIMD"
++{
++ if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
++ DONE;
++})
++
++(define_insn "*sqrt<mode>2"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+ (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+@@ -4652,7 +4674,7 @@
ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
[(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
neon_load<nregs>_<nregs>reg_q")
@@ -306,7 +667,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_insn "aarch64_be_ld1<mode>"
-@@ -4685,7 +4672,7 @@
+@@ -4685,7 +4707,7 @@
stp\\t%q1, %R1, %0
ldp\\t%q0, %R0, %1"
[(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
@@ -315,7 +676,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_insn "*aarch64_be_movci"
-@@ -4696,7 +4683,7 @@
+@@ -4696,7 +4718,7 @@
|| register_operand (operands[1], CImode))"
"#"
[(set_attr "type" "multiple")
@@ -324,7 +685,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_insn "*aarch64_be_movxi"
-@@ -4707,7 +4694,7 @@
+@@ -4707,7 +4729,7 @@
|| register_operand (operands[1], XImode))"
"#"
[(set_attr "type" "multiple")
@@ -333,7 +694,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_split
-@@ -5414,13 +5401,25 @@
+@@ -5414,13 +5436,25 @@
[(set_attr "type" "crypto_aese")]
)
@@ -362,9 +723,235 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
;; sha1
+--- a/src/gcc/config/aarch64/aarch64-tune.md
++++ b/src/gcc/config/aarch64/aarch64-tune.md
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from aarch64-cores.def
+ (define_attr "tune"
+- "cortexa35,cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,cortexa57cortexa53,cortexa72cortexa53"
++ "cortexa35,cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,vulcan,cortexa57cortexa53,cortexa72cortexa53"
+ (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+--- a/src/gcc/config/aarch64/aarch64-tuning-flags.def
++++ b/src/gcc/config/aarch64/aarch64-tuning-flags.def
+@@ -29,5 +29,3 @@
+ AARCH64_TUNE_ to give an enum name. */
+
+ AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
+-AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
+-
--- a/src/gcc/config/aarch64/aarch64.c
+++ b/src/gcc/config/aarch64/aarch64.c
-@@ -3582,7 +3582,12 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+@@ -250,6 +250,22 @@ static const struct cpu_addrcost_table xgene1_addrcost_table =
+ 0, /* imm_offset */
+ };
+
++static const struct cpu_addrcost_table vulcan_addrcost_table =
++{
++ {
++ 0, /* hi */
++ 0, /* si */
++ 0, /* di */
++ 2, /* ti */
++ },
++ 0, /* pre_modify */
++ 0, /* post_modify */
++ 2, /* register_offset */
++ 3, /* register_sextend */
++ 3, /* register_zextend */
++ 0, /* imm_offset */
++};
++
+ static const struct cpu_regmove_cost generic_regmove_cost =
+ {
+ 1, /* GP2GP */
+@@ -308,6 +324,15 @@ static const struct cpu_regmove_cost xgene1_regmove_cost =
+ 2 /* FP2FP */
+ };
+
++static const struct cpu_regmove_cost vulcan_regmove_cost =
++{
++ 1, /* GP2GP */
++ /* Avoid the use of int<->fp moves for spilling. */
++ 8, /* GP2FP */
++ 8, /* FP2GP */
++ 4 /* FP2FP */
++};
++
+ /* Generic costs for vector insn classes. */
+ static const struct cpu_vector_cost generic_vector_cost =
+ {
+@@ -379,6 +404,24 @@ static const struct cpu_vector_cost xgene1_vector_cost =
+ 1 /* cond_not_taken_branch_cost */
+ };
+
++/* Costs for vector insn classes for Vulcan. */
++static const struct cpu_vector_cost vulcan_vector_cost =
++{
++ 6, /* scalar_stmt_cost */
++ 4, /* scalar_load_cost */
++ 1, /* scalar_store_cost */
++ 6, /* vec_stmt_cost */
++ 3, /* vec_permute_cost */
++ 6, /* vec_to_scalar_cost */
++ 5, /* scalar_to_vec_cost */
++ 8, /* vec_align_load_cost */
++ 8, /* vec_unalign_load_cost */
++ 4, /* vec_unalign_store_cost */
++ 4, /* vec_store_cost */
++ 2, /* cond_taken_branch_cost */
++ 1 /* cond_not_taken_branch_cost */
++};
++
+ /* Generic costs for branch instructions. */
+ static const struct cpu_branch_cost generic_branch_cost =
+ {
+@@ -393,6 +436,37 @@ static const struct cpu_branch_cost cortexa57_branch_cost =
+ 3 /* Unpredictable. */
+ };
+
++/* Branch costs for Vulcan. */
++static const struct cpu_branch_cost vulcan_branch_cost =
++{
++ 1, /* Predictable. */
++ 3 /* Unpredictable. */
++};
++
++/* Generic approximation modes. */
++static const cpu_approx_modes generic_approx_modes =
++{
++ AARCH64_APPROX_NONE, /* division */
++ AARCH64_APPROX_NONE, /* sqrt */
++ AARCH64_APPROX_NONE /* recip_sqrt */
++};
++
++/* Approximation modes for Exynos M1. */
++static const cpu_approx_modes exynosm1_approx_modes =
++{
++ AARCH64_APPROX_NONE, /* division */
++ AARCH64_APPROX_ALL, /* sqrt */
++ AARCH64_APPROX_ALL /* recip_sqrt */
++};
++
++/* Approximation modes for X-Gene 1. */
++static const cpu_approx_modes xgene1_approx_modes =
++{
++ AARCH64_APPROX_NONE, /* division */
++ AARCH64_APPROX_NONE, /* sqrt */
++ AARCH64_APPROX_ALL /* recip_sqrt */
++};
++
+ static const struct tune_params generic_tunings =
+ {
+ &cortexa57_extra_costs,
+@@ -400,6 +474,7 @@ static const struct tune_params generic_tunings =
+ &generic_regmove_cost,
+ &generic_vector_cost,
+ &generic_branch_cost,
++ &generic_approx_modes,
+ 4, /* memmov_cost */
+ 2, /* issue_rate */
+ AARCH64_FUSE_NOTHING, /* fusible_ops */
+@@ -424,6 +499,7 @@ static const struct tune_params cortexa35_tunings =
+ &cortexa53_regmove_cost,
+ &generic_vector_cost,
+ &generic_branch_cost,
++ &generic_approx_modes,
+ 4, /* memmov_cost */
+ 1, /* issue_rate */
+ (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+@@ -449,6 +525,7 @@ static const struct tune_params cortexa53_tunings =
+ &cortexa53_regmove_cost,
+ &generic_vector_cost,
+ &generic_branch_cost,
++ &generic_approx_modes,
+ 4, /* memmov_cost */
+ 2, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+@@ -474,6 +551,7 @@ static const struct tune_params cortexa57_tunings =
+ &cortexa57_regmove_cost,
+ &cortexa57_vector_cost,
+ &cortexa57_branch_cost,
++ &generic_approx_modes,
+ 4, /* memmov_cost */
+ 3, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+@@ -499,6 +577,7 @@ static const struct tune_params cortexa72_tunings =
+ &cortexa57_regmove_cost,
+ &cortexa57_vector_cost,
+ &generic_branch_cost,
++ &generic_approx_modes,
+ 4, /* memmov_cost */
+ 3, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+@@ -524,6 +603,7 @@ static const struct tune_params exynosm1_tunings =
+ &exynosm1_regmove_cost,
+ &exynosm1_vector_cost,
+ &generic_branch_cost,
++ &exynosm1_approx_modes,
+ 4, /* memmov_cost */
+ 3, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
+@@ -538,7 +618,7 @@ static const struct tune_params exynosm1_tunings =
+ 48, /* max_case_values. */
+ 64, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ };
+
+ static const struct tune_params thunderx_tunings =
+@@ -548,6 +628,7 @@ static const struct tune_params thunderx_tunings =
+ &thunderx_regmove_cost,
+ &generic_vector_cost,
+ &generic_branch_cost,
++ &generic_approx_modes,
+ 6, /* memmov_cost */
+ 2, /* issue_rate */
+ AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
+@@ -572,6 +653,7 @@ static const struct tune_params xgene1_tunings =
+ &xgene1_regmove_cost,
+ &xgene1_vector_cost,
+ &generic_branch_cost,
++ &xgene1_approx_modes,
+ 6, /* memmov_cost */
+ 4, /* issue_rate */
+ AARCH64_FUSE_NOTHING, /* fusible_ops */
+@@ -586,7 +668,32 @@ static const struct tune_params xgene1_tunings =
+ 0, /* max_case_values. */
+ 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
+- (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
++ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
++};
++
++static const struct tune_params vulcan_tunings =
++{
++ &vulcan_extra_costs,
++ &vulcan_addrcost_table,
++ &vulcan_regmove_cost,
++ &vulcan_vector_cost,
++ &vulcan_branch_cost,
++ &generic_approx_modes,
++ 4, /* memmov_cost. */
++ 4, /* issue_rate. */
++ AARCH64_FUSE_NOTHING, /* fuseable_ops. */
++ 16, /* function_align. */
++ 8, /* jump_align. */
++ 16, /* loop_align. */
++ 3, /* int_reassoc_width. */
++ 2, /* fp_reassoc_width. */
++ 2, /* vec_reassoc_width. */
++ 2, /* min_div_recip_mul_sf. */
++ 2, /* min_div_recip_mul_df. */
++ 0, /* max_case_values. */
++ 64, /* cache_line_size. */
++ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
++ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ };
+
+ /* Support for fine-grained override of the tuning structures. */
+@@ -3582,7 +3689,12 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
return aarch64_tls_referenced_p (x);
}
@@ -378,7 +965,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
static unsigned int
aarch64_case_values_threshold (void)
-@@ -3593,7 +3598,7 @@ aarch64_case_values_threshold (void)
+@@ -3593,7 +3705,7 @@ aarch64_case_values_threshold (void)
&& selected_cpu->tune->max_case_values != 0)
return selected_cpu->tune->max_case_values;
else
@@ -387,7 +974,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
}
/* Return true if register REGNO is a valid index register.
-@@ -4232,14 +4237,6 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
+@@ -4232,14 +4344,6 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
&& GET_CODE (x) == NEG)
return CC_Zmode;
@@ -402,7 +989,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
/* A test for unsigned overflow. */
if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
&& code == NE
-@@ -4308,8 +4305,6 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
+@@ -4308,8 +4412,6 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
break;
case CC_SWPmode:
@@ -411,7 +998,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
switch (comp_code)
{
case NE: return AARCH64_NE;
-@@ -5022,120 +5017,6 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
+@@ -5022,120 +5124,6 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
return x;
}
@@ -532,7 +1119,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
/* Return the reload icode required for a constant pool in mode. */
static enum insn_code
aarch64_constant_pool_reload_icode (machine_mode mode)
-@@ -6411,10 +6292,6 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
+@@ -6411,10 +6399,6 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
/* TODO: A write to the CC flags possibly costs extra, this
needs encoding in the cost tables. */
@@ -543,7 +1130,390 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
mode = GET_MODE (op0);
/* ANDS. */
if (GET_CODE (op0) == AND)
-@@ -10851,33 +10728,6 @@ aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
+@@ -7452,12 +7436,12 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
+ to optimize 1.0/sqrt. */
+
+ static bool
+-use_rsqrt_p (void)
++use_rsqrt_p (machine_mode mode)
+ {
+ return (!flag_trapping_math
+ && flag_unsafe_math_optimizations
+- && ((aarch64_tune_params.extra_tuning_flags
+- & AARCH64_EXTRA_TUNE_APPROX_RSQRT)
++ && ((aarch64_tune_params.approx_modes->recip_sqrt
++ & AARCH64_APPROX_MODE (mode))
+ || flag_mrecip_low_precision_sqrt));
+ }
+
+@@ -7467,89 +7451,217 @@ use_rsqrt_p (void)
+ static tree
+ aarch64_builtin_reciprocal (tree fndecl)
+ {
+- if (!use_rsqrt_p ())
++ machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
++
++ if (!use_rsqrt_p (mode))
+ return NULL_TREE;
+ return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
+ }
+
+ typedef rtx (*rsqrte_type) (rtx, rtx);
+
+-/* Select reciprocal square root initial estimate
+- insn depending on machine mode. */
++/* Select reciprocal square root initial estimate insn depending on machine
++ mode. */
+
+-rsqrte_type
++static rsqrte_type
+ get_rsqrte_type (machine_mode mode)
+ {
+ switch (mode)
+ {
+- case DFmode: return gen_aarch64_rsqrte_df2;
+- case SFmode: return gen_aarch64_rsqrte_sf2;
+- case V2DFmode: return gen_aarch64_rsqrte_v2df2;
+- case V2SFmode: return gen_aarch64_rsqrte_v2sf2;
+- case V4SFmode: return gen_aarch64_rsqrte_v4sf2;
++ case DFmode: return gen_aarch64_rsqrtedf;
++ case SFmode: return gen_aarch64_rsqrtesf;
++ case V2DFmode: return gen_aarch64_rsqrtev2df;
++ case V2SFmode: return gen_aarch64_rsqrtev2sf;
++ case V4SFmode: return gen_aarch64_rsqrtev4sf;
+ default: gcc_unreachable ();
+ }
+ }
+
+ typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
+
+-/* Select reciprocal square root Newton-Raphson step
+- insn depending on machine mode. */
++/* Select reciprocal square root series step insn depending on machine mode. */
+
+-rsqrts_type
++static rsqrts_type
+ get_rsqrts_type (machine_mode mode)
+ {
+ switch (mode)
+ {
+- case DFmode: return gen_aarch64_rsqrts_df3;
+- case SFmode: return gen_aarch64_rsqrts_sf3;
+- case V2DFmode: return gen_aarch64_rsqrts_v2df3;
+- case V2SFmode: return gen_aarch64_rsqrts_v2sf3;
+- case V4SFmode: return gen_aarch64_rsqrts_v4sf3;
++ case DFmode: return gen_aarch64_rsqrtsdf;
++ case SFmode: return gen_aarch64_rsqrtssf;
++ case V2DFmode: return gen_aarch64_rsqrtsv2df;
++ case V2SFmode: return gen_aarch64_rsqrtsv2sf;
++ case V4SFmode: return gen_aarch64_rsqrtsv4sf;
+ default: gcc_unreachable ();
+ }
+ }
+
+-/* Emit instruction sequence to compute the reciprocal square root using the
+- Newton-Raphson series. Iterate over the series twice for SF
+- and thrice for DF. */
++/* Emit instruction sequence to compute either the approximate square root
++ or its approximate reciprocal, depending on the flag RECP, and return
++ whether the sequence was emitted or not. */
+
+-void
+-aarch64_emit_approx_rsqrt (rtx dst, rtx src)
++bool
++aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
+ {
+- machine_mode mode = GET_MODE (src);
+- gcc_assert (
+- mode == SFmode || mode == V2SFmode || mode == V4SFmode
+- || mode == DFmode || mode == V2DFmode);
+-
+- rtx xsrc = gen_reg_rtx (mode);
+- emit_move_insn (xsrc, src);
+- rtx x0 = gen_reg_rtx (mode);
++ machine_mode mode = GET_MODE (dst);
++ machine_mode mmsk = mode_for_vector
++ (int_mode_for_mode (GET_MODE_INNER (mode)),
++ GET_MODE_NUNITS (mode));
++ bool use_approx_sqrt_p = (!recp
++ && (flag_mlow_precision_sqrt
++ || (aarch64_tune_params.approx_modes->sqrt
++ & AARCH64_APPROX_MODE (mode))));
++ bool use_approx_rsqrt_p = (recp
++ && (flag_mrecip_low_precision_sqrt
++ || (aarch64_tune_params.approx_modes->recip_sqrt
++ & AARCH64_APPROX_MODE (mode))));
++
++ if (!flag_finite_math_only
++ || flag_trapping_math
++ || !flag_unsafe_math_optimizations
++ || !(use_approx_sqrt_p || use_approx_rsqrt_p)
++ || optimize_function_for_size_p (cfun))
++ return false;
+
+- emit_insn ((*get_rsqrte_type (mode)) (x0, xsrc));
++ rtx xmsk = gen_reg_rtx (mmsk);
++ if (!recp)
++ /* When calculating the approximate square root, compare the argument with
++ 0.0 and create a mask. */
++ emit_insn (gen_rtx_SET (xmsk, gen_rtx_NEG (mmsk, gen_rtx_EQ (mmsk, src,
++ CONST0_RTX (mode)))));
+
+- bool double_mode = (mode == DFmode || mode == V2DFmode);
++ /* Estimate the approximate reciprocal square root. */
++ rtx xdst = gen_reg_rtx (mode);
++ emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
+
+- int iterations = double_mode ? 3 : 2;
++ /* Iterate over the series twice for SF and thrice for DF. */
++ int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
+
+- /* Optionally iterate over the series one less time than otherwise. */
+- if (flag_mrecip_low_precision_sqrt)
++ /* Optionally iterate over the series once less for faster performance
++ while sacrificing the accuracy. */
++ if ((recp && flag_mrecip_low_precision_sqrt)
++ || (!recp && flag_mlow_precision_sqrt))
+ iterations--;
+
+- for (int i = 0; i < iterations; ++i)
++ /* Iterate over the series to calculate the approximate reciprocal square
++ root. */
++ rtx x1 = gen_reg_rtx (mode);
++ while (iterations--)
+ {
+- rtx x1 = gen_reg_rtx (mode);
+ rtx x2 = gen_reg_rtx (mode);
+- rtx x3 = gen_reg_rtx (mode);
+- emit_set_insn (x2, gen_rtx_MULT (mode, x0, x0));
++ emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
++
++ emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
+
+- emit_insn ((*get_rsqrts_type (mode)) (x3, xsrc, x2));
++ if (iterations > 0)
++ emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
++ }
++
++ if (!recp)
++ {
++ /* Qualify the approximate reciprocal square root when the argument is
++ 0.0 by squashing the intermediary result to 0.0. */
++ rtx xtmp = gen_reg_rtx (mmsk);
++ emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
++ gen_rtx_SUBREG (mmsk, xdst, 0)));
++ emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
+
+- emit_set_insn (x1, gen_rtx_MULT (mode, x0, x3));
+- x0 = x1;
++ /* Calculate the approximate square root. */
++ emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
+ }
+
+- emit_move_insn (dst, x0);
++ /* Finalize the approximation. */
++ emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
++
++ return true;
++}
++
++typedef rtx (*recpe_type) (rtx, rtx);
++
++/* Select reciprocal initial estimate insn depending on machine mode. */
++
++static recpe_type
++get_recpe_type (machine_mode mode)
++{
++ switch (mode)
++ {
++ case SFmode: return (gen_aarch64_frecpesf);
++ case V2SFmode: return (gen_aarch64_frecpev2sf);
++ case V4SFmode: return (gen_aarch64_frecpev4sf);
++ case DFmode: return (gen_aarch64_frecpedf);
++ case V2DFmode: return (gen_aarch64_frecpev2df);
++ default: gcc_unreachable ();
++ }
++}
++
++typedef rtx (*recps_type) (rtx, rtx, rtx);
++
++/* Select reciprocal series step insn depending on machine mode. */
++
++static recps_type
++get_recps_type (machine_mode mode)
++{
++ switch (mode)
++ {
++ case SFmode: return (gen_aarch64_frecpssf);
++ case V2SFmode: return (gen_aarch64_frecpsv2sf);
++ case V4SFmode: return (gen_aarch64_frecpsv4sf);
++ case DFmode: return (gen_aarch64_frecpsdf);
++ case V2DFmode: return (gen_aarch64_frecpsv2df);
++ default: gcc_unreachable ();
++ }
++}
++
++/* Emit the instruction sequence to compute the approximation for the division
++ of NUM by DEN in QUO and return whether the sequence was emitted or not. */
++
++bool
++aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
++{
++ machine_mode mode = GET_MODE (quo);
++ bool use_approx_division_p = (flag_mlow_precision_div
++ || (aarch64_tune_params.approx_modes->division
++ & AARCH64_APPROX_MODE (mode)));
++
++ if (!flag_finite_math_only
++ || flag_trapping_math
++ || !flag_unsafe_math_optimizations
++ || optimize_function_for_size_p (cfun)
++ || !use_approx_division_p)
++ return false;
++
++ /* Estimate the approximate reciprocal. */
++ rtx xrcp = gen_reg_rtx (mode);
++ emit_insn ((*get_recpe_type (mode)) (xrcp, den));
++
++ /* Iterate over the series twice for SF and thrice for DF. */
++ int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
++
++ /* Optionally iterate over the series once less for faster performance,
++ while sacrificing the accuracy. */
++ if (flag_mlow_precision_div)
++ iterations--;
++
++ /* Iterate over the series to calculate the approximate reciprocal. */
++ rtx xtmp = gen_reg_rtx (mode);
++ while (iterations--)
++ {
++ emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
++
++ if (iterations > 0)
++ emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
++ }
++
++ if (num != CONST1_RTX (mode))
++ {
++ /* As the approximate reciprocal of DEN is already calculated, only
++ calculate the approximate division when NUM is not 1.0. */
++ rtx xnum = force_reg (mode, num);
++ emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
++ }
++
++ /* Finalize the approximation. */
++ emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
++ return true;
+ }
+
+ /* Return the number of instructions that can be issued per cycle. */
+@@ -8079,6 +8191,12 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts)
+ && (aarch64_cmodel == AARCH64_CMODEL_TINY
+ || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC))
+ aarch64_nopcrelative_literal_loads = false;
++
++ /* When enabling the lower precision Newton series for the square root, also
++ enable it for the reciprocal square root, since the latter is an
++ intermediary step for the former. */
++ if (flag_mlow_precision_sqrt)
++ flag_mrecip_low_precision_sqrt = true;
+ }
+
+ /* 'Unpack' up the internal tuning structs and update the options
+@@ -9463,6 +9581,13 @@ aarch64_build_builtin_va_list (void)
+ FIELD_DECL, get_identifier ("__vr_offs"),
+ integer_type_node);
+
++ /* Tell tree-stdarg pass about our internal offset fields.
++ NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
++ purpose to identify whether the code is updating va_list internal
++ offset fields through irregular way. */
++ va_list_gpr_counter_field = f_groff;
++ va_list_fpr_counter_field = f_vroff;
++
+ DECL_ARTIFICIAL (f_stack) = 1;
+ DECL_ARTIFICIAL (f_grtop) = 1;
+ DECL_ARTIFICIAL (f_vrtop) = 1;
+@@ -9495,15 +9620,17 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+ tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
+ tree stack, grtop, vrtop, groff, vroff;
+ tree t;
+- int gr_save_area_size;
+- int vr_save_area_size;
++ int gr_save_area_size = cfun->va_list_gpr_size;
++ int vr_save_area_size = cfun->va_list_fpr_size;
+ int vr_offset;
+
+ cum = &crtl->args.info;
+- gr_save_area_size
+- = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
+- vr_save_area_size
+- = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
++ if (cfun->va_list_gpr_size)
++ gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
++ cfun->va_list_gpr_size);
++ if (cfun->va_list_fpr_size)
++ vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
++ * UNITS_PER_VREG, cfun->va_list_fpr_size);
+
+ if (!TARGET_FLOAT)
+ {
+@@ -9832,7 +9959,8 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
+ {
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+ CUMULATIVE_ARGS local_cum;
+- int gr_saved, vr_saved;
++ int gr_saved = cfun->va_list_gpr_size;
++ int vr_saved = cfun->va_list_fpr_size;
+
+ /* The caller has advanced CUM up to, but not beyond, the last named
+ argument. Advance a local copy of CUM past the last "real" named
+@@ -9840,9 +9968,14 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
+ local_cum = *cum;
+ aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
+
+- /* Found out how many registers we need to save. */
+- gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
+- vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
++ /* Found out how many registers we need to save.
++ Honor tree-stdvar analysis results. */
++ if (cfun->va_list_gpr_size)
++ gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
++ cfun->va_list_gpr_size / UNITS_PER_WORD);
++ if (cfun->va_list_fpr_size)
++ vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
++ cfun->va_list_fpr_size / UNITS_PER_VREG);
+
+ if (!TARGET_FLOAT)
+ {
+@@ -9870,7 +10003,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
+ /* We can't use move_block_from_reg, because it will use
+ the wrong mode, storing D regs only. */
+ machine_mode mode = TImode;
+- int off, i;
++ int off, i, vr_start;
+
+ /* Set OFF to the offset from virtual_incoming_args_rtx of
+ the first vector register. The VR save area lies below
+@@ -9879,14 +10012,15 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
+ STACK_BOUNDARY / BITS_PER_UNIT);
+ off -= vr_saved * UNITS_PER_VREG;
+
+- for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
++ vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
++ for (i = 0; i < vr_saved; ++i)
+ {
+ rtx ptr, mem;
+
+ ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
+ mem = gen_frame_mem (mode, ptr);
+ set_mem_alias_set (mem, get_varargs_alias_set ());
+- aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
++ aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
+ off += UNITS_PER_VREG;
+ }
+ }
+@@ -10848,33 +10982,6 @@ aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
gen_rtx_REG (mode, rsrc + count - i - 1));
}
@@ -577,7 +1547,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
one of VSTRUCT modes: OI, CI, or XI. */
int
-@@ -11959,12 +11809,11 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
+@@ -11956,12 +12063,11 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
info.value = GEN_INT (0);
else
{
@@ -591,7 +1561,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
if (lane_count == 1)
snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
-@@ -13317,6 +13166,14 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+@@ -13314,6 +13420,14 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
return false;
}
@@ -606,7 +1576,23 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
/* If MEM is in the form of [base+offset], extract the two parts
of address and set to BASE and OFFSET, otherwise return false
after clearing BASE and OFFSET. */
-@@ -14232,6 +14089,9 @@ aarch64_optab_supported_p (int op, machine_mode, machine_mode,
+@@ -13886,13 +14000,13 @@ aarch64_promoted_type (const_tree t)
+ /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
+
+ static bool
+-aarch64_optab_supported_p (int op, machine_mode, machine_mode,
++aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
+ optimization_type opt_type)
+ {
+ switch (op)
+ {
+ case rsqrt_optab:
+- return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
++ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
+
+ default:
+ return true;
+@@ -14229,6 +14343,9 @@ aarch64_optab_supported_p (int op, machine_mode, machine_mode,
#undef TARGET_OPTAB_SUPPORTED_P
#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
@@ -651,7 +1637,114 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
required size of load/store. */
--- a/src/gcc/config/aarch64/aarch64.md
+++ b/src/gcc/config/aarch64/aarch64.md
-@@ -1783,7 +1783,7 @@
+@@ -75,6 +75,8 @@
+ UNSPEC_CRC32H
+ UNSPEC_CRC32W
+ UNSPEC_CRC32X
++ UNSPEC_FCVTZS
++ UNSPEC_FCVTZU
+ UNSPEC_URECPE
+ UNSPEC_FRECPE
+ UNSPEC_FRECPS
+@@ -105,6 +107,7 @@
+ UNSPEC_NOP
+ UNSPEC_PRLG_STK
+ UNSPEC_RBIT
++ UNSPEC_SCVTF
+ UNSPEC_SISD_NEG
+ UNSPEC_SISD_SSHL
+ UNSPEC_SISD_USHL
+@@ -122,6 +125,7 @@
+ UNSPEC_TLSLE24
+ UNSPEC_TLSLE32
+ UNSPEC_TLSLE48
++ UNSPEC_UCVTF
+ UNSPEC_USHL_2S
+ UNSPEC_VSTRUCTDUMMY
+ UNSPEC_SP_SET
+@@ -1178,11 +1182,12 @@
+ )
+
+ (define_insn "*movhf_aarch64"
+- [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r")
+- (match_operand:HF 1 "general_operand" "?rY, w,w,m,w,m,rY,r"))]
++ [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r")
++ (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))]
+ "TARGET_FLOAT && (register_operand (operands[0], HFmode)
+ || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+ "@
++ movi\\t%0.4h, #0
+ mov\\t%0.h[0], %w1
+ umov\\t%w0, %1.h[0]
+ mov\\t%0.h[0], %1.h[0]
+@@ -1191,18 +1196,18 @@
+ ldrh\\t%w0, %1
+ strh\\t%w1, %0
+ mov\\t%w0, %w1"
+- [(set_attr "type" "neon_from_gp,neon_to_gp,neon_move,\
++ [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
+ f_loads,f_stores,load1,store1,mov_reg")
+- (set_attr "simd" "yes,yes,yes,*,*,*,*,*")
+- (set_attr "fp" "*,*,*,yes,yes,*,*,*")]
++ (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
+ )
+
+ (define_insn "*movsf_aarch64"
+- [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r")
+- (match_operand:SF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))]
++ [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
++ (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+ "TARGET_FLOAT && (register_operand (operands[0], SFmode)
+ || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+ "@
++ movi\\t%0.2s, #0
+ fmov\\t%s0, %w1
+ fmov\\t%w0, %s1
+ fmov\\t%s0, %s1
+@@ -1212,16 +1217,18 @@
+ ldr\\t%w0, %1
+ str\\t%w1, %0
+ mov\\t%w0, %w1"
+- [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
+- f_loads,f_stores,load1,store1,mov_reg")]
++ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
++ f_loads,f_stores,load1,store1,mov_reg")
++ (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ )
+
+ (define_insn "*movdf_aarch64"
+- [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r")
+- (match_operand:DF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))]
++ [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
++ (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+ "TARGET_FLOAT && (register_operand (operands[0], DFmode)
+ || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+ "@
++ movi\\t%d0, #0
+ fmov\\t%d0, %x1
+ fmov\\t%x0, %d1
+ fmov\\t%d0, %d1
+@@ -1231,8 +1238,9 @@
+ ldr\\t%x0, %1
+ str\\t%x1, %0
+ mov\\t%x0, %x1"
+- [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
+- f_loadd,f_stored,load1,store1,mov_reg")]
++ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
++ f_loadd,f_stored,load1,store1,mov_reg")
++ (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ )
+
+ (define_insn "*movtf_aarch64"
+@@ -1257,7 +1265,6 @@
+ [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
+ f_loadd,f_stored,load2,store2,store2")
+ (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
+- (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*,*")
+ (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
+ )
+
+@@ -1783,7 +1790,7 @@
"aarch64_zero_extend_const_eq (<DWI>mode, operands[2],
<MODE>mode, operands[1])"
"@
@@ -660,7 +1753,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
cmp\\t%<w>0, #%n1"
[(set_attr "type" "alus_imm")]
)
-@@ -1815,11 +1815,11 @@
+@@ -1815,11 +1822,11 @@
"aarch64_zero_extend_const_eq (<DWI>mode, operands[3],
<MODE>mode, operands[2])"
"@
@@ -674,7 +1767,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
(define_insn "add<mode>3_compareC"
[(set (reg:CC_C CC_REGNUM)
(ne:CC_C
-@@ -3422,7 +3422,9 @@
+@@ -3422,7 +3429,9 @@
(LOGICAL:SI (match_operand:SI 1 "register_operand" "%r,r")
(match_operand:SI 2 "aarch64_logical_operand" "r,K"))))]
""
@@ -685,7 +1778,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
[(set_attr "type" "logic_reg,logic_imm")]
)
-@@ -3435,7 +3437,9 @@
+@@ -3435,7 +3444,9 @@
(set (match_operand:GPI 0 "register_operand" "=r,r")
(and:GPI (match_dup 1) (match_dup 2)))]
""
@@ -696,7 +1789,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
[(set_attr "type" "logics_reg,logics_imm")]
)
-@@ -3449,7 +3453,9 @@
+@@ -3449,7 +3460,9 @@
(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
""
@@ -707,7 +1800,40 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
[(set_attr "type" "logics_reg,logics_imm")]
)
-@@ -3803,7 +3809,9 @@
+@@ -3775,16 +3788,23 @@
+ [(set_attr "type" "rbit")]
+ )
+
+-(define_expand "ctz<mode>2"
+- [(match_operand:GPI 0 "register_operand")
+- (match_operand:GPI 1 "register_operand")]
++;; Split after reload into RBIT + CLZ. Since RBIT is represented as an UNSPEC
++;; it is unlikely to fold with any other operation, so keep this as a CTZ
++;; expression and split after reload to enable scheduling them apart if
++;; needed.
++
++(define_insn_and_split "ctz<mode>2"
++ [(set (match_operand:GPI 0 "register_operand" "=r")
++ (ctz:GPI (match_operand:GPI 1 "register_operand" "r")))]
+ ""
+- {
+- emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
+- emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
+- DONE;
+- }
+-)
++ "#"
++ "reload_completed"
++ [(const_int 0)]
++ "
++ emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
++ emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
++ DONE;
++")
+
+ (define_insn "*and<mode>_compare0"
+ [(set (reg:CC_NZ CC_REGNUM)
+@@ -3803,7 +3823,9 @@
(match_operand:GPI 1 "aarch64_logical_operand" "r,<lconst>"))
(const_int 0)))]
""
@@ -718,7 +1844,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
[(set_attr "type" "logics_reg,logics_imm")]
)
-@@ -3869,22 +3877,16 @@
+@@ -3869,22 +3891,16 @@
(define_expand "ashl<mode>3"
[(set (match_operand:SHORT 0 "register_operand")
(ashift:SHORT (match_operand:SHORT 1 "register_operand")
@@ -747,7 +1873,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
}
)
-@@ -3933,33 +3935,35 @@
+@@ -3933,33 +3949,35 @@
;; Logical left shift using SISD or Integer instruction
(define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
@@ -795,7 +1921,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_split
-@@ -3994,18 +3998,19 @@
+@@ -3994,18 +4012,19 @@
;; Arithmetic right shift using SISD or Integer instruction
(define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
@@ -820,7 +1946,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_split
-@@ -4097,21 +4102,25 @@
+@@ -4097,21 +4116,25 @@
[(set (match_operand:GPI 0 "register_operand" "=r,r")
(rotatert:GPI
(match_operand:GPI 1 "register_operand" "r,r")
@@ -854,7 +1980,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_insn "*<optab><mode>3_insn"
-@@ -4135,7 +4144,7 @@
+@@ -4135,7 +4158,7 @@
"UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode) &&
(UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))"
"extr\\t%<w>0, %<w>1, %<w>2, %4"
@@ -863,7 +1989,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
;; There are no canonicalisation rules for ashift and lshiftrt inside an ior
-@@ -4150,7 +4159,7 @@
+@@ -4150,7 +4173,7 @@
&& (UINTVAL (operands[3]) + UINTVAL (operands[4])
== GET_MODE_BITSIZE (<MODE>mode))"
"extr\\t%<w>0, %<w>1, %<w>2, %4"
@@ -872,7 +1998,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
;; zero_extend version of the above
-@@ -4164,7 +4173,7 @@
+@@ -4164,7 +4187,7 @@
"UINTVAL (operands[3]) < 32 &&
(UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
"extr\\t%w0, %w1, %w2, %4"
@@ -881,7 +2007,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_insn "*extrsi5_insn_uxtw_alt"
-@@ -4177,7 +4186,7 @@
+@@ -4177,7 +4200,7 @@
"UINTVAL (operands[3]) < 32 &&
(UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
"extr\\t%w0, %w1, %w2, %4"
@@ -890,7 +2016,89 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_insn "*ror<mode>3_insn"
-@@ -5191,7 +5200,7 @@
+@@ -4608,6 +4631,36 @@
+ [(set_attr "type" "f_cvti2f")]
+ )
+
++;; Convert between fixed-point and floating-point (scalar modes)
++
++(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><GPF:mode>3"
++ [(set (match_operand:<GPF:FCVT_TARGET> 0 "register_operand" "=r, w")
++ (unspec:<GPF:FCVT_TARGET> [(match_operand:GPF 1 "register_operand" "w, w")
++ (match_operand:SI 2 "immediate_operand" "i, i")]
++ FCVT_F2FIXED))]
++ ""
++ "@
++ <FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPF:w1>0, %<GPF:s>1, #%2
++ <FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPF:s>0, %<GPF:s>1, #%2"
++ [(set_attr "type" "f_cvtf2i, neon_fp_to_int_<GPF:Vetype>")
++ (set_attr "fp" "yes, *")
++ (set_attr "simd" "*, yes")]
++)
++
++(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><GPI:mode>3"
++ [(set (match_operand:<GPI:FCVT_TARGET> 0 "register_operand" "=w, w")
++ (unspec:<GPI:FCVT_TARGET> [(match_operand:GPI 1 "register_operand" "r, w")
++ (match_operand:SI 2 "immediate_operand" "i, i")]
++ FCVT_FIXED2F))]
++ ""
++ "@
++ <FCVT_FIXED2F:fcvt_fixed_insn>\t%<GPI:v>0, %<GPI:w>1, #%2
++ <FCVT_FIXED2F:fcvt_fixed_insn>\t%<GPI:v>0, %<GPI:v>1, #%2"
++ [(set_attr "type" "f_cvti2f, neon_int_to_fp_<GPI:Vetype>")
++ (set_attr "fp" "yes, *")
++ (set_attr "simd" "*, yes")]
++)
++
+ ;; -------------------------------------------------------------------
+ ;; Floating-point arithmetic
+ ;; -------------------------------------------------------------------
+@@ -4662,11 +4715,22 @@
+ [(set_attr "type" "fmul<s>")]
+ )
+
+-(define_insn "div<mode>3"
++(define_expand "div<mode>3"
++ [(set (match_operand:GPF 0 "register_operand")
++ (div:GPF (match_operand:GPF 1 "general_operand")
++ (match_operand:GPF 2 "register_operand")))]
++ "TARGET_SIMD"
++{
++ if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
++ DONE;
++
++ operands[1] = force_reg (<MODE>mode, operands[1]);
++})
++
++(define_insn "*div<mode>3"
+ [(set (match_operand:GPF 0 "register_operand" "=w")
+- (div:GPF
+- (match_operand:GPF 1 "register_operand" "w")
+- (match_operand:GPF 2 "register_operand" "w")))]
++ (div:GPF (match_operand:GPF 1 "register_operand" "w")
++ (match_operand:GPF 2 "register_operand" "w")))]
+ "TARGET_FLOAT"
+ "fdiv\\t%<s>0, %<s>1, %<s>2"
+ [(set_attr "type" "fdiv<s>")]
+@@ -4680,7 +4744,16 @@
+ [(set_attr "type" "ffarith<s>")]
+ )
+
+-(define_insn "sqrt<mode>2"
++(define_expand "sqrt<mode>2"
++ [(set (match_operand:GPF 0 "register_operand")
++ (sqrt:GPF (match_operand:GPF 1 "register_operand")))]
++ "TARGET_FLOAT"
++{
++ if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
++ DONE;
++})
++
++(define_insn "*sqrt<mode>2"
+ [(set (match_operand:GPF 0 "register_operand" "=w")
+ (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))]
+ "TARGET_FLOAT"
+@@ -5191,7 +5264,7 @@
UNSPEC_SP_TEST))
(clobber (match_scratch:PTR 3 "=&r"))]
""
@@ -899,79 +2107,77 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
[(set_attr "length" "12")
(set_attr "type" "multiple")])
+--- a/src/gcc/config/aarch64/aarch64.opt
++++ b/src/gcc/config/aarch64/aarch64.opt
+@@ -151,5 +151,19 @@ PC relative literal loads.
+
+ mlow-precision-recip-sqrt
+ Common Var(flag_mrecip_low_precision_sqrt) Optimization
+-When calculating the reciprocal square root approximation,
+-uses one less step than otherwise, thus reducing latency and precision.
++Enable the reciprocal square root approximation. Enabling this reduces
++precision of reciprocal square root results to about 16 bits for
++single precision and to 32 bits for double precision.
++
++mlow-precision-sqrt
++Common Var(flag_mlow_precision_sqrt) Optimization
++Enable the square root approximation. Enabling this reduces
++precision of square root results to about 16 bits for
++single precision and to 32 bits for double precision.
++If enabled, it implies -mlow-precision-recip-sqrt.
++
++mlow-precision-div
++Common Var(flag_mlow_precision_div) Optimization
++Enable the division approximation. Enabling this reduces
++precision of division results to about 16 bits for
++single precision and to 32 bits for double precision.
--- a/src/gcc/config/aarch64/arm_neon.h
+++ b/src/gcc/config/aarch64/arm_neon.h
-@@ -7938,61 +7938,6 @@ vmovn_u64 (uint64x2_t a)
+@@ -5440,17 +5440,6 @@ vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
return result;
}
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vmul_n_f32 (float32x2_t a, float32_t b)
+-vabd_f32 (float32x2_t a, float32x2_t b)
-{
- float32x2_t result;
-- __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vmul_n_s16 (int16x4_t a, int16_t b)
--{
-- int16x4_t result;
-- __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
-- : "=w"(result)
-- : "w"(a), "x"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vmul_n_s32 (int32x2_t a, int32_t b)
--{
-- int32x2_t result;
-- __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
+- __asm__ ("fabd %0.2s, %1.2s, %2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vmul_n_u16 (uint16x4_t a, uint16_t b)
--{
-- uint16x4_t result;
-- __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
-- : "=w"(result)
-- : "w"(a), "x"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vmul_n_u32 (uint32x2_t a, uint32_t b)
+ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+ vabd_s8 (int8x8_t a, int8x8_t b)
+ {
+@@ -5517,17 +5506,6 @@ vabd_u32 (uint32x2_t a, uint32x2_t b)
+ return result;
+ }
+
+-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+-vabdd_f64 (float64_t a, float64_t b)
-{
-- uint32x2_t result;
-- __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
+- float64_t result;
+- __asm__ ("fabd %d0, %d1, %d2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
- #define vmull_high_lane_s16(a, b, c) \
- __extension__ \
- ({ \
-@@ -8443,227 +8388,6 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
+ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+ vabdl_high_s8 (int8x16_t a, int8x16_t b)
+ {
+@@ -5660,28 +5638,6 @@ vabdl_u32 (uint32x2_t a, uint32x2_t b)
return result;
}
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vmulq_n_f32 (float32x4_t a, float32_t b)
+-vabdq_f32 (float32x4_t a, float32x4_t b)
-{
- float32x4_t result;
-- __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
+- __asm__ ("fabd %0.4s, %1.4s, %2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
@@ -979,34 +2185,390 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vmulq_n_f64 (float64x2_t a, float64_t b)
+-vabdq_f64 (float64x2_t a, float64x2_t b)
-{
- float64x2_t result;
-- __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
+- __asm__ ("fabd %0.2d, %1.2d, %2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vmulq_n_s16 (int16x8_t a, int16_t b)
+ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+ vabdq_s8 (int8x16_t a, int8x16_t b)
+ {
+@@ -5748,17 +5704,6 @@ vabdq_u32 (uint32x4_t a, uint32x4_t b)
+ return result;
+ }
+
+-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+-vabds_f32 (float32_t a, float32_t b)
-{
-- int16x8_t result;
-- __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
+- float32_t result;
+- __asm__ ("fabd %s0, %s1, %s2"
- : "=w"(result)
-- : "w"(a), "x"(b)
+- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vmulq_n_s32 (int32x4_t a, int32_t b)
--{
-- int32x4_t result;
-- __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
-- : "=w"(result)
-- : "w"(a), "w"(b)
+ __extension__ static __inline int16_t __attribute__ ((__always_inline__))
+ vaddlv_s8 (int8x8_t a)
+ {
+@@ -6025,246 +5970,6 @@ vaddlvq_u32 (uint32x4_t a)
+ result; \
+ })
+
+-#define vcvt_n_f32_s32(a, b) \
+- __extension__ \
+- ({ \
+- int32x2_t a_ = (a); \
+- float32x2_t result; \
+- __asm__ ("scvtf %0.2s, %1.2s, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvt_n_f32_u32(a, b) \
+- __extension__ \
+- ({ \
+- uint32x2_t a_ = (a); \
+- float32x2_t result; \
+- __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvt_n_s32_f32(a, b) \
+- __extension__ \
+- ({ \
+- float32x2_t a_ = (a); \
+- int32x2_t result; \
+- __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvt_n_u32_f32(a, b) \
+- __extension__ \
+- ({ \
+- float32x2_t a_ = (a); \
+- uint32x2_t result; \
+- __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtd_n_f64_s64(a, b) \
+- __extension__ \
+- ({ \
+- int64_t a_ = (a); \
+- float64_t result; \
+- __asm__ ("scvtf %d0,%d1,%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtd_n_f64_u64(a, b) \
+- __extension__ \
+- ({ \
+- uint64_t a_ = (a); \
+- float64_t result; \
+- __asm__ ("ucvtf %d0,%d1,%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtd_n_s64_f64(a, b) \
+- __extension__ \
+- ({ \
+- float64_t a_ = (a); \
+- int64_t result; \
+- __asm__ ("fcvtzs %d0,%d1,%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtd_n_u64_f64(a, b) \
+- __extension__ \
+- ({ \
+- float64_t a_ = (a); \
+- uint64_t result; \
+- __asm__ ("fcvtzu %d0,%d1,%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtq_n_f32_s32(a, b) \
+- __extension__ \
+- ({ \
+- int32x4_t a_ = (a); \
+- float32x4_t result; \
+- __asm__ ("scvtf %0.4s, %1.4s, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtq_n_f32_u32(a, b) \
+- __extension__ \
+- ({ \
+- uint32x4_t a_ = (a); \
+- float32x4_t result; \
+- __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtq_n_f64_s64(a, b) \
+- __extension__ \
+- ({ \
+- int64x2_t a_ = (a); \
+- float64x2_t result; \
+- __asm__ ("scvtf %0.2d, %1.2d, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtq_n_f64_u64(a, b) \
+- __extension__ \
+- ({ \
+- uint64x2_t a_ = (a); \
+- float64x2_t result; \
+- __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtq_n_s32_f32(a, b) \
+- __extension__ \
+- ({ \
+- float32x4_t a_ = (a); \
+- int32x4_t result; \
+- __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtq_n_s64_f64(a, b) \
+- __extension__ \
+- ({ \
+- float64x2_t a_ = (a); \
+- int64x2_t result; \
+- __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtq_n_u32_f32(a, b) \
+- __extension__ \
+- ({ \
+- float32x4_t a_ = (a); \
+- uint32x4_t result; \
+- __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvtq_n_u64_f64(a, b) \
+- __extension__ \
+- ({ \
+- float64x2_t a_ = (a); \
+- uint64x2_t result; \
+- __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvts_n_f32_s32(a, b) \
+- __extension__ \
+- ({ \
+- int32_t a_ = (a); \
+- float32_t result; \
+- __asm__ ("scvtf %s0,%s1,%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvts_n_f32_u32(a, b) \
+- __extension__ \
+- ({ \
+- uint32_t a_ = (a); \
+- float32_t result; \
+- __asm__ ("ucvtf %s0,%s1,%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvts_n_s32_f32(a, b) \
+- __extension__ \
+- ({ \
+- float32_t a_ = (a); \
+- int32_t result; \
+- __asm__ ("fcvtzs %s0,%s1,%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+-#define vcvts_n_u32_f32(a, b) \
+- __extension__ \
+- ({ \
+- float32_t a_ = (a); \
+- uint32_t result; \
+- __asm__ ("fcvtzu %s0,%s1,%2" \
+- : "=w"(result) \
+- : "w"(a_), "i"(b) \
+- : /* No clobbers */); \
+- result; \
+- })
+-
+ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+ vcvtx_f32_f64 (float64x2_t a)
+ {
+@@ -7938,61 +7643,6 @@ vmovn_u64 (uint64x2_t a)
+ return result;
+ }
+
+-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+-vmul_n_f32 (float32x2_t a, float32_t b)
+-{
+- float32x2_t result;
+- __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+-vmul_n_s16 (int16x4_t a, int16_t b)
+-{
+- int16x4_t result;
+- __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
+- : "=w"(result)
+- : "w"(a), "x"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+-vmul_n_s32 (int32x2_t a, int32_t b)
+-{
+- int32x2_t result;
+- __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+-vmul_n_u16 (uint16x4_t a, uint16_t b)
+-{
+- uint16x4_t result;
+- __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
+- : "=w"(result)
+- : "w"(a), "x"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+-vmul_n_u32 (uint32x2_t a, uint32_t b)
+-{
+- uint32x2_t result;
+- __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+ #define vmull_high_lane_s16(a, b, c) \
+ __extension__ \
+ ({ \
+@@ -8443,227 +8093,6 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
+ return result;
+ }
+
+-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+-vmulq_n_f32 (float32x4_t a, float32_t b)
+-{
+- float32x4_t result;
+- __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+-vmulq_n_f64 (float64x2_t a, float64_t b)
+-{
+- float64x2_t result;
+- __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+-vmulq_n_s16 (int16x8_t a, int16_t b)
+-{
+- int16x8_t result;
+- __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
+- : "=w"(result)
+- : "w"(a), "x"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+-vmulq_n_s32 (int32x4_t a, int32_t b)
+-{
+- int32x4_t result;
+- __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
+- : "=w"(result)
+- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
@@ -1111,87 +2673,600 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vmvnq_p8 (poly8x16_t a)
--{
-- poly8x16_t result;
-- __asm__ ("mvn %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
+-vmvnq_p8 (poly8x16_t a)
+-{
+- poly8x16_t result;
+- __asm__ ("mvn %0.16b,%1.16b"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+-vmvnq_s8 (int8x16_t a)
+-{
+- int8x16_t result;
+- __asm__ ("mvn %0.16b,%1.16b"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+-vmvnq_s16 (int16x8_t a)
+-{
+- int16x8_t result;
+- __asm__ ("mvn %0.16b,%1.16b"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+-vmvnq_s32 (int32x4_t a)
+-{
+- int32x4_t result;
+- __asm__ ("mvn %0.16b,%1.16b"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+-vmvnq_u8 (uint8x16_t a)
+-{
+- uint8x16_t result;
+- __asm__ ("mvn %0.16b,%1.16b"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+-vmvnq_u16 (uint16x8_t a)
+-{
+- uint16x8_t result;
+- __asm__ ("mvn %0.16b,%1.16b"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+-vmvnq_u32 (uint32x4_t a)
+-{
+- uint32x4_t result;
+- __asm__ ("mvn %0.16b,%1.16b"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-
+ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+ vpadal_s8 (int16x4_t a, int8x8_t b)
+ {
+@@ -8785,24 +8214,13 @@ vpadalq_u16 (uint32x4_t a, uint16x8_t b)
+ return result;
+ }
+
+-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+-vpadalq_u32 (uint64x2_t a, uint32x4_t b)
+-{
+- uint64x2_t result;
+- __asm__ ("uadalp %0.2d,%2.4s"
+- : "=w"(result)
+- : "0"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+-vpadd_f32 (float32x2_t a, float32x2_t b)
++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
++vpadalq_u32 (uint64x2_t a, uint32x4_t b)
+ {
+- float32x2_t result;
+- __asm__ ("faddp %0.2s,%1.2s,%2.2s"
++ uint64x2_t result;
++ __asm__ ("uadalp %0.2d,%2.4s"
+ : "=w"(result)
+- : "w"(a), "w"(b)
++ : "0"(a), "w"(b)
+ : /* No clobbers */);
+ return result;
+ }
+@@ -8939,28 +8357,6 @@ vpaddlq_u32 (uint32x4_t a)
+ return result;
+ }
+
+-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+-vpaddq_f32 (float32x4_t a, float32x4_t b)
+-{
+- float32x4_t result;
+- __asm__ ("faddp %0.4s,%1.4s,%2.4s"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+-vpaddq_f64 (float64x2_t a, float64x2_t b)
+-{
+- float64x2_t result;
+- __asm__ ("faddp %0.2d,%1.2d,%2.2d"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+ vpaddq_s8 (int8x16_t a, int8x16_t b)
+ {
+@@ -9049,17 +8445,6 @@ vpaddq_u64 (uint64x2_t a, uint64x2_t b)
+ return result;
+ }
+
+-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+-vpadds_f32 (float32x2_t a)
+-{
+- float32_t result;
+- __asm__ ("faddp %s0,%1.2s"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+ vqdmulh_n_s16 (int16x4_t a, int16_t b)
+ {
+@@ -9679,28 +9064,6 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
+ result; \
+ })
+
+-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+-vrsqrte_f32 (float32x2_t a)
+-{
+- float32x2_t result;
+- __asm__ ("frsqrte %0.2s,%1.2s"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+-vrsqrte_f64 (float64x1_t a)
+-{
+- float64x1_t result;
+- __asm__ ("frsqrte %d0,%d1"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+ vrsqrte_u32 (uint32x2_t a)
+ {
+@@ -9712,39 +9075,6 @@ vrsqrte_u32 (uint32x2_t a)
+ return result;
+ }
+
+-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+-vrsqrted_f64 (float64_t a)
+-{
+- float64_t result;
+- __asm__ ("frsqrte %d0,%d1"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+-vrsqrteq_f32 (float32x4_t a)
+-{
+- float32x4_t result;
+- __asm__ ("frsqrte %0.4s,%1.4s"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+-vrsqrteq_f64 (float64x2_t a)
+-{
+- float64x2_t result;
+- __asm__ ("frsqrte %0.2d,%1.2d"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+ vrsqrteq_u32 (uint32x4_t a)
+ {
+@@ -9756,72 +9086,6 @@ vrsqrteq_u32 (uint32x4_t a)
+ return result;
+ }
+
+-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+-vrsqrtes_f32 (float32_t a)
+-{
+- float32_t result;
+- __asm__ ("frsqrte %s0,%s1"
+- : "=w"(result)
+- : "w"(a)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+-vrsqrts_f32 (float32x2_t a, float32x2_t b)
+-{
+- float32x2_t result;
+- __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+-vrsqrtsd_f64 (float64_t a, float64_t b)
+-{
+- float64_t result;
+- __asm__ ("frsqrts %d0,%d1,%d2"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+-vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
+-{
+- float32x4_t result;
+- __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+-vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
+-{
+- float64x2_t result;
+- __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+-vrsqrtss_f32 (float32_t a, float32_t b)
+-{
+- float32_t result;
+- __asm__ ("frsqrts %s0,%s1,%s2"
+- : "=w"(result)
+- : "w"(a), "w"(b)
+- : /* No clobbers */);
+- return result;
+-}
+-
+ #define vshrn_high_n_s16(a, b, c) \
+ __extension__ \
+ ({ \
+@@ -10872,6 +10136,45 @@ vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
+
+ /* Start of optimal implementations in approved order. */
+
++/* vabd. */
++
++__extension__ static __inline float32_t __attribute__ ((__always_inline__))
++vabds_f32 (float32_t __a, float32_t __b)
++{
++ return __builtin_aarch64_fabdsf (__a, __b);
++}
++
++__extension__ static __inline float64_t __attribute__ ((__always_inline__))
++vabdd_f64 (float64_t __a, float64_t __b)
++{
++ return __builtin_aarch64_fabddf (__a, __b);
++}
++
++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
++vabd_f32 (float32x2_t __a, float32x2_t __b)
++{
++ return __builtin_aarch64_fabdv2sf (__a, __b);
++}
++
++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
++vabd_f64 (float64x1_t __a, float64x1_t __b)
++{
++ return (float64x1_t) {vabdd_f64 (vget_lane_f64 (__a, 0),
++ vget_lane_f64 (__b, 0))};
++}
++
++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
++vabdq_f32 (float32x4_t __a, float32x4_t __b)
++{
++ return __builtin_aarch64_fabdv4sf (__a, __b);
++}
++
++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
++vabdq_f64 (float64x2_t __a, float64x2_t __b)
++{
++ return __builtin_aarch64_fabdv2df (__a, __b);
++}
++
+ /* vabs */
+
+ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+@@ -13026,84 +12329,208 @@ vcnt_p8 (poly8x8_t __a)
+ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+ vcnt_s8 (int8x8_t __a)
+ {
+- return __builtin_aarch64_popcountv8qi (__a);
++ return __builtin_aarch64_popcountv8qi (__a);
++}
++
++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
++vcnt_u8 (uint8x8_t __a)
++{
++ return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
++}
++
++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
++vcntq_p8 (poly8x16_t __a)
++{
++ return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
++}
++
++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
++vcntq_s8 (int8x16_t __a)
++{
++ return __builtin_aarch64_popcountv16qi (__a);
++}
++
++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
++vcntq_u8 (uint8x16_t __a)
++{
++ return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
++}
++
++/* vcvt (double -> float). */
++
++__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
++vcvt_f16_f32 (float32x4_t __a)
++{
++ return __builtin_aarch64_float_truncate_lo_v4hf (__a);
++}
++
++__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
++vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b)
++{
++ return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b);
++}
++
++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
++vcvt_f32_f64 (float64x2_t __a)
++{
++ return __builtin_aarch64_float_truncate_lo_v2sf (__a);
++}
++
++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
++vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
++{
++ return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
++}
++
++/* vcvt (float -> double). */
++
++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
++vcvt_f32_f16 (float16x4_t __a)
++{
++ return __builtin_aarch64_float_extend_lo_v4sf (__a);
++}
++
++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
++vcvt_f64_f32 (float32x2_t __a)
++{
++
++ return __builtin_aarch64_float_extend_lo_v2df (__a);
++}
++
++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
++vcvt_high_f32_f16 (float16x8_t __a)
++{
++ return __builtin_aarch64_vec_unpacks_hi_v8hf (__a);
++}
++
++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
++vcvt_high_f64_f32 (float32x4_t __a)
++{
++ return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
++}
++
++/* vcvt (<u>fixed-point -> float). */
++
++__extension__ static __inline float64_t __attribute__ ((__always_inline__))
++vcvtd_n_f64_s64 (int64_t __a, const int __b)
++{
++ return __builtin_aarch64_scvtfdi (__a, __b);
++}
++
++__extension__ static __inline float64_t __attribute__ ((__always_inline__))
++vcvtd_n_f64_u64 (uint64_t __a, const int __b)
++{
++ return __builtin_aarch64_ucvtfdi_sus (__a, __b);
++}
++
++__extension__ static __inline float32_t __attribute__ ((__always_inline__))
++vcvts_n_f32_s32 (int32_t __a, const int __b)
++{
++ return __builtin_aarch64_scvtfsi (__a, __b);
++}
++
++__extension__ static __inline float32_t __attribute__ ((__always_inline__))
++vcvts_n_f32_u32 (uint32_t __a, const int __b)
++{
++ return __builtin_aarch64_ucvtfsi_sus (__a, __b);
++}
++
++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
++vcvt_n_f32_s32 (int32x2_t __a, const int __b)
++{
++ return __builtin_aarch64_scvtfv2si (__a, __b);
++}
++
++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
++vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
++{
++ return __builtin_aarch64_ucvtfv2si_sus (__a, __b);
+ }
+
+-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+-vcnt_u8 (uint8x8_t __a)
++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
++vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
+ {
+- return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
++ return __builtin_aarch64_scvtfv4si (__a, __b);
+ }
+
+-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+-vcntq_p8 (poly8x16_t __a)
++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
++vcvtq_n_f32_u32 (uint32x4_t __a, const int __b)
+ {
+- return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
++ return __builtin_aarch64_ucvtfv4si_sus (__a, __b);
+ }
+
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vmvnq_s8 (int8x16_t a)
--{
-- int8x16_t result;
-- __asm__ ("mvn %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vmvnq_s16 (int16x8_t a)
--{
-- int16x8_t result;
-- __asm__ ("mvn %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vmvnq_s32 (int32x4_t a)
--{
-- int32x4_t result;
-- __asm__ ("mvn %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
+-vcntq_s8 (int8x16_t __a)
++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
++vcvtq_n_f64_s64 (int64x2_t __a, const int __b)
+ {
+- return __builtin_aarch64_popcountv16qi (__a);
++ return __builtin_aarch64_scvtfv2di (__a, __b);
+ }
+
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vmvnq_u8 (uint8x16_t a)
--{
-- uint8x16_t result;
-- __asm__ ("mvn %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vmvnq_u16 (uint16x8_t a)
--{
-- uint16x8_t result;
-- __asm__ ("mvn %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vmvnq_u32 (uint32x4_t a)
--{
-- uint32x4_t result;
-- __asm__ ("mvn %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vpadal_s8 (int16x4_t a, int8x8_t b)
+-vcntq_u8 (uint8x16_t __a)
++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
++vcvtq_n_f64_u64 (uint64x2_t __a, const int __b)
+ {
+- return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
++ return __builtin_aarch64_ucvtfv2di_sus (__a, __b);
+ }
+
+-/* vcvt (double -> float). */
++/* vcvt (float -> <u>fixed-point). */
+
+-__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+-vcvt_f16_f32 (float32x4_t __a)
++__extension__ static __inline int64_t __attribute__ ((__always_inline__))
++vcvtd_n_s64_f64 (float64_t __a, const int __b)
+ {
+- return __builtin_aarch64_float_truncate_lo_v4hf (__a);
++ return __builtin_aarch64_fcvtzsdf (__a, __b);
+ }
+
+-__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+-vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b)
++__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
++vcvtd_n_u64_f64 (float64_t __a, const int __b)
+ {
+- return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b);
++ return __builtin_aarch64_fcvtzudf_uss (__a, __b);
+ }
+
+-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+-vcvt_f32_f64 (float64x2_t __a)
++__extension__ static __inline int32_t __attribute__ ((__always_inline__))
++vcvts_n_s32_f32 (float32_t __a, const int __b)
+ {
+- return __builtin_aarch64_float_truncate_lo_v2sf (__a);
++ return __builtin_aarch64_fcvtzssf (__a, __b);
+ }
+
+-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+-vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
++__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
++vcvts_n_u32_f32 (float32_t __a, const int __b)
+ {
+- return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
++ return __builtin_aarch64_fcvtzusf_uss (__a, __b);
+ }
+
+-/* vcvt (float -> double). */
++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
++vcvt_n_s32_f32 (float32x2_t __a, const int __b)
++{
++ return __builtin_aarch64_fcvtzsv2sf (__a, __b);
++}
+
+-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+-vcvt_f32_f16 (float16x4_t __a)
++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
++vcvt_n_u32_f32 (float32x2_t __a, const int __b)
+ {
+- return __builtin_aarch64_float_extend_lo_v4sf (__a);
++ return __builtin_aarch64_fcvtzuv2sf_uss (__a, __b);
+ }
+
+-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+-vcvt_f64_f32 (float32x2_t __a)
++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
++vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
+ {
++ return __builtin_aarch64_fcvtzsv4sf (__a, __b);
++}
+
+- return __builtin_aarch64_float_extend_lo_v2df (__a);
++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
++vcvtq_n_u32_f32 (float32x4_t __a, const int __b)
++{
++ return __builtin_aarch64_fcvtzuv4sf_uss (__a, __b);
+ }
+
+-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+-vcvt_high_f32_f16 (float16x8_t __a)
++__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
++vcvtq_n_s64_f64 (float64x2_t __a, const int __b)
+ {
+- return __builtin_aarch64_vec_unpacks_hi_v8hf (__a);
++ return __builtin_aarch64_fcvtzsv2df (__a, __b);
+ }
+
+-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+-vcvt_high_f64_f32 (float32x4_t __a)
++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
++vcvtq_n_u64_f64 (float64x2_t __a, const int __b)
{
-@@ -14456,6 +14180,12 @@ vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
+- return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
++ return __builtin_aarch64_fcvtzuv2df_uss (__a, __b);
+ }
+
+ /* vcvt (<u>int -> float) */
+@@ -14456,6 +13883,12 @@ vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a);
}
@@ -1204,7 +3279,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
{
-@@ -14597,6 +14327,29 @@ vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
+@@ -14597,6 +14030,29 @@ vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
return __builtin_aarch64_fmav2df (-__b, __c, __a);
}
@@ -1234,7 +3309,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
/* vfms_lane */
-@@ -18895,6 +18648,160 @@ vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
+@@ -18895,6 +18351,160 @@ vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
return __a * __aarch64_vget_lane_any (__b, __lane);
}
@@ -1395,9 +3470,160 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
/* vneg */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+@@ -18971,6 +18581,24 @@ vnegq_s64 (int64x2_t __a)
+
+ /* vpadd */
+
++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
++vpadd_f32 (float32x2_t __a, float32x2_t __b)
++{
++ return __builtin_aarch64_faddpv2sf (__a, __b);
++}
++
++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
++vpaddq_f32 (float32x4_t __a, float32x4_t __b)
++{
++ return __builtin_aarch64_faddpv4sf (__a, __b);
++}
++
++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
++vpaddq_f64 (float64x2_t __a, float64x2_t __b)
++{
++ return __builtin_aarch64_faddpv2df (__a, __b);
++}
++
+ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+ vpadd_s8 (int8x8_t __a, int8x8_t __b)
+ {
+@@ -19010,6 +18638,12 @@ vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
+ (int32x2_t) __b);
+ }
+
++__extension__ static __inline float32_t __attribute__ ((__always_inline__))
++vpadds_f32 (float32x2_t __a)
++{
++ return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
++}
++
+ __extension__ static __inline float64_t __attribute__ ((__always_inline__))
+ vpaddd_f64 (float64x2_t __a)
+ {
+@@ -21713,6 +21347,83 @@ vrshrd_n_u64 (uint64_t __a, const int __b)
+ return __builtin_aarch64_urshr_ndi_uus (__a, __b);
+ }
+
++/* vrsqrte. */
++
++__extension__ static __inline float32_t __attribute__ ((__always_inline__))
++vrsqrtes_f32 (float32_t __a)
++{
++ return __builtin_aarch64_rsqrtesf (__a);
++}
++
++__extension__ static __inline float64_t __attribute__ ((__always_inline__))
++vrsqrted_f64 (float64_t __a)
++{
++ return __builtin_aarch64_rsqrtedf (__a);
++}
++
++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
++vrsqrte_f32 (float32x2_t __a)
++{
++ return __builtin_aarch64_rsqrtev2sf (__a);
++}
++
++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
++vrsqrte_f64 (float64x1_t __a)
++{
++ return (float64x1_t) {vrsqrted_f64 (vget_lane_f64 (__a, 0))};
++}
++
++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
++vrsqrteq_f32 (float32x4_t __a)
++{
++ return __builtin_aarch64_rsqrtev4sf (__a);
++}
++
++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
++vrsqrteq_f64 (float64x2_t __a)
++{
++ return __builtin_aarch64_rsqrtev2df (__a);
++}
++
++/* vrsqrts. */
++
++__extension__ static __inline float32_t __attribute__ ((__always_inline__))
++vrsqrtss_f32 (float32_t __a, float32_t __b)
++{
++ return __builtin_aarch64_rsqrtssf (__a, __b);
++}
++
++__extension__ static __inline float64_t __attribute__ ((__always_inline__))
++vrsqrtsd_f64 (float64_t __a, float64_t __b)
++{
++ return __builtin_aarch64_rsqrtsdf (__a, __b);
++}
++
++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
++vrsqrts_f32 (float32x2_t __a, float32x2_t __b)
++{
++ return __builtin_aarch64_rsqrtsv2sf (__a, __b);
++}
++
++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
++vrsqrts_f64 (float64x1_t __a, float64x1_t __b)
++{
++ return (float64x1_t) {vrsqrtsd_f64 (vget_lane_f64 (__a, 0),
++ vget_lane_f64 (__b, 0))};
++}
++
++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
++vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b)
++{
++ return __builtin_aarch64_rsqrtsv4sf (__a, __b);
++}
++
++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
++vrsqrtsq_f64 (float64x2_t __a, float64x2_t __b)
++{
++ return __builtin_aarch64_rsqrtsv2df (__a, __b);
++}
++
+ /* vrsra */
+
+ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--- a/src/gcc/config/aarch64/iterators.md
+++ b/src/gcc/config/aarch64/iterators.md
-@@ -715,6 +715,7 @@
+@@ -154,6 +154,12 @@
+ ;; Vector modes for S type.
+ (define_mode_iterator VDQ_SI [V2SI V4SI])
+
++;; Vector modes for S and D
++(define_mode_iterator VDQ_SDI [V2SI V4SI V2DI])
++
++;; Scalar and Vector modes for S and D
++(define_mode_iterator VSDQ_SDI [V2SI V4SI V2DI SI DI])
++
+ ;; Vector modes for Q and H types.
+ (define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
+
+@@ -648,8 +654,13 @@
+ (define_mode_attr atomic_sfx
+ [(QI "b") (HI "h") (SI "") (DI "")])
+
+-(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si") (SF "si") (DF "di")])
+-(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (SF "SI") (DF "DI")])
++(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")
++ (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
++ (SF "si") (DF "di") (SI "sf") (DI "df")])
++(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
++ (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
++ (SF "SI") (DF "DI") (SI "SF") (DI "DF")])
++
+
+ ;; for the inequal width integer to fp conversions
+ (define_mode_attr fcvt_iesize [(SF "di") (DF "si")])
+@@ -715,6 +726,7 @@
(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
@@ -1405,9 +3631,42 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
(define_mode_attr insn_count [(OI "8") (CI "12") (XI "16")])
;; -fpic small model GOT reloc modifers: gotpage_lo15/lo14 for ILP64/32.
+@@ -1001,6 +1013,9 @@
+ (define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
+ UNSPEC_FRINTA UNSPEC_FRINTN])
+
++(define_int_iterator FCVT_F2FIXED [UNSPEC_FCVTZS UNSPEC_FCVTZU])
++(define_int_iterator FCVT_FIXED2F [UNSPEC_SCVTF UNSPEC_UCVTF])
++
+ (define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
+
+ (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
+@@ -1137,6 +1152,11 @@
+ (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")
+ (UNSPEC_FRINTN "frintn")])
+
++(define_int_attr fcvt_fixed_insn [(UNSPEC_SCVTF "scvtf")
++ (UNSPEC_UCVTF "ucvtf")
++ (UNSPEC_FCVTZS "fcvtzs")
++ (UNSPEC_FCVTZU "fcvtzu")])
++
+ (define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
+ (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
+ (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
--- a/src/gcc/config/arm/arm-protos.h
+++ b/src/gcc/config/arm/arm-protos.h
-@@ -319,6 +319,7 @@ extern int vfp3_const_double_for_bits (rtx);
+@@ -50,7 +50,9 @@ extern tree arm_builtin_decl (unsigned code, bool initialize_p
+ ATTRIBUTE_UNUSED);
+ extern void arm_init_builtins (void);
+ extern void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
+-
++extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high);
++extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
++ bool high);
+ #ifdef RTX_CODE
+ extern bool arm_vector_mode_supported_p (machine_mode);
+ extern bool arm_small_register_classes_for_mode_p (machine_mode);
+@@ -319,6 +321,7 @@ extern int vfp3_const_double_for_bits (rtx);
extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
rtx);
@@ -1415,7 +3674,7 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
extern bool arm_valid_symbolic_address_p (rtx);
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
#endif /* RTX_CODE */
-@@ -601,6 +602,9 @@ extern int arm_tune_cortex_a9;
+@@ -601,6 +604,9 @@ extern int arm_tune_cortex_a9;
interworking clean. */
extern int arm_cpp_interwork;
@@ -1501,6 +3760,87 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
static unsigned HOST_WIDE_INT
+@@ -30311,4 +30322,80 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
+ return;
+ }
+
++
++/* Construct and return a PARALLEL RTX vector with elements numbering the
++ lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
++ the vector - from the perspective of the architecture. This does not
++ line up with GCC's perspective on lane numbers, so we end up with
++ different masks depending on our target endian-ness. The diagram
++ below may help. We must draw the distinction when building masks
++ which select one half of the vector. An instruction selecting
++ architectural low-lanes for a big-endian target, must be described using
++ a mask selecting GCC high-lanes.
++
++ Big-Endian Little-Endian
++
++GCC 0 1 2 3 3 2 1 0
++ | x | x | x | x | | x | x | x | x |
++Architecture 3 2 1 0 3 2 1 0
++
++Low Mask: { 2, 3 } { 0, 1 }
++High Mask: { 0, 1 } { 2, 3 }
++*/
++
++rtx
++arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
++{
++ int nunits = GET_MODE_NUNITS (mode);
++ rtvec v = rtvec_alloc (nunits / 2);
++ int high_base = nunits / 2;
++ int low_base = 0;
++ int base;
++ rtx t1;
++ int i;
++
++ if (BYTES_BIG_ENDIAN)
++ base = high ? low_base : high_base;
++ else
++ base = high ? high_base : low_base;
++
++ for (i = 0; i < nunits / 2; i++)
++ RTVEC_ELT (v, i) = GEN_INT (base + i);
++
++ t1 = gen_rtx_PARALLEL (mode, v);
++ return t1;
++}
++
++/* Check OP for validity as a PARALLEL RTX vector with elements
++ numbering the lanes of either the high (HIGH == TRUE) or low lanes,
++ from the perspective of the architecture. See the diagram above
++ arm_simd_vect_par_cnst_half_p for more details. */
++
++bool
++arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
++ bool high)
++{
++ rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
++ HOST_WIDE_INT count_op = XVECLEN (op, 0);
++ HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
++ int i = 0;
++
++ if (!VECTOR_MODE_P (mode))
++ return false;
++
++ if (count_op != count_ideal)
++ return false;
++
++ for (i = 0; i < count_ideal; i++)
++ {
++ rtx elt_op = XVECEXP (op, 0, i);
++ rtx elt_ideal = XVECEXP (ideal, 0, i);
++
++ if (!CONST_INT_P (elt_op)
++ || INTVAL (elt_ideal) != INTVAL (elt_op))
++ return false;
++ }
++ return true;
++}
++
+ #include "gt-arm.h"
--- a/src/gcc/config/arm/arm.h
+++ b/src/gcc/config/arm/arm.h
@@ -478,6 +478,9 @@ extern int arm_tune_cortex_a9;
@@ -1531,7 +3871,27 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
profile. */
--- a/src/gcc/config/arm/arm.md
+++ b/src/gcc/config/arm/arm.md
-@@ -8152,8 +8152,8 @@
+@@ -121,7 +121,7 @@
+ ; arm_arch6. "v6t2" for Thumb-2 with arm_arch6. This attribute is
+ ; used to compute attribute "enabled", use type "any" to enable an
+ ; alternative in all cases.
+-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3"
++(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon"
+ (const_string "any"))
+
+ (define_attr "arch_enabled" "no,yes"
+@@ -177,6 +177,10 @@
+ (and (eq_attr "arch" "armv6_or_vfpv3")
+ (match_test "arm_arch6 || TARGET_VFP3"))
+ (const_string "yes")
++
++ (and (eq_attr "arch" "neon")
++ (match_test "TARGET_NEON"))
++ (const_string "yes")
+ ]
+
+ (const_string "no")))
+@@ -8152,8 +8156,8 @@
)
(define_insn "probe_stack"
@@ -1542,8 +3902,96 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
"TARGET_32BIT"
"str%?\\tr0, %0"
[(set_attr "type" "store1")
+@@ -10821,19 +10825,22 @@
+ (set_attr "predicable_short_it" "no")
+ (set_attr "type" "clz")])
+
+-(define_expand "ctzsi2"
+- [(set (match_operand:SI 0 "s_register_operand" "")
+- (ctz:SI (match_operand:SI 1 "s_register_operand" "")))]
++;; Keep this as a CTZ expression until after reload and then split
++;; into RBIT + CLZ. Since RBIT is represented as an UNSPEC it is unlikely
++;; to fold with any other expression.
++
++(define_insn_and_split "ctzsi2"
++ [(set (match_operand:SI 0 "s_register_operand" "=r")
++ (ctz:SI (match_operand:SI 1 "s_register_operand" "r")))]
+ "TARGET_32BIT && arm_arch_thumb2"
++ "#"
++ "&& reload_completed"
++ [(const_int 0)]
+ "
+- {
+- rtx tmp = gen_reg_rtx (SImode);
+- emit_insn (gen_rbitsi2 (tmp, operands[1]));
+- emit_insn (gen_clzsi2 (operands[0], tmp));
+- }
+- DONE;
+- "
+-)
++ emit_insn (gen_rbitsi2 (operands[0], operands[1]));
++ emit_insn (gen_clzsi2 (operands[0], operands[0]));
++ DONE;
++")
+
+ ;; V5E instructions.
+
--- a/src/gcc/config/arm/arm_neon.h
+++ b/src/gcc/config/arm/arm_neon.h
+@@ -530,7 +530,7 @@ vadd_s32 (int32x2_t __a, int32x2_t __b)
+ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+ vadd_f32 (float32x2_t __a, float32x2_t __b)
+ {
+-#ifdef __FAST_MATH
++#ifdef __FAST_MATH__
+ return __a + __b;
+ #else
+ return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b);
+@@ -594,7 +594,7 @@ vaddq_s64 (int64x2_t __a, int64x2_t __b)
+ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+ vaddq_f32 (float32x4_t __a, float32x4_t __b)
+ {
+-#ifdef __FAST_MATH
++#ifdef __FAST_MATH__
+ return __a + __b;
+ #else
+ return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b);
+@@ -1030,7 +1030,7 @@ vmul_s32 (int32x2_t __a, int32x2_t __b)
+ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+ vmul_f32 (float32x2_t __a, float32x2_t __b)
+ {
+-#ifdef __FAST_MATH
++#ifdef __FAST_MATH__
+ return __a * __b;
+ #else
+ return (float32x2_t) __builtin_neon_vmulfv2sf (__a, __b);
+@@ -1077,7 +1077,7 @@ vmulq_s32 (int32x4_t __a, int32x4_t __b)
+ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+ vmulq_f32 (float32x4_t __a, float32x4_t __b)
+ {
+-#ifdef __FAST_MATH
++#ifdef __FAST_MATH__
+ return __a * __b;
+ #else
+ return (float32x4_t) __builtin_neon_vmulfv4sf (__a, __b);
+@@ -1678,7 +1678,7 @@ vsub_s32 (int32x2_t __a, int32x2_t __b)
+ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+ vsub_f32 (float32x2_t __a, float32x2_t __b)
+ {
+-#ifdef __FAST_MATH
++#ifdef __FAST_MATH__
+ return __a - __b;
+ #else
+ return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b);
+@@ -1742,7 +1742,7 @@ vsubq_s64 (int64x2_t __a, int64x2_t __b)
+ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+ vsubq_f32 (float32x4_t __a, float32x4_t __b)
+ {
+-#ifdef __FAST_MATH
++#ifdef __FAST_MATH__
+ return __a - __b;
+ #else
+ return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b);
@@ -2607,6 +2607,12 @@ vtst_p8 (poly8x8_t __a, poly8x8_t __b)
return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b);
}
@@ -1603,6 +4051,209 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
)
(define_insn "crypto_<crypto_pattern>"
+--- a/src/gcc/config/arm/neon.md
++++ b/src/gcc/config/arm/neon.md
+@@ -1204,16 +1204,133 @@
+
+ ;; Widening operations
+
++(define_expand "widen_ssum<mode>3"
++ [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
++ (plus:<V_double_width>
++ (sign_extend:<V_double_width>
++ (match_operand:VQI 1 "s_register_operand" ""))
++ (match_operand:<V_double_width> 2 "s_register_operand" "")))]
++ "TARGET_NEON"
++ {
++ machine_mode mode = GET_MODE (operands[1]);
++ rtx p1, p2;
++
++ p1 = arm_simd_vect_par_cnst_half (mode, false);
++ p2 = arm_simd_vect_par_cnst_half (mode, true);
++
++ if (operands[0] != operands[2])
++ emit_move_insn (operands[0], operands[2]);
++
++ emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
++ operands[1],
++ p1,
++ operands[0]));
++ emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
++ operands[1],
++ p2,
++ operands[0]));
++ DONE;
++ }
++)
++
++(define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
++ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
++ (plus:<VW:V_widen>
++ (sign_extend:<VW:V_widen>
++ (vec_select:VW
++ (match_operand:VQI 1 "s_register_operand" "%w")
++ (match_operand:VQI 2 "vect_par_constant_low" "")))
++ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
++ "TARGET_NEON"
++{
++ return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
++ "vaddw.<V_s_elem>\t%q0, %q3, %e1";
++}
++ [(set_attr "type" "neon_add_widen")])
++
++(define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
++ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
++ (plus:<VW:V_widen>
++ (sign_extend:<VW:V_widen>
++ (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
++ (match_operand:VQI 2 "vect_par_constant_high" "")))
++ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
++ "TARGET_NEON"
++{
++ return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
++ "vaddw.<V_s_elem>\t%q0, %q3, %f1";
++}
++ [(set_attr "type" "neon_add_widen")])
++
+ (define_insn "widen_ssum<mode>3"
+ [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+- (plus:<V_widen> (sign_extend:<V_widen>
+- (match_operand:VW 1 "s_register_operand" "%w"))
+- (match_operand:<V_widen> 2 "s_register_operand" "w")))]
++ (plus:<V_widen>
++ (sign_extend:<V_widen>
++ (match_operand:VW 1 "s_register_operand" "%w"))
++ (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vaddw.<V_s_elem>\t%q0, %q2, %P1"
+ [(set_attr "type" "neon_add_widen")]
+ )
+
++(define_expand "widen_usum<mode>3"
++ [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
++ (plus:<V_double_width>
++ (zero_extend:<V_double_width>
++ (match_operand:VQI 1 "s_register_operand" ""))
++ (match_operand:<V_double_width> 2 "s_register_operand" "")))]
++ "TARGET_NEON"
++ {
++ machine_mode mode = GET_MODE (operands[1]);
++ rtx p1, p2;
++
++ p1 = arm_simd_vect_par_cnst_half (mode, false);
++ p2 = arm_simd_vect_par_cnst_half (mode, true);
++
++ if (operands[0] != operands[2])
++ emit_move_insn (operands[0], operands[2]);
++
++ emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
++ operands[1],
++ p1,
++ operands[0]));
++ emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
++ operands[1],
++ p2,
++ operands[0]));
++ DONE;
++ }
++)
++
++(define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
++ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
++ (plus:<VW:V_widen>
++ (zero_extend:<VW:V_widen>
++ (vec_select:VW
++ (match_operand:VQI 1 "s_register_operand" "%w")
++ (match_operand:VQI 2 "vect_par_constant_low" "")))
++ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
++ "TARGET_NEON"
++{
++ return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
++ "vaddw.<V_u_elem>\t%q0, %q3, %e1";
++}
++ [(set_attr "type" "neon_add_widen")])
++
++(define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
++ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
++ (plus:<VW:V_widen>
++ (zero_extend:<VW:V_widen>
++ (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
++ (match_operand:VQI 2 "vect_par_constant_high" "")))
++ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
++ "TARGET_NEON"
++{
++ return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
++ "vaddw.<V_u_elem>\t%q0, %q3, %f1";
++}
++ [(set_attr "type" "neon_add_widen")])
++
+ (define_insn "widen_usum<mode>3"
+ [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+ (plus:<V_widen> (zero_extend:<V_widen>
+--- a/src/gcc/config/arm/predicates.md
++++ b/src/gcc/config/arm/predicates.md
+@@ -612,59 +612,13 @@
+ (define_special_predicate "vect_par_constant_high"
+ (match_code "parallel")
+ {
+- HOST_WIDE_INT count = XVECLEN (op, 0);
+- int i;
+- int base = GET_MODE_NUNITS (mode);
+-
+- if ((count < 1)
+- || (count != base/2))
+- return false;
+-
+- if (!VECTOR_MODE_P (mode))
+- return false;
+-
+- for (i = 0; i < count; i++)
+- {
+- rtx elt = XVECEXP (op, 0, i);
+- int val;
+-
+- if (!CONST_INT_P (elt))
+- return false;
+-
+- val = INTVAL (elt);
+- if (val != (base/2) + i)
+- return false;
+- }
+- return true;
++ return arm_simd_check_vect_par_cnst_half_p (op, mode, true);
+ })
+
+ (define_special_predicate "vect_par_constant_low"
+ (match_code "parallel")
+ {
+- HOST_WIDE_INT count = XVECLEN (op, 0);
+- int i;
+- int base = GET_MODE_NUNITS (mode);
+-
+- if ((count < 1)
+- || (count != base/2))
+- return false;
+-
+- if (!VECTOR_MODE_P (mode))
+- return false;
+-
+- for (i = 0; i < count; i++)
+- {
+- rtx elt = XVECEXP (op, 0, i);
+- int val;
+-
+- if (!CONST_INT_P (elt))
+- return false;
+-
+- val = INTVAL (elt);
+- if (val != i)
+- return false;
+- }
+- return true;
++ return arm_simd_check_vect_par_cnst_half_p (op, mode, false);
+ })
+
+ (define_predicate "const_double_vcvt_power_of_two_reciprocal"
--- a/src/gcc/config/arm/sync.md
+++ b/src/gcc/config/arm/sync.md
@@ -452,14 +452,13 @@
@@ -1657,6 +4308,117 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
(define_insn "*thumb_mulsi3"
[(set (match_operand:SI 0 "register_operand" "=&l,&l,&l")
(mult:SI (match_operand:SI 1 "register_operand" "%l,*h,0")
+--- a/src/gcc/config/arm/vfp.md
++++ b/src/gcc/config/arm/vfp.md
+@@ -394,8 +394,8 @@
+ ;; DFmode moves
+
+ (define_insn "*movdf_vfp"
+- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r, m,w,r")
+- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
++ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r, m,w,r")
++ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w ,mF,r,w,r"))]
+ "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+ && ( register_operand (operands[0], DFmode)
+ || register_operand (operands[1], DFmode))"
+@@ -410,39 +410,43 @@
+ case 2:
+ gcc_assert (TARGET_VFP_DOUBLE);
+ return \"vmov%?.f64\\t%P0, %1\";
+- case 3: case 4:
++ case 3:
++ gcc_assert (TARGET_VFP_DOUBLE);
++ return \"vmov.i64\\t%P0, #0\\t%@ float\";
++ case 4: case 5:
+ return output_move_vfp (operands);
+- case 5: case 6:
++ case 6: case 7:
+ return output_move_double (operands, true, NULL);
+- case 7:
++ case 8:
+ if (TARGET_VFP_SINGLE)
+ return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
+ else
+ return \"vmov%?.f64\\t%P0, %P1\";
+- case 8:
++ case 9:
+ return \"#\";
+ default:
+ gcc_unreachable ();
+ }
+ }
+ "
+- [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\
++ [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\
+ load2,store2,ffarithd,multiple")
+- (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
+- (eq_attr "alternative" "7")
++ (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
++ (eq_attr "alternative" "8")
+ (if_then_else
+ (match_test "TARGET_VFP_SINGLE")
+ (const_int 8)
+ (const_int 4))]
+ (const_int 4)))
+- (set_attr "predicable" "yes")
+- (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
+- (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")]
++ (set_attr "predicable" "yes,yes,yes,no,yes,yes,yes,yes,yes,yes")
++ (set_attr "pool_range" "*,*,*,*,1020,*,1020,*,*,*")
++ (set_attr "neg_pool_range" "*,*,*,*,1004,*,1004,*,*,*")
++ (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
+ )
+
+ (define_insn "*thumb2_movdf_vfp"
+- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r ,m,w,r")
+- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w, mF,r, w,r"))]
++ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r ,m,w,r")
++ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w, mF,r, w,r"))]
+ "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
+ && ( register_operand (operands[0], DFmode)
+ || register_operand (operands[1], DFmode))"
+@@ -457,11 +461,14 @@
+ case 2:
+ gcc_assert (TARGET_VFP_DOUBLE);
+ return \"vmov%?.f64\\t%P0, %1\";
+- case 3: case 4:
++ case 3:
++ gcc_assert (TARGET_VFP_DOUBLE);
++ return \"vmov.i64\\t%P0, #0\\t%@ float\";
++ case 4: case 5:
+ return output_move_vfp (operands);
+- case 5: case 6: case 8:
++ case 6: case 7: case 9:
+ return output_move_double (operands, true, NULL);
+- case 7:
++ case 8:
+ if (TARGET_VFP_SINGLE)
+ return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
+ else
+@@ -471,17 +478,18 @@
+ }
+ }
+ "
+- [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\
++ [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\
+ f_stored,load2,store2,ffarithd,multiple")
+- (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
+- (eq_attr "alternative" "7")
++ (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
++ (eq_attr "alternative" "8")
+ (if_then_else
+ (match_test "TARGET_VFP_SINGLE")
+ (const_int 8)
+ (const_int 4))]
+ (const_int 4)))
+- (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*")
+- (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
++ (set_attr "pool_range" "*,*,*,*,1018,*,4094,*,*,*")
++ (set_attr "neg_pool_range" "*,*,*,*,1008,*,0,*,*,*")
++ (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
+ )
+
+
--- a/src/gcc/configure
+++ b/src/gcc/configure
@@ -1711,7 +1711,8 @@ Optional Packages:
@@ -1752,6 +4514,100 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
+--- a/src/gcc/ifcvt.c
++++ b/src/gcc/ifcvt.c
+@@ -817,6 +817,7 @@ struct noce_if_info
+
+ static rtx noce_emit_store_flag (struct noce_if_info *, rtx, int, int);
+ static int noce_try_move (struct noce_if_info *);
++static int noce_try_ifelse_collapse (struct noce_if_info *);
+ static int noce_try_store_flag (struct noce_if_info *);
+ static int noce_try_addcc (struct noce_if_info *);
+ static int noce_try_store_flag_constants (struct noce_if_info *);
+@@ -1120,6 +1121,37 @@ noce_try_move (struct noce_if_info *if_info)
+ return FALSE;
+ }
+
++/* Try forming an IF_THEN_ELSE (cond, b, a) and collapsing that
++ through simplify_rtx. Sometimes that can eliminate the IF_THEN_ELSE.
++ If that is the case, emit the result into x. */
++
++static int
++noce_try_ifelse_collapse (struct noce_if_info * if_info)
++{
++ if (!noce_simple_bbs (if_info))
++ return FALSE;
++
++ machine_mode mode = GET_MODE (if_info->x);
++ rtx if_then_else = simplify_gen_ternary (IF_THEN_ELSE, mode, mode,
++ if_info->cond, if_info->b,
++ if_info->a);
++
++ if (GET_CODE (if_then_else) == IF_THEN_ELSE)
++ return FALSE;
++
++ rtx_insn *seq;
++ start_sequence ();
++ noce_emit_move_insn (if_info->x, if_then_else);
++ seq = end_ifcvt_sequence (if_info);
++ if (!seq)
++ return FALSE;
++
++ emit_insn_before_setloc (seq, if_info->jump,
++ INSN_LOCATION (if_info->insn_a));
++ return TRUE;
++}
++
++
+ /* Convert "if (test) x = 1; else x = 0".
+
+ Only try 0 and STORE_FLAG_VALUE here. Other combinations will be
+@@ -2364,28 +2396,32 @@ noce_get_alt_condition (struct noce_if_info *if_info, rtx target,
+ switch (code)
+ {
+ case LT:
+- if (actual_val == desired_val + 1)
++ if (desired_val != HOST_WIDE_INT_MAX
++ && actual_val == desired_val + 1)
+ {
+ code = LE;
+ op_b = GEN_INT (desired_val);
+ }
+ break;
+ case LE:
+- if (actual_val == desired_val - 1)
++ if (desired_val != HOST_WIDE_INT_MIN
++ && actual_val == desired_val - 1)
+ {
+ code = LT;
+ op_b = GEN_INT (desired_val);
+ }
+ break;
+ case GT:
+- if (actual_val == desired_val - 1)
++ if (desired_val != HOST_WIDE_INT_MIN
++ && actual_val == desired_val - 1)
+ {
+ code = GE;
+ op_b = GEN_INT (desired_val);
+ }
+ break;
+ case GE:
+- if (actual_val == desired_val + 1)
++ if (desired_val != HOST_WIDE_INT_MAX
++ && actual_val == desired_val + 1)
+ {
+ code = GT;
+ op_b = GEN_INT (desired_val);
+@@ -3493,6 +3529,8 @@ noce_process_if_block (struct noce_if_info *if_info)
+
+ if (noce_try_move (if_info))
+ goto success;
++ if (noce_try_ifelse_collapse (if_info))
++ goto success;
+ if (noce_try_store_flag (if_info))
+ goto success;
+ if (noce_try_bitop (if_info))
--- a/src/gcc/internal-fn.c
+++ b/src/gcc/internal-fn.c
@@ -1807,11 +1807,7 @@ expand_arith_overflow (enum tree_code code, gimple *stmt)
@@ -1767,6 +4623,32 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
enum machine_mode m = smallest_mode_for_size (p, MODE_INT);
tree optype = build_nonstandard_integer_type (GET_MODE_PRECISION (m),
uns0_p && uns1_p
+--- a/src/gcc/lra-constraints.c
++++ b/src/gcc/lra-constraints.c
+@@ -1303,7 +1303,22 @@ process_addr_reg (rtx *loc, bool check_only_p, rtx_insn **before, rtx_insn **aft
+
+ subreg_p = GET_CODE (*loc) == SUBREG;
+ if (subreg_p)
+- loc = &SUBREG_REG (*loc);
++ {
++ reg = SUBREG_REG (*loc);
++ mode = GET_MODE (reg);
++
++ /* For mode with size bigger than ptr_mode, there unlikely to be "mov"
++ between two registers with different classes, but there normally will
++ be "mov" which transfers element of vector register into the general
++ register, and this normally will be a subreg which should be reloaded
++ as a whole. This is particularly likely to be triggered when
++ -fno-split-wide-types specified. */
++ if (!REG_P (reg)
++ || in_class_p (reg, cl, &new_class)
++ || GET_MODE_SIZE (mode) <= GET_MODE_SIZE (ptr_mode))
++ loc = &SUBREG_REG (*loc);
++ }
++
+ reg = *loc;
+ mode = GET_MODE (reg);
+ if (! REG_P (reg))
--- a/src/gcc/lto/lto-partition.c
+++ b/src/gcc/lto/lto-partition.c
@@ -447,7 +447,7 @@ add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
@@ -1840,6 +4722,98 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
/* Diagnostic parameters. */
+--- a/src/gcc/rtlanal.c
++++ b/src/gcc/rtlanal.c
+@@ -3657,6 +3657,16 @@ subreg_get_info (unsigned int xregno, machine_mode xmode,
+ info->offset = offset / regsize_xmode;
+ return;
+ }
++ /* It's not valid to extract a subreg of mode YMODE at OFFSET that
++ would go outside of XMODE. */
++ if (!rknown
++ && GET_MODE_SIZE (ymode) + offset > GET_MODE_SIZE (xmode))
++ {
++ info->representable_p = false;
++ info->nregs = nregs_ymode;
++ info->offset = offset / regsize_xmode;
++ return;
++ }
+ /* Quick exit for the simple and common case of extracting whole
+ subregisters from a multiregister value. */
+ /* ??? It would be better to integrate this into the code below,
+--- a/src/gcc/simplify-rtx.c
++++ b/src/gcc/simplify-rtx.c
+@@ -5267,6 +5267,50 @@ simplify_const_relational_operation (enum rtx_code code,
+
+ return 0;
+ }
++
++/* Recognize expressions of the form (X CMP 0) ? VAL : OP (X)
++ where OP is CLZ or CTZ and VAL is the value from CLZ_DEFINED_VALUE_AT_ZERO
++ or CTZ_DEFINED_VALUE_AT_ZERO respectively and return OP (X) if the expression
++ can be simplified to that or NULL_RTX if not.
++ Assume X is compared against zero with CMP_CODE and the true
++ arm is TRUE_VAL and the false arm is FALSE_VAL. */
++
++static rtx
++simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val)
++{
++ if (cmp_code != EQ && cmp_code != NE)
++ return NULL_RTX;
++
++ /* Result on X == 0 and X !=0 respectively. */
++ rtx on_zero, on_nonzero;
++ if (cmp_code == EQ)
++ {
++ on_zero = true_val;
++ on_nonzero = false_val;
++ }
++ else
++ {
++ on_zero = false_val;
++ on_nonzero = true_val;
++ }
++
++ rtx_code op_code = GET_CODE (on_nonzero);
++ if ((op_code != CLZ && op_code != CTZ)
++ || !rtx_equal_p (XEXP (on_nonzero, 0), x)
++ || !CONST_INT_P (on_zero))
++ return NULL_RTX;
++
++ HOST_WIDE_INT op_val;
++ if (((op_code == CLZ
++ && CLZ_DEFINED_VALUE_AT_ZERO (GET_MODE (on_nonzero), op_val))
++ || (op_code == CTZ
++ && CTZ_DEFINED_VALUE_AT_ZERO (GET_MODE (on_nonzero), op_val)))
++ && op_val == INTVAL (on_zero))
++ return on_nonzero;
++
++ return NULL_RTX;
++}
++
+
+ /* Simplify CODE, an operation with result mode MODE and three operands,
+ OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became
+@@ -5400,6 +5444,19 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
+ }
+ }
+
++ /* Convert x == 0 ? N : clz (x) into clz (x) when
++ CLZ_DEFINED_VALUE_AT_ZERO is defined to N for the mode of x.
++ Similarly for ctz (x). */
++ if (COMPARISON_P (op0) && !side_effects_p (op0)
++ && XEXP (op0, 1) == const0_rtx)
++ {
++ rtx simplified
++ = simplify_cond_clz_ctz (XEXP (op0, 0), GET_CODE (op0),
++ op1, op2);
++ if (simplified)
++ return simplified;
++ }
++
+ if (COMPARISON_P (op0) && ! side_effects_p (op0))
+ {
+ machine_mode cmp_mode = (GET_MODE (XEXP (op0, 0)) == VOIDmode
--- a/src/gcc/testsuite/g++.dg/lto/pr69589_0.C
+++ b/src/gcc/testsuite/g++.dg/lto/pr69589_0.C
@@ -1,6 +1,8 @@
@@ -1852,6 +4826,97 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
#pragma GCC visibility push(hidden)
struct A { int &operator[] (long); };
template <typename> struct B;
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.c-torture/compile/pr71295.c
+@@ -0,0 +1,12 @@
++extern void fn2 (long long);
++int a;
++
++void
++fn1 ()
++{
++ long long b[3];
++ a = 0;
++ for (; a < 3; a++)
++ b[a] = 1;
++ fn2 (b[1]);
++}
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.c-torture/execute/pr37780.c
+@@ -0,0 +1,49 @@
++/* PR middle-end/37780. */
++
++#define VAL (8 * sizeof (int))
++
++int __attribute__ ((noinline, noclone))
++fooctz (int i)
++{
++ return (i == 0) ? VAL : __builtin_ctz (i);
++}
++
++int __attribute__ ((noinline, noclone))
++fooctz2 (int i)
++{
++ return (i != 0) ? __builtin_ctz (i) : VAL;
++}
++
++unsigned int __attribute__ ((noinline, noclone))
++fooctz3 (unsigned int i)
++{
++ return (i > 0) ? __builtin_ctz (i) : VAL;
++}
++
++int __attribute__ ((noinline, noclone))
++fooclz (int i)
++{
++ return (i == 0) ? VAL : __builtin_clz (i);
++}
++
++int __attribute__ ((noinline, noclone))
++fooclz2 (int i)
++{
++ return (i != 0) ? __builtin_clz (i) : VAL;
++}
++
++unsigned int __attribute__ ((noinline, noclone))
++fooclz3 (unsigned int i)
++{
++ return (i > 0) ? __builtin_clz (i) : VAL;
++}
++
++int
++main (void)
++{
++ if (fooctz (0) != VAL || fooctz2 (0) != VAL || fooctz3 (0) != VAL
++ || fooclz (0) != VAL || fooclz2 (0) != VAL || fooclz3 (0) != VAL)
++ __builtin_abort ();
++
++ return 0;
++}
+\ No newline at end of file
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.c-torture/execute/pr66940.c
+@@ -0,0 +1,20 @@
++long long __attribute__ ((noinline, noclone))
++foo (long long ival)
++{
++ if (ival <= 0)
++ return -0x7fffffffffffffffL - 1;
++
++ return 0x7fffffffffffffffL;
++}
++
++int
++main (void)
++{
++ if (foo (-1) != (-0x7fffffffffffffffL - 1))
++ __builtin_abort ();
++
++ if (foo (1) != 0x7fffffffffffffffL)
++ __builtin_abort ();
++
++ return 0;
++}
--- a/src/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/src/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -87,6 +87,12 @@ foreach plugin_test $plugin_test_list {
@@ -1931,6 +4996,317 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
+
+
+
+--- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
+@@ -25,6 +25,7 @@ f1 (int i, ...)
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -45,6 +46,7 @@ f2 (int i, ...)
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -60,6 +62,7 @@ f3 (int i, ...)
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
+@@ -78,6 +81,7 @@ f4 (int i, ...)
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -96,6 +100,7 @@ f5 (int i, ...)
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -116,6 +121,7 @@ f6 (int i, ...)
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -133,6 +139,7 @@ f7 (int i, ...)
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -152,6 +159,7 @@ f8 (int i, ...)
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -169,6 +177,7 @@ f9 (int i, ...)
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -188,6 +197,7 @@ f10 (int i, ...)
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -208,6 +218,7 @@ f11 (int i, ...)
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -228,6 +239,7 @@ f12 (int i, ...)
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -248,6 +260,7 @@ f13 (int i, ...)
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -268,6 +281,7 @@ f14 (int i, ...)
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -291,6 +305,7 @@ f15 (int i, ...)
+ /* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+ /* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+
+ /* We may be able to improve upon this after fixing PR66010/PR66013. */
+ /* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+--- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
+@@ -24,6 +24,7 @@ f1 (int i, ...)
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -39,6 +40,7 @@ f2 (int i, ...)
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -57,6 +59,7 @@ f3 (int i, ...)
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -73,6 +76,7 @@ f4 (int i, ...)
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -89,6 +93,7 @@ f5 (int i, ...)
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -107,6 +112,7 @@ f6 (int i, ...)
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -123,6 +129,7 @@ f7 (int i, ...)
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -139,6 +146,7 @@ f8 (int i, ...)
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -155,6 +163,7 @@ f10 (int i, ...)
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -171,6 +180,7 @@ f11 (int i, ...)
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -187,6 +197,7 @@ f12 (int i, ...)
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+--- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
+@@ -27,6 +27,7 @@ f1 (int i, ...)
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -44,6 +45,7 @@ f2 (int i, ...)
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -67,6 +69,7 @@ f3 (int i, ...)
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -88,6 +91,7 @@ f4 (int i, ...)
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+--- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
+@@ -25,6 +25,7 @@ f1 (int i, ...)
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+
+ void
+ f2 (int i, ...)
+@@ -38,6 +39,7 @@ f2 (int i, ...)
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+
+ /* Here va_arg can be executed at most as many times as va_start. */
+ void
+@@ -56,6 +58,7 @@ f3 (int i, ...)
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+
+ void
+ f4 (int i, ...)
+@@ -74,6 +77,7 @@ f4 (int i, ...)
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 16 GPR units and 16 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+
+ void
+ f5 (int i, ...)
+@@ -88,6 +92,7 @@ f5 (int i, ...)
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save (4|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+
+ void
+ f6 (int i, ...)
+@@ -102,6 +107,7 @@ f6 (int i, ...)
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
+
+ void
+ f7 (int i, ...)
+@@ -116,3 +122,4 @@ f7 (int i, ...)
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target aarch64*-*-* } } } */
+--- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
+@@ -30,6 +30,7 @@ bar (int x, char const *y, ...)
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
--- /dev/null
+++ b/src/gcc/testsuite/gcc.dg/vect/pr57206.c
@@ -0,0 +1,11 @@
@@ -4900,6 +8276,71 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
+/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2?d\\\[\[0-9\]+\\\]" 3 } } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero-reg.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero-reg.c
+@@ -8,4 +8,4 @@ foo (void)
+ bar (0.0);
+ }
+
+-/* { dg-final { scan-assembler "fmov\\td0, xzr" } } */
++/* { dg-final { scan-assembler "movi\\td0, #0" } } */
+--- a/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero-reg.c
++++ b/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero-reg.c
+@@ -8,4 +8,4 @@ foo (void)
+ bar (0.0);
+ }
+
+-/* { dg-final { scan-assembler "fmov\\ts0, wzr" } } */
++/* { dg-final { scan-assembler "movi\\tv0\.2s, #0" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/pr37780_1.c
+@@ -0,0 +1,46 @@
++/* Test that we can remove the conditional move due to CLZ
++ and CTZ being defined at zero. */
++
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++int
++fooctz (int i)
++{
++ return (i == 0) ? 32 : __builtin_ctz (i);
++}
++
++int
++fooctz2 (int i)
++{
++ return (i != 0) ? __builtin_ctz (i) : 32;
++}
++
++unsigned int
++fooctz3 (unsigned int i)
++{
++ return (i > 0) ? __builtin_ctz (i) : 32;
++}
++
++/* { dg-final { scan-assembler-times "rbit\t*" 3 } } */
++
++int
++fooclz (int i)
++{
++ return (i == 0) ? 32 : __builtin_clz (i);
++}
++
++int
++fooclz2 (int i)
++{
++ return (i != 0) ? __builtin_clz (i) : 32;
++}
++
++unsigned int
++fooclz3 (unsigned int i)
++{
++ return (i > 0) ? __builtin_clz (i) : 32;
++}
++
++/* { dg-final { scan-assembler-times "clz\t" 6 } } */
++/* { dg-final { scan-assembler-not "cmp\t.*0" } } */
--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vmul_elem_1.c
@@ -0,0 +1,541 @@
@@ -5479,6 +8920,70 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
+
+/* { dg-final { scan-assembler-not "mov\tx0, x8" } } */
--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/va_arg_1.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 --save-temps" } */
++
++int
++f (int a, ...)
++{
++ /* { dg-final { scan-assembler-not "str" } } */
++ return a;
++}
++
++/* { dg-final { cleanup-saved-temps } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/va_arg_2.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 --save-temps" } */
++
++int
++foo (char *fmt, ...)
++{
++ int d;
++ __builtin_va_list ap;
++
++ __builtin_va_start (ap, fmt);
++ d = __builtin_va_arg (ap, int);
++ __builtin_va_end (ap);
++
++ /* { dg-final { scan-assembler-not "x7" } } */
++ return d;
++}
++
++/* { dg-final { cleanup-saved-temps } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/aarch64/va_arg_3.c
+@@ -0,0 +1,26 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 --save-temps" } */
++
++int d2i (double a);
++
++int
++foo (char *fmt, ...)
++{
++ int d, e;
++ double f, g;
++ __builtin_va_list ap;
++
++ __builtin_va_start (ap, fmt);
++ d = __builtin_va_arg (ap, int);
++ f = __builtin_va_arg (ap, double);
++ g = __builtin_va_arg (ap, double);
++ d += d2i (f);
++ d += d2i (g);
++ __builtin_va_end (ap);
++
++ /* { dg-final { scan-assembler-not "x7" } } */
++ /* { dg-final { scan-assembler-not "q7" } } */
++ return d;
++}
++
++/* { dg-final { cleanup-saved-temps } } */
+--- /dev/null
+++ b/src/gcc/testsuite/gcc.target/arm/armv5_thumb_isa.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target arm_arch_v5_ok } */
@@ -5489,6 +8994,164 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
+#endif
+
+int foo;
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddws16.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O3" } */
++/* { dg-add-options arm_neon } */
++
++
++
++int
++t6 (int len, void * dummy, short * __restrict x)
++{
++ len = len & ~31;
++ int result = 0;
++ __asm volatile ("");
++ for (int i = 0; i < len; i++)
++ result += x[i];
++ return result;
++}
++
++/* { dg-final { scan-assembler "vaddw\.s16" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddws32.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O3" } */
++/* { dg-add-options arm_neon } */
++
++
++int
++t6 (int len, void * dummy, int * __restrict x)
++{
++ len = len & ~31;
++ long long result = 0;
++ __asm volatile ("");
++ for (int i = 0; i < len; i++)
++ result += x[i];
++ return result;
++}
++
++/* { dg-final { scan-assembler "vaddw\.s32" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O3" } */
++/* { dg-add-options arm_neon } */
++
++
++int
++t6 (int len, void * dummy, unsigned short * __restrict x)
++{
++ len = len & ~31;
++ unsigned int result = 0;
++ __asm volatile ("");
++ for (int i = 0; i < len; i++)
++ result += x[i];
++ return result;
++}
++
++/* { dg-final { scan-assembler "vaddw.u16" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O3" } */
++/* { dg-add-options arm_neon } */
++
++
++int
++t6 (int len, void * dummy, unsigned int * __restrict x)
++{
++ len = len & ~31;
++ unsigned long long result = 0;
++ __asm volatile ("");
++ for (int i = 0; i < len; i++)
++ result += x[i];
++ return result;
++}
++
++/* { dg-final { scan-assembler "vaddw\.u32" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_neon_ok } */
++/* { dg-options "-O3" } */
++/* { dg-add-options arm_neon } */
++
++
++
++int
++t6 (int len, void * dummy, char * __restrict x)
++{
++ len = len & ~31;
++ unsigned short result = 0;
++ __asm volatile ("");
++ for (int i = 0; i < len; i++)
++ result += x[i];
++ return result;
++}
++
++/* { dg-final { scan-assembler "vaddw\.u8" } } */
+--- /dev/null
++++ b/src/gcc/testsuite/gcc.target/arm/pr37780_1.c
+@@ -0,0 +1,48 @@
++/* Test that we can remove the conditional move due to CLZ
++ being defined at zero. */
++
++/* { dg-do compile } */
++/* { dg-require-effective-target arm_arch_v6t2_ok } */
++/* { dg-options "-O2" } */
++/* { dg-add-options arm_arch_v6t2 } */
++
++int
++fooctz (int i)
++{
++ return (i == 0) ? 32 : __builtin_ctz (i);
++}
++
++int
++fooctz2 (int i)
++{
++ return (i != 0) ? __builtin_ctz (i) : 32;
++}
++
++unsigned int
++fooctz3 (unsigned int i)
++{
++ return (i > 0) ? __builtin_ctz (i) : 32;
++}
++
++/* { dg-final { scan-assembler-times "rbit\t*" 3 } } */
++
++int
++fooclz (int i)
++{
++ return (i == 0) ? 32 : __builtin_clz (i);
++}
++
++int
++fooclz2 (int i)
++{
++ return (i != 0) ? __builtin_clz (i) : 32;
++}
++
++unsigned int
++fooclz3 (unsigned int i)
++{
++ return (i > 0) ? __builtin_clz (i) : 32;
++}
++
++/* { dg-final { scan-assembler-times "clz\t" 6 } } */
++/* { dg-final { scan-assembler-not "cmp\t.*0" } } */
--- a/src/gcc/testsuite/lib/gcc-dg.exp
+++ b/src/gcc/testsuite/lib/gcc-dg.exp
@@ -403,6 +403,7 @@ if { [info procs ${tool}_load] != [list] \
@@ -5499,6 +9162,17 @@ LANG=C git diff 45abd6d7c64044c2b09b971d3af06edfc5e1635e bb0c4802e3ecbe29b817501
}
set result [list $status [lindex $result 1]]
}
+--- a/src/gcc/testsuite/lib/target-supports.exp
++++ b/src/gcc/testsuite/lib/target-supports.exp
+@@ -4382,6 +4382,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
+ set et_vect_widen_sum_hi_to_si_pattern_saved 0
+ if { [istarget powerpc*-*-*]
+ || [istarget aarch64*-*-*]
++ || ([istarget arm*-*-*] &&
++ [check_effective_target_arm_neon_ok])
+ || [istarget ia64-*-*] } {
+ set et_vect_widen_sum_hi_to_si_pattern_saved 1
+ }
--- a/src/gcc/tree-scalar-evolution.c
+++ b/src/gcc/tree-scalar-evolution.c
@@ -1937,6 +1937,36 @@ interpret_rhs_expr (struct loop *loop, gimple *at_stmt,
diff --git a/debian/patches/vulcan-costs.diff b/debian/patches/vulcan-costs.diff
deleted file mode 100644
index ca5c90b..0000000
--- a/debian/patches/vulcan-costs.diff
+++ /dev/null
@@ -1,259 +0,0 @@
-# DP: Add cost model for vulcan CPU
-
-From: jgreenhalgh <jgreenhalgh at 138bc75d-0d04-0410-961f-82ee72b054a4>
-Date: Fri, 15 Jul 2016 11:17:53 +0000
-Subject: [PATCH] [PATCH/AARCH64] Add rtx_costs routine for vulcan.
-
-gcc/ChangeLog:
-
-2016-07-15 Virendra Pathak <virendra.pathak at broadcom.com>
- Julian Brown <julian at codesourcery.com>
-
- * config/aarch64/aarch64-cores.def: Update vulcan COSTS.
- * config/aarch64/aarch64-cost-tables.h
- (vulcan_extra_costs): New variable.
- * config/aarch64/aarch64.c
- (vulcan_addrcost_table): Likewise.
- (vulcan_regmove_cost): Likewise.
- (vulcan_vector_cost): Likewise.
- (vulcan_branch_cost): Likewise.
- (vulcan_tunings): Likewise.
-
-[dannf: backported by removing approx_modes function pointer]
-
-diff -urpN a/src/gcc/config/aarch64/aarch64.c b/src/gcc/config/aarch64/aarch64.c
---- a/src/gcc/config/aarch64/aarch64.c 2016-07-15 16:14:24.268328586 +0000
-+++ b/src/gcc/config/aarch64/aarch64.c 2016-07-15 16:15:52.603299822 +0000
-@@ -250,6 +250,22 @@ static const struct cpu_addrcost_table x
- 0, /* imm_offset */
- };
-
-+static const struct cpu_addrcost_table vulcan_addrcost_table =
-+{
-+ {
-+ 0, /* hi */
-+ 0, /* si */
-+ 0, /* di */
-+ 2, /* ti */
-+ },
-+ 0, /* pre_modify */
-+ 0, /* post_modify */
-+ 2, /* register_offset */
-+ 3, /* register_sextend */
-+ 3, /* register_zextend */
-+ 0, /* imm_offset */
-+};
-+
- static const struct cpu_regmove_cost generic_regmove_cost =
- {
- 1, /* GP2GP */
-@@ -308,6 +324,15 @@ static const struct cpu_regmove_cost xge
- 2 /* FP2FP */
- };
-
-+static const struct cpu_regmove_cost vulcan_regmove_cost =
-+{
-+ 1, /* GP2GP */
-+ /* Avoid the use of int<->fp moves for spilling. */
-+ 8, /* GP2FP */
-+ 8, /* FP2GP */
-+ 4 /* FP2FP */
-+};
-+
- /* Generic costs for vector insn classes. */
- static const struct cpu_vector_cost generic_vector_cost =
- {
-@@ -379,6 +404,24 @@ static const struct cpu_vector_cost xgen
- 1 /* cond_not_taken_branch_cost */
- };
-
-+/* Costs for vector insn classes for Vulcan. */
-+static const struct cpu_vector_cost vulcan_vector_cost =
-+{
-+ 6, /* scalar_stmt_cost */
-+ 4, /* scalar_load_cost */
-+ 1, /* scalar_store_cost */
-+ 6, /* vec_stmt_cost */
-+ 3, /* vec_permute_cost */
-+ 6, /* vec_to_scalar_cost */
-+ 5, /* scalar_to_vec_cost */
-+ 8, /* vec_align_load_cost */
-+ 8, /* vec_unalign_load_cost */
-+ 4, /* vec_unalign_store_cost */
-+ 4, /* vec_store_cost */
-+ 2, /* cond_taken_branch_cost */
-+ 1 /* cond_not_taken_branch_cost */
-+};
-+
- /* Generic costs for branch instructions. */
- static const struct cpu_branch_cost generic_branch_cost =
- {
-@@ -393,6 +436,13 @@ static const struct cpu_branch_cost cort
- 3 /* Unpredictable. */
- };
-
-+/* Branch costs for Vulcan. */
-+static const struct cpu_branch_cost vulcan_branch_cost =
-+{
-+ 1, /* Predictable. */
-+ 3 /* Unpredictable. */
-+};
-+
- static const struct tune_params generic_tunings =
- {
- &cortexa57_extra_costs,
-@@ -589,6 +639,30 @@ static const struct tune_params xgene1_t
- (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
- };
-
-+static const struct tune_params vulcan_tunings =
-+{
-+ &vulcan_extra_costs,
-+ &vulcan_addrcost_table,
-+ &vulcan_regmove_cost,
-+ &vulcan_vector_cost,
-+ &vulcan_branch_cost,
-+ 4, /* memmov_cost. */
-+ 4, /* issue_rate. */
-+ AARCH64_FUSE_NOTHING, /* fuseable_ops. */
-+ 16, /* function_align. */
-+ 8, /* jump_align. */
-+ 16, /* loop_align. */
-+ 3, /* int_reassoc_width. */
-+ 2, /* fp_reassoc_width. */
-+ 2, /* vec_reassoc_width. */
-+ 2, /* min_div_recip_mul_sf. */
-+ 2, /* min_div_recip_mul_df. */
-+ 0, /* max_case_values. */
-+ 0, /* cache_line_size. */
-+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
-+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
-+};
-+
- /* Support for fine-grained override of the tuning structures. */
- struct aarch64_tuning_override_function
- {
-diff -urpN a/src/gcc/config/aarch64/aarch64-cores.def b/src/gcc/config/aarch64/aarch64-cores.def
---- a/src/gcc/config/aarch64/aarch64-cores.def 2016-07-15 16:14:24.272328721 +0000
-+++ b/src/gcc/config/aarch64/aarch64-cores.def 2016-07-15 16:15:26.730430056 +0000
-@@ -51,7 +51,7 @@ AARCH64_CORE("xgene1", xgene1, x
-
- /* V8.1 Architecture Processors. */
-
--AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, cortexa57, "0x42", "0x516")
-+AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516")
-
- /* V8 big.LITTLE implementations. */
-
-diff -urpN a/src/gcc/config/aarch64/aarch64-cost-tables.h b/src/gcc/config/aarch64/aarch64-cost-tables.h
---- a/src/gcc/config/aarch64/aarch64-cost-tables.h 2016-07-15 16:14:24.272328721 +0000
-+++ b/src/gcc/config/aarch64/aarch64-cost-tables.h 2016-07-15 16:15:26.730430056 +0000
-@@ -127,6 +127,108 @@ const struct cpu_cost_table thunderx_ext
- }
- };
-
-+const struct cpu_cost_table vulcan_extra_costs =
-+{
-+ /* ALU */
-+ {
-+ 0, /* Arith. */
-+ 0, /* Logical. */
-+ 0, /* Shift. */
-+ 0, /* Shift_reg. */
-+ COSTS_N_INSNS (1), /* Arith_shift. */
-+ COSTS_N_INSNS (1), /* Arith_shift_reg. */
-+ COSTS_N_INSNS (1), /* Log_shift. */
-+ COSTS_N_INSNS (1), /* Log_shift_reg. */
-+ 0, /* Extend. */
-+ COSTS_N_INSNS (1), /* Extend_arith. */
-+ 0, /* Bfi. */
-+ 0, /* Bfx. */
-+ COSTS_N_INSNS (3), /* Clz. */
-+ 0, /* Rev. */
-+ 0, /* Non_exec. */
-+ true /* Non_exec_costs_exec. */
-+ },
-+ {
-+ /* MULT SImode */
-+ {
-+ COSTS_N_INSNS (4), /* Simple. */
-+ COSTS_N_INSNS (4), /* Flag_setting. */
-+ COSTS_N_INSNS (4), /* Extend. */
-+ COSTS_N_INSNS (5), /* Add. */
-+ COSTS_N_INSNS (5), /* Extend_add. */
-+ COSTS_N_INSNS (18) /* Idiv. */
-+ },
-+ /* MULT DImode */
-+ {
-+ COSTS_N_INSNS (4), /* Simple. */
-+ 0, /* Flag_setting. */
-+ COSTS_N_INSNS (4), /* Extend. */
-+ COSTS_N_INSNS (5), /* Add. */
-+ COSTS_N_INSNS (5), /* Extend_add. */
-+ COSTS_N_INSNS (26) /* Idiv. */
-+ }
-+ },
-+ /* LD/ST */
-+ {
-+ COSTS_N_INSNS (4), /* Load. */
-+ COSTS_N_INSNS (4), /* Load_sign_extend. */
-+ COSTS_N_INSNS (5), /* Ldrd. */
-+ COSTS_N_INSNS (4), /* Ldm_1st. */
-+ 1, /* Ldm_regs_per_insn_1st. */
-+ 1, /* Ldm_regs_per_insn_subsequent. */
-+ COSTS_N_INSNS (4), /* Loadf. */
-+ COSTS_N_INSNS (4), /* Loadd. */
-+ COSTS_N_INSNS (4), /* Load_unaligned. */
-+ 0, /* Store. */
-+ 0, /* Strd. */
-+ 0, /* Stm_1st. */
-+ 1, /* Stm_regs_per_insn_1st. */
-+ 1, /* Stm_regs_per_insn_subsequent. */
-+ 0, /* Storef. */
-+ 0, /* Stored. */
-+ 0, /* Store_unaligned. */
-+ COSTS_N_INSNS (1), /* Loadv. */
-+ COSTS_N_INSNS (1) /* Storev. */
-+ },
-+ {
-+ /* FP SFmode */
-+ {
-+ COSTS_N_INSNS (4), /* Div. */
-+ COSTS_N_INSNS (1), /* Mult. */
-+ COSTS_N_INSNS (1), /* Mult_addsub. */
-+ COSTS_N_INSNS (1), /* Fma. */
-+ COSTS_N_INSNS (1), /* Addsub. */
-+ COSTS_N_INSNS (1), /* Fpconst. */
-+ COSTS_N_INSNS (1), /* Neg. */
-+ COSTS_N_INSNS (1), /* Compare. */
-+ COSTS_N_INSNS (2), /* Widen. */
-+ COSTS_N_INSNS (2), /* Narrow. */
-+ COSTS_N_INSNS (2), /* Toint. */
-+ COSTS_N_INSNS (2), /* Fromint. */
-+ COSTS_N_INSNS (2) /* Roundint. */
-+ },
-+ /* FP DFmode */
-+ {
-+ COSTS_N_INSNS (6), /* Div. */
-+ COSTS_N_INSNS (1), /* Mult. */
-+ COSTS_N_INSNS (1), /* Mult_addsub. */
-+ COSTS_N_INSNS (1), /* Fma. */
-+ COSTS_N_INSNS (1), /* Addsub. */
-+ COSTS_N_INSNS (1), /* Fpconst. */
-+ COSTS_N_INSNS (1), /* Neg. */
-+ COSTS_N_INSNS (1), /* Compare. */
-+ COSTS_N_INSNS (2), /* Widen. */
-+ COSTS_N_INSNS (2), /* Narrow. */
-+ COSTS_N_INSNS (2), /* Toint. */
-+ COSTS_N_INSNS (2), /* Fromint. */
-+ COSTS_N_INSNS (2) /* Roundint. */
-+ }
-+ },
-+ /* Vector */
-+ {
-+ COSTS_N_INSNS (1) /* Alu. */
-+ }
-+};
-
-
- #endif
diff --git a/debian/patches/vulcan-cpu-doc.diff b/debian/patches/vulcan-cpu-doc.diff
deleted file mode 100644
index 4656259..0000000
--- a/debian/patches/vulcan-cpu-doc.diff
+++ /dev/null
@@ -1,27 +0,0 @@
-# DP: Accept vulcan as a cpu name for the AArch64 port of GCC (documentation)
-
-From: jgreenhalgh <jgreenhalgh at 138bc75d-0d04-0410-961f-82ee72b054a4>
-Date: Tue, 21 Jun 2016 13:43:29 +0000 (+0000)
-Subject: [PATCH/AARCH64] Accept vulcan as a cpu name for the AArch64 port of GCC
-X-Git-Url: https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff_plain;h=2c6ac78145ac8ff2fd83271d093e23ab80a15e4f
-
-[PATCH/AARCH64] Accept vulcan as a cpu name for the AArch64 port of GCC
-
-gcc/ChangeLog
-
- * config/aarch64/aarch64-cores.def (vulcan): New core.
- * config/aarch64/aarch64-tune.md: Regenerate.
- * doc/invoke.texi: Document vulcan as an available option.
-
-diff -urpN a/src/gcc/doc/invoke.texi b/src/gcc/doc/invoke.texi
---- a/src/gcc/doc/invoke.texi 2016-06-21 10:31:29.994143994 -0600
-+++ b/src/gcc/doc/invoke.texi 2016-06-21 10:35:51.136081208 -0600
-@@ -12988,7 +12988,7 @@ Specify the name of the target processor
- performance of the code. Permissible values for this option are:
- @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a57},
- @samp{cortex-a72}, @samp{exynos-m1}, @samp{qdf24xx}, @samp{thunderx},
-- at samp{xgene1}.
-+ at samp{vulcan}, @samp{xgene1}.
-
- Additionally, this option can specify that GCC should tune the performance
- of the code for a big.LITTLE system. Permissible values for this
diff --git a/debian/patches/vulcan-cpu.diff b/debian/patches/vulcan-cpu.diff
deleted file mode 100644
index 29edebe..0000000
--- a/debian/patches/vulcan-cpu.diff
+++ /dev/null
@@ -1,39 +0,0 @@
-# DP: Accept vulcan as a cpu name for the AArch64 port of GCC
-
-From: jgreenhalgh <jgreenhalgh at 138bc75d-0d04-0410-961f-82ee72b054a4>
-Date: Tue, 21 Jun 2016 13:43:29 +0000 (+0000)
-Subject: [PATCH/AARCH64] Accept vulcan as a cpu name for the AArch64 port of GCC
-X-Git-Url: https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff_plain;h=2c6ac78145ac8ff2fd83271d093e23ab80a15e4f
-
-[PATCH/AARCH64] Accept vulcan as a cpu name for the AArch64 port of GCC
-
-gcc/ChangeLog
-
- * config/aarch64/aarch64-cores.def (vulcan): New core.
- * config/aarch64/aarch64-tune.md: Regenerate.
- * doc/invoke.texi: Document vulcan as an available option.
-
-diff -urpN a/src/gcc/config/aarch64/aarch64-cores.def b/src/gcc/config/aarch64/aarch64-cores.def
---- a/src/gcc/config/aarch64/aarch64-cores.def 2016-01-04 07:30:50.000000000 -0700
-+++ b/src/gcc/config/aarch64/aarch64-cores.def 2016-06-21 10:32:59.191974071 -0600
-@@ -49,6 +49,10 @@ AARCH64_CORE("qdf24xx", qdf24xx, c
- AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1")
- AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
-
-+/* V8.1 Architecture Processors. */
-+
-+AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, cortexa57, "0x42", "0x516")
-+
- /* V8 big.LITTLE implementations. */
-
- AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
-diff -urpN a/src/gcc/config/aarch64/aarch64-tune.md b/src/gcc/config/aarch64/aarch64-tune.md
---- a/src/gcc/config/aarch64/aarch64-tune.md 2016-04-27 02:22:11.000000000 -0600
-+++ b/src/gcc/config/aarch64/aarch64-tune.md 2016-06-21 10:32:59.191974071 -0600
-@@ -1,5 +1,5 @@
- ;; -*- buffer-read-only: t -*-
- ;; Generated automatically by gentune.sh from aarch64-cores.def
- (define_attr "tune"
-- "cortexa35,cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,cortexa57cortexa53,cortexa72cortexa53"
-+ "cortexa35,cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,vulcan,cortexa57cortexa53,cortexa72cortexa53"
- (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/debian/rules.patch b/debian/rules.patch
index b7f9161..a9010b5 100644
--- a/debian/rules.patch
+++ b/debian/rules.patch
@@ -30,7 +30,6 @@ ifneq ($(GFDL_INVARIANT_FREE),yes)
rename-info-files \
gcc-SOURCE_DATE_EPOCH-doc \
gcc-SOURCE_DATE_EPOCH-2-doc \
- vulcan-cpu-doc \
# svn-doc-updates \
# $(if $(with_linaro_branch),,svn-doc-updates) \
@@ -85,8 +84,6 @@ debian_patches += \
gcc-SOURCE_DATE_EPOCH \
gcc-SOURCE_DATE_EPOCH-2 \
cmd-go-combine-gccgo-s-ld-and-ldShared-methods \
- vulcan-cpu \
- vulcan-costs \
libjava-mips64el \
PR55947-revert \
gccgo-issue16780 \
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/gcc-6.git
More information about the Reproducible-commits
mailing list