[gcc-7] 286/354: * Fix PR target/81833 (PPC), taken from the trunk. Closes: #871565.

Thu Nov 23 15:51:18 UTC 2017

This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch master
in repository gcc-7.

commit bf41cd6927ca2a210834655b8b4c06667a1ccdd7
Author: doko <doko at 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca>
Date:   Wed Sep 6 08:49:45 2017 +0000

      * Fix PR target/81833 (PPC), taken from the trunk. Closes: #871565.
    
    
    git-svn-id: svn+ssh://svn.debian.org/svn/gcccvs/branches/sid/gcc-7@9658 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca
---
 debian/changelog            |   1 +
 debian/patches/pr81833.diff | 248 ++++++++++++++++++++++++++++++++++++++++++++
 debian/rules.patch          |   1 +
 3 files changed, 250 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index 9c1738c..dadbae6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -4,6 +4,7 @@ gcc-7 (7.2.0-4) UNRELEASED; urgency=medium
     - Fix PR c++/82039, PR libstdc++/81912, PR libstdc++/81891,
       PR libstdc++/81599, PR libstdc++/81338, PR tree-optimization/81503,
       PR ada/79542, PR ada/62235, PR fortran/81770.
+  * Fix PR target/81833 (PPC), taken from the trunk. Closes: #871565.
 
  -- Matthias Klose <doko at debian.org>  Wed, 06 Sep 2017 10:38:05 +0200
 
diff --git a/debian/patches/pr81833.diff b/debian/patches/pr81833.diff
new file mode 100644
index 0000000..29d029f
--- /dev/null
+++ b/debian/patches/pr81833.diff
@@ -0,0 +1,248 @@
+# DP: Fix PR target/81833 (PPC), taken from the trunk.
+
+gcc/
+
+2017-09-05  Bill Schmidt  <wschmidt at linux.vnet.ibm.com>
+
+	PR target/81833
+	* config/rs6000/altivec.md (altivec_vsum2sws): Convert from a
+	define_insn to a define_expand.
+	(altivec_vsum2sws_direct): New define_insn.
+	(altivec_vsumsws): Convert from a define_insn to a define_expand.
+
+gcc/testsuite/
+
+2017-09-05  Bill Schmidt  <wschmidt at linux.vnet.ibm.com>
+
+	PR target/81833
+	* gcc.target/powerpc/pr81833-1.c: New file.
+	* gcc.target/powerpc/pr81833-2.c: New file.
+
+ 
+Index: gcc/testsuite/gcc.target/powerpc/pr81833-2.c
+===================================================================
+--- a/src/gcc/testsuite/gcc.target/powerpc/pr81833-2.c	(nonexistent)
++++ a/src/gcc/testsuite/gcc.target/powerpc/pr81833-2.c	(revision 251723)
+@@ -0,0 +1,59 @@
++/* PR81833: This used to fail due to improper implementation of vec_msum.  */
++/* Test case relies on -mcpu=power7 or later.  Currently we don't have
++   machinery to express that, so we have two separate tests for -mcpu=power7
++   and -mcpu=power8 to catch 32-bit BE on P7 and 64-bit BE/LE on P8.  */
++
++/* { dg-do run } */
++/* { dg-require-effective-target vsx_hw } */
++/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
++/* { dg-options "-mcpu=power7 -O2" } */
++
++#include <altivec.h>
++
++#define vec_u8  vector unsigned char
++#define vec_s8  vector signed char
++#define vec_u16 vector unsigned short
++#define vec_s16 vector signed short
++#define vec_u32 vector unsigned int
++#define vec_s32 vector signed int
++#define vec_f   vector float
++
++#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8 (0)
++
++#define zero_u8v  (vec_u8)  zerov
++#define zero_s8v  (vec_s8)  zerov
++#define zero_u16v (vec_u16) zerov
++#define zero_s16v (vec_s16) zerov
++#define zero_u32v (vec_u32) zerov
++#define zero_s32v (vec_s32) zerov
++
++signed int __attribute__((noinline))
++scalarproduct_int16_vsx (const signed short *v1, const signed short *v2,
++			 int order)
++{
++  int i;
++  LOAD_ZERO;
++  register vec_s16 vec1;
++  register vec_s32 res = vec_splat_s32 (0), t;
++  signed int ires;
++
++  for (i = 0; i < order; i += 8) {
++    vec1 = vec_vsx_ld (0, v1);
++    t    = vec_msum (vec1, vec_vsx_ld (0, v2), zero_s32v);
++    res  = vec_sums (t, res);
++    v1  += 8;
++    v2  += 8;
++  }
++  res = vec_splat (res, 3);
++  vec_ste (res, 0, &ires);
++
++  return ires;
++}
++
++int main(void)
++{
++  const signed short test_vec[] = { 1, 1, 1, 1, 1, 1, 1, 1 };
++  if (scalarproduct_int16_vsx (test_vec, test_vec, 8) != 8)
++    __builtin_abort ();
++  return 0;
++}
+Index: gcc/testsuite/gcc.target/powerpc/pr81833-1.c
+===================================================================
+--- a/src/gcc/testsuite/gcc.target/powerpc/pr81833-1.c	(nonexistent)
++++ a/src/gcc/testsuite/gcc.target/powerpc/pr81833-1.c	(revision 251723)
+@@ -0,0 +1,59 @@
++/* PR81833: This used to fail due to improper implementation of vec_msum.  */
++/* Test case relies on -mcpu=power7 or later.  Currently we don't have
++   machinery to express that, so we have two separate tests for -mcpu=power7
++   and -mcpu=power8 to catch 32-bit BE on P7 and 64-bit BE/LE on P8.  */
++
++/* { dg-do run } */
++/* { dg-require-effective-target p8vector_hw } */
++/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
++/* { dg-options "-mcpu=power8 -O2" } */
++
++#include <altivec.h>
++
++#define vec_u8  vector unsigned char
++#define vec_s8  vector signed char
++#define vec_u16 vector unsigned short
++#define vec_s16 vector signed short
++#define vec_u32 vector unsigned int
++#define vec_s32 vector signed int
++#define vec_f   vector float
++
++#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8 (0)
++
++#define zero_u8v  (vec_u8)  zerov
++#define zero_s8v  (vec_s8)  zerov
++#define zero_u16v (vec_u16) zerov
++#define zero_s16v (vec_s16) zerov
++#define zero_u32v (vec_u32) zerov
++#define zero_s32v (vec_s32) zerov
++
++signed int __attribute__((noinline))
++scalarproduct_int16_vsx (const signed short *v1, const signed short *v2,
++			 int order)
++{
++  int i;
++  LOAD_ZERO;
++  register vec_s16 vec1;
++  register vec_s32 res = vec_splat_s32 (0), t;
++  signed int ires;
++
++  for (i = 0; i < order; i += 8) {
++    vec1 = vec_vsx_ld (0, v1);
++    t    = vec_msum (vec1, vec_vsx_ld (0, v2), zero_s32v);
++    res  = vec_sums (t, res);
++    v1  += 8;
++    v2  += 8;
++  }
++  res = vec_splat (res, 3);
++  vec_ste (res, 0, &ires);
++
++  return ires;
++}
++
++int main(void)
++{
++  const signed short test_vec[] = { 1, 1, 1, 1, 1, 1, 1, 1 };
++  if (scalarproduct_int16_vsx (test_vec, test_vec, 8) != 8)
++    __builtin_abort ();
++  return 0;
++}
+Index: gcc/config/rs6000/altivec.md
+===================================================================
+--- a/src/gcc/config/rs6000/altivec.md	(revision 251722)
++++ a/src/gcc/config/rs6000/altivec.md	(revision 251723)
+@@ -1804,51 +1804,61 @@
+   "vsum4s<VI_char>s %0,%1,%2"
+   [(set_attr "type" "veccomplex")])
+ 
+-;; FIXME: For the following two patterns, the scratch should only be
+-;; allocated for !VECTOR_ELT_ORDER_BIG, and the instructions should
+-;; be emitted separately.
+-(define_insn "altivec_vsum2sws"
+-  [(set (match_operand:V4SI 0 "register_operand" "=v")
+-        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+-                      (match_operand:V4SI 2 "register_operand" "v")]
+-		     UNSPEC_VSUM2SWS))
+-   (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+-   (clobber (match_scratch:V4SI 3 "=v"))]
++(define_expand "altivec_vsum2sws"
++  [(use (match_operand:V4SI 0 "register_operand"))
++   (use (match_operand:V4SI 1 "register_operand"))
++   (use (match_operand:V4SI 2 "register_operand"))]
+   "TARGET_ALTIVEC"
+ {
+   if (VECTOR_ELT_ORDER_BIG)
+-    return "vsum2sws %0,%1,%2";
++    emit_insn (gen_altivec_vsum2sws_direct (operands[0], operands[1],
++                                            operands[2]));
+   else
+-    return "vsldoi %3,%2,%2,12\n\tvsum2sws %3,%1,%3\n\tvsldoi %0,%3,%3,4";
+-}
+-  [(set_attr "type" "veccomplex")
+-   (set (attr "length")
+-     (if_then_else
+-       (match_test "VECTOR_ELT_ORDER_BIG")
+-       (const_string "4")
+-       (const_string "12")))])
++    {
++      rtx tmp1 = gen_reg_rtx (V4SImode);
++      rtx tmp2 = gen_reg_rtx (V4SImode);
++      emit_insn (gen_altivec_vsldoi_v4si (tmp1, operands[2],
++                                          operands[2], GEN_INT (12)));
++      emit_insn (gen_altivec_vsum2sws_direct (tmp2, operands[1], tmp1));
++      emit_insn (gen_altivec_vsldoi_v4si (operands[0], tmp2, tmp2,
++                                          GEN_INT (4)));
++    }
++  DONE;
++})
+ 
+-(define_insn "altivec_vsumsws"
++; FIXME: This can probably be expressed without an UNSPEC.
++(define_insn "altivec_vsum2sws_direct"
+   [(set (match_operand:V4SI 0 "register_operand" "=v")
+         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+-                      (match_operand:V4SI 2 "register_operand" "v")]
+-		     UNSPEC_VSUMSWS))
+-   (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+-   (clobber (match_scratch:V4SI 3 "=v"))]
++	              (match_operand:V4SI 2 "register_operand" "v")]
++		     UNSPEC_VSUM2SWS))
++   (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+   "TARGET_ALTIVEC"
++  "vsum2sws %0,%1,%2"
++  [(set_attr "type" "veccomplex")])
++
++(define_expand "altivec_vsumsws"
++  [(use (match_operand:V4SI 0 "register_operand"))
++   (use (match_operand:V4SI 1 "register_operand"))
++   (use (match_operand:V4SI 2 "register_operand"))]
++  "TARGET_ALTIVEC"
+ {
+   if (VECTOR_ELT_ORDER_BIG)
+-    return "vsumsws %0,%1,%2";
++    emit_insn (gen_altivec_vsumsws_direct (operands[0], operands[1],
++                                           operands[2]));
+   else
+-    return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvsldoi %0,%3,%3,12";
+-}
+-  [(set_attr "type" "veccomplex")
+-   (set (attr "length")
+-     (if_then_else
+-       (match_test "(VECTOR_ELT_ORDER_BIG)")
+-       (const_string "4")
+-       (const_string "12")))])
++    {
++      rtx tmp1 = gen_reg_rtx (V4SImode);
++      rtx tmp2 = gen_reg_rtx (V4SImode);
++      emit_insn (gen_altivec_vspltw_direct (tmp1, operands[2], const0_rtx));
++      emit_insn (gen_altivec_vsumsws_direct (tmp2, operands[1], tmp1));
++      emit_insn (gen_altivec_vsldoi_v4si (operands[0], tmp2, tmp2,
++                                          GEN_INT (12)));
++    }
++  DONE;
++})
+ 
++; FIXME: This can probably be expressed without an UNSPEC.
+ (define_insn "altivec_vsumsws_direct"
+   [(set (match_operand:V4SI 0 "register_operand" "=v")
+         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
diff --git a/debian/rules.patch b/debian/rules.patch
index 83f761a..f045bca 100644
--- a/debian/rules.patch
+++ b/debian/rules.patch
@@ -72,6 +72,7 @@ debian_patches += \
 	gcc-fuse-ld-lld \
 	libgo-s390x-default-isa \
 	pr81829 \
+	pr81833 \
 
 
 #	$(if $(filter yes, $(DEB_CROSS)),,gcc-print-file-name) \

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/gcc-7.git