[libclc] 100/291: R600: Set the noduplicate attribute on barrier() intrinsics

Andreas Beckmann anbe at moszumanska.debian.org
Tue Sep 8 10:53:38 UTC 2015


This is an automated email from the git hooks/post-receive script.

anbe pushed a commit to branch master
in repository libclc.

commit 6e12753d8c78cc3ac93c0cda76e287f24322dcdb
Author: Tom Stellard <thomas.stellard at amd.com>
Date:   Thu Oct 31 15:50:48 2013 +0000

    R600: Set the noduplicate attribute on barrier() intrinsics
    
    This will prevent LLVM optimization passes from creating illegal uses
    of the barrier() intrinsic (e.g. calling barrier() from a conditional
    that is not executed by all threads).
    
    git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@193753 91177308-0d34-0410-b5e6-96231b3b80d8
---
 r600/lib/SOURCES                         |  1 -
 r600/lib/synchronization/barrier.cl      | 15 +++++----------
 r600/lib/synchronization/barrier_impl.ll | 33 ++++++++++++++++++++++++--------
 3 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES
index aac6d8f..d9fc897 100644
--- a/r600/lib/SOURCES
+++ b/r600/lib/SOURCES
@@ -8,4 +8,3 @@ workitem/get_global_size.ll
 synchronization/barrier.cl
 synchronization/barrier_impl.ll
 shared/vload.cl
-shared/vstore.cl
\ No newline at end of file
diff --git a/r600/lib/synchronization/barrier.cl b/r600/lib/synchronization/barrier.cl
index ac0b4b3..6f2900b 100644
--- a/r600/lib/synchronization/barrier.cl
+++ b/r600/lib/synchronization/barrier.cl
@@ -1,15 +1,10 @@
 
 #include <clc/clc.h>
 
-void barrier_local(void);
-void barrier_global(void);
-
-void barrier(cl_mem_fence_flags flags) {
-  if (flags & CLK_LOCAL_MEM_FENCE) {
-    barrier_local();
-  }
+_CLC_DEF int __clc_clk_local_mem_fence() {
+  return CLK_LOCAL_MEM_FENCE;
+}
 
-  if (flags & CLK_GLOBAL_MEM_FENCE) {
-    barrier_global();
-  }
+_CLC_DEF int __clc_clk_global_mem_fence() {
+  return CLK_GLOBAL_MEM_FENCE;
 }
diff --git a/r600/lib/synchronization/barrier_impl.ll b/r600/lib/synchronization/barrier_impl.ll
index 99ac018..3d8ee66 100644
--- a/r600/lib/synchronization/barrier_impl.ll
+++ b/r600/lib/synchronization/barrier_impl.ll
@@ -1,12 +1,29 @@
-declare void @llvm.AMDGPU.barrier.local() nounwind
-declare void @llvm.AMDGPU.barrier.global() nounwind
+declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
+declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
+declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
 
-define void @barrier_local() nounwind alwaysinline {
-  call void @llvm.AMDGPU.barrier.local()
-  ret void
-}
+define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
+barrier_local_test:
+  %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
+  %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
+  %1 = icmp ne i32 %0, 0
+  br i1 %1, label %barrier_local, label %barrier_global_test
+
+barrier_local:
+  call void @llvm.AMDGPU.barrier.local() noduplicate
+  br label %barrier_global_test
+
+barrier_global_test:
+  %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
+  %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
+  %3 = icmp ne i32 %2, 0
+  br i1 %3, label %barrier_global, label %done
+
+barrier_global:
+  call void @llvm.AMDGPU.barrier.global() noduplicate
+  br label %done
 
-define void @barrier_global() nounwind alwaysinline {
-  call void @llvm.AMDGPU.barrier.global()
+done:
   ret void
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/libclc.git



More information about the Pkg-opencl-commits mailing list