[viennacl] 01/04: New upstream version 1.5.2
Toby St Clere Smithe
tsmithe-guest at moszumanska.debian.org
Fri May 16 11:43:09 UTC 2014
This is an automated email from the git hooks/post-receive script.
tsmithe-guest pushed a commit to branch master
in repository viennacl.
commit 15d0b5a96d64cf43267bdbedfcc95f5f9b2a949b
Author: Toby Smithe <git at tsmithe.net>
Date: Fri May 16 11:04:58 2014 +0100
New upstream version 1.5.2
---
CMakeLists.txt | 2 +-
README | 6 ++--
changelog | 9 ++++-
doc/Doxyfile.in | 2 +-
doc/manual/changelogs.tex | 10 ++++++
doc/manual/cover.tex | 2 +-
doc/manual/viennacl.tex | 4 +--
viennacl/forwards.h | 2 +-
viennacl/generator/profiles.hpp | 23 +++++++++++++
viennacl/linalg/detail/ilu/block_ilu.hpp | 4 +--
viennacl/linalg/vector_operations.hpp | 23 +++++++++++++
viennacl/ocl/device.hpp | 44 ++++++++++++++++++++++++
viennacl/ocl/infos.hpp | 4 +++
viennacl/scheduler/execute.hpp | 4 +++
viennacl/scheduler/execute_matrix_dispatcher.hpp | 44 ++++++++++++++++++++++++
viennacl/vector.hpp | 22 ------------
16 files changed, 171 insertions(+), 34 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bd38b04..a1eee31 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,7 +31,7 @@ ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(VERSION_MAJOR 1)
set(VERSION_MINOR 5)
-set(VERSION_PATCH 1)
+set(VERSION_PATCH 2)
set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
diff --git a/README b/README
index b4ea993..4a66899 100644
--- a/README
+++ b/README
@@ -26,9 +26,9 @@ ViennaCL requires the following:
The first step is to extract the file:
Unix-based OS:
-$> gunzip ViennaCL-1.5.1.tar.gz
-$> tar -xf ViennaCL-1.5.1.tar
-$> cd ViennaCL-1.5.1
+$> gunzip ViennaCL-1.5.2.tar.gz
+$> tar -xf ViennaCL-1.5.2.tar
+$> cd ViennaCL-1.5.2
Windows:
Extract the file using your favorite compressor/decompressor, e.g. 7-zip.
diff --git a/changelog b/changelog
index c3fdc04..205cd47 100644
--- a/changelog
+++ b/changelog
@@ -4,6 +4,14 @@
*** Version 1.5.x ***
+-- Version 1.5.2 --
+While the work for the upcoming 1.6.0 release is in full progress, this maintenance release fixes a couple of bugs and performance regressions reported to us:
+ - Fixed compilation problems on Visual Studio for the operations y += prod(A, x) and y -= prod(A, x) with dense matrix A.
+ - Added a better performance profile for NVIDIA Kepler GPUs. For example, this increases the performance of matrix-matrix multiplications to 600 GFLOPs in single precision on a GeForce GTX 680. Thanks to Paul Dufort for bringing this to our attention.
+ - Added support for the operation A = trans(B) for matrices A and B to the scheduler.
+ - Fixed compilation problems in block-ILU preconditioners when passing block boundaries manually.
+ - Ensured compatibility with OpenCL 1.0, which may still be available on older devices.
+
-- Version 1.5.1 --
This maintenance release fixes a few nasty bugs:
- Fixed a memory leak in the OpenCL kernel generator. Thanks to GitHub user dxyzab for spotting this.
@@ -15,7 +23,6 @@ This maintenance release fixes a few nasty bugs:
- Corrected a weak check used in two tests. Thanks to Walter Mascarenhas for providing a fix.
- Fixed a wrong global work size inside the SPAI preconditioner. Thanks to Andreas Rost.
-
-- Version 1.5.0 --
This new minor release number update focuses on a more powerful API, and on first steps in making ViennaCL more accessible from languages other than C++.
In addition to many internal improvements both in terms of performance and flexibility, the following changes are visible to users:
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 2edec42..70a9174 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -31,7 +31,7 @@ PROJECT_NAME = "ViennaCL - The Vienna Computing Library"
# This could be handy for archiving the generated documentation or
# if some version control system is used.
-PROJECT_NUMBER = 1.5.1
+PROJECT_NUMBER = 1.5.2
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
diff --git a/doc/manual/changelogs.tex b/doc/manual/changelogs.tex
index 1daaafe..90faf4a 100644
--- a/doc/manual/changelogs.tex
+++ b/doc/manual/changelogs.tex
@@ -3,6 +3,16 @@
\section*{Version 1.5.x}
+\subsection*{Version 1.5.2}
+While the work for the upcoming 1.6.0 release is in full progress, this maintenance release fixes a couple of bugs and performance regressions reported to us:
+\begin{itemize}
+ \item Fixed compilation problems on Visual Studio for the operations \lstinline|y += prod(A, x)| and \lstinline|y -= prod(A, x)| with dense matrix A.
+ \item Added a better performance profile for NVIDIA Kepler GPUs. For example, this increases the performance of matrix-matrix multiplications to 600 GFLOPs in single precision on a GeForce GTX 680. Thanks to Paul Dufort for bringing this to our attention.
+ \item Added support for the operation \lstinline|A = trans(B)| for matrices A and B to the scheduler.
+ \item Fixed compilation problems in block-ILU preconditioners when passing block boundaries manually.
+ \item Ensured compatibility with OpenCL 1.0, which may still be available on older devices.
+\end{itemize}
+
\subsection*{Version 1.5.1}
This maintenance release fixes a few nasty bugs:
\begin{itemize}
diff --git a/doc/manual/cover.tex b/doc/manual/cover.tex
index b9e5fe5..cd914f3 100644
--- a/doc/manual/cover.tex
+++ b/doc/manual/cover.tex
@@ -2,7 +2,7 @@
\begin{titlepage}
\vspace*{3cm}
-\Huge{ViennaCL 1.5.1}
+\Huge{ViennaCL 1.5.2}
\rule[0.0cm]{9.5cm}{0.05cm}
\begin{flushright}
\Large{User Manual}
diff --git a/doc/manual/viennacl.tex b/doc/manual/viennacl.tex
index 85c60f8..8935634 100644
--- a/doc/manual/viennacl.tex
+++ b/doc/manual/viennacl.tex
@@ -20,7 +20,7 @@
\usepackage[pdfauthor={Karl Rupp et al.},
- pdftitle={ViennaCL 1.5.1 Manual},
+ pdftitle={ViennaCL 1.5.2 Manual},
colorlinks=true,
linktocpage=true]{hyperref}
@@ -61,7 +61,7 @@
\newcommand{\OpenCL} {\texttt{OpenCL}}
\newcommand{\CUDA} {\texttt{CUDA}}
\newcommand{\ViennaCL} {\texttt{ViennaCL}}
-\newcommand{\ViennaCLversion} {\texttt{ViennaCL 1.5.1}}
+\newcommand{\ViennaCLversion} {\texttt{ViennaCL 1.5.2}}
\newcommand{\ViennaCLminorversion} {\texttt{ViennaCL 1.5.x}}
\newcommand{\Boost} {\texttt{Boost}}
\newcommand{\ublas} {\texttt{uBLAS}}
diff --git a/viennacl/forwards.h b/viennacl/forwards.h
index d2ba91f..e370616 100644
--- a/viennacl/forwards.h
+++ b/viennacl/forwards.h
@@ -24,7 +24,7 @@
*/
/**
- @mainpage Source Code Documentation for ViennaCL 1.5.1
+ @mainpage Source Code Documentation for ViennaCL 1.5.2
This is the source code documentation of ViennaCL. Detailed information about the functions in ViennaCL can be found here.
diff --git a/viennacl/generator/profiles.hpp b/viennacl/generator/profiles.hpp
index 3755fdd..fd472fe 100644
--- a/viennacl/generator/profiles.hpp
+++ b/viennacl/generator/profiles.hpp
@@ -268,6 +268,29 @@ namespace viennacl{
//default
set_all_generation_default_to(map,viennacl::ocl::nvidia_id,viennacl::ocl::Fermi,"GeForce GTX 470");
+ //Geforce GTX 680
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,1,256,true));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(4,64,512,true));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,64,4,64));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,2,64,64,8,4,2,1,0));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,16,4,4,8,0,0));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
+
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(2,16,16,16,16,true));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,64,512,true));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,128,1024));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,16,32,1024));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,64,32,2,2,8,1,0));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,64,128,4,2,2,8,0,1));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
+ map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
+
+ //default
+ set_all_generation_default_to(map,viennacl::ocl::nvidia_id,viennacl::ocl::Kepler,"GeForce GTX 680");
return map;
diff --git a/viennacl/linalg/detail/ilu/block_ilu.hpp b/viennacl/linalg/detail/ilu/block_ilu.hpp
index 406553a..1a0acc5 100644
--- a/viennacl/linalg/detail/ilu/block_ilu.hpp
+++ b/viennacl/linalg/detail/ilu/block_ilu.hpp
@@ -294,10 +294,10 @@ namespace viennacl
index_vector_type const & block_boundaries
) : tag_(tag),
block_indices_(block_boundaries),
- gpu_block_indices(viennacl::traits::context(mat)),
+ gpu_block_indices(),
gpu_L_trans(0,0,viennacl::traits::context(mat)),
gpu_U_trans(0,0,viennacl::traits::context(mat)),
- gpu_D(0,viennacl::traits::context(mat)),
+ gpu_D(mat.size1(),viennacl::traits::context(mat)),
LU_blocks(block_boundaries.size())
{
//initialize preconditioner:
diff --git a/viennacl/linalg/vector_operations.hpp b/viennacl/linalg/vector_operations.hpp
index 9b6eb51..5692bee 100644
--- a/viennacl/linalg/vector_operations.hpp
+++ b/viennacl/linalg/vector_operations.hpp
@@ -876,6 +876,29 @@ namespace viennacl
}
} //namespace linalg
+
+ template <typename T, typename LHS, typename RHS, typename OP>
+ vector_base<T> & operator += (vector_base<T> & v1, const vector_expression<const LHS, const RHS, OP> & proxy)
+ {
+ assert( (viennacl::traits::size(proxy) == v1.size()) && bool("Incompatible vector sizes!"));
+ assert( (v1.size() > 0) && bool("Vector not yet initialized!") );
+
+ linalg::detail::op_executor<vector_base<T>, op_inplace_add, vector_expression<const LHS, const RHS, OP> >::apply(v1, proxy);
+
+ return v1;
+ }
+
+ template <typename T, typename LHS, typename RHS, typename OP>
+ vector_base<T> & operator -= (vector_base<T> & v1, const vector_expression<const LHS, const RHS, OP> & proxy)
+ {
+ assert( (viennacl::traits::size(proxy) == v1.size()) && bool("Incompatible vector sizes!"));
+ assert( (v1.size() > 0) && bool("Vector not yet initialized!") );
+
+ linalg::detail::op_executor<vector_base<T>, op_inplace_sub, vector_expression<const LHS, const RHS, OP> >::apply(v1, proxy);
+
+ return v1;
+ }
+
} //namespace viennacl
diff --git a/viennacl/ocl/device.hpp b/viennacl/ocl/device.hpp
index f04cf53..196a8c8 100644
--- a/viennacl/ocl/device.hpp
+++ b/viennacl/ocl/device.hpp
@@ -271,6 +271,7 @@ namespace viennacl
#endif
/** @brief Is CL_TRUE if the device and the host have a unified memory subsystem and is CL_FALSE otherwise. */
+#ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
cl_bool host_unified_memory() const
{
if (!host_unified_memory_valid_)
@@ -281,6 +282,7 @@ namespace viennacl
}
return host_unified_memory_;
}
+#endif
/** @brief Is CL_TRUE if images are supported by the OpenCL device and CL_FALSE otherwise. */
cl_bool image_support() const
@@ -583,6 +585,7 @@ namespace viennacl
return architecture_family_;
}
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR
/** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */
cl_uint native_vector_width_char() const
{
@@ -594,7 +597,9 @@ namespace viennacl
}
return native_vector_width_char_;
}
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT
/** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */
cl_uint native_vector_width_short() const
{
@@ -606,7 +611,9 @@ namespace viennacl
}
return native_vector_width_short_;
}
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_INT
/** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */
cl_uint native_vector_width_int() const
{
@@ -618,7 +625,9 @@ namespace viennacl
}
return native_vector_width_int_;
}
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG
/** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */
cl_uint native_vector_width_long() const
{
@@ -630,7 +639,9 @@ namespace viennacl
}
return native_vector_width_long_;
}
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT
/** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector. */
cl_uint native_vector_width_float() const
{
@@ -642,7 +653,9 @@ namespace viennacl
}
return native_vector_width_float_;
}
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
/** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector.
*
* If the cl_khr_fp64 extension is not supported, this function returns 0.
@@ -657,7 +670,9 @@ namespace viennacl
}
return native_vector_width_double_;
}
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF
/** @brief Returns the native ISA vector width. The vector width is defined as the number of scalar elements that can be stored in the vector.
*
* If the cl_khr_fp16 extension is not supported, this function returns 0.
@@ -672,7 +687,9 @@ namespace viennacl
}
return native_vector_width_half_;
}
+#endif
+#if CL_DEVICE_OPENCL_C_VERSION
/** @brief OpenCL C version string. Returns the highest OpenCL C version supported by the compiler for this device.
*
* This version string has the following format:
@@ -691,6 +708,7 @@ namespace viennacl
}
return opencl_c_version_;
}
+#endif
/** @brief The platform associated with this device. */
cl_platform_id platform() const
@@ -783,6 +801,7 @@ namespace viennacl
*
* If the cl_khr_fp16 extension is not supported, this function returns 0.
*/
+#ifdef CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF
cl_uint preferred_vector_width_half() const
{
if (!preferred_vector_width_half_valid_)
@@ -793,6 +812,7 @@ namespace viennacl
}
return preferred_vector_width_half_;
}
+#endif
/** @brief OpenCL profile string. Returns the profile name supported by the device.
*
@@ -985,7 +1005,9 @@ namespace viennacl
oss << line_indent << "Global Mem Size: " << global_mem_size() << std::endl;
oss << line_indent << "Local Mem Size: " << local_mem_size() << std::endl;
oss << line_indent << "Local Mem Type: " << local_mem_type() << std::endl;
+#ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
oss << line_indent << "Host Unified Memory: " << host_unified_memory() << std::endl;
+#endif
return oss.str();
}
@@ -1016,7 +1038,9 @@ namespace viennacl
#ifdef CL_DEVICE_HALF_FP_CONFIG
oss << line_indent << "Half PF Config: " << fp_config_to_string(half_fp_config()) << std::endl;
#endif
+#ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
oss << line_indent << "Host Unified Memory: " << host_unified_memory() << std::endl;
+#endif
oss << line_indent << "Image Support: " << image_support() << std::endl;
oss << line_indent << "Image2D Max Height: " << image2d_max_height() << std::endl;
oss << line_indent << "Image2D Max Width: " << image2d_max_width() << std::endl;
@@ -1040,14 +1064,30 @@ namespace viennacl
oss << line_indent << "Mem Base Addr Align: " << mem_base_addr_align() << std::endl;
oss << line_indent << "Min Data Type Align Size: " << min_data_type_align_size() << " Bytes" << std::endl;
oss << line_indent << "Name: " << name() << std::endl;
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR
oss << line_indent << "Native Vector Width char: " << native_vector_width_char() << std::endl;
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT
oss << line_indent << "Native Vector Width short: " << native_vector_width_short() << std::endl;
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_INT
oss << line_indent << "Native Vector Width int: " << native_vector_width_int() << std::endl;
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG
oss << line_indent << "Native Vector Width long: " << native_vector_width_long() << std::endl;
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT
oss << line_indent << "Native Vector Width float: " << native_vector_width_float() << std::endl;
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
oss << line_indent << "Native Vector Width double: " << native_vector_width_double() << std::endl;
+#endif
+#ifdef CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF
oss << line_indent << "Native Vector Width half: " << native_vector_width_half() << std::endl;
+#endif
+#ifdef CL_DEVICE_OPENCL_C_VERSION
oss << line_indent << "OpenCL C Version: " << opencl_c_version() << std::endl;
+#endif
oss << line_indent << "Platform: " << platform() << std::endl;
oss << line_indent << "Preferred Vector Width char: " << preferred_vector_width_char() << std::endl;
oss << line_indent << "Preferred Vector Width short: " << preferred_vector_width_short() << std::endl;
@@ -1055,7 +1095,9 @@ namespace viennacl
oss << line_indent << "Preferred Vector Width long: " << preferred_vector_width_long() << std::endl;
oss << line_indent << "Preferred Vector Width float: " << preferred_vector_width_float() << std::endl;
oss << line_indent << "Preferred Vector Width double: " << preferred_vector_width_double() << std::endl;
+#ifdef CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF
oss << line_indent << "Preferred Vector Width half: " << preferred_vector_width_half() << std::endl;
+#endif
oss << line_indent << "Profile: " << profile() << std::endl;
oss << line_indent << "Profiling Timer Resolution: " << profiling_timer_resolution() << " ns" << std::endl;
oss << line_indent << "Queue Properties: " << queue_properties_to_string(queue_properties()) << std::endl;
@@ -1097,8 +1139,10 @@ namespace viennacl
oss << "CL_FP_ROUND_TO_INF ";
if (conf & CL_FP_FMA)
oss << "CL_FP_FMA ";
+#ifdef CL_FP_SOFT_FLOAT
if (conf & CL_FP_SOFT_FLOAT)
oss << "CL_FP_SOFT_FLOAT ";
+#endif
return oss.str();
}
diff --git a/viennacl/ocl/infos.hpp b/viennacl/ocl/infos.hpp
index 3534f6c..c311d7b 100644
--- a/viennacl/ocl/infos.hpp
+++ b/viennacl/ocl/infos.hpp
@@ -212,9 +212,13 @@ namespace viennacl{
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_WORK_GROUP_SIZE, size_t);
// SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_COMPILE_WORK_GROUP_SIZE, size_t[3]);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong);
+#ifdef CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_t);
+#endif
+#ifdef CL_CONTEXT_NUM_DEVICES
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_NUM_DEVICES, cl_uint);
+#endif
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_REFERENCE_COUNT, cl_uint);
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_PROPERTIES, cl_context_properties);
diff --git a/viennacl/scheduler/execute.hpp b/viennacl/scheduler/execute.hpp
index c96066b..7e93831 100644
--- a/viennacl/scheduler/execute.hpp
+++ b/viennacl/scheduler/execute.hpp
@@ -175,6 +175,10 @@ namespace viennacl
{
execute_matrix_prod(s, root_node);
}
+ else if ( leaf.op.type == OPERATION_UNARY_TRANS_TYPE)
+ {
+ assign_trans(root_node.lhs, leaf.lhs);
+ }
else
throw statement_not_supported_exception("Unsupported binary operator");
}
diff --git a/viennacl/scheduler/execute_matrix_dispatcher.hpp b/viennacl/scheduler/execute_matrix_dispatcher.hpp
index 8367855..764bc37 100644
--- a/viennacl/scheduler/execute_matrix_dispatcher.hpp
+++ b/viennacl/scheduler/execute_matrix_dispatcher.hpp
@@ -201,6 +201,50 @@ namespace viennacl
}
}
+ /** @brief Scheduler unwrapper for A = trans(B) */
+ inline void assign_trans(lhs_rhs_element const & A,
+ lhs_rhs_element const & B)
+ {
+ assert( A.type_family == MATRIX_TYPE_FAMILY && B.type_family == MATRIX_TYPE_FAMILY
+ && bool("Arguments are not matrix types!"));
+
+ assert(A.numeric_type == B.numeric_type && bool("Matrices do not have the same scalar type"));
+
+ if (A.subtype == DENSE_ROW_MATRIX_TYPE)
+ {
+ switch (A.numeric_type)
+ {
+ case FLOAT_TYPE:
+ *A.matrix_row_float = viennacl::trans(*B.matrix_row_float);
+ break;
+ case DOUBLE_TYPE:
+ *A.matrix_row_double = viennacl::trans(*B.matrix_row_double);
+ break;
+
+ default:
+ throw statement_not_supported_exception("Invalid arguments in scheduler when calling assign_trans()");
+ }
+ }
+ else if (A.subtype == DENSE_COL_MATRIX_TYPE)
+ {
+ switch (A.numeric_type)
+ {
+ case FLOAT_TYPE:
+ *A.matrix_col_float = viennacl::trans(*B.matrix_col_float);
+ break;
+ case DOUBLE_TYPE:
+ *A.matrix_col_double = viennacl::trans(*B.matrix_col_double);
+ break;
+
+ default:
+ throw statement_not_supported_exception("Invalid arguments in scheduler when calling assign_trans()");
+ }
+ }
+ else
+ {
+ throw statement_not_supported_exception("Invalid arguments in scheduler when calling assign_trans()");
+ }
+ }
} // namespace detail
} // namespace scheduler
diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp
index 23e4906..1619f51 100644
--- a/viennacl/vector.hpp
+++ b/viennacl/vector.hpp
@@ -750,28 +750,6 @@ namespace viennacl
return *this;
}
- template <typename LHS, typename RHS, typename OP>
- self_type & operator += (const vector_expression<const LHS, const RHS, OP> & proxy)
- {
- assert( (viennacl::traits::size(proxy) == size()) && bool("Incompatible vector sizes!"));
- assert( (size() > 0) && bool("Vector not yet initialized!") );
-
- linalg::detail::op_executor<self_type, op_inplace_add, vector_expression<const LHS, const RHS, OP> >::apply(*this, proxy);
-
- return *this;
- }
-
- template <typename LHS, typename RHS, typename OP>
- self_type & operator -= (const vector_expression<const LHS, const RHS, OP> & proxy)
- {
- assert( (viennacl::traits::size(proxy) == size()) && bool("Incompatible vector sizes!"));
- assert( (size() > 0) && bool("Vector not yet initialized!") );
-
- linalg::detail::op_executor<self_type, op_inplace_sub, vector_expression<const LHS, const RHS, OP> >::apply(*this, proxy);
-
- return *this;
- }
-
/** @brief Scales a vector (or proxy) by a CPU scalar value
*/
self_type & operator *= (SCALARTYPE val)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/viennacl.git
More information about the debian-science-commits
mailing list